mirror of
https://github.com/RPCS3/rpcs3.git
synced 2026-02-08 08:44:46 +01:00
Merge branch 'master' into nastys-patch-17
This commit is contained in:
commit
173edea60c
|
|
@ -394,7 +394,7 @@ namespace fmt
|
|||
}
|
||||
|
||||
#if !defined(_MSC_VER) || defined(__clang__)
|
||||
[[noreturn]] ~throw_exception();
|
||||
[[noreturn]] ~throw_exception() = default;
|
||||
#endif
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -192,6 +192,7 @@ if(BUILD_RPCS3_TESTS)
|
|||
tests/test_tuple.cpp
|
||||
tests/test_simple_array.cpp
|
||||
tests/test_address_range.cpp
|
||||
tests/test_rsx_cfg.cpp
|
||||
)
|
||||
|
||||
target_link_libraries(rpcs3_test
|
||||
|
|
|
|||
|
|
@ -157,7 +157,7 @@ std::array<u8, PASSPHRASE_KEY_LEN> sc_combine_laid_paid(s64 laid, s64 paid)
|
|||
{
|
||||
const std::string paid_laid = fmt::format("%016llx%016llx", laid, paid);
|
||||
std::array<u8, PASSPHRASE_KEY_LEN> out{};
|
||||
hex_to_bytes(out.data(), paid_laid.c_str(), PASSPHRASE_KEY_LEN * 2);
|
||||
hex_to_bytes(out.data(), paid_laid, PASSPHRASE_KEY_LEN * 2);
|
||||
return out;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -516,6 +516,7 @@ target_sources(rpcs3_emu PRIVATE
|
|||
RSX/Overlays/overlay_video.cpp
|
||||
RSX/Overlays/Shaders/shader_loading_dialog.cpp
|
||||
RSX/Overlays/Shaders/shader_loading_dialog_native.cpp
|
||||
RSX/Program/Assembler/FPToCFG.cpp
|
||||
RSX/Program/CgBinaryProgram.cpp
|
||||
RSX/Program/CgBinaryFragmentProgram.cpp
|
||||
RSX/Program/CgBinaryVertexProgram.cpp
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@
|
|||
#include "util/types.hpp"
|
||||
#include "Emu/Memory/vm_ptr.h"
|
||||
#include "Emu/Cell/ErrorCodes.h"
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
#include <mutex>
|
||||
|
||||
|
|
|
|||
|
|
@ -3718,7 +3718,7 @@ extern void ppu_finalize(const ppu_module<lv2_obj>& info, bool force_mem_release
|
|||
#endif
|
||||
}
|
||||
|
||||
extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_module<lv2_obj>*>* loaded_modules)
|
||||
extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_module<lv2_obj>*>* loaded_modules, bool is_fast_compilation)
|
||||
{
|
||||
if (g_cfg.core.ppu_decoder != ppu_decoder_type::llvm)
|
||||
{
|
||||
|
|
@ -4166,6 +4166,12 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
|
|||
break;
|
||||
}
|
||||
|
||||
if (is_fast_compilation)
|
||||
{
|
||||
// Skip overlays in fast mode
|
||||
break;
|
||||
}
|
||||
|
||||
if (!wait_for_memory())
|
||||
{
|
||||
// Emulation stopped
|
||||
|
|
@ -4460,7 +4466,7 @@ extern void ppu_initialize()
|
|||
|
||||
progress_dialog.reset();
|
||||
|
||||
ppu_precompile(dir_queue, &module_list);
|
||||
ppu_precompile(dir_queue, &module_list, false);
|
||||
|
||||
if (Emu.IsStopped())
|
||||
{
|
||||
|
|
|
|||
|
|
@ -416,7 +416,6 @@ Function* PPUTranslator::GetSymbolResolver(const ppu_module<lv2_obj>& info)
|
|||
assert(ptr_inst->getResultElementType() == m_ir->getPtrTy());
|
||||
|
||||
const auto faddr = m_ir->CreateLoad(ptr_inst->getResultElementType(), ptr_inst);
|
||||
const auto faddr_int = m_ir->CreatePtrToInt(faddr, get_type<uptr>());
|
||||
const auto pos_32 = m_reloc ? m_ir->CreateAdd(func_pc, m_seg0) : func_pc;
|
||||
const auto pos = m_ir->CreateShl(pos_32, 1);
|
||||
const auto ptr = m_ir->CreatePtrAdd(m_exec, pos);
|
||||
|
|
@ -427,7 +426,7 @@ Function* PPUTranslator::GetSymbolResolver(const ppu_module<lv2_obj>& info)
|
|||
const auto seg_val = m_ir->CreateTrunc(m_ir->CreateLShr(m_seg0, 13), get_type<u16>());
|
||||
|
||||
// Store to jumptable
|
||||
m_ir->CreateStore(faddr_int, ptr);
|
||||
m_ir->CreateStore(faddr, ptr);
|
||||
m_ir->CreateStore(seg_val, seg_ptr);
|
||||
|
||||
// Increment index and branch back to loop
|
||||
|
|
@ -596,7 +595,7 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect)
|
|||
|
||||
if (g_cfg.core.ppu_prof)
|
||||
{
|
||||
m_ir->CreateStore(m_ir->getInt32(target_last), m_ir->CreateStructGEP(m_thread_type, m_thread, static_cast<uint>(&m_cia - m_locals)));
|
||||
m_ir->CreateStore(GetAddr(target_last - m_addr), m_ir->CreateStructGEP(m_thread_type, m_thread, static_cast<uint>(&m_cia - m_locals)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3069,6 +3069,39 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
values[op.rt] = pos + 4;
|
||||
}
|
||||
|
||||
const u32 pos_next = wa;
|
||||
|
||||
bool is_no_return = false;
|
||||
|
||||
if (pos_next >= lsa && pos_next < limit)
|
||||
{
|
||||
const u32 data_next = ls[pos_next / 4];
|
||||
const auto type_next = g_spu_itype.decode(data_next);
|
||||
const auto flag_next = g_spu_iflag.decode(data_next);
|
||||
const auto op_next = spu_opcode_t{data_next};
|
||||
|
||||
if (!(type_next & spu_itype::zregmod) && !(type_next & spu_itype::branch))
|
||||
{
|
||||
if (auto iflags = g_spu_iflag.decode(data_next))
|
||||
{
|
||||
if (+flag_next & +spu_iflag::use_ra)
|
||||
{
|
||||
is_no_return = is_no_return || (op_next.ra >= 4 && op_next.ra < 10);
|
||||
}
|
||||
|
||||
if (+flag_next & +spu_iflag::use_rb)
|
||||
{
|
||||
is_no_return = is_no_return || (op_next.rb >= 4 && op_next.rb < 10);
|
||||
}
|
||||
|
||||
if (type_next & spu_itype::_quadrop && +iflags & +spu_iflag::use_rc)
|
||||
{
|
||||
is_no_return = is_no_return || (op_next.ra >= 4 && op_next.rb < 10);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (af & vf::is_const)
|
||||
{
|
||||
const u32 target = spu_branch_target(av);
|
||||
|
|
@ -3105,7 +3138,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
limit = std::min<u32>(limit, target);
|
||||
}
|
||||
|
||||
if (sl && g_cfg.core.spu_block_size != spu_block_size_type::safe)
|
||||
if (!is_no_return && sl && g_cfg.core.spu_block_size != spu_block_size_type::safe)
|
||||
{
|
||||
m_ret_info[pos / 4 + 1] = true;
|
||||
m_entry_info[pos / 4 + 1] = true;
|
||||
|
|
@ -3122,7 +3155,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
u64 dabs = 0;
|
||||
u64 drel = 0;
|
||||
|
||||
for (u32 i = start; i < limit; i += 4)
|
||||
for (u32 i = start, abs_fail = 0, rel_fail = 0; i < limit; i += 4)
|
||||
{
|
||||
const u32 target = ls[i / 4];
|
||||
|
||||
|
|
@ -3132,16 +3165,39 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
break;
|
||||
}
|
||||
|
||||
if (target >= SPU_LS_SIZE && target <= 0u - SPU_LS_SIZE)
|
||||
{
|
||||
if (g_spu_itype.decode(target) != spu_itype::UNK)
|
||||
{
|
||||
// End of jumptable: valid instruction
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (target >= lsa && target < SPU_LS_SIZE)
|
||||
{
|
||||
// Possible jump table entry (absolute)
|
||||
jt_abs.push_back(target);
|
||||
if (!abs_fail)
|
||||
{
|
||||
jt_abs.push_back(target);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
abs_fail++;
|
||||
}
|
||||
|
||||
if (target + start >= lsa && target + start < SPU_LS_SIZE)
|
||||
{
|
||||
// Possible jump table entry (relative)
|
||||
jt_rel.push_back(target + start);
|
||||
if (!rel_fail)
|
||||
{
|
||||
jt_rel.push_back(target + start);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
rel_fail++;
|
||||
}
|
||||
|
||||
if (std::max(jt_abs.size(), jt_rel.size()) * 4 + start <= i)
|
||||
|
|
@ -3153,6 +3209,35 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
}
|
||||
}
|
||||
|
||||
for (usz i = 0; i < jt_abs.size(); i++)
|
||||
{
|
||||
if (jt_abs[i] == start + jt_abs.size() * 4)
|
||||
{
|
||||
// If jumptable contains absolute address of code start after the jumptable itself
|
||||
// It is likely an absolute-type jumptable
|
||||
|
||||
bool is_good_conclusion = true;
|
||||
|
||||
// For verification: make sure there is none like this in relative table
|
||||
|
||||
for (u32 target : jt_rel)
|
||||
{
|
||||
if (target == start + jt_rel.size() * 4)
|
||||
{
|
||||
is_good_conclusion = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (is_good_conclusion)
|
||||
{
|
||||
jt_rel.clear();
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Choose position after the jt as an anchor and compute the average distance
|
||||
for (u32 target : jt_abs)
|
||||
{
|
||||
|
|
@ -3251,9 +3336,9 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
spu_log.notice("[0x%x] At 0x%x: ignoring indirect branch (SYNC)", entry_point, pos);
|
||||
}
|
||||
|
||||
if (type == spu_itype::BI || sl)
|
||||
if (type == spu_itype::BI || sl || is_no_return)
|
||||
{
|
||||
if (type == spu_itype::BI || g_cfg.core.spu_block_size == spu_block_size_type::safe)
|
||||
if (type == spu_itype::BI || g_cfg.core.spu_block_size == spu_block_size_type::safe || is_no_return)
|
||||
{
|
||||
m_targets[pos];
|
||||
}
|
||||
|
|
@ -3290,9 +3375,42 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
break;
|
||||
}
|
||||
|
||||
const u32 pos_next = wa;
|
||||
|
||||
bool is_no_return = false;
|
||||
|
||||
if (pos_next >= lsa && pos_next < limit)
|
||||
{
|
||||
const u32 data_next = ls[pos_next / 4];
|
||||
const auto type_next = g_spu_itype.decode(data_next);
|
||||
const auto flag_next = g_spu_iflag.decode(data_next);
|
||||
const auto op_next = spu_opcode_t{data_next};
|
||||
|
||||
if (!(type_next & spu_itype::zregmod) && !(type_next & spu_itype::branch))
|
||||
{
|
||||
if (auto iflags = g_spu_iflag.decode(data_next))
|
||||
{
|
||||
if (+flag_next & +spu_iflag::use_ra)
|
||||
{
|
||||
is_no_return = is_no_return || (op_next.ra >= 4 && op_next.ra < 10);
|
||||
}
|
||||
|
||||
if (+flag_next & +spu_iflag::use_rb)
|
||||
{
|
||||
is_no_return = is_no_return || (op_next.rb >= 4 && op_next.rb < 10);
|
||||
}
|
||||
|
||||
if (type_next & spu_itype::_quadrop && +iflags & +spu_iflag::use_rc)
|
||||
{
|
||||
is_no_return = is_no_return || (op_next.rc >= 4 && op_next.rc < 10);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
m_targets[pos].push_back(target);
|
||||
|
||||
if (g_cfg.core.spu_block_size != spu_block_size_type::safe)
|
||||
if (!is_no_return && g_cfg.core.spu_block_size != spu_block_size_type::safe)
|
||||
{
|
||||
m_ret_info[pos / 4 + 1] = true;
|
||||
m_entry_info[pos / 4 + 1] = true;
|
||||
|
|
@ -3300,7 +3418,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
add_block(pos + 4);
|
||||
}
|
||||
|
||||
if (g_cfg.core.spu_block_size == spu_block_size_type::giga && !sync)
|
||||
if (!is_no_return && g_cfg.core.spu_block_size == spu_block_size_type::giga && !sync)
|
||||
{
|
||||
m_entry_info[target / 4] = true;
|
||||
add_block(target);
|
||||
|
|
@ -4860,20 +4978,27 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
bool select_16_or_0_at_runtime = false;
|
||||
bool put_active = false; // PUTLLC happened
|
||||
bool get_rdatomic = false; // True if MFC_RdAtomicStat was read after GETLLAR
|
||||
u32 required_pc = SPU_LS_SIZE; // Require program to be location specific for this optimization (SPU_LS_SIZE - no requirement)
|
||||
u32 mem_count = 0;
|
||||
u32 break_cause = 100;
|
||||
u32 break_pc = SPU_LS_SIZE;
|
||||
|
||||
// Return old state for error reporting
|
||||
atomic16_t discard()
|
||||
{
|
||||
const u32 pc = lsa_pc;
|
||||
const u32 last_pc = lsa_last_pc;
|
||||
const u32 cause = break_cause;
|
||||
const u32 break_pos = break_pc;
|
||||
|
||||
const atomic16_t old = *this;
|
||||
*this = atomic16_t{};
|
||||
|
||||
// Keep some members
|
||||
lsa_pc = pc;
|
||||
lsa_last_pc = last_pc;
|
||||
this->lsa_pc = pc;
|
||||
this->lsa_last_pc = last_pc;
|
||||
this->break_cause = cause;
|
||||
this->break_pc = break_pos;
|
||||
return old;
|
||||
}
|
||||
|
||||
|
|
@ -5080,15 +5205,17 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
{
|
||||
if (previous.active && likely_putllc_loop && getllar_starts.contains(previous.lsa_pc))
|
||||
{
|
||||
const bool is_first = !std::exchange(getllar_starts[previous.lsa_pc], true);
|
||||
had_putllc_evaluation = true;
|
||||
|
||||
if (!is_first)
|
||||
if (cause != 24)
|
||||
{
|
||||
atomic16->break_cause = cause;
|
||||
atomic16->break_pc = pos;
|
||||
return;
|
||||
}
|
||||
|
||||
had_putllc_evaluation = true;
|
||||
|
||||
cause = atomic16->break_cause;
|
||||
getllar_starts[previous.lsa_pc] = true;
|
||||
g_fxo->get<putllc16_statistics_t>().breaking_reason[cause]++;
|
||||
|
||||
if (!spu_log.notice)
|
||||
|
|
@ -5096,7 +5223,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
return;
|
||||
}
|
||||
|
||||
std::string break_error = fmt::format("PUTLLC pattern breakage [%x mem=%d lsa_const=%d cause=%u] (lsa_pc=0x%x)", pos, previous.mem_count, u32{!previous.ls_offs.is_const()} * 2 + previous.lsa.is_const(), cause, previous.lsa_pc);
|
||||
std::string break_error = fmt::format("PUTLLC pattern breakage [%x mem=%d lsa_const=%d cause=%u] (lsa_pc=0x%x)", atomic16->break_pc, previous.mem_count, u32{!previous.ls_offs.is_const()} * 2 + previous.lsa.is_const(), cause, previous.lsa_pc);
|
||||
|
||||
const auto values = sort_breakig_reasons(g_fxo->get<putllc16_statistics_t>().breaking_reason);
|
||||
|
||||
|
|
@ -6258,6 +6385,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
existing.ls_invalid |= atomic16->ls_invalid;
|
||||
existing.ls_access |= atomic16->ls_access;
|
||||
existing.mem_count = std::max<u32>(existing.mem_count, atomic16->mem_count);
|
||||
existing.required_pc = std::min<u32>(existing.required_pc, atomic16->required_pc);
|
||||
existing.select_16_or_0_at_runtime |= atomic16->select_16_or_0_at_runtime;
|
||||
}
|
||||
|
||||
|
|
@ -6272,6 +6400,24 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
invalidate = false;
|
||||
}
|
||||
}
|
||||
else if (atomic16->break_cause != 100 && atomic16->lsa_pc != SPU_LS_SIZE)
|
||||
{
|
||||
const auto it = atomic16_all.find(pos);
|
||||
|
||||
if (it == atomic16_all.end())
|
||||
{
|
||||
// Ensure future failure
|
||||
atomic16_all.emplace(pos, *atomic16);
|
||||
break_putllc16(24, FN(x.active = true, x)(as_rvalue(*atomic16)));
|
||||
}
|
||||
else if (it->second.active && atomic16->break_cause != 100)
|
||||
{
|
||||
it->second = *atomic16;
|
||||
break_putllc16(24, FN(x.active = true, x)(as_rvalue(*atomic16)));
|
||||
}
|
||||
|
||||
atomic16->break_cause = 100;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
|
@ -6342,6 +6488,10 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
|
||||
// Do not clear lower 16 bytes addressing because the program can move on 4-byte basis
|
||||
const u32 offs = spu_branch_target(pos - result.lower_bound, op.si16);
|
||||
const u32 true_offs = spu_branch_target(pos, op.si16);
|
||||
|
||||
// Make this optimization depend on the location of the program
|
||||
atomic16->required_pc = result.lower_bound;
|
||||
|
||||
if (atomic16->lsa.is_const() && [&]()
|
||||
{
|
||||
|
|
@ -6366,6 +6516,10 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
{
|
||||
// Ignore memory access in this case
|
||||
}
|
||||
else if (atomic16->lsa.is_const() && !atomic16->lsa.compare_with_mask_indifference(true_offs, SPU_LS_MASK_128))
|
||||
{
|
||||
// Same
|
||||
}
|
||||
else if (atomic16->ls_invalid && is_store)
|
||||
{
|
||||
break_putllc16(35, atomic16->set_invalid_ls(is_store));
|
||||
|
|
@ -7119,27 +7273,33 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
continue;
|
||||
}
|
||||
|
||||
union putllc16_or_0_info
|
||||
{
|
||||
u64 data;
|
||||
bf_t<u64, 32, 18> required_pc;
|
||||
bf_t<u64, 30, 2> type;
|
||||
bf_t<u64, 29, 1> runtime16_select;
|
||||
bf_t<u64, 28, 1> no_notify;
|
||||
bf_t<u64, 18, 8> reg;
|
||||
bf_t<u64, 0, 18> off18;
|
||||
bf_t<u64, 0, 8> reg2;
|
||||
} value{};
|
||||
|
||||
auto& stats = g_fxo->get<putllc16_statistics_t>();
|
||||
had_putllc_evaluation = true;
|
||||
|
||||
if (!pattern.ls_write)
|
||||
{
|
||||
if (pattern.required_pc != SPU_LS_SIZE)
|
||||
{
|
||||
value.required_pc = pattern.required_pc;
|
||||
}
|
||||
|
||||
spu_log.success("PUTLLC0 Pattern Detected! (put_pc=0x%x, %s) (putllc0=%d, putllc16+0=%d, all=%d)", pattern.put_pc, func_hash, ++stats.nowrite, ++stats.single, +stats.all);
|
||||
add_pattern(false, inst_attr::putllc0, pattern.put_pc - lsa);
|
||||
add_pattern(false, inst_attr::putllc0, pattern.put_pc - lsa, value.data);
|
||||
continue;
|
||||
}
|
||||
|
||||
union putllc16_info
|
||||
{
|
||||
u32 data;
|
||||
bf_t<u32, 30, 2> type;
|
||||
bf_t<u32, 29, 1> runtime16_select;
|
||||
bf_t<u32, 28, 1> no_notify;
|
||||
bf_t<u32, 18, 8> reg;
|
||||
bf_t<u32, 0, 18> off18;
|
||||
bf_t<u32, 0, 8> reg2;
|
||||
} value{};
|
||||
|
||||
enum : u32
|
||||
{
|
||||
v_const = 0,
|
||||
|
|
@ -7170,6 +7330,11 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
value.runtime16_select = pattern.select_16_or_0_at_runtime;
|
||||
value.reg = s_reg_max;
|
||||
|
||||
if (pattern.required_pc != SPU_LS_SIZE)
|
||||
{
|
||||
value.required_pc = pattern.required_pc;
|
||||
}
|
||||
|
||||
if (pattern.ls.is_const())
|
||||
{
|
||||
ensure(pattern.reg == s_reg_max && pattern.reg2 == s_reg_max && pattern.ls_offs.is_const(), "Unexpected register usage");
|
||||
|
|
@ -7201,7 +7366,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
if (g_cfg.core.spu_accurate_reservations)
|
||||
{
|
||||
// Because enabling it is a hack, as it turns out
|
||||
continue;
|
||||
// continue;
|
||||
}
|
||||
|
||||
add_pattern(false, inst_attr::putllc16, pattern.put_pc - result.entry_point, value.data);
|
||||
|
|
@ -7225,7 +7390,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
|
||||
if (inst_attr attr = m_inst_attrs[(read_pc - entry_point) / 4]; attr == inst_attr::none)
|
||||
{
|
||||
add_pattern(false, inst_attr::rchcnt_loop, read_pc - result.entry_point);
|
||||
add_pattern(false, inst_attr::rchcnt_loop, read_pc - result.entry_point, 0);
|
||||
|
||||
spu_log.error("Channel Loop Pattern Detected! Report to developers! (read_pc=0x%x, branch_pc=0x%x, branch_target=0x%x, 0x%x-%s)", read_pc, pattern.branch_pc, pattern.branch_target, entry_point, func_hash);
|
||||
}
|
||||
|
|
@ -7241,6 +7406,26 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
// Blocks starting from 0x0 or invalid instruction won't be compiled, may need special interpreter fallback
|
||||
}
|
||||
|
||||
if (!m_patterns.empty())
|
||||
{
|
||||
std::string out_dump;
|
||||
dump(result, out_dump);
|
||||
spu_log.notice("Dump SPU Function with pattern(s):\n%s", out_dump);
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < result.data.size(); i++)
|
||||
{
|
||||
const be_t<u32> ls_val = ls[result.lower_bound / 4 + i];
|
||||
|
||||
if (result.data[i] && std::bit_cast<u32>(ls_val) != result.data[i])
|
||||
{
|
||||
std::string out_dump;
|
||||
dump(result, out_dump);
|
||||
spu_log.error("SPU Function Dump:\n%s", out_dump);
|
||||
fmt::throw_exception("SPU Analyzer failed: Instruction mismatch at 0x%x [read: 0x%x vs LS: 0x%x] (i=0x%x)", result.lower_bound + i * 4, std::bit_cast<be_t<u32>>(result.data[i]), ls_val, i);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
@ -8290,19 +8475,10 @@ std::array<reg_state_t, s_reg_max>& block_reg_info::evaluate_start_state(const s
|
|||
return walkby_state;
|
||||
}
|
||||
|
||||
void spu_recompiler_base::add_pattern(bool fill_all, inst_attr attr, u32 start, u32 end)
|
||||
void spu_recompiler_base::add_pattern(bool fill_all, inst_attr attr, u32 start, u64 info)
|
||||
{
|
||||
if (end == umax)
|
||||
{
|
||||
end = start;
|
||||
}
|
||||
|
||||
m_patterns[start] = pattern_info{utils::address_range32::start_end(start, end)};
|
||||
|
||||
for (u32 i = start; i <= (fill_all ? end : start); i += 4)
|
||||
{
|
||||
m_inst_attrs[i / 4] = attr;
|
||||
}
|
||||
m_patterns[start] = pattern_info{info};
|
||||
m_inst_attrs[start / 4] = attr;
|
||||
}
|
||||
|
||||
extern std::string format_spu_func_info(u32 addr, cpu_thread* spu)
|
||||
|
|
|
|||
|
|
@ -1080,7 +1080,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
|
|||
m_ir->SetInsertPoint(_body);
|
||||
}
|
||||
|
||||
void putllc16_pattern(const spu_program& /*prog*/, utils::address_range32 range)
|
||||
void putllc16_pattern(const spu_program& /*prog*/, u64 pattern_info)
|
||||
{
|
||||
// Prevent store elimination
|
||||
m_block->store_context_ctr[s_reg_mfc_eal]++;
|
||||
|
|
@ -1109,16 +1109,17 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
|
|||
}
|
||||
};
|
||||
|
||||
const union putllc16_info
|
||||
const union putllc16_or_0_info
|
||||
{
|
||||
u32 data;
|
||||
bf_t<u32, 30, 2> type;
|
||||
bf_t<u32, 29, 1> runtime16_select;
|
||||
bf_t<u32, 28, 1> no_notify;
|
||||
bf_t<u32, 18, 8> reg;
|
||||
bf_t<u32, 0, 18> off18;
|
||||
bf_t<u32, 0, 8> reg2;
|
||||
} info = std::bit_cast<putllc16_info>(range.end);
|
||||
u64 data;
|
||||
bf_t<u64, 32, 18> required_pc;
|
||||
bf_t<u64, 30, 2> type;
|
||||
bf_t<u64, 29, 1> runtime16_select;
|
||||
bf_t<u64, 28, 1> no_notify;
|
||||
bf_t<u64, 18, 8> reg;
|
||||
bf_t<u64, 0, 18> off18;
|
||||
bf_t<u64, 0, 8> reg2;
|
||||
} info = std::bit_cast<putllc16_or_0_info>(pattern_info);
|
||||
|
||||
enum : u32
|
||||
{
|
||||
|
|
@ -1150,8 +1151,10 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
|
|||
value_t<u32> eal_val;
|
||||
eal_val.value = _eal;
|
||||
|
||||
auto get_reg32 = [&](u32 reg)
|
||||
auto get_reg32 = [&](u64 reg_)
|
||||
{
|
||||
const u32 reg = static_cast<u32>(reg_);
|
||||
|
||||
if (get_reg_type(reg) != get_type<u32[4]>())
|
||||
{
|
||||
return get_reg_fixed(reg, get_type<u32>());
|
||||
|
|
@ -1170,6 +1173,19 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
|
|||
}
|
||||
else if (info.type == v_relative)
|
||||
{
|
||||
if (info.required_pc && info.required_pc != SPU_LS_SIZE)
|
||||
{
|
||||
const auto short_op = llvm::BasicBlock::Create(m_context, "__putllc16_short_op", m_function);
|
||||
const auto heavy_op = llvm::BasicBlock::Create(m_context, "__putllc16_heavy_op", m_function);
|
||||
|
||||
m_ir->CreateCondBr(m_ir->CreateICmpNE(m_ir->getInt32(info.required_pc), m_base_pc), heavy_op, short_op);
|
||||
m_ir->SetInsertPoint(heavy_op);
|
||||
update_pc();
|
||||
call("spu_exec_mfc_cmd", &exec_mfc_cmd<false>, m_thread);
|
||||
m_ir->CreateBr(_final);
|
||||
m_ir->SetInsertPoint(short_op);
|
||||
}
|
||||
|
||||
dest = m_ir->CreateAnd(get_pc(spu_branch_target(info.off18 + m_base)), 0x3fff0);
|
||||
}
|
||||
else if (info.type == v_reg_offs)
|
||||
|
|
@ -1268,17 +1284,18 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
|
|||
const auto _new = m_ir->CreateAlignedLoad(get_type<u128>(), _ptr(m_lsptr, dest), llvm::MaybeAlign{16});
|
||||
const auto _rdata = m_ir->CreateAlignedLoad(get_type<u128>(), _ptr(spu_ptr(&spu_thread::rdata), m_ir->CreateAnd(diff, 0x70)), llvm::MaybeAlign{16});
|
||||
|
||||
const bool is_accurate_op = !!g_cfg.core.spu_accurate_reservations;
|
||||
const bool is_accurate_op = true || !!g_cfg.core.spu_accurate_reservations;
|
||||
|
||||
const auto compare_data_change_res = is_accurate_op ? m_ir->getTrue() : m_ir->CreateICmpNE(_new, _rdata);
|
||||
const auto compare_data_change_res = m_ir->CreateICmpNE(_new, _rdata);
|
||||
const auto second_test_for_complete_op = is_accurate_op ? m_ir->getTrue() : compare_data_change_res;
|
||||
|
||||
if (info.runtime16_select)
|
||||
{
|
||||
m_ir->CreateCondBr(m_ir->CreateAnd(m_ir->CreateICmpULT(diff, m_ir->getInt64(128)), compare_data_change_res), _begin_op, _inc_res, m_md_likely);
|
||||
m_ir->CreateCondBr(m_ir->CreateAnd(m_ir->CreateICmpULT(diff, m_ir->getInt64(128)), second_test_for_complete_op), _begin_op, _inc_res, m_md_likely);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_ir->CreateCondBr(compare_data_change_res, _begin_op, _inc_res, m_md_unlikely);
|
||||
m_ir->CreateCondBr(second_test_for_complete_op, _begin_op, _inc_res, m_md_unlikely);
|
||||
}
|
||||
|
||||
m_ir->SetInsertPoint(_begin_op);
|
||||
|
|
@ -1323,7 +1340,14 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
|
|||
|
||||
if (!info.no_notify)
|
||||
{
|
||||
const auto notify_block = llvm::BasicBlock::Create(m_context, "__putllc16_block_notify", m_function);
|
||||
const auto notify_next = llvm::BasicBlock::Create(m_context, "__putllc16_block_notify_next", m_function);
|
||||
|
||||
m_ir->CreateCondBr(compare_data_change_res, notify_block, notify_next);
|
||||
m_ir->SetInsertPoint(notify_block);
|
||||
call("atomic_wait_engine::notify_all", static_cast<void(*)(const void*)>(atomic_wait_engine::notify_all), rptr);
|
||||
m_ir->CreateBr(notify_next);
|
||||
m_ir->SetInsertPoint(notify_next);
|
||||
}
|
||||
|
||||
m_ir->CreateBr(_success);
|
||||
|
|
@ -1373,7 +1397,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
|
|||
m_ir->SetInsertPoint(_final);
|
||||
}
|
||||
|
||||
void putllc0_pattern(const spu_program& /*prog*/, utils::address_range32 /*range*/)
|
||||
void putllc0_pattern(const spu_program& /*prog*/, u64 pattern_info)
|
||||
{
|
||||
// Prevent store elimination
|
||||
m_block->store_context_ctr[s_reg_mfc_eal]++;
|
||||
|
|
@ -1401,6 +1425,18 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
|
|||
}
|
||||
};
|
||||
|
||||
const union putllc16_or_0_info
|
||||
{
|
||||
u64 data;
|
||||
bf_t<u64, 32, 18> required_pc;
|
||||
bf_t<u64, 30, 2> type;
|
||||
bf_t<u64, 29, 1> runtime16_select;
|
||||
bf_t<u64, 28, 1> no_notify;
|
||||
bf_t<u64, 18, 8> reg;
|
||||
bf_t<u64, 0, 18> off18;
|
||||
bf_t<u64, 0, 8> reg2;
|
||||
} info = std::bit_cast<putllc16_or_0_info>(pattern_info);
|
||||
|
||||
const auto _next = llvm::BasicBlock::Create(m_context, "", m_function);
|
||||
const auto _next0 = llvm::BasicBlock::Create(m_context, "", m_function);
|
||||
const auto _fail = llvm::BasicBlock::Create(m_context, "", m_function);
|
||||
|
|
@ -1409,6 +1445,19 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
|
|||
const auto _eal = (get_reg_fixed<u32>(s_reg_mfc_eal) & -128).eval(m_ir);
|
||||
const auto _raddr = m_ir->CreateLoad(get_type<u32>(), spu_ptr(&spu_thread::raddr));
|
||||
|
||||
if (info.required_pc && info.required_pc != SPU_LS_SIZE)
|
||||
{
|
||||
const auto short_op = llvm::BasicBlock::Create(m_context, "__putllc0_short_op", m_function);
|
||||
const auto heavy_op = llvm::BasicBlock::Create(m_context, "__putllc0_heavy_op", m_function);
|
||||
|
||||
m_ir->CreateCondBr(m_ir->CreateICmpNE(m_ir->getInt32(info.required_pc), m_base_pc), heavy_op, short_op);
|
||||
m_ir->SetInsertPoint(heavy_op);
|
||||
update_pc();
|
||||
call("spu_exec_mfc_cmd", &exec_mfc_cmd<false>, m_thread);
|
||||
m_ir->CreateBr(_final);
|
||||
m_ir->SetInsertPoint(short_op);
|
||||
}
|
||||
|
||||
m_ir->CreateCondBr(m_ir->CreateAnd(m_ir->CreateICmpEQ(_eal, _raddr), m_ir->CreateIsNotNull(_raddr)), _next, _fail, m_md_likely);
|
||||
m_ir->SetInsertPoint(_next);
|
||||
|
||||
|
|
@ -2143,12 +2192,12 @@ public:
|
|||
{
|
||||
case inst_attr::putllc0:
|
||||
{
|
||||
putllc0_pattern(func, m_patterns.at(m_pos - start).range);
|
||||
putllc0_pattern(func, m_patterns.at(m_pos - start).info);
|
||||
continue;
|
||||
}
|
||||
case inst_attr::putllc16:
|
||||
{
|
||||
putllc16_pattern(func, m_patterns.at(m_pos - start).range);
|
||||
putllc16_pattern(func, m_patterns.at(m_pos - start).info);
|
||||
continue;
|
||||
}
|
||||
case inst_attr::omit:
|
||||
|
|
|
|||
|
|
@ -397,12 +397,12 @@ protected:
|
|||
|
||||
struct pattern_info
|
||||
{
|
||||
utils::address_range32 range;
|
||||
u64 info;
|
||||
};
|
||||
|
||||
std::unordered_map<u32, pattern_info> m_patterns;
|
||||
|
||||
void add_pattern(bool fill_all, inst_attr attr, u32 start, u32 end = -1);
|
||||
void add_pattern(bool fill_all, inst_attr attr, u32 start, u64 info);
|
||||
|
||||
private:
|
||||
// For private use
|
||||
|
|
|
|||
|
|
@ -1036,7 +1036,6 @@ lv2_file::open_result_t lv2_file::open(std::string_view vpath, s32 flags, s32 mo
|
|||
error_code sys_fs_open(ppu_thread& ppu, vm::cptr<char> path, s32 flags, vm::ptr<u32> fd, s32 mode, vm::cptr<void> arg, u64 size)
|
||||
{
|
||||
ppu.state += cpu_flag::wait;
|
||||
lv2_obj::sleep(ppu);
|
||||
|
||||
sys_fs.warning("sys_fs_open(path=%s, flags=%#o, fd=*0x%x, mode=%#o, arg=*0x%x, size=0x%llx)", path, flags, fd, mode, arg, size);
|
||||
|
||||
|
|
@ -1085,7 +1084,6 @@ error_code sys_fs_open(ppu_thread& ppu, vm::cptr<char> path, s32 flags, vm::ptr<
|
|||
error_code sys_fs_read(ppu_thread& ppu, u32 fd, vm::ptr<void> buf, u64 nbytes, vm::ptr<u64> nread)
|
||||
{
|
||||
ppu.state += cpu_flag::wait;
|
||||
lv2_obj::sleep(ppu);
|
||||
|
||||
sys_fs.trace("sys_fs_read(fd=%d, buf=*0x%x, nbytes=0x%llx, nread=*0x%x)", fd, buf, nbytes, nread);
|
||||
|
||||
|
|
@ -1122,6 +1120,11 @@ error_code sys_fs_read(ppu_thread& ppu, u32 fd, vm::ptr<void> buf, u64 nbytes, v
|
|||
return CELL_OK;
|
||||
}
|
||||
|
||||
if (nbytes >= 0x100000 && file->type != lv2_file_type::regular)
|
||||
{
|
||||
lv2_obj::sleep(ppu);
|
||||
}
|
||||
|
||||
std::unique_lock lock(file->mp->mutex);
|
||||
|
||||
if (!file->file)
|
||||
|
|
@ -1154,7 +1157,6 @@ error_code sys_fs_read(ppu_thread& ppu, u32 fd, vm::ptr<void> buf, u64 nbytes, v
|
|||
error_code sys_fs_write(ppu_thread& ppu, u32 fd, vm::cptr<void> buf, u64 nbytes, vm::ptr<u64> nwrite)
|
||||
{
|
||||
ppu.state += cpu_flag::wait;
|
||||
lv2_obj::sleep(ppu);
|
||||
|
||||
sys_fs.trace("sys_fs_write(fd=%d, buf=*0x%x, nbytes=0x%llx, nwrite=*0x%x)", fd, buf, nbytes, nwrite);
|
||||
|
||||
|
|
@ -1237,7 +1239,6 @@ error_code sys_fs_write(ppu_thread& ppu, u32 fd, vm::cptr<void> buf, u64 nbytes,
|
|||
error_code sys_fs_close(ppu_thread& ppu, u32 fd)
|
||||
{
|
||||
ppu.state += cpu_flag::wait;
|
||||
lv2_obj::sleep(ppu);
|
||||
|
||||
const auto file = idm::get_unlocked<lv2_fs_object, lv2_file>(fd);
|
||||
|
||||
|
|
@ -1314,7 +1315,6 @@ error_code sys_fs_close(ppu_thread& ppu, u32 fd)
|
|||
error_code sys_fs_opendir(ppu_thread& ppu, vm::cptr<char> path, vm::ptr<u32> fd)
|
||||
{
|
||||
ppu.state += cpu_flag::wait;
|
||||
lv2_obj::sleep(ppu);
|
||||
|
||||
sys_fs.warning("sys_fs_opendir(path=%s, fd=*0x%x)", path, fd);
|
||||
|
||||
|
|
@ -1491,7 +1491,6 @@ error_code sys_fs_readdir(ppu_thread& ppu, u32 fd, vm::ptr<CellFsDirent> dir, vm
|
|||
error_code sys_fs_closedir(ppu_thread& ppu, u32 fd)
|
||||
{
|
||||
ppu.state += cpu_flag::wait;
|
||||
lv2_obj::sleep(ppu);
|
||||
|
||||
sys_fs.warning("sys_fs_closedir(fd=%d)", fd);
|
||||
|
||||
|
|
@ -1506,7 +1505,6 @@ error_code sys_fs_closedir(ppu_thread& ppu, u32 fd)
|
|||
error_code sys_fs_stat(ppu_thread& ppu, vm::cptr<char> path, vm::ptr<CellFsStat> sb)
|
||||
{
|
||||
ppu.state += cpu_flag::wait;
|
||||
lv2_obj::sleep(ppu);
|
||||
|
||||
sys_fs.warning("sys_fs_stat(path=%s, sb=*0x%x)", path, sb);
|
||||
|
||||
|
|
@ -1610,7 +1608,6 @@ error_code sys_fs_stat(ppu_thread& ppu, vm::cptr<char> path, vm::ptr<CellFsStat>
|
|||
error_code sys_fs_fstat(ppu_thread& ppu, u32 fd, vm::ptr<CellFsStat> sb)
|
||||
{
|
||||
ppu.state += cpu_flag::wait;
|
||||
lv2_obj::sleep(ppu);
|
||||
|
||||
sys_fs.warning("sys_fs_fstat(fd=%d, sb=*0x%x)", fd, sb);
|
||||
|
||||
|
|
@ -1666,7 +1663,6 @@ error_code sys_fs_link(ppu_thread&, vm::cptr<char> from, vm::cptr<char> to)
|
|||
error_code sys_fs_mkdir(ppu_thread& ppu, vm::cptr<char> path, s32 mode)
|
||||
{
|
||||
ppu.state += cpu_flag::wait;
|
||||
lv2_obj::sleep(ppu);
|
||||
|
||||
sys_fs.warning("sys_fs_mkdir(path=%s, mode=%#o)", path, mode);
|
||||
|
||||
|
|
@ -1728,7 +1724,6 @@ error_code sys_fs_mkdir(ppu_thread& ppu, vm::cptr<char> path, s32 mode)
|
|||
error_code sys_fs_rename(ppu_thread& ppu, vm::cptr<char> from, vm::cptr<char> to)
|
||||
{
|
||||
ppu.state += cpu_flag::wait;
|
||||
lv2_obj::sleep(ppu);
|
||||
|
||||
sys_fs.warning("sys_fs_rename(from=%s, to=%s)", from, to);
|
||||
|
||||
|
|
@ -1794,7 +1789,6 @@ error_code sys_fs_rename(ppu_thread& ppu, vm::cptr<char> from, vm::cptr<char> to
|
|||
error_code sys_fs_rmdir(ppu_thread& ppu, vm::cptr<char> path)
|
||||
{
|
||||
ppu.state += cpu_flag::wait;
|
||||
lv2_obj::sleep(ppu);
|
||||
|
||||
sys_fs.warning("sys_fs_rmdir(path=%s)", path);
|
||||
|
||||
|
|
@ -1850,7 +1844,6 @@ error_code sys_fs_rmdir(ppu_thread& ppu, vm::cptr<char> path)
|
|||
error_code sys_fs_unlink(ppu_thread& ppu, vm::cptr<char> path)
|
||||
{
|
||||
ppu.state += cpu_flag::wait;
|
||||
lv2_obj::sleep(ppu);
|
||||
|
||||
sys_fs.warning("sys_fs_unlink(path=%s)", path);
|
||||
|
||||
|
|
@ -1951,8 +1944,6 @@ error_code sys_fs_fcntl(ppu_thread& ppu, u32 fd, u32 op, vm::ptr<void> _arg, u32
|
|||
case 0x8000000a: // cellFsReadWithOffset
|
||||
case 0x8000000b: // cellFsWriteWithOffset
|
||||
{
|
||||
lv2_obj::sleep(ppu);
|
||||
|
||||
const auto arg = vm::static_ptr_cast<lv2_file_op_rw>(_arg);
|
||||
|
||||
if (_size < arg.size())
|
||||
|
|
@ -1992,6 +1983,11 @@ error_code sys_fs_fcntl(ppu_thread& ppu, u32 fd, u32 op, vm::ptr<void> _arg, u32
|
|||
sys_fs.error("%s type: Writing %u bytes to FD=%d (path=%s)", file->type, arg->size, file->name.data());
|
||||
}
|
||||
|
||||
if (op == 0x8000000a && file->type != lv2_file_type::regular && arg->size >= 0x100000)
|
||||
{
|
||||
lv2_obj::sleep(ppu);
|
||||
}
|
||||
|
||||
std::unique_lock wlock(file->mp->mutex, std::defer_lock);
|
||||
std::shared_lock rlock(file->mp->mutex, std::defer_lock);
|
||||
|
||||
|
|
@ -2047,8 +2043,6 @@ error_code sys_fs_fcntl(ppu_thread& ppu, u32 fd, u32 op, vm::ptr<void> _arg, u32
|
|||
|
||||
case 0x80000009: // cellFsSdataOpenByFd
|
||||
{
|
||||
lv2_obj::sleep(ppu);
|
||||
|
||||
const auto arg = vm::static_ptr_cast<lv2_file_op_09>(_arg);
|
||||
|
||||
if (_size < arg.size())
|
||||
|
|
@ -2102,8 +2096,6 @@ error_code sys_fs_fcntl(ppu_thread& ppu, u32 fd, u32 op, vm::ptr<void> _arg, u32
|
|||
|
||||
case 0xc0000002: // cellFsGetFreeSize (TODO)
|
||||
{
|
||||
lv2_obj::sleep(ppu);
|
||||
|
||||
const auto arg = vm::static_ptr_cast<lv2_file_c0000002>(_arg);
|
||||
|
||||
const auto& mp = g_fxo->get<lv2_fs_mount_info_map>().lookup("/dev_hdd0");
|
||||
|
|
@ -2418,8 +2410,6 @@ error_code sys_fs_fcntl(ppu_thread& ppu, u32 fd, u32 op, vm::ptr<void> _arg, u32
|
|||
|
||||
case 0xe0000012: // cellFsGetDirectoryEntries
|
||||
{
|
||||
lv2_obj::sleep(ppu);
|
||||
|
||||
const auto arg = vm::static_ptr_cast<lv2_file_op_dir::dir_info>(_arg);
|
||||
|
||||
if (_size < arg.size())
|
||||
|
|
@ -2434,8 +2424,6 @@ error_code sys_fs_fcntl(ppu_thread& ppu, u32 fd, u32 op, vm::ptr<void> _arg, u32
|
|||
return CELL_EBADF;
|
||||
}
|
||||
|
||||
ppu.check_state();
|
||||
|
||||
u32 read_count = 0;
|
||||
|
||||
// NOTE: This function is actually capable of reading only one entry at a time
|
||||
|
|
@ -2593,7 +2581,6 @@ error_code sys_fs_fcntl(ppu_thread& ppu, u32 fd, u32 op, vm::ptr<void> _arg, u32
|
|||
error_code sys_fs_lseek(ppu_thread& ppu, u32 fd, s64 offset, s32 whence, vm::ptr<u64> pos)
|
||||
{
|
||||
ppu.state += cpu_flag::wait;
|
||||
lv2_obj::sleep(ppu);
|
||||
|
||||
sys_fs.trace("sys_fs_lseek(fd=%d, offset=0x%llx, whence=0x%x, pos=*0x%x)", fd, offset, whence, pos);
|
||||
|
||||
|
|
@ -2639,7 +2626,6 @@ error_code sys_fs_lseek(ppu_thread& ppu, u32 fd, s64 offset, s32 whence, vm::ptr
|
|||
error_code sys_fs_fdatasync(ppu_thread& ppu, u32 fd)
|
||||
{
|
||||
ppu.state += cpu_flag::wait;
|
||||
lv2_obj::sleep(ppu);
|
||||
|
||||
sys_fs.trace("sys_fs_fdadasync(fd=%d)", fd);
|
||||
|
||||
|
|
@ -2650,6 +2636,8 @@ error_code sys_fs_fdatasync(ppu_thread& ppu, u32 fd)
|
|||
return CELL_EBADF;
|
||||
}
|
||||
|
||||
lv2_obj::sleep(ppu);
|
||||
|
||||
std::lock_guard lock(file->mp->mutex);
|
||||
|
||||
if (!file->file)
|
||||
|
|
@ -2664,7 +2652,6 @@ error_code sys_fs_fdatasync(ppu_thread& ppu, u32 fd)
|
|||
error_code sys_fs_fsync(ppu_thread& ppu, u32 fd)
|
||||
{
|
||||
ppu.state += cpu_flag::wait;
|
||||
lv2_obj::sleep(ppu);
|
||||
|
||||
sys_fs.trace("sys_fs_fsync(fd=%d)", fd);
|
||||
|
||||
|
|
@ -2675,6 +2662,8 @@ error_code sys_fs_fsync(ppu_thread& ppu, u32 fd)
|
|||
return CELL_EBADF;
|
||||
}
|
||||
|
||||
lv2_obj::sleep(ppu);
|
||||
|
||||
std::lock_guard lock(file->mp->mutex);
|
||||
|
||||
if (!file->file)
|
||||
|
|
@ -2763,7 +2752,6 @@ error_code sys_fs_get_block_size(ppu_thread& ppu, vm::cptr<char> path, vm::ptr<u
|
|||
error_code sys_fs_truncate(ppu_thread& ppu, vm::cptr<char> path, u64 size)
|
||||
{
|
||||
ppu.state += cpu_flag::wait;
|
||||
lv2_obj::sleep(ppu);
|
||||
|
||||
sys_fs.warning("sys_fs_truncate(path=%s, size=0x%llx)", path, size);
|
||||
|
||||
|
|
@ -2815,7 +2803,6 @@ error_code sys_fs_truncate(ppu_thread& ppu, vm::cptr<char> path, u64 size)
|
|||
error_code sys_fs_ftruncate(ppu_thread& ppu, u32 fd, u64 size)
|
||||
{
|
||||
ppu.state += cpu_flag::wait;
|
||||
lv2_obj::sleep(ppu);
|
||||
|
||||
sys_fs.warning("sys_fs_ftruncate(fd=%d, size=0x%llx)", fd, size);
|
||||
|
||||
|
|
@ -3021,7 +3008,6 @@ error_code sys_fs_disk_free(ppu_thread& ppu, vm::cptr<char> path, vm::ptr<u64> t
|
|||
error_code sys_fs_utime(ppu_thread& ppu, vm::cptr<char> path, vm::cptr<CellFsUtimbuf> timep)
|
||||
{
|
||||
ppu.state += cpu_flag::wait;
|
||||
lv2_obj::sleep(ppu);
|
||||
|
||||
sys_fs.warning("sys_fs_utime(path=%s, timep=*0x%x)", path, timep);
|
||||
sys_fs.warning("** actime=%u, modtime=%u", timep->actime, timep->modtime);
|
||||
|
|
|
|||
|
|
@ -80,6 +80,7 @@ namespace rsx
|
|||
std::span<T> as_span() const
|
||||
{
|
||||
auto bytes = data();
|
||||
ensure((reinterpret_cast<uintptr_t>(bytes) & (sizeof(T) - 1)) == 0, "IO buffer span cast requires naturally aligned pointers.");
|
||||
return { utils::bless<T>(bytes), m_size / sizeof(T) };
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -3,11 +3,53 @@
|
|||
#include <util/types.hpp>
|
||||
#include <functional>
|
||||
#include <algorithm>
|
||||
#include <cstdlib>
|
||||
|
||||
#include "reverse_ptr.hpp"
|
||||
|
||||
namespace rsx
|
||||
{
|
||||
namespace aligned_allocator
|
||||
{
|
||||
template <size_t Align>
|
||||
void* malloc(size_t size)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
return _aligned_malloc(size, Align);
|
||||
#else
|
||||
return std::aligned_alloc(Align, size);
|
||||
#endif
|
||||
}
|
||||
|
||||
template <size_t Align>
|
||||
void* realloc(void* prev_ptr, [[maybe_unused]] size_t prev_size, size_t new_size)
|
||||
{
|
||||
if (prev_size >= new_size)
|
||||
{
|
||||
return prev_ptr;
|
||||
}
|
||||
|
||||
ensure(reinterpret_cast<usz>(prev_ptr) % Align == 0, "Pointer not aligned to Align");
|
||||
#ifdef _WIN32
|
||||
return _aligned_realloc(prev_ptr, new_size, Align);
|
||||
#else
|
||||
void* ret = std::aligned_alloc(Align, new_size);
|
||||
std::memcpy(ret, prev_ptr, std::min(prev_size, new_size));
|
||||
std::free(prev_ptr);
|
||||
return ret;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void free(void* ptr)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
_aligned_free(ptr);
|
||||
#else
|
||||
std::free(ptr);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
template <typename C, typename T>
|
||||
concept span_like =
|
||||
requires(C& c) {
|
||||
|
|
@ -15,7 +57,13 @@ namespace rsx
|
|||
{ c.size() } -> std::integral;
|
||||
};
|
||||
|
||||
template <typename Ty>
|
||||
template <typename T, typename U>
|
||||
concept is_trivially_comparable_v =
|
||||
requires (T t1, U t2) {
|
||||
{ t1 == t2 } -> std::same_as<bool>;
|
||||
};
|
||||
|
||||
template <typename Ty, size_t Align=alignof(Ty)>
|
||||
requires std::is_trivially_destructible_v<Ty> && std::is_trivially_copyable_v<Ty>
|
||||
struct simple_array
|
||||
{
|
||||
|
|
@ -28,7 +76,7 @@ namespace rsx
|
|||
|
||||
private:
|
||||
static constexpr u32 _local_capacity = std::max<u32>(64u / sizeof(Ty), 1u);
|
||||
char _local_storage[_local_capacity * sizeof(Ty)];
|
||||
alignas(Align) char _local_storage[_local_capacity * sizeof(Ty)];
|
||||
|
||||
u32 _capacity = _local_capacity;
|
||||
Ty* _data = _local_capacity ? reinterpret_cast<Ty*>(_local_storage) : nullptr;
|
||||
|
|
@ -128,7 +176,7 @@ namespace rsx
|
|||
{
|
||||
if (!is_local_storage())
|
||||
{
|
||||
free(_data);
|
||||
aligned_allocator::free(_data);
|
||||
}
|
||||
|
||||
_data = nullptr;
|
||||
|
|
@ -196,13 +244,13 @@ namespace rsx
|
|||
if (is_local_storage())
|
||||
{
|
||||
// Switch to heap storage
|
||||
ensure(_data = static_cast<Ty*>(std::malloc(sizeof(Ty) * size)));
|
||||
ensure(_data = static_cast<Ty*>(aligned_allocator::malloc<Align>(sizeof(Ty) * size)));
|
||||
std::memcpy(static_cast<void*>(_data), _local_storage, size_bytes());
|
||||
}
|
||||
else
|
||||
{
|
||||
// Extend heap storage
|
||||
ensure(_data = static_cast<Ty*>(std::realloc(_data, sizeof(Ty) * size))); // "realloc() failed!"
|
||||
ensure(_data = static_cast<Ty*>(aligned_allocator::realloc<Align>(_data, size_bytes(), sizeof(Ty) * size))); // "realloc() failed!"
|
||||
}
|
||||
|
||||
_capacity = size;
|
||||
|
|
@ -457,6 +505,50 @@ namespace rsx
|
|||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Note that find and find_if return pointers to objects and not iterators for simplified usage.
|
||||
* It is functionally equivalent to retrieve a nullptr meaning empty object stored and nullptr meaning not found for all practical uses of this container.
|
||||
*/
|
||||
template <typename T = Ty>
|
||||
requires is_trivially_comparable_v<Ty, T>
|
||||
Ty* find(const T& value)
|
||||
{
|
||||
for (auto it = begin(); it != end(); ++it)
|
||||
{
|
||||
if (*it == value)
|
||||
{
|
||||
return &(*it);
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Remove when we switch to C++23
|
||||
template <typename T = Ty>
|
||||
requires is_trivially_comparable_v<Ty, T>
|
||||
const Ty* find(const T& value) const
|
||||
{
|
||||
return const_cast<simple_array<Ty, Align>*>(this)->find(value);
|
||||
}
|
||||
|
||||
Ty* find_if(std::predicate<const Ty&> auto predicate)
|
||||
{
|
||||
for (auto it = begin(); it != end(); ++it)
|
||||
{
|
||||
if (std::invoke(predicate, *it))
|
||||
{
|
||||
return &(*it);
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Remove with C++23
|
||||
const Ty* find_if(std::predicate<const Ty&> auto predicate) const
|
||||
{
|
||||
return const_cast<simple_array<Ty, Align>*>(this)->find_if(predicate);
|
||||
}
|
||||
|
||||
bool erase_if(std::predicate<const Ty&> auto predicate)
|
||||
{
|
||||
if (!_size)
|
||||
|
|
|
|||
|
|
@ -338,10 +338,10 @@ namespace gl
|
|||
params.logd = rsx::ceil_log2(depth);
|
||||
set_parameters(cmd);
|
||||
|
||||
const u32 num_bytes_per_invocation = (sizeof(_BlockType) * optimal_group_size);
|
||||
const u32 texels_per_dword = std::max<u32>(4u / sizeof(_BlockType), 1u); // For block sizes less than 4 bytes wide
|
||||
const u32 linear_invocations = utils::aligned_div(data_length, num_bytes_per_invocation) / texels_per_dword;
|
||||
compute_task::run(cmd, linear_invocations);
|
||||
const u32 word_count_per_invocation = std::max<u32>(sizeof(_BlockType) / 4u, 1u);
|
||||
const u32 num_bytes_per_invocation = (word_count_per_invocation * 4u * optimal_group_size);
|
||||
const u32 workgroup_invocations = utils::aligned_div(data_length, num_bytes_per_invocation);
|
||||
compute_task::run(cmd, workgroup_invocations);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -590,7 +590,7 @@ namespace gl
|
|||
|
||||
void fill_texture(gl::command_context& cmd, texture* dst, int format,
|
||||
const std::vector<rsx::subresource_layout> &input_layouts,
|
||||
bool is_swizzled, GLenum gl_format, GLenum gl_type, rsx::simple_array<std::byte>& staging_buffer)
|
||||
bool is_swizzled, GLenum gl_format, GLenum gl_type, std::span<std::byte> staging_buffer)
|
||||
{
|
||||
const auto& driver_caps = gl::get_driver_caps();
|
||||
rsx::texture_uploader_capabilities caps
|
||||
|
|
@ -841,7 +841,7 @@ namespace gl
|
|||
void upload_texture(gl::command_context& cmd, texture* dst, u32 gcm_format, bool is_swizzled, const std::vector<rsx::subresource_layout>& subresources_layout)
|
||||
{
|
||||
// Calculate staging buffer size
|
||||
rsx::simple_array<std::byte> data_upload_buf;
|
||||
rsx::simple_array<std::byte, sizeof(u128)> data_upload_buf;
|
||||
|
||||
rsx::texture_uploader_capabilities caps { .supports_dxt = gl::get_driver_caps().EXT_texture_compression_s3tc_supported };
|
||||
if (rsx::is_compressed_host_format(caps, gcm_format))
|
||||
|
|
|
|||
39
rpcs3/Emu/RSX/Program/Assembler/CFG.h
Normal file
39
rpcs3/Emu/RSX/Program/Assembler/CFG.h
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
#pragma once
|
||||
|
||||
#include <util/asm.hpp>
|
||||
#include "IR.h"
|
||||
|
||||
#include <list>
|
||||
|
||||
struct RSXFragmentProgram;
|
||||
|
||||
namespace rsx::assembler
|
||||
{
|
||||
struct FlowGraph
|
||||
{
|
||||
std::list<BasicBlock> blocks;
|
||||
|
||||
BasicBlock* push(BasicBlock* parent = nullptr, u32 pc = 0, EdgeType edge_type = EdgeType::NONE)
|
||||
{
|
||||
if (!parent && !blocks.empty())
|
||||
{
|
||||
parent = &blocks.back();
|
||||
}
|
||||
|
||||
blocks.push_back({});
|
||||
BasicBlock* new_block = &blocks.back();
|
||||
|
||||
if (parent)
|
||||
{
|
||||
parent->insert_succ(new_block, edge_type);
|
||||
new_block->insert_pred(parent, edge_type);
|
||||
}
|
||||
|
||||
new_block->id = pc;
|
||||
return new_block;
|
||||
}
|
||||
};
|
||||
|
||||
FlowGraph deconstruct_fragment_program(const RSXFragmentProgram& prog);
|
||||
}
|
||||
|
||||
193
rpcs3/Emu/RSX/Program/Assembler/FPToCFG.cpp
Normal file
193
rpcs3/Emu/RSX/Program/Assembler/FPToCFG.cpp
Normal file
|
|
@ -0,0 +1,193 @@
|
|||
#include "stdafx.h"
|
||||
|
||||
#include "CFG.h"
|
||||
|
||||
#include "Emu/RSX/Common/simple_array.hpp"
|
||||
#include "Emu/RSX/Program/RSXFragmentProgram.h"
|
||||
|
||||
#include <util/asm.hpp>
|
||||
#include <util/v128.hpp>
|
||||
#include <span>
|
||||
|
||||
#if defined(ARCH_ARM64)
|
||||
#if !defined(_MSC_VER)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wstrict-aliasing"
|
||||
#pragma GCC diagnostic ignored "-Wold-style-cast"
|
||||
#endif
|
||||
#undef FORCE_INLINE
|
||||
#include "Emu/CPU/sse2neon.h"
|
||||
#if !defined(_MSC_VER)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
#endif
|
||||
|
||||
namespace rsx::assembler
|
||||
{
|
||||
inline v128 decode_instruction(const v128& raw_inst)
|
||||
{
|
||||
// Fixup of RSX's weird half-word shuffle for FP instructions
|
||||
// Convert input stream into LE u16 array
|
||||
__m128i _mask0 = _mm_set1_epi32(0xff00ff00);
|
||||
__m128i _mask1 = _mm_set1_epi32(0x00ff00ff);
|
||||
__m128i a = _mm_slli_epi32(static_cast<__m128i>(raw_inst), 8);
|
||||
__m128i b = _mm_srli_epi32(static_cast<__m128i>(raw_inst), 8);
|
||||
__m128i ret = _mm_or_si128(
|
||||
_mm_and_si128(_mask0, a),
|
||||
_mm_and_si128(_mask1, b)
|
||||
);
|
||||
return v128::loadu(&ret);
|
||||
}
|
||||
|
||||
FlowGraph deconstruct_fragment_program(const RSXFragmentProgram& prog)
|
||||
{
|
||||
// For a flowgraph, we don't care at all about the actual contents, just flow control instructions.
|
||||
OPDEST dst{};
|
||||
SRC0 src0{};
|
||||
SRC1 src1{};
|
||||
SRC2 src2{};
|
||||
|
||||
u32 pc = 0; // Program counter
|
||||
bool end = false;
|
||||
|
||||
// Flow control data
|
||||
rsx::simple_array<BasicBlock*> end_blocks;
|
||||
rsx::simple_array<BasicBlock*> else_blocks;
|
||||
|
||||
// Data block
|
||||
u32* data = static_cast<u32*>(prog.get_data());
|
||||
|
||||
// Output
|
||||
FlowGraph graph{};
|
||||
BasicBlock* bb = graph.push();
|
||||
|
||||
auto find_block_for_pc = [&](u32 id) -> BasicBlock*
|
||||
{
|
||||
auto found = std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == id));
|
||||
if (found != graph.blocks.end())
|
||||
{
|
||||
return &(*found);
|
||||
}
|
||||
return nullptr;
|
||||
};
|
||||
|
||||
auto safe_insert_block = [&](BasicBlock* parent, u32 id, EdgeType edge_type) -> BasicBlock*
|
||||
{
|
||||
if (auto found = find_block_for_pc(id))
|
||||
{
|
||||
parent->insert_succ(found, edge_type);
|
||||
found->insert_pred(parent, edge_type);
|
||||
return found;
|
||||
}
|
||||
|
||||
return graph.push(parent, id, edge_type);
|
||||
};
|
||||
|
||||
auto includes_literal_constant = [&]()
|
||||
{
|
||||
return src0.reg_type == RSX_FP_REGISTER_TYPE_CONSTANT ||
|
||||
src1.reg_type == RSX_FP_REGISTER_TYPE_CONSTANT ||
|
||||
src2.reg_type == RSX_FP_REGISTER_TYPE_CONSTANT;
|
||||
};
|
||||
|
||||
while (!end)
|
||||
{
|
||||
BasicBlock** found = end_blocks.find_if(FN(x->id == pc));
|
||||
|
||||
if (!found)
|
||||
{
|
||||
found = else_blocks.find_if(FN(x->id == pc));
|
||||
}
|
||||
|
||||
if (found)
|
||||
{
|
||||
bb = *found;
|
||||
}
|
||||
|
||||
const v128 raw_inst = v128::loadu(data, pc);
|
||||
v128 decoded = decode_instruction(raw_inst);
|
||||
|
||||
dst.HEX = decoded._u32[0];
|
||||
src0.HEX = decoded._u32[1];
|
||||
src1.HEX = decoded._u32[2];
|
||||
src2.HEX = decoded._u32[3];
|
||||
|
||||
end = !!dst.end;
|
||||
const u32 opcode = dst.opcode | (src1.opcode_is_branch << 6);
|
||||
|
||||
if (opcode == RSX_FP_OPCODE_NOP)
|
||||
{
|
||||
pc++;
|
||||
continue;
|
||||
}
|
||||
|
||||
bb->instructions.push_back({});
|
||||
auto& ir_inst = bb->instructions.back();
|
||||
std::memcpy(ir_inst.bytecode, &decoded._u32[0], 16);
|
||||
ir_inst.length = 4;
|
||||
ir_inst.addr = pc * 16;
|
||||
|
||||
switch (opcode)
|
||||
{
|
||||
case RSX_FP_OPCODE_BRK:
|
||||
break;
|
||||
case RSX_FP_OPCODE_CAL:
|
||||
// Unimplemented. Also unused by the RSX compiler
|
||||
fmt::throw_exception("Unimplemented FP CAL instruction.");
|
||||
break;
|
||||
case RSX_FP_OPCODE_FENCT:
|
||||
break;
|
||||
case RSX_FP_OPCODE_FENCB:
|
||||
break;
|
||||
case RSX_FP_OPCODE_RET:
|
||||
// Outside a subroutine, this doesn't mean much. The main block can conditionally return to stop execution early.
|
||||
// This will not alter flow control.
|
||||
break;
|
||||
case RSX_FP_OPCODE_IFE:
|
||||
{
|
||||
// Inserts if and else and end blocks
|
||||
auto parent = bb;
|
||||
bb = safe_insert_block(parent, pc + 1, EdgeType::IF);
|
||||
if (src2.end_offset != src1.else_offset)
|
||||
{
|
||||
else_blocks.push_back(safe_insert_block(parent, src1.else_offset >> 2, EdgeType::ELSE));
|
||||
}
|
||||
end_blocks.push_back(safe_insert_block(parent, src2.end_offset >> 2, EdgeType::ENDIF));
|
||||
break;
|
||||
}
|
||||
case RSX_FP_OPCODE_LOOP:
|
||||
case RSX_FP_OPCODE_REP:
|
||||
{
|
||||
// Inserts for and end blocks
|
||||
auto parent = bb;
|
||||
bb = safe_insert_block(parent, pc + 1, EdgeType::LOOP);
|
||||
end_blocks.push_back(safe_insert_block(parent, src2.end_offset >> 2, EdgeType::ENDLOOP));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
if (includes_literal_constant())
|
||||
{
|
||||
const v128 constant_literal = v128::loadu(data, pc);
|
||||
v128 decoded_literal = decode_instruction(constant_literal);
|
||||
|
||||
std::memcpy(ir_inst.bytecode + 4, &decoded_literal._u32[0], 16);
|
||||
ir_inst.length += 4;
|
||||
pc++;
|
||||
}
|
||||
}
|
||||
|
||||
pc++;
|
||||
}
|
||||
|
||||
// Sort edges for each block by distance
|
||||
for (auto& block : graph.blocks)
|
||||
{
|
||||
std::sort(block.pred.begin(), block.pred.end(), FN(x.from->id > y.from->id));
|
||||
std::sort(block.succ.begin(), block.succ.end(), FN(x.to->id < y.to->id));
|
||||
}
|
||||
|
||||
// Sort block nodes by distance
|
||||
graph.blocks.sort(FN(x.id < y.id));
|
||||
return graph;
|
||||
}
|
||||
}
|
||||
95
rpcs3/Emu/RSX/Program/Assembler/IR.h
Normal file
95
rpcs3/Emu/RSX/Program/Assembler/IR.h
Normal file
|
|
@ -0,0 +1,95 @@
|
|||
#pragma once
|
||||
|
||||
#include <util/asm.hpp>
|
||||
|
||||
namespace rsx::assembler
|
||||
{
|
||||
struct BasicBlock;
|
||||
|
||||
struct Register
|
||||
{
|
||||
int id = 0;
|
||||
bool f16 = false;
|
||||
};
|
||||
|
||||
struct RegisterRef
|
||||
{
|
||||
Register reg{};
|
||||
|
||||
// Vector information
|
||||
union
|
||||
{
|
||||
u32 mask;
|
||||
|
||||
struct
|
||||
{
|
||||
bool x : 1;
|
||||
bool y : 1;
|
||||
bool z : 1;
|
||||
bool w : 1;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
struct Instruction
|
||||
{
|
||||
// Raw data. Every instruction is max 128 bits.
|
||||
// Each instruction can also have 128 bits of literal/embedded data.
|
||||
u32 bytecode[8]{ {} };
|
||||
u32 addr = 0;
|
||||
|
||||
// Decoded
|
||||
u32 opcode = 0;
|
||||
u8 length = 4; // Length in dwords
|
||||
|
||||
// Padding
|
||||
u8 reserved0 = 0;
|
||||
u16 reserved1 = 0;
|
||||
|
||||
// References
|
||||
std::vector<RegisterRef> srcs;
|
||||
std::vector<RegisterRef> dsts;
|
||||
};
|
||||
|
||||
enum class EdgeType
|
||||
{
|
||||
NONE,
|
||||
IF,
|
||||
ELSE,
|
||||
ENDIF,
|
||||
LOOP,
|
||||
ENDLOOP,
|
||||
};
|
||||
|
||||
struct FlowEdge
|
||||
{
|
||||
EdgeType type = EdgeType::NONE;
|
||||
BasicBlock* from = nullptr;
|
||||
BasicBlock* to = nullptr;
|
||||
};
|
||||
|
||||
struct BasicBlock
|
||||
{
|
||||
u32 id = 0;
|
||||
std::vector<Instruction> instructions; // Program instructions for the RSX processor
|
||||
std::vector<FlowEdge> succ; // Forward edges. Sorted closest first.
|
||||
std::vector<FlowEdge> pred; // Back edges. Sorted closest first.
|
||||
|
||||
std::vector<Instruction> prologue; // Prologue, created by passes
|
||||
std::vector<Instruction> epilogue; // Epilogue, created by passes
|
||||
|
||||
FlowEdge* insert_succ(BasicBlock* b, EdgeType type = EdgeType::NONE)
|
||||
{
|
||||
FlowEdge e{ .type = type, .from = this, .to = b };
|
||||
succ.push_back(e);
|
||||
return &succ.back();
|
||||
}
|
||||
|
||||
FlowEdge* insert_pred(BasicBlock* b, EdgeType type = EdgeType::NONE)
|
||||
{
|
||||
FlowEdge e{ .type = type, .from = b, .to = this };
|
||||
pred.push_back(e);
|
||||
return &pred.back();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
@ -234,7 +234,8 @@ std::string FragmentProgramDecompiler::AddCond()
|
|||
|
||||
std::string FragmentProgramDecompiler::AddConst()
|
||||
{
|
||||
const u32 constant_id = m_size + (4 * sizeof(u32));
|
||||
ensure(m_instruction->length == 8);
|
||||
const u32 constant_id = m_instruction->addr + 16;
|
||||
u32 index = umax;
|
||||
|
||||
if (auto found = m_constant_offsets.find(constant_id);
|
||||
|
|
@ -249,9 +250,6 @@ std::string FragmentProgramDecompiler::AddConst()
|
|||
m_constant_offsets[constant_id] = index;
|
||||
}
|
||||
|
||||
// Skip next instruction, its just a literal
|
||||
m_offset = 2 * 4 * sizeof(u32);
|
||||
|
||||
// Return the next offset index
|
||||
return "_fetch_constant(" + std::to_string(index) + ")";
|
||||
}
|
||||
|
|
@ -1297,7 +1295,7 @@ bool FragmentProgramDecompiler::handle_tex_srb(u32 opcode)
|
|||
|
||||
std::string FragmentProgramDecompiler::Decompile()
|
||||
{
|
||||
auto data = static_cast<be_t<u32>*>(m_prog.get_data());
|
||||
const auto graph = rsx::assembler::deconstruct_fragment_program(m_prog);
|
||||
m_size = 0;
|
||||
m_location = 0;
|
||||
m_loop_count = 0;
|
||||
|
|
@ -1314,141 +1312,130 @@ std::string FragmentProgramDecompiler::Decompile()
|
|||
|
||||
int forced_unit = FORCE_NONE;
|
||||
|
||||
while (true)
|
||||
for (const auto &block : graph.blocks)
|
||||
{
|
||||
for (auto found = std::find(m_end_offsets.begin(), m_end_offsets.end(), m_size);
|
||||
found != m_end_offsets.end();
|
||||
found = std::find(m_end_offsets.begin(), m_end_offsets.end(), m_size))
|
||||
// TODO: Handle block prologue if any
|
||||
if (!block.pred.empty())
|
||||
{
|
||||
m_end_offsets.erase(found);
|
||||
m_code_level--;
|
||||
AddCode("}");
|
||||
m_loop_count--;
|
||||
// CFG guarantees predecessors are sorted, closest one first
|
||||
for (const auto& pred : block.pred)
|
||||
{
|
||||
switch (pred.type)
|
||||
{
|
||||
case rsx::assembler::EdgeType::ENDLOOP:
|
||||
m_loop_count--;
|
||||
[[ fallthrough ]];
|
||||
case rsx::assembler::EdgeType::ENDIF:
|
||||
m_code_level--;
|
||||
AddCode("}");
|
||||
break;
|
||||
case rsx::assembler::EdgeType::LOOP:
|
||||
m_loop_count++;
|
||||
[[ fallthrough ]];
|
||||
case rsx::assembler::EdgeType::IF:
|
||||
// Instruction will be inserted by the SIP decoder
|
||||
AddCode("{");
|
||||
m_code_level++;
|
||||
break;
|
||||
case rsx::assembler::EdgeType::ELSE:
|
||||
// This one needs more testing
|
||||
m_code_level--;
|
||||
AddCode("}");
|
||||
AddCode("else");
|
||||
AddCode("{");
|
||||
m_code_level++;
|
||||
break;
|
||||
default:
|
||||
// Start a new block anyway
|
||||
fmt::throw_exception("Unexpected block found");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (auto found = std::find(m_else_offsets.begin(), m_else_offsets.end(), m_size);
|
||||
found != m_else_offsets.end();
|
||||
found = std::find(m_else_offsets.begin(), m_else_offsets.end(), m_size))
|
||||
for (const auto& inst : block.instructions)
|
||||
{
|
||||
m_else_offsets.erase(found);
|
||||
m_code_level--;
|
||||
AddCode("}");
|
||||
AddCode("else");
|
||||
AddCode("{");
|
||||
m_code_level++;
|
||||
}
|
||||
m_instruction = &inst;
|
||||
|
||||
dst.HEX = GetData(data[0]);
|
||||
src0.HEX = GetData(data[1]);
|
||||
src1.HEX = GetData(data[2]);
|
||||
src2.HEX = GetData(data[3]);
|
||||
dst.HEX = inst.bytecode[0];
|
||||
src0.HEX = inst.bytecode[1];
|
||||
src1.HEX = inst.bytecode[2];
|
||||
src2.HEX = inst.bytecode[3];
|
||||
|
||||
m_offset = 4 * sizeof(u32);
|
||||
opflags = 0;
|
||||
opflags = 0;
|
||||
|
||||
const u32 opcode = dst.opcode | (src1.opcode_is_branch << 6);
|
||||
const u32 opcode = dst.opcode | (src1.opcode_is_branch << 6);
|
||||
|
||||
auto SIP = [&]()
|
||||
{
|
||||
switch (opcode)
|
||||
{
|
||||
case RSX_FP_OPCODE_BRK:
|
||||
if (m_loop_count) AddFlowOp("break");
|
||||
else rsx_log.error("BRK opcode found outside of a loop");
|
||||
break;
|
||||
case RSX_FP_OPCODE_CAL:
|
||||
rsx_log.error("Unimplemented SIP instruction: CAL");
|
||||
break;
|
||||
case RSX_FP_OPCODE_FENCT:
|
||||
AddCode("//FENCT");
|
||||
forced_unit = FORCE_SCT;
|
||||
break;
|
||||
case RSX_FP_OPCODE_FENCB:
|
||||
AddCode("//FENCB");
|
||||
forced_unit = FORCE_SCB;
|
||||
break;
|
||||
case RSX_FP_OPCODE_IFE:
|
||||
AddCode("if($cond)");
|
||||
break;
|
||||
case RSX_FP_OPCODE_LOOP:
|
||||
AddCode(fmt::format("$ifcond for(int i%u = %u; i%u < %u; i%u += %u) //LOOP",
|
||||
m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment));
|
||||
break;
|
||||
case RSX_FP_OPCODE_REP:
|
||||
AddCode(fmt::format("if($cond) for(int i%u = %u; i%u < %u; i%u += %u) //REP",
|
||||
m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment));
|
||||
break;
|
||||
case RSX_FP_OPCODE_RET:
|
||||
AddFlowOp("return");
|
||||
break;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
auto SIP = [&]()
|
||||
{
|
||||
switch (opcode)
|
||||
{
|
||||
case RSX_FP_OPCODE_BRK:
|
||||
if (m_loop_count) AddFlowOp("break");
|
||||
else rsx_log.error("BRK opcode found outside of a loop");
|
||||
case RSX_FP_OPCODE_NOP:
|
||||
break;
|
||||
case RSX_FP_OPCODE_CAL:
|
||||
rsx_log.error("Unimplemented SIP instruction: CAL");
|
||||
case RSX_FP_OPCODE_KIL:
|
||||
properties.has_discard_op = true;
|
||||
AddFlowOp("_kill()");
|
||||
break;
|
||||
case RSX_FP_OPCODE_FENCT:
|
||||
AddCode("//FENCT");
|
||||
forced_unit = FORCE_SCT;
|
||||
break;
|
||||
case RSX_FP_OPCODE_FENCB:
|
||||
AddCode("//FENCB");
|
||||
forced_unit = FORCE_SCB;
|
||||
break;
|
||||
case RSX_FP_OPCODE_IFE:
|
||||
AddCode("if($cond)");
|
||||
if (src2.end_offset != src1.else_offset)
|
||||
m_else_offsets.push_back(src1.else_offset << 2);
|
||||
m_end_offsets.push_back(src2.end_offset << 2);
|
||||
AddCode("{");
|
||||
m_code_level++;
|
||||
break;
|
||||
case RSX_FP_OPCODE_LOOP:
|
||||
if (!src0.exec_if_eq && !src0.exec_if_gr && !src0.exec_if_lt)
|
||||
{
|
||||
AddCode(fmt::format("//$ifcond for(int i%u = %u; i%u < %u; i%u += %u) {} //-> %u //LOOP",
|
||||
m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment, src2.end_offset));
|
||||
}
|
||||
else
|
||||
{
|
||||
AddCode(fmt::format("$ifcond for(int i%u = %u; i%u < %u; i%u += %u) //LOOP",
|
||||
m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment));
|
||||
m_loop_count++;
|
||||
m_end_offsets.push_back(src2.end_offset << 2);
|
||||
AddCode("{");
|
||||
m_code_level++;
|
||||
}
|
||||
break;
|
||||
case RSX_FP_OPCODE_REP:
|
||||
if (!src0.exec_if_eq && !src0.exec_if_gr && !src0.exec_if_lt)
|
||||
{
|
||||
AddCode(fmt::format("//$ifcond for(int i%u = %u; i%u < %u; i%u += %u) {} //-> %u //REP",
|
||||
m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment, src2.end_offset));
|
||||
}
|
||||
else
|
||||
{
|
||||
AddCode(fmt::format("if($cond) for(int i%u = %u; i%u < %u; i%u += %u) //REP",
|
||||
m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment));
|
||||
m_loop_count++;
|
||||
m_end_offsets.push_back(src2.end_offset << 2);
|
||||
AddCode("{");
|
||||
m_code_level++;
|
||||
}
|
||||
break;
|
||||
case RSX_FP_OPCODE_RET:
|
||||
AddFlowOp("return");
|
||||
break;
|
||||
|
||||
default:
|
||||
return false;
|
||||
int prev_force_unit = forced_unit;
|
||||
|
||||
// Some instructions do not respect forced unit
|
||||
// Tested with Tales of Vesperia
|
||||
if (SIP()) break;
|
||||
if (handle_tex_srb(opcode)) break;
|
||||
|
||||
// FENCT/FENCB do not actually reject instructions if they dont match the forced unit
|
||||
// Looks like they are optimization hints and not hard-coded forced paths
|
||||
if (handle_sct_scb(opcode)) break;
|
||||
forced_unit = FORCE_NONE;
|
||||
|
||||
rsx_log.error("Unknown/illegal instruction: 0x%x (forced unit %d)", opcode, prev_force_unit);
|
||||
break;
|
||||
}
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
switch (opcode)
|
||||
{
|
||||
case RSX_FP_OPCODE_NOP:
|
||||
break;
|
||||
case RSX_FP_OPCODE_KIL:
|
||||
properties.has_discard_op = true;
|
||||
AddFlowOp("_kill()");
|
||||
break;
|
||||
default:
|
||||
int prev_force_unit = forced_unit;
|
||||
|
||||
// Some instructions do not respect forced unit
|
||||
// Tested with Tales of Vesperia
|
||||
if (SIP()) break;
|
||||
if (handle_tex_srb(opcode)) break;
|
||||
|
||||
// FENCT/FENCB do not actually reject instructions if they dont match the forced unit
|
||||
// Looks like they are optimization hints and not hard-coded forced paths
|
||||
if (handle_sct_scb(opcode)) break;
|
||||
forced_unit = FORCE_NONE;
|
||||
|
||||
rsx_log.error("Unknown/illegal instruction: 0x%x (forced unit %d)", opcode, prev_force_unit);
|
||||
break;
|
||||
m_size += m_instruction->length * 4;
|
||||
if (dst.end) break;
|
||||
}
|
||||
|
||||
m_size += m_offset;
|
||||
|
||||
if (dst.end) break;
|
||||
|
||||
ensure(m_offset % sizeof(u32) == 0);
|
||||
data += m_offset / sizeof(u32);
|
||||
// TODO: Handle block epilogue if needed
|
||||
}
|
||||
|
||||
while (m_code_level > 1)
|
||||
|
|
|
|||
|
|
@ -3,6 +3,8 @@
|
|||
#include "FragmentProgramRegister.h"
|
||||
#include "RSXFragmentProgram.h"
|
||||
|
||||
#include "Assembler/CFG.h"
|
||||
|
||||
#include <sstream>
|
||||
#include <unordered_map>
|
||||
|
||||
|
|
@ -39,17 +41,16 @@ class FragmentProgramDecompiler
|
|||
SRC2 src2;
|
||||
u32 opflags;
|
||||
|
||||
const rsx::assembler::Instruction* m_instruction;
|
||||
|
||||
std::string main;
|
||||
u32& m_size;
|
||||
u32 m_const_index = 0;
|
||||
u32 m_offset;
|
||||
u32 m_location = 0;
|
||||
bool m_is_valid_ucode = true;
|
||||
|
||||
u32 m_loop_count;
|
||||
int m_code_level;
|
||||
std::vector<u32> m_end_offsets;
|
||||
std::vector<u32> m_else_offsets;
|
||||
std::unordered_map<u32, u32> m_constant_offsets;
|
||||
|
||||
std::array<rsx::MixedPrecisionRegister, 64> temp_registers;
|
||||
|
|
|
|||
|
|
@ -103,34 +103,50 @@ uint get_z_index(const in uint x_, const in uint y_, const in uint z_)
|
|||
|
||||
#if USE_16BIT_ADDRESSING
|
||||
|
||||
void write16(inout uint accumulator, const in uint subword, const in uint src_id, const in uint dst_id)
|
||||
void decode_16b(const in uint texel_id, in uint x, const in uint y, const in uint z)
|
||||
{
|
||||
const uint masks[] = { 0x0000FFFF, 0xFFFF0000 };
|
||||
accumulator |= data_in[src_id / 2] & masks[subword];
|
||||
uint accumulator = 0;
|
||||
|
||||
if (subword == 1)
|
||||
const uint subword_count = min(invocation.size.x, 2);
|
||||
for (uint subword = 0; subword < subword_count; ++subword, ++x)
|
||||
{
|
||||
data_out[dst_id / 2] = %f(accumulator);
|
||||
uint src_texel_id = get_z_index(x, y, z);
|
||||
uint src_id = (src_texel_id + invocation.data_offset);
|
||||
int src_bit_offset = int(src_id % 2) << 4;
|
||||
uint src_value = bitfieldExtract(data_in[src_id / 2], src_bit_offset, 16);
|
||||
accumulator = bitfieldInsert(accumulator, src_value, int(subword << 4), 16);
|
||||
}
|
||||
|
||||
data_out[texel_id / 2] = %f(accumulator);
|
||||
}
|
||||
|
||||
#elif USE_8BIT_ADDRESSING
|
||||
|
||||
void write8(inout uint accumulator, const in uint subword, const in uint src_id, const in uint dst_id)
|
||||
void decode_8b(const in uint texel_id, in uint x, const in uint y, const in uint z)
|
||||
{
|
||||
const uint masks[] = { 0x000000FF, 0x0000FF00, 0x00FF0000, 0xFF000000 };
|
||||
accumulator |= data_in[src_id / 4] & masks[subword];
|
||||
uint accumulator = 0;
|
||||
|
||||
if (subword == 3)
|
||||
const uint subword_count = min(invocation.size.x, 4);
|
||||
for (uint subword = 0; subword < subword_count; ++subword, ++x)
|
||||
{
|
||||
data_out[dst_id / 4] = accumulator;
|
||||
uint src_texel_id = get_z_index(x, y, z);
|
||||
uint src_id = (src_texel_id + invocation.data_offset);
|
||||
int src_bit_offset = int(src_id % 4) << 3;
|
||||
uint src_value = bitfieldExtract(data_in[src_id / 4], src_bit_offset, 8);
|
||||
accumulator = bitfieldInsert(accumulator, src_value, int(subword << 3), 8);
|
||||
}
|
||||
|
||||
data_out[texel_id / 4] = accumulator;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void write32(const in uint word_count, in uint src_id, in uint dst_id)
|
||||
void decode_32b(const in uint texel_id, const in uint word_count, const in uint x, const in uint y, const in uint z)
|
||||
{
|
||||
uint src_texel_id = get_z_index(x, y, z);
|
||||
uint dst_id = (texel_id * word_count);
|
||||
uint src_id = (src_texel_id + invocation.data_offset) * word_count;
|
||||
|
||||
for (uint i = 0; i < word_count; ++i)
|
||||
{
|
||||
uint value = data_in[src_id++];
|
||||
|
|
@ -165,23 +181,11 @@ void main()
|
|||
uint x = (slice_offset % row_length);
|
||||
|
||||
#if USE_8BIT_ADDRESSING
|
||||
for (uint subword = 0, accumulator = 0; subword < 4; ++subword, ++x) {
|
||||
decode_8b(texel_id, x, y, z);
|
||||
#elif USE_16BIT_ADDRESSING
|
||||
for (uint subword = 0, accumulator = 0; subword < 2; ++subword, ++x) {
|
||||
#endif
|
||||
|
||||
uint src_texel_id = get_z_index(x, y, z);
|
||||
uint dst_id = (texel_id * word_count);
|
||||
uint src_id = (src_texel_id + invocation.data_offset) * word_count;
|
||||
|
||||
#if USE_8BIT_ADDRESSING
|
||||
write8(accumulator, subword, src_id, dst_id);
|
||||
}
|
||||
#elif USE_16BIT_ADDRESSING
|
||||
write16(accumulator, subword, src_id, dst_id);
|
||||
}
|
||||
decode_16b(texel_id, x, y, z);
|
||||
#else
|
||||
write32(word_count, src_id, dst_id);
|
||||
decode_32b(texel_id, word_count, x, y, z);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -475,10 +475,10 @@ namespace vk
|
|||
params.logh = rsx::ceil_log2(height);
|
||||
params.logd = rsx::ceil_log2(depth);
|
||||
|
||||
const u32 num_bytes_per_invocation = (sizeof(_BlockType) * optimal_group_size);
|
||||
const u32 texels_per_dword = std::max<u32>(4u / sizeof(_BlockType), 1u); // For block sizes less than 4 bytes wide
|
||||
const u32 linear_invocations = utils::aligned_div(data_length, num_bytes_per_invocation) / texels_per_dword;
|
||||
compute_task::run(cmd, linear_invocations);
|
||||
const u32 word_count_per_invocation = std::max<u32>(sizeof(_BlockType) / 4u, 1u);
|
||||
const u32 num_bytes_per_invocation = (word_count_per_invocation * 4u * optimal_group_size);
|
||||
const u32 workgroup_invocations = utils::aligned_div(data_length, num_bytes_per_invocation);
|
||||
compute_task::run(cmd, workgroup_invocations);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -69,8 +69,9 @@ namespace vk
|
|||
|
||||
void on_query_pool_released(std::unique_ptr<vk::query_pool>& pool);
|
||||
|
||||
template<template<class> class _List>
|
||||
void free_queries(vk::command_buffer& cmd, _List<u32>& list)
|
||||
template<typename T>
|
||||
requires std::ranges::range<T> && std::same_as<std::ranges::range_value_t<T>, u32> // List of u32
|
||||
void free_queries(vk::command_buffer& cmd, T& list)
|
||||
{
|
||||
for (const auto index : list)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -39,11 +39,20 @@ namespace vk
|
|||
return false;
|
||||
}
|
||||
|
||||
buffer::buffer(const vk::render_device& dev, u64 size, const memory_type_info& memory_type, u32 access_flags, VkBufferUsageFlags usage, VkBufferCreateFlags flags, vmm_allocation_pool allocation_pool)
|
||||
buffer::buffer(
|
||||
const vk::render_device& dev,
|
||||
u64 size,
|
||||
const memory_type_info& memory_type,
|
||||
u32 access_flags,
|
||||
VkBufferUsageFlags usage,
|
||||
VkBufferCreateFlags flags,
|
||||
vmm_allocation_pool allocation_pool)
|
||||
: m_device(dev)
|
||||
{
|
||||
const bool nullable = !!(flags & VK_BUFFER_CREATE_ALLOW_NULL_RPCS3);
|
||||
|
||||
info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
|
||||
info.flags = flags;
|
||||
info.flags = flags & ~VK_BUFFER_CREATE_SPECIAL_FLAGS_RPCS3;
|
||||
info.size = size;
|
||||
info.usage = usage;
|
||||
info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
||||
|
|
@ -60,8 +69,18 @@ namespace vk
|
|||
fmt::throw_exception("No compatible memory type was found!");
|
||||
}
|
||||
|
||||
memory = std::make_unique<memory_block>(m_device, memory_reqs.size, memory_reqs.alignment, allocation_type_info, allocation_pool);
|
||||
vkBindBufferMemory(dev, value, memory->get_vk_device_memory(), memory->get_vk_device_memory_offset());
|
||||
memory = std::make_unique<memory_block>(m_device, memory_reqs.size, memory_reqs.alignment, allocation_type_info, allocation_pool, nullable);
|
||||
if (auto device_memory = memory->get_vk_device_memory();
|
||||
device_memory != VK_NULL_HANDLE)
|
||||
{
|
||||
vkBindBufferMemory(dev, value, device_memory, memory->get_vk_device_memory_offset());
|
||||
}
|
||||
else
|
||||
{
|
||||
ensure(nullable);
|
||||
vkDestroyBuffer(m_device, value, nullptr);
|
||||
value = VK_NULL_HANDLE;
|
||||
}
|
||||
}
|
||||
|
||||
buffer::buffer(const vk::render_device& dev, VkBufferUsageFlags usage, void* host_pointer, u64 size)
|
||||
|
|
|
|||
|
|
@ -7,6 +7,13 @@
|
|||
|
||||
namespace vk
|
||||
{
|
||||
enum : u32
|
||||
{
|
||||
VK_BUFFER_CREATE_ALLOW_NULL_RPCS3 = 0x80000000,
|
||||
|
||||
VK_BUFFER_CREATE_SPECIAL_FLAGS_RPCS3 = (VK_BUFFER_CREATE_ALLOW_NULL_RPCS3)
|
||||
};
|
||||
|
||||
struct buffer_view : public unique_resource
|
||||
{
|
||||
VkBufferView value;
|
||||
|
|
@ -30,8 +37,21 @@ namespace vk
|
|||
VkBufferCreateInfo info = {};
|
||||
std::unique_ptr<vk::memory_block> memory;
|
||||
|
||||
buffer(const vk::render_device& dev, u64 size, const memory_type_info& memory_type, u32 access_flags, VkBufferUsageFlags usage, VkBufferCreateFlags flags, vmm_allocation_pool allocation_pool);
|
||||
buffer(const vk::render_device& dev, VkBufferUsageFlags usage, void* host_pointer, u64 size);
|
||||
buffer(
|
||||
const vk::render_device& dev,
|
||||
u64 size,
|
||||
const memory_type_info& memory_type,
|
||||
u32 access_flags,
|
||||
VkBufferUsageFlags usage,
|
||||
VkBufferCreateFlags flags,
|
||||
vmm_allocation_pool allocation_pool);
|
||||
|
||||
buffer(
|
||||
const vk::render_device& dev,
|
||||
VkBufferUsageFlags usage,
|
||||
void* host_pointer,
|
||||
u64 size);
|
||||
|
||||
~buffer();
|
||||
|
||||
void* map(u64 offset, u64 size);
|
||||
|
|
|
|||
|
|
@ -47,9 +47,28 @@ namespace vk
|
|||
usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
|
||||
memory_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
|
||||
memory_index = memory_map.device_local;
|
||||
m_prefer_writethrough = false;
|
||||
}
|
||||
|
||||
heap = std::make_unique<buffer>(*g_render_device, size, memory_index, memory_flags, usage, 0, VMM_ALLOCATION_POOL_SYSTEM);
|
||||
VkFlags create_flags = 0;
|
||||
if (m_prefer_writethrough)
|
||||
{
|
||||
create_flags |= VK_BUFFER_CREATE_ALLOW_NULL_RPCS3;
|
||||
}
|
||||
|
||||
heap = std::make_unique<buffer>(*g_render_device, size, memory_index, memory_flags, usage, create_flags, VMM_ALLOCATION_POOL_SYSTEM);
|
||||
|
||||
if (!heap->value)
|
||||
{
|
||||
rsx_log.warning("Could not place heap '%s' into Re-BAR memory. Will attempt to use regular host-visible memory.", m_name);
|
||||
ensure(m_prefer_writethrough);
|
||||
|
||||
// We failed to place the buffer in rebar memory. Try again in host-visible.
|
||||
m_prefer_writethrough = false;
|
||||
auto gc = get_resource_manager();
|
||||
gc->dispose(heap);
|
||||
heap = std::make_unique<buffer>(*g_render_device, size, memory_map.host_visible_coherent, memory_flags, usage, 0, VMM_ALLOCATION_POOL_SYSTEM);
|
||||
}
|
||||
|
||||
initial_size = size;
|
||||
notify_on_grow = bool(notify);
|
||||
|
|
@ -112,6 +131,7 @@ namespace vk
|
|||
auto gc = get_resource_manager();
|
||||
if (shadow)
|
||||
{
|
||||
ensure(!m_prefer_writethrough);
|
||||
rsx_log.warning("Buffer usage %u is not heap-compatible using this driver, explicit staging buffer in use", usage);
|
||||
|
||||
gc->dispose(shadow);
|
||||
|
|
@ -122,7 +142,25 @@ namespace vk
|
|||
}
|
||||
|
||||
gc->dispose(heap);
|
||||
heap = std::make_unique<buffer>(*g_render_device, aligned_new_size, memory_index, memory_flags, usage, 0, VMM_ALLOCATION_POOL_SYSTEM);
|
||||
|
||||
VkFlags create_flags = 0;
|
||||
if (m_prefer_writethrough)
|
||||
{
|
||||
create_flags |= VK_BUFFER_CREATE_ALLOW_NULL_RPCS3;
|
||||
}
|
||||
|
||||
heap = std::make_unique<buffer>(*g_render_device, aligned_new_size, memory_index, memory_flags, usage, create_flags, VMM_ALLOCATION_POOL_SYSTEM);
|
||||
|
||||
if (!heap->value)
|
||||
{
|
||||
rsx_log.warning("Could not place heap '%s' into Re-BAR memory. Will attempt to use regular host-visible memory.", m_name);
|
||||
ensure(m_prefer_writethrough);
|
||||
|
||||
// We failed to place the buffer in rebar memory. Try again in host-visible.
|
||||
m_prefer_writethrough = false;
|
||||
gc->dispose(heap);
|
||||
heap = std::make_unique<buffer>(*g_render_device, aligned_new_size, memory_map.host_visible_coherent, memory_flags, usage, 0, VMM_ALLOCATION_POOL_SYSTEM);
|
||||
}
|
||||
|
||||
if (notify_on_grow)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -75,7 +75,7 @@ atomic_t<u64> g_watchdog_hold_ctr{0};
|
|||
extern bool ppu_load_exec(const ppu_exec_object&, bool virtual_load, const std::string&, utils::serial* = nullptr);
|
||||
extern void spu_load_exec(const spu_exec_object&);
|
||||
extern void spu_load_rel_exec(const spu_rel_object&);
|
||||
extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_module<lv2_obj>*>* loaded_prx);
|
||||
extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_module<lv2_obj>*>* loaded_prx, bool is_fast_compilation);
|
||||
extern bool ppu_initialize(const ppu_module<lv2_obj>&, bool check_only = false, u64 file_size = 0);
|
||||
extern void ppu_finalize(const ppu_module<lv2_obj>&);
|
||||
extern void ppu_unload_prx(const lv2_prx&);
|
||||
|
|
@ -1684,7 +1684,7 @@ game_boot_result Emulator::Load(const std::string& title_id, bool is_disc_patch,
|
|||
}
|
||||
}
|
||||
|
||||
g_fxo->init<named_thread>("SPRX Loader"sv, [this, dir_queue]() mutable
|
||||
g_fxo->init<named_thread>("SPRX Loader"sv, [this, dir_queue, is_fast = m_precompilation_option.is_fast]() mutable
|
||||
{
|
||||
std::vector<ppu_module<lv2_obj>*> mod_list;
|
||||
|
||||
|
|
@ -1705,7 +1705,7 @@ game_boot_result Emulator::Load(const std::string& title_id, bool is_disc_patch,
|
|||
return;
|
||||
}
|
||||
|
||||
ppu_precompile(dir_queue, mod_list.empty() ? nullptr : &mod_list);
|
||||
ppu_precompile(dir_queue, mod_list.empty() ? nullptr : &mod_list, is_fast);
|
||||
|
||||
if (Emu.IsStopped())
|
||||
{
|
||||
|
|
@ -3230,6 +3230,7 @@ void Emulator::Kill(bool allow_autoexit, bool savestate, savestate_stage* save_s
|
|||
read_used_savestate_versions();
|
||||
m_savestate_extension_flags1 = {};
|
||||
m_emu_state_close_pending = false;
|
||||
m_precompilation_option = {};
|
||||
|
||||
// Enable logging
|
||||
rpcs3::utils::configure_logs(true);
|
||||
|
|
@ -3824,6 +3825,7 @@ void Emulator::Kill(bool allow_autoexit, bool savestate, savestate_stage* save_s
|
|||
read_used_savestate_versions();
|
||||
m_savestate_extension_flags1 = {};
|
||||
m_emu_state_close_pending = false;
|
||||
m_precompilation_option = {};
|
||||
|
||||
initialize_timebased_time(0, true);
|
||||
|
||||
|
|
|
|||
|
|
@ -120,6 +120,11 @@ namespace utils
|
|||
struct serial;
|
||||
};
|
||||
|
||||
struct emu_precompilation_option_t
|
||||
{
|
||||
bool is_fast = false;
|
||||
};
|
||||
|
||||
class Emulator final
|
||||
{
|
||||
atomic_t<system_state> m_state{system_state::stopped};
|
||||
|
|
@ -188,6 +193,7 @@ class Emulator final
|
|||
};
|
||||
|
||||
bs_t<SaveStateExtentionFlags1> m_savestate_extension_flags1{};
|
||||
emu_precompilation_option_t m_precompilation_option{};
|
||||
|
||||
public:
|
||||
static constexpr std::string_view game_id_boot_prefix = "%RPCS3_GAMEID%:";
|
||||
|
|
@ -245,6 +251,11 @@ public:
|
|||
m_state = system_state::running;
|
||||
}
|
||||
|
||||
void SetPrecompileCacheOption(emu_precompilation_option_t option)
|
||||
{
|
||||
m_precompilation_option = option;
|
||||
}
|
||||
|
||||
void Init();
|
||||
|
||||
std::vector<std::string> argv;
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ struct cfg_root : cfg::node
|
|||
cfg::_int<0, 16> spu_delay_penalty{ this, "SPU delay penalty", 3 }; // Number of milliseconds to block a thread if a virtual 'core' isn't free
|
||||
cfg::_bool spu_loop_detection{ this, "SPU loop detection", false }; // Try to detect wait loops and trigger thread yield
|
||||
cfg::_int<1, 6> max_spurs_threads{ this, "Max SPURS Threads", 6, true }; // HACK. If less then 6, max number of running SPURS threads in each thread group.
|
||||
cfg::_enum<spu_block_size_type> spu_block_size{ this, "SPU Block Size", spu_block_size_type::safe };
|
||||
cfg::_enum<spu_block_size_type> spu_block_size{ this, "SPU Analyzer Block Size", spu_block_size_type::mega };
|
||||
cfg::_bool spu_accurate_dma{ this, "Accurate SPU DMA", false };
|
||||
cfg::_bool spu_accurate_reservations{ this, "Accurate SPU Reservations", true };
|
||||
cfg::_bool accurate_cache_line_stores{ this, "Accurate Cache Line Stores", false };
|
||||
|
|
|
|||
|
|
@ -101,6 +101,48 @@ namespace rpcs3::utils
|
|||
return worker();
|
||||
}
|
||||
|
||||
std::vector<std::pair<std::string, u64>> get_vfs_disk_usage()
|
||||
{
|
||||
std::vector<std::pair<std::string, u64>> disk_usage;
|
||||
|
||||
if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_hdd0_dir(), 1); data_size != umax)
|
||||
{
|
||||
disk_usage.push_back({"dev_hdd0", data_size});
|
||||
}
|
||||
|
||||
if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_hdd1_dir(), 1); data_size != umax)
|
||||
{
|
||||
disk_usage.push_back({"dev_hdd1", data_size});
|
||||
}
|
||||
|
||||
if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_flash_dir(), 1); data_size != umax)
|
||||
{
|
||||
disk_usage.push_back({"dev_flash", data_size});
|
||||
}
|
||||
|
||||
if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_flash2_dir(), 1); data_size != umax)
|
||||
{
|
||||
disk_usage.push_back({"dev_flash2", data_size});
|
||||
}
|
||||
|
||||
if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_flash3_dir(), 1); data_size != umax)
|
||||
{
|
||||
disk_usage.push_back({"dev_flash3", data_size});
|
||||
}
|
||||
|
||||
if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_bdvd_dir(), 1); data_size != umax)
|
||||
{
|
||||
disk_usage.push_back({"dev_bdvd", data_size});
|
||||
}
|
||||
|
||||
if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_games_dir(), 1); data_size != umax)
|
||||
{
|
||||
disk_usage.push_back({"games", data_size});
|
||||
}
|
||||
|
||||
return disk_usage;
|
||||
}
|
||||
|
||||
std::string get_emu_dir()
|
||||
{
|
||||
const std::string& emu_dir_ = g_cfg_vfs.emulator_dir;
|
||||
|
|
@ -122,6 +164,36 @@ namespace rpcs3::utils
|
|||
return g_cfg_vfs.get(g_cfg_vfs.dev_hdd1, get_emu_dir());
|
||||
}
|
||||
|
||||
std::string get_flash_dir()
|
||||
{
|
||||
return g_cfg_vfs.get(g_cfg_vfs.dev_flash, get_emu_dir());
|
||||
}
|
||||
|
||||
std::string get_flash2_dir()
|
||||
{
|
||||
return g_cfg_vfs.get(g_cfg_vfs.dev_flash2, get_emu_dir());
|
||||
}
|
||||
|
||||
std::string get_flash3_dir()
|
||||
{
|
||||
return g_cfg_vfs.get(g_cfg_vfs.dev_flash3, get_emu_dir());
|
||||
}
|
||||
|
||||
std::string get_bdvd_dir()
|
||||
{
|
||||
return g_cfg_vfs.get(g_cfg_vfs.dev_bdvd, get_emu_dir());
|
||||
}
|
||||
|
||||
u64 get_cache_disk_usage()
|
||||
{
|
||||
if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_cache_dir(), 1); data_size != umax)
|
||||
{
|
||||
return data_size;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::string get_cache_dir()
|
||||
{
|
||||
return fs::get_cache_dir() + "cache/";
|
||||
|
|
|
|||
|
|
@ -23,10 +23,19 @@ namespace rpcs3::utils
|
|||
|
||||
bool install_pkg(const std::string& path);
|
||||
|
||||
// VFS directories and disk usage
|
||||
std::vector<std::pair<std::string, u64>> get_vfs_disk_usage();
|
||||
std::string get_emu_dir();
|
||||
std::string get_games_dir();
|
||||
std::string get_hdd0_dir();
|
||||
std::string get_hdd1_dir();
|
||||
std::string get_flash_dir();
|
||||
std::string get_flash2_dir();
|
||||
std::string get_flash3_dir();
|
||||
std::string get_bdvd_dir();
|
||||
|
||||
// Cache directories and disk usage
|
||||
u64 get_cache_disk_usage();
|
||||
std::string get_cache_dir();
|
||||
std::string get_cache_dir(std::string_view module_path);
|
||||
|
||||
|
|
|
|||
|
|
@ -156,6 +156,7 @@
|
|||
<ClCompile Include="Emu\RSX\Overlays\Shaders\shader_loading_dialog.cpp" />
|
||||
<ClCompile Include="Emu\RSX\Overlays\Shaders\shader_loading_dialog_native.cpp" />
|
||||
<ClCompile Include="Emu\RSX\Overlays\Trophies\overlay_trophy_list_dialog.cpp" />
|
||||
<ClCompile Include="Emu\RSX\Program\Assembler\FPToCFG.cpp" />
|
||||
<ClCompile Include="Emu\RSX\Program\FragmentProgramRegister.cpp" />
|
||||
<ClCompile Include="Emu\RSX\Program\ProgramStateCache.cpp" />
|
||||
<ClCompile Include="Emu\RSX\Program\program_util.cpp" />
|
||||
|
|
@ -699,6 +700,8 @@
|
|||
<ClInclude Include="Emu\RSX\Overlays\overlay_progress_bar.hpp" />
|
||||
<ClInclude Include="Emu\RSX\Overlays\overlay_video.h" />
|
||||
<ClInclude Include="Emu\RSX\Overlays\Trophies\overlay_trophy_list_dialog.h" />
|
||||
<ClInclude Include="Emu\RSX\Program\Assembler\CFG.h" />
|
||||
<ClInclude Include="Emu\RSX\Program\Assembler\IR.h" />
|
||||
<ClInclude Include="Emu\RSX\Program\FragmentProgramRegister.h" />
|
||||
<ClInclude Include="Emu\RSX\Program\GLSLTypes.h" />
|
||||
<ClInclude Include="Emu\RSX\Program\ProgramStateCache.h" />
|
||||
|
|
|
|||
|
|
@ -133,6 +133,9 @@
|
|||
<Filter Include="Emu\GPU\RSX\Program\MSAA">
|
||||
<UniqueIdentifier>{ce6d6b90-8313-4273-b46c-d92bd450c002}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="Emu\GPU\RSX\Program\Assembler">
|
||||
<UniqueIdentifier>{d99df916-8a99-428b-869a-9f14ac0ab411}</UniqueIdentifier>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="Crypto\aes.cpp">
|
||||
|
|
@ -1372,6 +1375,9 @@
|
|||
<ClCompile Include="Emu\Io\evdev_gun_handler.cpp">
|
||||
<Filter>Emu\Io</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="Emu\RSX\Program\Assembler\FPToCFG.cpp">
|
||||
<Filter>Emu\GPU\RSX\Program\Assembler</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="Crypto\aes.h">
|
||||
|
|
@ -2764,6 +2770,12 @@
|
|||
<ClInclude Include="util\pair.hpp">
|
||||
<Filter>Utilities</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Emu\RSX\Program\Assembler\CFG.h">
|
||||
<Filter>Emu\GPU\RSX\Program\Assembler</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Emu\RSX\Program\Assembler\IR.h">
|
||||
<Filter>Emu\GPU\RSX\Program\Assembler</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="Emu\RSX\Program\GLSLSnippets\GPUDeswizzle.glsl">
|
||||
|
|
|
|||
|
|
@ -975,9 +975,9 @@ QString emu_settings::GetLocalizedSetting(const QString& original, emu_settings_
|
|||
case emu_settings_type::SPUBlockSize:
|
||||
switch (static_cast<spu_block_size_type>(index))
|
||||
{
|
||||
case spu_block_size_type::safe: return tr("Safe", "SPU block size");
|
||||
case spu_block_size_type::mega: return tr("Mega", "SPU block size");
|
||||
case spu_block_size_type::giga: return tr("Giga", "SPU block size");
|
||||
case spu_block_size_type::safe: return tr("Safe", "SPU Analyzer Block Size");
|
||||
case spu_block_size_type::mega: return tr("Mega", "SPU Analyzer Block Size");
|
||||
case spu_block_size_type::giga: return tr("Giga", "SPU Analyzer Block Size");
|
||||
}
|
||||
break;
|
||||
case emu_settings_type::ThreadSchedulerMode:
|
||||
|
|
|
|||
|
|
@ -239,7 +239,7 @@ inline static const std::map<emu_settings_type, cfg_location> settings_location
|
|||
{ emu_settings_type::XFloatAccuracy, { "Core", "XFloat Accuracy"}},
|
||||
{ emu_settings_type::MFCCommandsShuffling, { "Core", "MFC Commands Shuffling Limit"}},
|
||||
{ emu_settings_type::SetDAZandFTZ, { "Core", "Set DAZ and FTZ"}},
|
||||
{ emu_settings_type::SPUBlockSize, { "Core", "SPU Block Size"}},
|
||||
{ emu_settings_type::SPUBlockSize, { "Core", "SPU Analyzer Block Size"}},
|
||||
{ emu_settings_type::SPUCache, { "Core", "SPU Cache"}},
|
||||
{ emu_settings_type::DebugConsoleMode, { "Core", "Debug Console Mode"}},
|
||||
{ emu_settings_type::MaxSPURSThreads, { "Core", "Max SPURS Threads"}},
|
||||
|
|
|
|||
|
|
@ -2011,10 +2011,11 @@ void game_list_frame::ShowContextMenu(const QPoint &pos)
|
|||
menu.exec(global_pos);
|
||||
}
|
||||
|
||||
bool game_list_frame::CreateCPUCaches(const std::string& path, const std::string& serial)
|
||||
bool game_list_frame::CreateCPUCaches(const std::string& path, const std::string& serial, bool is_fast_compilation)
|
||||
{
|
||||
Emu.GracefulShutdown(false);
|
||||
Emu.SetForceBoot(true);
|
||||
Emu.SetPrecompileCacheOption(emu_precompilation_option_t{.is_fast = is_fast_compilation});
|
||||
|
||||
if (const auto error = Emu.BootGame(fs::is_file(path) ? fs::get_parent_dir(path) : path, serial, true); error != game_boot_result::no_errors)
|
||||
{
|
||||
|
|
@ -2026,9 +2027,9 @@ bool game_list_frame::CreateCPUCaches(const std::string& path, const std::string
|
|||
return true;
|
||||
}
|
||||
|
||||
bool game_list_frame::CreateCPUCaches(const game_info& game)
|
||||
bool game_list_frame::CreateCPUCaches(const game_info& game, bool is_fast_compilation)
|
||||
{
|
||||
return game && CreateCPUCaches(game->info.path, game->info.serial);
|
||||
return game && CreateCPUCaches(game->info.path, game->info.serial, is_fast_compilation);
|
||||
}
|
||||
|
||||
bool game_list_frame::RemoveCustomConfiguration(const std::string& title_id, const game_info& game, bool is_interactive)
|
||||
|
|
@ -2404,6 +2405,9 @@ void game_list_frame::BatchActionBySerials(progress_dialog* pdlg, const std::set
|
|||
connect(pdlg, &progress_dialog::canceled, this, [pdlg](){ pdlg->deleteLater(); });
|
||||
QApplication::beep();
|
||||
|
||||
// Signal termination back to the callback
|
||||
action("");
|
||||
|
||||
if (refresh_on_finish && index)
|
||||
{
|
||||
Refresh(true);
|
||||
|
|
@ -2414,7 +2418,7 @@ void game_list_frame::BatchActionBySerials(progress_dialog* pdlg, const std::set
|
|||
QTimer::singleShot(1, this, *periodic_func);
|
||||
}
|
||||
|
||||
void game_list_frame::BatchCreateCPUCaches(const std::vector<game_info>& game_data)
|
||||
void game_list_frame::BatchCreateCPUCaches(const std::vector<game_info>& game_data, bool is_fast_compilation)
|
||||
{
|
||||
std::set<std::string> serials;
|
||||
|
||||
|
|
@ -2433,11 +2437,13 @@ void game_list_frame::BatchCreateCPUCaches(const std::vector<game_info>& game_da
|
|||
if (total == 0)
|
||||
{
|
||||
QMessageBox::information(this, tr("LLVM Cache Batch Creation"), tr("No titles found"), QMessageBox::Ok);
|
||||
Q_EMIT NotifyBatchedGameActionFinished();
|
||||
return;
|
||||
}
|
||||
|
||||
if (!m_gui_settings->GetBootConfirmation(this))
|
||||
{
|
||||
Q_EMIT NotifyBatchedGameActionFinished();
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -2459,13 +2465,19 @@ void game_list_frame::BatchCreateCPUCaches(const std::vector<game_info>& game_da
|
|||
BatchActionBySerials(pdlg, serials, tr("%0\nProgress: %1/%2 caches compiled").arg(main_label),
|
||||
[&, game_data](const std::string& serial)
|
||||
{
|
||||
if (serial.empty())
|
||||
{
|
||||
Q_EMIT NotifyBatchedGameActionFinished();
|
||||
return false;
|
||||
}
|
||||
|
||||
if (Emu.IsStopped(true))
|
||||
{
|
||||
const auto it = std::find_if(m_game_data.begin(), m_game_data.end(), FN(x->info.serial == serial));
|
||||
|
||||
if (it != m_game_data.end())
|
||||
{
|
||||
return CreateCPUCaches((*it)->info.path, serial);
|
||||
return CreateCPUCaches((*it)->info.path, serial, is_fast_compilation);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -2512,7 +2524,7 @@ void game_list_frame::BatchRemovePPUCaches()
|
|||
BatchActionBySerials(pdlg, serials, tr("%0/%1 caches cleared"),
|
||||
[this](const std::string& serial)
|
||||
{
|
||||
return Emu.IsStopped(true) && RemovePPUCache(GetCacheDirBySerial(serial));
|
||||
return !serial.empty() &&Emu.IsStopped(true) && RemovePPUCache(GetCacheDirBySerial(serial));
|
||||
},
|
||||
[this](u32, u32)
|
||||
{
|
||||
|
|
@ -2551,7 +2563,7 @@ void game_list_frame::BatchRemoveSPUCaches()
|
|||
BatchActionBySerials(pdlg, serials, tr("%0/%1 caches cleared"),
|
||||
[this](const std::string& serial)
|
||||
{
|
||||
return Emu.IsStopped(true) && RemoveSPUCache(GetCacheDirBySerial(serial));
|
||||
return !serial.empty() && Emu.IsStopped(true) && RemoveSPUCache(GetCacheDirBySerial(serial));
|
||||
},
|
||||
[this](u32 removed, u32 total)
|
||||
{
|
||||
|
|
@ -2586,7 +2598,7 @@ void game_list_frame::BatchRemoveCustomConfigurations()
|
|||
BatchActionBySerials(pdlg, serials, tr("%0/%1 custom configurations cleared"),
|
||||
[this](const std::string& serial)
|
||||
{
|
||||
return Emu.IsStopped(true) && RemoveCustomConfiguration(serial);
|
||||
return !serial.empty() && Emu.IsStopped(true) && RemoveCustomConfiguration(serial);
|
||||
},
|
||||
[this](u32 removed, u32 total)
|
||||
{
|
||||
|
|
@ -2620,7 +2632,7 @@ void game_list_frame::BatchRemoveCustomPadConfigurations()
|
|||
BatchActionBySerials(pdlg, serials, tr("%0/%1 custom pad configurations cleared"),
|
||||
[this](const std::string& serial)
|
||||
{
|
||||
return Emu.IsStopped(true) && RemoveCustomPadConfiguration(serial);
|
||||
return !serial.empty() && Emu.IsStopped(true) && RemoveCustomPadConfiguration(serial);
|
||||
},
|
||||
[this](u32 removed, u32 total)
|
||||
{
|
||||
|
|
@ -2659,7 +2671,7 @@ void game_list_frame::BatchRemoveShaderCaches()
|
|||
BatchActionBySerials(pdlg, serials, tr("%0/%1 shader caches cleared"),
|
||||
[this](const std::string& serial)
|
||||
{
|
||||
return Emu.IsStopped(true) && RemoveShadersCache(GetCacheDirBySerial(serial));
|
||||
return !serial.empty() && Emu.IsStopped(true) && RemoveShadersCache(GetCacheDirBySerial(serial));
|
||||
},
|
||||
[this](u32 removed, u32 total)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ public:
|
|||
bool IsEntryVisible(const game_info& game, bool search_fallback = false) const;
|
||||
|
||||
public Q_SLOTS:
|
||||
void BatchCreateCPUCaches(const std::vector<game_info>& game_data = {});
|
||||
void BatchCreateCPUCaches(const std::vector<game_info>& game_data = {}, bool is_fast_compilation = false);
|
||||
void BatchRemovePPUCaches();
|
||||
void BatchRemoveSPUCaches();
|
||||
void BatchRemoveCustomConfigurations();
|
||||
|
|
@ -96,6 +96,7 @@ Q_SIGNALS:
|
|||
void FocusToSearchBar();
|
||||
void Refreshed();
|
||||
void RequestSaveStateManager(const game_info& game);
|
||||
void NotifyBatchedGameActionFinished();
|
||||
|
||||
public:
|
||||
template <typename KeyType>
|
||||
|
|
@ -135,8 +136,8 @@ private:
|
|||
bool RemovePPUCache(const std::string& base_dir, bool is_interactive = false);
|
||||
bool RemoveSPUCache(const std::string& base_dir, bool is_interactive = false);
|
||||
void RemoveHDD1Cache(const std::string& base_dir, const std::string& title_id, bool is_interactive = false);
|
||||
static bool CreateCPUCaches(const std::string& path, const std::string& serial = {});
|
||||
static bool CreateCPUCaches(const game_info& game);
|
||||
static bool CreateCPUCaches(const std::string& path, const std::string& serial = {}, bool is_fast_compilation = false);
|
||||
static bool CreateCPUCaches(const game_info& game, bool is_fast_compilation = false);
|
||||
|
||||
static bool RemoveContentPath(const std::string& path, const std::string& desc);
|
||||
static u32 RemoveContentPathList(const std::vector<std::string>& path_list, const std::string& desc);
|
||||
|
|
|
|||
|
|
@ -4,10 +4,14 @@
|
|||
#include "hex_validator.h"
|
||||
#include "memory_viewer_panel.h"
|
||||
|
||||
#include "Emu/System.h"
|
||||
#include "Emu/system_utils.hpp"
|
||||
#include "Utilities/lockless.h"
|
||||
#include "util/asm.hpp"
|
||||
|
||||
#include <QtConcurrent>
|
||||
#include <QMenu>
|
||||
#include <QMessageBox>
|
||||
#include <QActionGroup>
|
||||
#include <QScrollBar>
|
||||
#include <QVBoxLayout>
|
||||
|
|
@ -17,6 +21,8 @@
|
|||
#include <deque>
|
||||
#include <mutex>
|
||||
|
||||
LOG_CHANNEL(sys_log, "SYS");
|
||||
|
||||
extern fs::file g_tty;
|
||||
extern atomic_t<s64> g_tty_size;
|
||||
extern std::array<std::deque<std::string>, 16> g_tty_input;
|
||||
|
|
@ -165,6 +171,28 @@ log_frame::log_frame(std::shared_ptr<gui_settings> _gui_settings, QWidget* paren
|
|||
connect(m_timer, &QTimer::timeout, this, &log_frame::UpdateUI);
|
||||
}
|
||||
|
||||
void log_frame::show_disk_usage(const std::vector<std::pair<std::string, u64>>& vfs_disk_usage, u64 cache_disk_usage)
|
||||
{
|
||||
QString text;
|
||||
u64 tot_data_size = 0;
|
||||
|
||||
for (const auto& [dev, data_size] : vfs_disk_usage)
|
||||
{
|
||||
text += tr("\n %0: %1").arg(QString::fromStdString(dev)).arg(gui::utils::format_byte_size(data_size));
|
||||
tot_data_size += data_size;
|
||||
}
|
||||
|
||||
if (!text.isEmpty())
|
||||
{
|
||||
text = tr("\n VFS disk usage: %0%1").arg(gui::utils::format_byte_size(tot_data_size)).arg(text);
|
||||
}
|
||||
|
||||
text += tr("\n Cache disk usage: %0").arg(gui::utils::format_byte_size(cache_disk_usage));
|
||||
|
||||
sys_log.success("%s", text);
|
||||
QMessageBox::information(this, tr("Disk usage"), text);
|
||||
}
|
||||
|
||||
void log_frame::SetLogLevel(logs::level lev) const
|
||||
{
|
||||
switch (lev)
|
||||
|
|
@ -245,6 +273,26 @@ void log_frame::CreateAndConnectActions()
|
|||
m_tty->clear();
|
||||
});
|
||||
|
||||
m_show_disk_usage_act = new QAction(tr("Show Disk Usage"), this);
|
||||
connect(m_show_disk_usage_act, &QAction::triggered, [this]()
|
||||
{
|
||||
if (m_disk_usage_future.isRunning())
|
||||
{
|
||||
return; // Still running the last request
|
||||
}
|
||||
|
||||
m_disk_usage_future = QtConcurrent::run([this]()
|
||||
{
|
||||
const std::vector<std::pair<std::string, u64>> vfs_disk_usage = rpcs3::utils::get_vfs_disk_usage();
|
||||
const u64 cache_disk_usage = rpcs3::utils::get_cache_disk_usage();
|
||||
|
||||
Emu.CallFromMainThread([this, vfs_disk_usage, cache_disk_usage]()
|
||||
{
|
||||
show_disk_usage(vfs_disk_usage, cache_disk_usage);
|
||||
}, nullptr, false);
|
||||
});
|
||||
});
|
||||
|
||||
m_perform_goto_on_debugger = new QAction(tr("Go-To On The Debugger"), this);
|
||||
connect(m_perform_goto_on_debugger, &QAction::triggered, [this]()
|
||||
{
|
||||
|
|
@ -369,6 +417,9 @@ void log_frame::CreateAndConnectActions()
|
|||
{
|
||||
QMenu* menu = m_log->createStandardContextMenu();
|
||||
menu->addAction(m_clear_act);
|
||||
menu->addSeparator();
|
||||
menu->addAction(m_show_disk_usage_act);
|
||||
menu->addSeparator();
|
||||
menu->addAction(m_perform_goto_on_debugger);
|
||||
menu->addAction(m_perform_goto_thread_on_debugger);
|
||||
menu->addAction(m_perform_show_in_mem_viewer);
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@
|
|||
|
||||
#include <memory>
|
||||
|
||||
#include <QFuture>
|
||||
#include <QTabWidget>
|
||||
#include <QPlainTextEdit>
|
||||
#include <QActionGroup>
|
||||
|
|
@ -38,6 +39,7 @@ protected:
|
|||
private Q_SLOTS:
|
||||
void UpdateUI();
|
||||
private:
|
||||
void show_disk_usage(const std::vector<std::pair<std::string, u64>>& vfs_disk_usage, u64 cache_disk_usage);
|
||||
void SetLogLevel(logs::level lev) const;
|
||||
void SetTTYLogging(bool val) const;
|
||||
|
||||
|
|
@ -48,6 +50,7 @@ private:
|
|||
std::unique_ptr<find_dialog> m_find_dialog;
|
||||
|
||||
QTimer* m_timer = nullptr;
|
||||
QFuture<void> m_disk_usage_future;
|
||||
|
||||
std::vector<QColor> m_color;
|
||||
QColor m_color_stack;
|
||||
|
|
@ -72,6 +75,7 @@ private:
|
|||
|
||||
QAction* m_clear_act = nullptr;
|
||||
QAction* m_clear_tty_act = nullptr;
|
||||
QAction* m_show_disk_usage_act = nullptr;
|
||||
QAction* m_perform_goto_on_debugger = nullptr;
|
||||
QAction* m_perform_goto_thread_on_debugger = nullptr;
|
||||
QAction* m_perform_show_in_mem_viewer = nullptr;
|
||||
|
|
|
|||
|
|
@ -1187,7 +1187,13 @@ bool main_window::HandlePackageInstallation(QStringList file_paths, bool from_bo
|
|||
}
|
||||
}
|
||||
|
||||
ShowOptionalGamePreparations(tr("Success!"), tr("Successfully installed software from package(s)!"), std::move(paths));
|
||||
// Executes after PrecompileCachesFromInstalledPackages
|
||||
m_notify_batch_game_action_cb = [this, paths]() mutable
|
||||
{
|
||||
ShowOptionalGamePreparations(tr("Success!"), tr("Successfully installed software from package(s)!"), std::move(paths));
|
||||
};
|
||||
|
||||
PrecompileCachesFromInstalledPackages(paths);
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -2368,8 +2374,7 @@ void main_window::ShowOptionalGamePreparations(const QString& title, const QStri
|
|||
#else
|
||||
QCheckBox* quick_check = new QCheckBox(tr("Add launcher shortcut(s)"));
|
||||
#endif
|
||||
QCheckBox* precompile_check = new QCheckBox(tr("Precompile caches"));
|
||||
QLabel* label = new QLabel(tr("%1\nWould you like to install shortcuts to the installed software and precompile caches? (%2 new software detected)\n\n").arg(message).arg(bootable_paths.size()), dlg);
|
||||
QLabel* label = new QLabel(tr("%1\nWould you like to install shortcuts to the installed software? (%2 new software detected)\n\n").arg(message).arg(bootable_paths.size()), dlg);
|
||||
|
||||
vlayout->addWidget(label);
|
||||
vlayout->addStretch(10);
|
||||
|
|
@ -2377,10 +2382,6 @@ void main_window::ShowOptionalGamePreparations(const QString& title, const QStri
|
|||
vlayout->addStretch(3);
|
||||
vlayout->addWidget(quick_check);
|
||||
vlayout->addStretch(3);
|
||||
vlayout->addWidget(precompile_check);
|
||||
vlayout->addStretch(3);
|
||||
|
||||
precompile_check->setToolTip(tr("Spend time building data needed for game boot now instead of at launch."));
|
||||
|
||||
QDialogButtonBox* btn_box = new QDialogButtonBox(QDialogButtonBox::Ok);
|
||||
|
||||
|
|
@ -2391,7 +2392,6 @@ void main_window::ShowOptionalGamePreparations(const QString& title, const QStri
|
|||
{
|
||||
const bool create_desktop_shortcuts = desk_check->isChecked();
|
||||
const bool create_app_shortcut = quick_check->isChecked();
|
||||
const bool create_caches = precompile_check->isChecked();
|
||||
|
||||
dlg->hide();
|
||||
dlg->accept();
|
||||
|
|
@ -2411,12 +2411,11 @@ void main_window::ShowOptionalGamePreparations(const QString& title, const QStri
|
|||
locations.insert(gui::utils::shortcut_location::applications);
|
||||
}
|
||||
|
||||
if (locations.empty() && !create_caches)
|
||||
if (locations.empty())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<game_info> game_data;
|
||||
std::vector<game_info> game_data_shortcuts;
|
||||
|
||||
for (const auto& [boot_path, title_id] : paths)
|
||||
|
|
@ -2431,11 +2430,6 @@ void main_window::ShowOptionalGamePreparations(const QString& title, const QStri
|
|||
{
|
||||
game_data_shortcuts.push_back(gameinfo);
|
||||
}
|
||||
|
||||
if (create_caches)
|
||||
{
|
||||
game_data.push_back(gameinfo);
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
|
|
@ -2447,17 +2441,39 @@ void main_window::ShowOptionalGamePreparations(const QString& title, const QStri
|
|||
{
|
||||
m_game_list_frame->CreateShortcuts(game_data_shortcuts, locations);
|
||||
}
|
||||
|
||||
if (!game_data.empty())
|
||||
{
|
||||
m_game_list_frame->BatchCreateCPUCaches(game_data);
|
||||
}
|
||||
});
|
||||
|
||||
dlg->setAttribute(Qt::WA_DeleteOnClose);
|
||||
dlg->open();
|
||||
}
|
||||
|
||||
|
||||
void main_window::PrecompileCachesFromInstalledPackages(const std::map<std::string, QString>& bootable_paths)
|
||||
{
|
||||
std::vector<game_info> game_data;
|
||||
|
||||
for (const auto& [boot_path, title_id] : bootable_paths)
|
||||
{
|
||||
for (const game_info& gameinfo : m_game_list_frame->GetGameInfo())
|
||||
{
|
||||
if (gameinfo && gameinfo->info.serial == title_id.toStdString())
|
||||
{
|
||||
if (Emu.IsPathInsideDir(boot_path, gameinfo->info.path))
|
||||
{
|
||||
game_data.push_back(gameinfo);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!game_data.empty())
|
||||
{
|
||||
m_game_list_frame->BatchCreateCPUCaches(game_data, true);
|
||||
}
|
||||
}
|
||||
|
||||
void main_window::CreateActions()
|
||||
{
|
||||
ui->exitAct->setShortcuts(QKeySequence::Quit);
|
||||
|
|
@ -3401,6 +3417,15 @@ void main_window::CreateConnects()
|
|||
connect(ui->mw_searchbar, &QLineEdit::textChanged, m_game_list_frame, &game_list_frame::SetSearchText);
|
||||
connect(ui->mw_searchbar, &QLineEdit::returnPressed, m_game_list_frame, &game_list_frame::FocusAndSelectFirstEntryIfNoneIs);
|
||||
connect(m_game_list_frame, &game_list_frame::FocusToSearchBar, this, [this]() { ui->mw_searchbar->setFocus(); });
|
||||
|
||||
connect(m_game_list_frame, &game_list_frame::NotifyBatchedGameActionFinished, this, [this]() mutable
|
||||
{
|
||||
if (m_notify_batch_game_action_cb)
|
||||
{
|
||||
m_notify_batch_game_action_cb();
|
||||
m_notify_batch_game_action_cb = {};
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
void main_window::CreateDockWindows()
|
||||
|
|
|
|||
|
|
@ -48,6 +48,7 @@ class main_window : public QMainWindow
|
|||
bool m_save_slider_pos = false;
|
||||
bool m_requested_show_logs_on_exit = false;
|
||||
int m_other_slider_pos = 0;
|
||||
std::function<void()> m_notify_batch_game_action_cb;
|
||||
|
||||
QIcon m_app_icon;
|
||||
QIcon m_icon_play;
|
||||
|
|
@ -141,6 +142,7 @@ private:
|
|||
void CreateDockWindows();
|
||||
void EnableMenus(bool enabled) const;
|
||||
void ShowTitleBars(bool show) const;
|
||||
void PrecompileCachesFromInstalledPackages(const std::map<std::string, QString>& bootable_paths);
|
||||
void ShowOptionalGamePreparations(const QString& title, const QString& message, std::map<std::string, QString> game_path);
|
||||
|
||||
static bool InstallFileInExData(const std::string& extension, const QString& path, const std::string& filename);
|
||||
|
|
|
|||
|
|
@ -91,7 +91,7 @@ public:
|
|||
const QString xfloat = tr("Control accuracy to SPU float vectors processing.\nFixes bugs in various games at the cost of performance.\nThis setting is only applied when SPU Decoder is set to Dynamic or LLVM.");
|
||||
const QString enable_thread_scheduler = tr("Control how RPCS3 utilizes the threads of your system.\nEach option heavily depends on the game and on your CPU. It's recommended to try each option to find out which performs the best.\nChanging the thread scheduler is not supported on CPUs with less than 12 threads.");
|
||||
const QString spu_loop_detection = tr("Try to detect loop conditions in SPU kernels and use them as scheduling hints.\nImproves performance and reduces CPU usage.\nMay cause severe audio stuttering in rare cases.");
|
||||
const QString spu_block_size = tr("This option controls the SPU analyser, particularly the size of compiled units. The Mega and Giga modes may improve performance by tying smaller units together, decreasing the number of compiled units but increasing their size.\nUse the Safe mode for maximum compatibility.");
|
||||
const QString spu_block_size = tr("This option controls the SPU analyser, particularly the size of compiled units. The Mega and Giga modes may improve performance by tying smaller units together, decreasing the number of compiled units but increasing their size.\nUse the Safe mode for maximum compatibility at the cost of lower performance.");
|
||||
const QString preferred_spu_threads = tr("Some SPU stages are sensitive to race conditions and allowing a limited number at a time helps alleviate performance stalls.\nSetting this to a smaller value might improve performance and reduce stuttering in some games.\nLeave this on auto if performance is negatively affected when setting a small value.");
|
||||
const QString max_cpu_preempt = tr("Reduces CPU usage and power consumption, improving battery life on mobile devices. (0 means disabled)\nHigher values cause a more pronounced effect, but may cause audio or performance issues. A value of 50 or less is recommended.\nThis option forces an FPS limit because it's active when framerate is stable.\nThe lighter the game is on the hardware, the more power is saved by it. (until the preemption count barrier is reached)");
|
||||
|
||||
|
|
|
|||
|
|
@ -88,6 +88,7 @@
|
|||
<ItemGroup>
|
||||
<ClCompile Include="test.cpp" />
|
||||
<ClCompile Include="test_fmt.cpp" />
|
||||
<ClCompile Include="test_rsx_cfg.cpp" />
|
||||
<ClCompile Include="test_simple_array.cpp" />
|
||||
<ClCompile Include="test_address_range.cpp" />
|
||||
<ClCompile Include="test_tuple.cpp" />
|
||||
|
|
|
|||
|
|
@ -3,44 +3,47 @@
|
|||
#include "util/types.hpp"
|
||||
#include "util/pair.hpp"
|
||||
|
||||
struct some_struct
|
||||
namespace utils
|
||||
{
|
||||
u64 v {};
|
||||
char s[12] = "Hello World";
|
||||
|
||||
bool operator == (const some_struct& r) const
|
||||
struct some_struct
|
||||
{
|
||||
return v == r.v && std::memcmp(s, r.s, sizeof(s)) == 0;
|
||||
u64 v{};
|
||||
char s[12] = "Hello World";
|
||||
|
||||
bool operator == (const some_struct& r) const
|
||||
{
|
||||
return v == r.v && std::memcmp(s, r.s, sizeof(s)) == 0;
|
||||
}
|
||||
};
|
||||
|
||||
TEST(Pair, General)
|
||||
{
|
||||
some_struct s{};
|
||||
s.v = 1234;
|
||||
|
||||
utils::pair<int, some_struct> p;
|
||||
EXPECT_EQ(sizeof(p), 32);
|
||||
EXPECT_EQ(p.first, 0);
|
||||
EXPECT_EQ(p.second, some_struct{});
|
||||
|
||||
p = { 666, s };
|
||||
EXPECT_EQ(p.first, 666);
|
||||
EXPECT_EQ(p.second, s);
|
||||
|
||||
const utils::pair<int, some_struct> p1 = p;
|
||||
EXPECT_EQ(p.first, 666);
|
||||
EXPECT_EQ(p.second, s);
|
||||
EXPECT_EQ(p1.first, 666);
|
||||
EXPECT_EQ(p1.second, s);
|
||||
|
||||
utils::pair<int, some_struct> p2 = p1;
|
||||
EXPECT_EQ(p1.first, 666);
|
||||
EXPECT_EQ(p1.second, s);
|
||||
EXPECT_EQ(p2.first, 666);
|
||||
EXPECT_EQ(p2.second, s);
|
||||
|
||||
utils::pair<int, some_struct> p3 = std::move(p);
|
||||
EXPECT_EQ(p3.first, 666);
|
||||
EXPECT_EQ(p3.second, s);
|
||||
}
|
||||
};
|
||||
|
||||
TEST(Utils, Pair)
|
||||
{
|
||||
some_struct s {};
|
||||
s.v = 1234;
|
||||
|
||||
utils::pair<int, some_struct> p;
|
||||
EXPECT_EQ(sizeof(p), 32);
|
||||
EXPECT_EQ(p.first, 0);
|
||||
EXPECT_EQ(p.second, some_struct{});
|
||||
|
||||
p = { 666, s };
|
||||
EXPECT_EQ(p.first, 666);
|
||||
EXPECT_EQ(p.second, s);
|
||||
|
||||
const utils::pair<int, some_struct> p1 = p;
|
||||
EXPECT_EQ(p.first, 666);
|
||||
EXPECT_EQ(p.second, s);
|
||||
EXPECT_EQ(p1.first, 666);
|
||||
EXPECT_EQ(p1.second, s);
|
||||
|
||||
utils::pair<int, some_struct> p2 = p1;
|
||||
EXPECT_EQ(p1.first, 666);
|
||||
EXPECT_EQ(p1.second, s);
|
||||
EXPECT_EQ(p2.first, 666);
|
||||
EXPECT_EQ(p2.second, s);
|
||||
|
||||
utils::pair<int, some_struct> p3 = std::move(p);
|
||||
EXPECT_EQ(p3.first, 666);
|
||||
EXPECT_EQ(p3.second, s);
|
||||
}
|
||||
|
|
|
|||
239
rpcs3/tests/test_rsx_cfg.cpp
Normal file
239
rpcs3/tests/test_rsx_cfg.cpp
Normal file
|
|
@ -0,0 +1,239 @@
|
|||
#include <gtest/gtest.h>
|
||||
|
||||
#include "Emu/RSX/Common/simple_array.hpp"
|
||||
#include "Emu/RSX/Program/Assembler/CFG.h"
|
||||
#include "Emu/RSX/Program/RSXFragmentProgram.h"
|
||||
|
||||
#include <util/v128.hpp>
|
||||
|
||||
namespace rsx::assembler
|
||||
{
|
||||
auto swap_bytes16 = [](u32 dword) -> u32
|
||||
{
|
||||
// Lazy encode, but good enough for what we need here.
|
||||
union v32
|
||||
{
|
||||
u32 HEX;
|
||||
u8 _v[4];
|
||||
};
|
||||
|
||||
u8* src_bytes = reinterpret_cast<u8*>(&dword);
|
||||
v32 dst_bytes;
|
||||
|
||||
dst_bytes._v[0] = src_bytes[1];
|
||||
dst_bytes._v[1] = src_bytes[0];
|
||||
dst_bytes._v[2] = src_bytes[3];
|
||||
dst_bytes._v[3] = src_bytes[2];
|
||||
|
||||
return dst_bytes.HEX;
|
||||
};
|
||||
|
||||
// Instruction mocks because we don't have a working assember (yet)
|
||||
auto encode_instruction = [](u32 opcode, bool end = false) -> v128
|
||||
{
|
||||
OPDEST dst{};
|
||||
dst.opcode = opcode;
|
||||
|
||||
if (end)
|
||||
{
|
||||
dst.end = 1;
|
||||
}
|
||||
|
||||
return v128::from32(swap_bytes16(dst.HEX), 0, 0, 0);
|
||||
};
|
||||
|
||||
auto create_if(u32 end, u32 _else = 0)
|
||||
{
|
||||
OPDEST dst{};
|
||||
dst.opcode = RSX_FP_OPCODE_IFE & 0x3Fu;
|
||||
|
||||
SRC1 src1{};
|
||||
src1.else_offset = (_else ? _else : end) << 2;
|
||||
src1.opcode_is_branch = 1;
|
||||
|
||||
SRC2 src2{};
|
||||
src2.end_offset = end << 2;
|
||||
|
||||
return v128::from32(swap_bytes16(dst.HEX), 0, swap_bytes16(src1.HEX), swap_bytes16(src2.HEX));
|
||||
};
|
||||
|
||||
TEST(CFG, FpToCFG_Basic)
|
||||
{
|
||||
rsx::simple_array<v128> buffer = {
|
||||
encode_instruction(RSX_FP_OPCODE_ADD),
|
||||
encode_instruction(RSX_FP_OPCODE_MOV, true)
|
||||
};
|
||||
|
||||
RSXFragmentProgram program{};
|
||||
program.data = buffer.data();
|
||||
|
||||
FlowGraph graph = deconstruct_fragment_program(program);
|
||||
|
||||
EXPECT_EQ(graph.blocks.size(), 1);
|
||||
EXPECT_EQ(graph.blocks.front().instructions.size(), 2);
|
||||
EXPECT_EQ(graph.blocks.front().instructions.front().length, 4);
|
||||
EXPECT_EQ(graph.blocks.front().instructions[0].addr, 0);
|
||||
EXPECT_EQ(graph.blocks.front().instructions[1].addr, 16);
|
||||
}
|
||||
|
||||
TEST(CFG, FpToCFG_IF)
|
||||
{
|
||||
rsx::simple_array<v128> buffer = {
|
||||
encode_instruction(RSX_FP_OPCODE_ADD), // 0
|
||||
encode_instruction(RSX_FP_OPCODE_MOV), // 1
|
||||
create_if(4), // 2 (BR, 4)
|
||||
encode_instruction(RSX_FP_OPCODE_ADD), // 3
|
||||
encode_instruction(RSX_FP_OPCODE_MOV, true), // 4 (Merge block)
|
||||
};
|
||||
|
||||
const std::pair<int, size_t> expected_block_data[3] = {
|
||||
{ 0, 3 }, // Head
|
||||
{ 3, 1 }, // Branch
|
||||
{ 4, 1 }, // Merge
|
||||
};
|
||||
|
||||
RSXFragmentProgram program{};
|
||||
program.data = buffer.data();
|
||||
|
||||
FlowGraph graph = deconstruct_fragment_program(program);
|
||||
|
||||
ASSERT_EQ(graph.blocks.size(), 3);
|
||||
|
||||
int i = 0;
|
||||
for (auto it = graph.blocks.begin(); it != graph.blocks.end(); ++it)
|
||||
{
|
||||
const auto& expected = expected_block_data[i++];
|
||||
EXPECT_EQ(it->id, expected.first);
|
||||
EXPECT_EQ(it->instructions.size(), expected.second);
|
||||
}
|
||||
|
||||
// Check edges
|
||||
EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 3))->pred[0].type, EdgeType::IF);
|
||||
EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 0))->succ[0].type, EdgeType::IF);
|
||||
EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 4))->pred[0].type, EdgeType::ENDIF);
|
||||
}
|
||||
|
||||
TEST(CFG, FpToCFG_NestedIF)
|
||||
{
|
||||
rsx::simple_array<v128> buffer = {
|
||||
encode_instruction(RSX_FP_OPCODE_ADD), // 0
|
||||
encode_instruction(RSX_FP_OPCODE_MOV), // 1
|
||||
create_if(8), // 2 (BR, 8)
|
||||
encode_instruction(RSX_FP_OPCODE_ADD), // 3
|
||||
create_if(6), // 4 (BR, 6)
|
||||
encode_instruction(RSX_FP_OPCODE_MOV), // 5
|
||||
encode_instruction(RSX_FP_OPCODE_MOV), // 6 (merge block 1)
|
||||
encode_instruction(RSX_FP_OPCODE_ADD), // 7
|
||||
encode_instruction(RSX_FP_OPCODE_MOV, true) // 8 (merge block 2
|
||||
};
|
||||
|
||||
const std::pair<int, size_t> expected_block_data[5] = {
|
||||
{ 0, 3 }, // Head
|
||||
{ 3, 2 }, // Branch 1
|
||||
{ 5, 1 }, // Branch 2
|
||||
{ 6, 2 }, // Merge 1
|
||||
{ 8, 1 }, // Merge 2
|
||||
};
|
||||
|
||||
RSXFragmentProgram program{};
|
||||
program.data = buffer.data();
|
||||
|
||||
FlowGraph graph = deconstruct_fragment_program(program);
|
||||
|
||||
ASSERT_EQ(graph.blocks.size(), 5);
|
||||
|
||||
int i = 0;
|
||||
for (auto it = graph.blocks.begin(); it != graph.blocks.end(); ++it)
|
||||
{
|
||||
const auto& expected = expected_block_data[i++];
|
||||
EXPECT_EQ(it->id, expected.first);
|
||||
EXPECT_EQ(it->instructions.size(), expected.second);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(CFG, FpToCFG_NestedIF_MultiplePred)
|
||||
{
|
||||
rsx::simple_array<v128> buffer = {
|
||||
encode_instruction(RSX_FP_OPCODE_ADD), // 0
|
||||
encode_instruction(RSX_FP_OPCODE_MOV), // 1
|
||||
create_if(6), // 2 (BR, 6)
|
||||
encode_instruction(RSX_FP_OPCODE_ADD), // 3
|
||||
create_if(6), // 4 (BR, 6)
|
||||
encode_instruction(RSX_FP_OPCODE_MOV), // 5
|
||||
encode_instruction(RSX_FP_OPCODE_MOV), // 6 (merge block)
|
||||
encode_instruction(RSX_FP_OPCODE_ADD), // 7
|
||||
encode_instruction(RSX_FP_OPCODE_MOV, true) // 8
|
||||
};
|
||||
|
||||
const std::pair<int, size_t> expected_block_data[4] = {
|
||||
{ 0, 3 }, // Head
|
||||
{ 3, 2 }, // Branch 1
|
||||
{ 5, 1 }, // Branch 2
|
||||
{ 6, 3 }, // Merge
|
||||
};
|
||||
|
||||
RSXFragmentProgram program{};
|
||||
program.data = buffer.data();
|
||||
|
||||
FlowGraph graph = deconstruct_fragment_program(program);
|
||||
|
||||
ASSERT_EQ(graph.blocks.size(), 4);
|
||||
|
||||
int i = 0;
|
||||
for (auto it = graph.blocks.begin(); it != graph.blocks.end(); ++it)
|
||||
{
|
||||
const auto& expected = expected_block_data[i++];
|
||||
EXPECT_EQ(it->id, expected.first);
|
||||
EXPECT_EQ(it->instructions.size(), expected.second);
|
||||
}
|
||||
|
||||
// Predecessors must be ordered, closest first
|
||||
ASSERT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 6))->pred.size(), 2);
|
||||
EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 6))->pred[0].type, EdgeType::ENDIF);
|
||||
EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 6))->pred[0].from->id, 3);
|
||||
EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 6))->pred[1].type, EdgeType::ENDIF);
|
||||
EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 6))->pred[1].from->id, 0);
|
||||
|
||||
// Successors must also be ordered, closest first
|
||||
ASSERT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 0))->succ.size(), 2);
|
||||
EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 0))->succ[0].type, EdgeType::IF);
|
||||
EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 0))->succ[0].to->id, 3);
|
||||
EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 0))->succ[1].type, EdgeType::ENDIF);
|
||||
EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 0))->succ[1].to->id, 6);
|
||||
}
|
||||
|
||||
TEST(CFG, FpToCFG_IF_ELSE)
|
||||
{
|
||||
rsx::simple_array<v128> buffer = {
|
||||
encode_instruction(RSX_FP_OPCODE_ADD), // 0
|
||||
encode_instruction(RSX_FP_OPCODE_MOV), // 1
|
||||
create_if(6, 4), // 2 (BR, 6)
|
||||
encode_instruction(RSX_FP_OPCODE_ADD), // 3
|
||||
encode_instruction(RSX_FP_OPCODE_MOV), // 4 (Else)
|
||||
encode_instruction(RSX_FP_OPCODE_ADD), // 5
|
||||
encode_instruction(RSX_FP_OPCODE_MOV, true), // 6 (Merge)
|
||||
};
|
||||
|
||||
const std::pair<int, size_t> expected_block_data[4] = {
|
||||
{ 0, 3 }, // Head
|
||||
{ 3, 1 }, // Branch positive
|
||||
{ 4, 2 }, // Branch negative
|
||||
{ 6, 1 }, // Merge
|
||||
};
|
||||
|
||||
RSXFragmentProgram program{};
|
||||
program.data = buffer.data();
|
||||
|
||||
FlowGraph graph = deconstruct_fragment_program(program);
|
||||
|
||||
ASSERT_EQ(graph.blocks.size(), 4);
|
||||
|
||||
int i = 0;
|
||||
for (auto it = graph.blocks.begin(); it != graph.blocks.end(); ++it)
|
||||
{
|
||||
const auto& expected = expected_block_data[i++];
|
||||
EXPECT_EQ(it->id, expected.first);
|
||||
EXPECT_EQ(it->instructions.size(), expected.second);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -267,4 +267,90 @@ namespace rsx
|
|||
EXPECT_EQ(std::memcmp(arr[i].second.s, "Hello World", sizeof(arr[i].second.s)), 0);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(SimpleArray, DataAlignment_SmallVector)
|
||||
{
|
||||
struct alignas(16) some_struct {
|
||||
char data[16];
|
||||
};
|
||||
|
||||
rsx::simple_array<some_struct> arr(2);
|
||||
const auto data_ptr = reinterpret_cast<uintptr_t>(arr.data());
|
||||
|
||||
EXPECT_EQ(data_ptr & 15, 0);
|
||||
}
|
||||
|
||||
TEST(SimpleArray, DataAlignment_HeapAlloc)
|
||||
{
|
||||
struct alignas(16) some_struct {
|
||||
char data[16];
|
||||
};
|
||||
|
||||
rsx::simple_array<some_struct> arr(128);
|
||||
const auto data_ptr = reinterpret_cast<uintptr_t>(arr.data());
|
||||
|
||||
EXPECT_EQ(data_ptr & 15, 0);
|
||||
}
|
||||
|
||||
TEST(SimpleArray, DataAlignment_Overrides)
|
||||
{
|
||||
rsx::simple_array<std::byte, 16> arr(4);
|
||||
rsx::simple_array<std::byte, 128> arr2(4);
|
||||
|
||||
const auto data_ptr1 = reinterpret_cast<uintptr_t>(arr.data());
|
||||
const auto data_ptr2 = reinterpret_cast<uintptr_t>(arr2.data());
|
||||
|
||||
EXPECT_EQ(data_ptr1 & 15, 0);
|
||||
EXPECT_EQ(data_ptr2 & 127, 0);
|
||||
}
|
||||
|
||||
TEST(SimpleArray, Find)
|
||||
{
|
||||
const rsx::simple_array<int> arr{
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9
|
||||
};
|
||||
|
||||
EXPECT_EQ(*arr.find(8), 8);
|
||||
EXPECT_EQ(arr.find(99), nullptr);
|
||||
}
|
||||
|
||||
TEST(SimpleArray, FindIf)
|
||||
{
|
||||
const rsx::simple_array<int> arr{
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9
|
||||
};
|
||||
|
||||
EXPECT_EQ(*arr.find_if(FN(x == 8)), 8);
|
||||
EXPECT_EQ(arr.find_if(FN(x == 99)), nullptr);
|
||||
}
|
||||
|
||||
TEST(AlignedAllocator, Alloc)
|
||||
{
|
||||
auto ptr = rsx::aligned_allocator::malloc<256>(16);
|
||||
const auto ptr_value = reinterpret_cast<uintptr_t>(ptr);
|
||||
rsx::aligned_allocator::free(ptr);
|
||||
|
||||
EXPECT_NE(ptr_value, 0);
|
||||
EXPECT_EQ(ptr_value % 256, 0);
|
||||
}
|
||||
|
||||
TEST(AlignedAllocator, Realloc)
|
||||
{
|
||||
auto ptr = rsx::aligned_allocator::malloc<256>(16);
|
||||
auto ptr2 = rsx::aligned_allocator::realloc<256>(ptr, 16, 32);
|
||||
const auto ptr_value = reinterpret_cast<uintptr_t>(ptr2);
|
||||
rsx::aligned_allocator::free(ptr2);
|
||||
|
||||
EXPECT_NE(ptr_value, 0);
|
||||
EXPECT_EQ(ptr_value % 256, 0);
|
||||
}
|
||||
|
||||
TEST(AlignedAllocator, Realloc_ReturnsPreviousPointerIfFits)
|
||||
{
|
||||
auto ptr = rsx::aligned_allocator::malloc<256>(16);
|
||||
auto ptr2 = rsx::aligned_allocator::realloc<256>(ptr, 16, 8);
|
||||
rsx::aligned_allocator::free(ptr2);
|
||||
|
||||
EXPECT_EQ(ptr, ptr2);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,113 +2,116 @@
|
|||
|
||||
#include "util/tuple.hpp"
|
||||
|
||||
struct some_struct
|
||||
namespace utils
|
||||
{
|
||||
u64 v {};
|
||||
char s[12] = "Hello World";
|
||||
|
||||
bool operator == (const some_struct& r) const
|
||||
struct some_struct
|
||||
{
|
||||
return v == r.v && std::memcmp(s, r.s, sizeof(s)) == 0;
|
||||
u64 v{};
|
||||
char s[12] = "Hello World";
|
||||
|
||||
bool operator == (const some_struct& r) const
|
||||
{
|
||||
return v == r.v && std::memcmp(s, r.s, sizeof(s)) == 0;
|
||||
}
|
||||
};
|
||||
|
||||
TEST(Tuple, General)
|
||||
{
|
||||
some_struct s{};
|
||||
s.v = 1234;
|
||||
|
||||
utils::tuple t0 = {};
|
||||
EXPECT_EQ(t0.size(), 0);
|
||||
|
||||
utils::tuple<int> t;
|
||||
EXPECT_EQ(sizeof(t), sizeof(int));
|
||||
EXPECT_TRUE((std::is_same_v<decltype(t.get<0>()), int&>));
|
||||
EXPECT_EQ(t.size(), 1);
|
||||
EXPECT_EQ(t.get<0>(), 0);
|
||||
|
||||
utils::tuple<int> t1 = 2;
|
||||
EXPECT_EQ(sizeof(t1), sizeof(int));
|
||||
EXPECT_TRUE((std::is_same_v<decltype(t1.get<0>()), int&>));
|
||||
EXPECT_EQ(t1.size(), 1);
|
||||
EXPECT_EQ(t1.get<0>(), 2);
|
||||
t1 = {};
|
||||
EXPECT_EQ(t1.size(), 1);
|
||||
EXPECT_EQ(t1.get<0>(), 0);
|
||||
|
||||
utils::tuple<int, some_struct> t2 = { 2, s };
|
||||
EXPECT_EQ(sizeof(t2), 32);
|
||||
EXPECT_EQ(t2.size(), 2);
|
||||
EXPECT_TRUE((std::is_same_v<decltype(t2.get<0>()), int&>));
|
||||
EXPECT_TRUE((std::is_same_v<decltype(t2.get<1>()), some_struct&>));
|
||||
EXPECT_EQ(t2.get<0>(), 2);
|
||||
EXPECT_EQ(t2.get<1>(), s);
|
||||
t2 = {};
|
||||
EXPECT_EQ(t2.size(), 2);
|
||||
EXPECT_EQ(t2.get<0>(), 0);
|
||||
EXPECT_EQ(t2.get<1>(), some_struct{});
|
||||
|
||||
t2.get<0>() = 666;
|
||||
t2.get<1>() = s;
|
||||
EXPECT_EQ(t2.get<0>(), 666);
|
||||
EXPECT_EQ(t2.get<1>(), s);
|
||||
|
||||
utils::tuple<int, some_struct, double> t3 = { 2, s, 1234.0 };
|
||||
EXPECT_EQ(sizeof(t3), 40);
|
||||
EXPECT_EQ(t3.size(), 3);
|
||||
EXPECT_TRUE((std::is_same_v<decltype(t3.get<0>()), int&>));
|
||||
EXPECT_TRUE((std::is_same_v<decltype(t3.get<1>()), some_struct&>));
|
||||
EXPECT_TRUE((std::is_same_v<decltype(t3.get<2>()), double&>));
|
||||
EXPECT_EQ(t3.get<0>(), 2);
|
||||
EXPECT_EQ(t3.get<1>(), s);
|
||||
EXPECT_EQ(t3.get<2>(), 1234.0);
|
||||
t3 = {};
|
||||
EXPECT_EQ(t3.size(), 3);
|
||||
EXPECT_EQ(t3.get<0>(), 0);
|
||||
EXPECT_EQ(t3.get<1>(), some_struct{});
|
||||
EXPECT_EQ(t3.get<2>(), 0.0);
|
||||
|
||||
t3.get<0>() = 666;
|
||||
t3.get<1>() = s;
|
||||
t3.get<2>() = 7.0;
|
||||
EXPECT_EQ(t3.get<0>(), 666);
|
||||
EXPECT_EQ(t3.get<1>(), s);
|
||||
EXPECT_EQ(t3.get<2>(), 7.0);
|
||||
|
||||
// const
|
||||
const utils::tuple<int, some_struct> tc = { 2, s };
|
||||
EXPECT_EQ(tc.size(), 2);
|
||||
EXPECT_TRUE((std::is_same_v<decltype(tc.get<0>()), const int&>));
|
||||
EXPECT_TRUE((std::is_same_v<decltype(tc.get<1>()), const some_struct&>));
|
||||
EXPECT_EQ(tc.get<0>(), 2);
|
||||
EXPECT_EQ(tc.get<1>(), s);
|
||||
|
||||
// assignment
|
||||
const utils::tuple<int, some_struct> ta1 = { 2, s };
|
||||
utils::tuple<int, some_struct> ta = ta1;
|
||||
EXPECT_EQ(ta.size(), 2);
|
||||
EXPECT_TRUE((std::is_same_v<decltype(ta.get<0>()), int&>));
|
||||
EXPECT_TRUE((std::is_same_v<decltype(ta.get<1>()), some_struct&>));
|
||||
EXPECT_EQ(ta.get<0>(), 2);
|
||||
EXPECT_EQ(ta.get<1>(), s);
|
||||
|
||||
utils::tuple<int, some_struct> ta2 = { 2, s };
|
||||
ta = ta2;
|
||||
EXPECT_EQ(ta.size(), 2);
|
||||
EXPECT_TRUE((std::is_same_v<decltype(ta.get<0>()), int&>));
|
||||
EXPECT_TRUE((std::is_same_v<decltype(ta.get<1>()), some_struct&>));
|
||||
EXPECT_EQ(ta.get<0>(), 2);
|
||||
EXPECT_EQ(ta.get<1>(), s);
|
||||
EXPECT_EQ(ta2.size(), 2);
|
||||
EXPECT_TRUE((std::is_same_v<decltype(ta2.get<0>()), int&>));
|
||||
EXPECT_TRUE((std::is_same_v<decltype(ta2.get<1>()), some_struct&>));
|
||||
EXPECT_EQ(ta2.get<0>(), 2);
|
||||
EXPECT_EQ(ta2.get<1>(), s);
|
||||
|
||||
ta = std::move(ta2);
|
||||
EXPECT_EQ(ta.size(), 2);
|
||||
EXPECT_TRUE((std::is_same_v<decltype(ta.get<0>()), int&>));
|
||||
EXPECT_TRUE((std::is_same_v<decltype(ta.get<1>()), some_struct&>));
|
||||
EXPECT_EQ(ta.get<0>(), 2);
|
||||
EXPECT_EQ(ta.get<1>(), s);
|
||||
}
|
||||
};
|
||||
|
||||
TEST(Utils, Tuple)
|
||||
{
|
||||
some_struct s {};
|
||||
s.v = 1234;
|
||||
|
||||
utils::tuple t0 = {};
|
||||
EXPECT_EQ(t0.size(), 0);
|
||||
|
||||
utils::tuple<int> t;
|
||||
EXPECT_EQ(sizeof(t), sizeof(int));
|
||||
EXPECT_TRUE((std::is_same_v<decltype(t.get<0>()), int&>));
|
||||
EXPECT_EQ(t.size(), 1);
|
||||
EXPECT_EQ(t.get<0>(), 0);
|
||||
|
||||
utils::tuple<int> t1 = 2;
|
||||
EXPECT_EQ(sizeof(t1), sizeof(int));
|
||||
EXPECT_TRUE((std::is_same_v<decltype(t1.get<0>()), int&>));
|
||||
EXPECT_EQ(t1.size(), 1);
|
||||
EXPECT_EQ(t1.get<0>(), 2);
|
||||
t1 = {};
|
||||
EXPECT_EQ(t1.size(), 1);
|
||||
EXPECT_EQ(t1.get<0>(), 0);
|
||||
|
||||
utils::tuple<int, some_struct> t2 = { 2, s };
|
||||
EXPECT_EQ(sizeof(t2), 32);
|
||||
EXPECT_EQ(t2.size(), 2);
|
||||
EXPECT_TRUE((std::is_same_v<decltype(t2.get<0>()), int&>));
|
||||
EXPECT_TRUE((std::is_same_v<decltype(t2.get<1>()), some_struct&>));
|
||||
EXPECT_EQ(t2.get<0>(), 2);
|
||||
EXPECT_EQ(t2.get<1>(), s);
|
||||
t2 = {};
|
||||
EXPECT_EQ(t2.size(), 2);
|
||||
EXPECT_EQ(t2.get<0>(), 0);
|
||||
EXPECT_EQ(t2.get<1>(), some_struct{});
|
||||
|
||||
t2.get<0>() = 666;
|
||||
t2.get<1>() = s;
|
||||
EXPECT_EQ(t2.get<0>(), 666);
|
||||
EXPECT_EQ(t2.get<1>(), s);
|
||||
|
||||
utils::tuple<int, some_struct, double> t3 = { 2, s, 1234.0 };
|
||||
EXPECT_EQ(sizeof(t3), 40);
|
||||
EXPECT_EQ(t3.size(), 3);
|
||||
EXPECT_TRUE((std::is_same_v<decltype(t3.get<0>()), int&>));
|
||||
EXPECT_TRUE((std::is_same_v<decltype(t3.get<1>()), some_struct&>));
|
||||
EXPECT_TRUE((std::is_same_v<decltype(t3.get<2>()), double&>));
|
||||
EXPECT_EQ(t3.get<0>(), 2);
|
||||
EXPECT_EQ(t3.get<1>(), s);
|
||||
EXPECT_EQ(t3.get<2>(), 1234.0);
|
||||
t3 = {};
|
||||
EXPECT_EQ(t3.size(), 3);
|
||||
EXPECT_EQ(t3.get<0>(), 0);
|
||||
EXPECT_EQ(t3.get<1>(), some_struct{});
|
||||
EXPECT_EQ(t3.get<2>(), 0.0);
|
||||
|
||||
t3.get<0>() = 666;
|
||||
t3.get<1>() = s;
|
||||
t3.get<2>() = 7.0;
|
||||
EXPECT_EQ(t3.get<0>(), 666);
|
||||
EXPECT_EQ(t3.get<1>(), s);
|
||||
EXPECT_EQ(t3.get<2>(), 7.0);
|
||||
|
||||
// const
|
||||
const utils::tuple<int, some_struct> tc = { 2, s };
|
||||
EXPECT_EQ(tc.size(), 2);
|
||||
EXPECT_TRUE((std::is_same_v<decltype(tc.get<0>()), const int&>));
|
||||
EXPECT_TRUE((std::is_same_v<decltype(tc.get<1>()), const some_struct&>));
|
||||
EXPECT_EQ(tc.get<0>(), 2);
|
||||
EXPECT_EQ(tc.get<1>(), s);
|
||||
|
||||
// assignment
|
||||
const utils::tuple<int, some_struct> ta1 = { 2, s };
|
||||
utils::tuple<int, some_struct> ta = ta1;
|
||||
EXPECT_EQ(ta.size(), 2);
|
||||
EXPECT_TRUE((std::is_same_v<decltype(ta.get<0>()), int&>));
|
||||
EXPECT_TRUE((std::is_same_v<decltype(ta.get<1>()), some_struct&>));
|
||||
EXPECT_EQ(ta.get<0>(), 2);
|
||||
EXPECT_EQ(ta.get<1>(), s);
|
||||
|
||||
utils::tuple<int, some_struct> ta2 = { 2, s };
|
||||
ta = ta2;
|
||||
EXPECT_EQ(ta.size(), 2);
|
||||
EXPECT_TRUE((std::is_same_v<decltype(ta.get<0>()), int&>));
|
||||
EXPECT_TRUE((std::is_same_v<decltype(ta.get<1>()), some_struct&>));
|
||||
EXPECT_EQ(ta.get<0>(), 2);
|
||||
EXPECT_EQ(ta.get<1>(), s);
|
||||
EXPECT_EQ(ta2.size(), 2);
|
||||
EXPECT_TRUE((std::is_same_v<decltype(ta2.get<0>()), int&>));
|
||||
EXPECT_TRUE((std::is_same_v<decltype(ta2.get<1>()), some_struct&>));
|
||||
EXPECT_EQ(ta2.get<0>(), 2);
|
||||
EXPECT_EQ(ta2.get<1>(), s);
|
||||
|
||||
ta = std::move(ta2);
|
||||
EXPECT_EQ(ta.size(), 2);
|
||||
EXPECT_TRUE((std::is_same_v<decltype(ta.get<0>()), int&>));
|
||||
EXPECT_TRUE((std::is_same_v<decltype(ta.get<1>()), some_struct&>));
|
||||
EXPECT_EQ(ta.get<0>(), 2);
|
||||
EXPECT_EQ(ta.get<1>(), s);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -60,7 +60,7 @@ namespace utils
|
|||
#if defined(ARCH_X64)
|
||||
return _m_prefetchw(const_cast<void*>(ptr));
|
||||
#else
|
||||
return __builtin_prefetch(ptr, 1, 0);
|
||||
return __builtin_prefetch(ptr, 1, 3);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,10 +1,12 @@
|
|||
#pragma once
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
namespace utils
|
||||
{
|
||||
// Hack. Pointer cast util to workaround UB. Use with extreme care.
|
||||
template <typename T, typename U>
|
||||
[[nodiscard]] T* bless(U* ptr)
|
||||
template <typename T, typename U> requires (std::is_pointer_v<std::remove_reference_t<U>>)
|
||||
[[nodiscard]] inline T* bless(const U& ptr)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
return (T*)ptr;
|
||||
|
|
@ -21,3 +23,4 @@ namespace utils
|
|||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -999,17 +999,18 @@ template <typename To, typename From> requires (std::is_integral_v<decltype(std:
|
|||
constexpr bool is_from_signed = std::is_signed_v<CommonFrom>;
|
||||
constexpr bool is_to_signed = std::is_signed_v<CommonTo>;
|
||||
|
||||
constexpr auto from_mask = (is_from_signed && !is_to_signed) ? UnFrom{umax} >> 1 : UnFrom{umax};
|
||||
// For unsigned/signed mismatch, create an "unsigned" compatible mask
|
||||
constexpr auto from_mask = (is_from_signed && !is_to_signed && sizeof(CommonFrom) <= sizeof(CommonTo)) ? UnFrom{umax} >> 1 : UnFrom{umax};
|
||||
constexpr auto to_mask = (is_to_signed && !is_from_signed) ? UnTo{umax} >> 1 : UnTo{umax};
|
||||
|
||||
constexpr auto mask = ~(from_mask & to_mask);
|
||||
constexpr auto mask = static_cast<UnFrom>(~(from_mask & to_mask));
|
||||
|
||||
// Signed to unsigned always require test
|
||||
// Otherwise, this is bit-wise narrowing or conversion between types of different signedness of the same size
|
||||
if constexpr ((is_from_signed && !is_to_signed) || to_mask < from_mask)
|
||||
// If destination ("unsigned" compatible) mask is smaller than source ("unsigned" compatible) mask
|
||||
// It requires narrowing.
|
||||
if constexpr (!!mask)
|
||||
{
|
||||
// Try to optimize test if both are of the same signedness
|
||||
if (is_from_signed != is_to_signed ? !!(value & mask) : static_cast<CommonTo>(value) != value) [[unlikely]]
|
||||
if (is_from_signed != is_to_signed ? !!(value & mask) : static_cast<CommonFrom>(static_cast<CommonTo>(value)) != value) [[unlikely]]
|
||||
{
|
||||
fmt::raw_verify_error(src_loc, u8"Narrowing error", +value);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue