diff --git a/Utilities/StrFmt.h b/Utilities/StrFmt.h index 5eba199e26..6d9ea05c9e 100644 --- a/Utilities/StrFmt.h +++ b/Utilities/StrFmt.h @@ -394,7 +394,7 @@ namespace fmt } #if !defined(_MSC_VER) || defined(__clang__) - [[noreturn]] ~throw_exception(); + [[noreturn]] ~throw_exception() = default; #endif }; diff --git a/rpcs3/CMakeLists.txt b/rpcs3/CMakeLists.txt index 3217cc38f5..796351e16c 100644 --- a/rpcs3/CMakeLists.txt +++ b/rpcs3/CMakeLists.txt @@ -192,6 +192,7 @@ if(BUILD_RPCS3_TESTS) tests/test_tuple.cpp tests/test_simple_array.cpp tests/test_address_range.cpp + tests/test_rsx_cfg.cpp ) target_link_libraries(rpcs3_test diff --git a/rpcs3/Crypto/utils.cpp b/rpcs3/Crypto/utils.cpp index 7432acbf62..8d2fd4e9aa 100644 --- a/rpcs3/Crypto/utils.cpp +++ b/rpcs3/Crypto/utils.cpp @@ -157,7 +157,7 @@ std::array sc_combine_laid_paid(s64 laid, s64 paid) { const std::string paid_laid = fmt::format("%016llx%016llx", laid, paid); std::array out{}; - hex_to_bytes(out.data(), paid_laid.c_str(), PASSPHRASE_KEY_LEN * 2); + hex_to_bytes(out.data(), paid_laid, PASSPHRASE_KEY_LEN * 2); return out; } diff --git a/rpcs3/Emu/CMakeLists.txt b/rpcs3/Emu/CMakeLists.txt index 139688947d..1a902b46df 100644 --- a/rpcs3/Emu/CMakeLists.txt +++ b/rpcs3/Emu/CMakeLists.txt @@ -516,6 +516,7 @@ target_sources(rpcs3_emu PRIVATE RSX/Overlays/overlay_video.cpp RSX/Overlays/Shaders/shader_loading_dialog.cpp RSX/Overlays/Shaders/shader_loading_dialog_native.cpp + RSX/Program/Assembler/FPToCFG.cpp RSX/Program/CgBinaryProgram.cpp RSX/Program/CgBinaryFragmentProgram.cpp RSX/Program/CgBinaryVertexProgram.cpp diff --git a/rpcs3/Emu/Cell/Modules/sceNpTrophy.h b/rpcs3/Emu/Cell/Modules/sceNpTrophy.h index 0c48623fda..6f7d88c148 100644 --- a/rpcs3/Emu/Cell/Modules/sceNpTrophy.h +++ b/rpcs3/Emu/Cell/Modules/sceNpTrophy.h @@ -3,7 +3,6 @@ #include "util/types.hpp" #include "Emu/Memory/vm_ptr.h" #include "Emu/Cell/ErrorCodes.h" -#include #include #include diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 95536540f5..0982ed79e4 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -3718,7 +3718,7 @@ extern void ppu_finalize(const ppu_module& info, bool force_mem_release #endif } -extern void ppu_precompile(std::vector& dir_queue, std::vector*>* loaded_modules) +extern void ppu_precompile(std::vector& dir_queue, std::vector*>* loaded_modules, bool is_fast_compilation) { if (g_cfg.core.ppu_decoder != ppu_decoder_type::llvm) { @@ -4166,6 +4166,12 @@ extern void ppu_precompile(std::vector& dir_queue, std::vector& info) assert(ptr_inst->getResultElementType() == m_ir->getPtrTy()); const auto faddr = m_ir->CreateLoad(ptr_inst->getResultElementType(), ptr_inst); - const auto faddr_int = m_ir->CreatePtrToInt(faddr, get_type()); const auto pos_32 = m_reloc ? m_ir->CreateAdd(func_pc, m_seg0) : func_pc; const auto pos = m_ir->CreateShl(pos_32, 1); const auto ptr = m_ir->CreatePtrAdd(m_exec, pos); @@ -427,7 +426,7 @@ Function* PPUTranslator::GetSymbolResolver(const ppu_module& info) const auto seg_val = m_ir->CreateTrunc(m_ir->CreateLShr(m_seg0, 13), get_type()); // Store to jumptable - m_ir->CreateStore(faddr_int, ptr); + m_ir->CreateStore(faddr, ptr); m_ir->CreateStore(seg_val, seg_ptr); // Increment index and branch back to loop @@ -596,7 +595,7 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect) if (g_cfg.core.ppu_prof) { - m_ir->CreateStore(m_ir->getInt32(target_last), m_ir->CreateStructGEP(m_thread_type, m_thread, static_cast(&m_cia - m_locals))); + m_ir->CreateStore(GetAddr(target_last - m_addr), m_ir->CreateStructGEP(m_thread_type, m_thread, static_cast(&m_cia - m_locals))); } } } diff --git a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp index 11eb124eae..2a792f677e 100644 --- a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp @@ -3069,6 +3069,39 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s values[op.rt] = pos + 4; } + const u32 pos_next = wa; + + bool is_no_return = false; + + if (pos_next >= lsa && pos_next < limit) + { + const u32 data_next = ls[pos_next / 4]; + const auto type_next = g_spu_itype.decode(data_next); + const auto flag_next = g_spu_iflag.decode(data_next); + const auto op_next = spu_opcode_t{data_next}; + + if (!(type_next & spu_itype::zregmod) && !(type_next & spu_itype::branch)) + { + if (auto iflags = g_spu_iflag.decode(data_next)) + { + if (+flag_next & +spu_iflag::use_ra) + { + is_no_return = is_no_return || (op_next.ra >= 4 && op_next.ra < 10); + } + + if (+flag_next & +spu_iflag::use_rb) + { + is_no_return = is_no_return || (op_next.rb >= 4 && op_next.rb < 10); + } + + if (type_next & spu_itype::_quadrop && +iflags & +spu_iflag::use_rc) + { + is_no_return = is_no_return || (op_next.ra >= 4 && op_next.rb < 10); + } + } + } + } + if (af & vf::is_const) { const u32 target = spu_branch_target(av); @@ -3105,7 +3138,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s limit = std::min(limit, target); } - if (sl && g_cfg.core.spu_block_size != spu_block_size_type::safe) + if (!is_no_return && sl && g_cfg.core.spu_block_size != spu_block_size_type::safe) { m_ret_info[pos / 4 + 1] = true; m_entry_info[pos / 4 + 1] = true; @@ -3122,7 +3155,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s u64 dabs = 0; u64 drel = 0; - for (u32 i = start; i < limit; i += 4) + for (u32 i = start, abs_fail = 0, rel_fail = 0; i < limit; i += 4) { const u32 target = ls[i / 4]; @@ -3132,16 +3165,39 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s break; } + if (target >= SPU_LS_SIZE && target <= 0u - SPU_LS_SIZE) + { + if (g_spu_itype.decode(target) != spu_itype::UNK) + { + // End of jumptable: valid instruction + break; + } + } + if (target >= lsa && target < SPU_LS_SIZE) { // Possible jump table entry (absolute) - jt_abs.push_back(target); + if (!abs_fail) + { + jt_abs.push_back(target); + } + } + else + { + abs_fail++; } if (target + start >= lsa && target + start < SPU_LS_SIZE) { // Possible jump table entry (relative) - jt_rel.push_back(target + start); + if (!rel_fail) + { + jt_rel.push_back(target + start); + } + } + else + { + rel_fail++; } if (std::max(jt_abs.size(), jt_rel.size()) * 4 + start <= i) @@ -3153,6 +3209,35 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s } } + for (usz i = 0; i < jt_abs.size(); i++) + { + if (jt_abs[i] == start + jt_abs.size() * 4) + { + // If jumptable contains absolute address of code start after the jumptable itself + // It is likely an absolute-type jumptable + + bool is_good_conclusion = true; + + // For verification: make sure there is none like this in relative table + + for (u32 target : jt_rel) + { + if (target == start + jt_rel.size() * 4) + { + is_good_conclusion = false; + break; + } + } + + if (is_good_conclusion) + { + jt_rel.clear(); + } + + break; + } + } + // Choose position after the jt as an anchor and compute the average distance for (u32 target : jt_abs) { @@ -3251,9 +3336,9 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s spu_log.notice("[0x%x] At 0x%x: ignoring indirect branch (SYNC)", entry_point, pos); } - if (type == spu_itype::BI || sl) + if (type == spu_itype::BI || sl || is_no_return) { - if (type == spu_itype::BI || g_cfg.core.spu_block_size == spu_block_size_type::safe) + if (type == spu_itype::BI || g_cfg.core.spu_block_size == spu_block_size_type::safe || is_no_return) { m_targets[pos]; } @@ -3290,9 +3375,42 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s break; } + const u32 pos_next = wa; + + bool is_no_return = false; + + if (pos_next >= lsa && pos_next < limit) + { + const u32 data_next = ls[pos_next / 4]; + const auto type_next = g_spu_itype.decode(data_next); + const auto flag_next = g_spu_iflag.decode(data_next); + const auto op_next = spu_opcode_t{data_next}; + + if (!(type_next & spu_itype::zregmod) && !(type_next & spu_itype::branch)) + { + if (auto iflags = g_spu_iflag.decode(data_next)) + { + if (+flag_next & +spu_iflag::use_ra) + { + is_no_return = is_no_return || (op_next.ra >= 4 && op_next.ra < 10); + } + + if (+flag_next & +spu_iflag::use_rb) + { + is_no_return = is_no_return || (op_next.rb >= 4 && op_next.rb < 10); + } + + if (type_next & spu_itype::_quadrop && +iflags & +spu_iflag::use_rc) + { + is_no_return = is_no_return || (op_next.rc >= 4 && op_next.rc < 10); + } + } + } + } + m_targets[pos].push_back(target); - if (g_cfg.core.spu_block_size != spu_block_size_type::safe) + if (!is_no_return && g_cfg.core.spu_block_size != spu_block_size_type::safe) { m_ret_info[pos / 4 + 1] = true; m_entry_info[pos / 4 + 1] = true; @@ -3300,7 +3418,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s add_block(pos + 4); } - if (g_cfg.core.spu_block_size == spu_block_size_type::giga && !sync) + if (!is_no_return && g_cfg.core.spu_block_size == spu_block_size_type::giga && !sync) { m_entry_info[target / 4] = true; add_block(target); @@ -4860,20 +4978,27 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s bool select_16_or_0_at_runtime = false; bool put_active = false; // PUTLLC happened bool get_rdatomic = false; // True if MFC_RdAtomicStat was read after GETLLAR + u32 required_pc = SPU_LS_SIZE; // Require program to be location specific for this optimization (SPU_LS_SIZE - no requirement) u32 mem_count = 0; + u32 break_cause = 100; + u32 break_pc = SPU_LS_SIZE; // Return old state for error reporting atomic16_t discard() { const u32 pc = lsa_pc; const u32 last_pc = lsa_last_pc; + const u32 cause = break_cause; + const u32 break_pos = break_pc; const atomic16_t old = *this; *this = atomic16_t{}; // Keep some members - lsa_pc = pc; - lsa_last_pc = last_pc; + this->lsa_pc = pc; + this->lsa_last_pc = last_pc; + this->break_cause = cause; + this->break_pc = break_pos; return old; } @@ -5080,15 +5205,17 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s { if (previous.active && likely_putllc_loop && getllar_starts.contains(previous.lsa_pc)) { - const bool is_first = !std::exchange(getllar_starts[previous.lsa_pc], true); + had_putllc_evaluation = true; - if (!is_first) + if (cause != 24) { + atomic16->break_cause = cause; + atomic16->break_pc = pos; return; } - had_putllc_evaluation = true; - + cause = atomic16->break_cause; + getllar_starts[previous.lsa_pc] = true; g_fxo->get().breaking_reason[cause]++; if (!spu_log.notice) @@ -5096,7 +5223,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s return; } - std::string break_error = fmt::format("PUTLLC pattern breakage [%x mem=%d lsa_const=%d cause=%u] (lsa_pc=0x%x)", pos, previous.mem_count, u32{!previous.ls_offs.is_const()} * 2 + previous.lsa.is_const(), cause, previous.lsa_pc); + std::string break_error = fmt::format("PUTLLC pattern breakage [%x mem=%d lsa_const=%d cause=%u] (lsa_pc=0x%x)", atomic16->break_pc, previous.mem_count, u32{!previous.ls_offs.is_const()} * 2 + previous.lsa.is_const(), cause, previous.lsa_pc); const auto values = sort_breakig_reasons(g_fxo->get().breaking_reason); @@ -6258,6 +6385,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s existing.ls_invalid |= atomic16->ls_invalid; existing.ls_access |= atomic16->ls_access; existing.mem_count = std::max(existing.mem_count, atomic16->mem_count); + existing.required_pc = std::min(existing.required_pc, atomic16->required_pc); existing.select_16_or_0_at_runtime |= atomic16->select_16_or_0_at_runtime; } @@ -6272,6 +6400,24 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s invalidate = false; } } + else if (atomic16->break_cause != 100 && atomic16->lsa_pc != SPU_LS_SIZE) + { + const auto it = atomic16_all.find(pos); + + if (it == atomic16_all.end()) + { + // Ensure future failure + atomic16_all.emplace(pos, *atomic16); + break_putllc16(24, FN(x.active = true, x)(as_rvalue(*atomic16))); + } + else if (it->second.active && atomic16->break_cause != 100) + { + it->second = *atomic16; + break_putllc16(24, FN(x.active = true, x)(as_rvalue(*atomic16))); + } + + atomic16->break_cause = 100; + } break; } @@ -6342,6 +6488,10 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s // Do not clear lower 16 bytes addressing because the program can move on 4-byte basis const u32 offs = spu_branch_target(pos - result.lower_bound, op.si16); + const u32 true_offs = spu_branch_target(pos, op.si16); + + // Make this optimization depend on the location of the program + atomic16->required_pc = result.lower_bound; if (atomic16->lsa.is_const() && [&]() { @@ -6366,6 +6516,10 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s { // Ignore memory access in this case } + else if (atomic16->lsa.is_const() && !atomic16->lsa.compare_with_mask_indifference(true_offs, SPU_LS_MASK_128)) + { + // Same + } else if (atomic16->ls_invalid && is_store) { break_putllc16(35, atomic16->set_invalid_ls(is_store)); @@ -7119,27 +7273,33 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s continue; } + union putllc16_or_0_info + { + u64 data; + bf_t required_pc; + bf_t type; + bf_t runtime16_select; + bf_t no_notify; + bf_t reg; + bf_t off18; + bf_t reg2; + } value{}; + auto& stats = g_fxo->get(); had_putllc_evaluation = true; if (!pattern.ls_write) { + if (pattern.required_pc != SPU_LS_SIZE) + { + value.required_pc = pattern.required_pc; + } + spu_log.success("PUTLLC0 Pattern Detected! (put_pc=0x%x, %s) (putllc0=%d, putllc16+0=%d, all=%d)", pattern.put_pc, func_hash, ++stats.nowrite, ++stats.single, +stats.all); - add_pattern(false, inst_attr::putllc0, pattern.put_pc - lsa); + add_pattern(false, inst_attr::putllc0, pattern.put_pc - lsa, value.data); continue; } - union putllc16_info - { - u32 data; - bf_t type; - bf_t runtime16_select; - bf_t no_notify; - bf_t reg; - bf_t off18; - bf_t reg2; - } value{}; - enum : u32 { v_const = 0, @@ -7170,6 +7330,11 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s value.runtime16_select = pattern.select_16_or_0_at_runtime; value.reg = s_reg_max; + if (pattern.required_pc != SPU_LS_SIZE) + { + value.required_pc = pattern.required_pc; + } + if (pattern.ls.is_const()) { ensure(pattern.reg == s_reg_max && pattern.reg2 == s_reg_max && pattern.ls_offs.is_const(), "Unexpected register usage"); @@ -7201,7 +7366,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s if (g_cfg.core.spu_accurate_reservations) { // Because enabling it is a hack, as it turns out - continue; + // continue; } add_pattern(false, inst_attr::putllc16, pattern.put_pc - result.entry_point, value.data); @@ -7225,7 +7390,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s if (inst_attr attr = m_inst_attrs[(read_pc - entry_point) / 4]; attr == inst_attr::none) { - add_pattern(false, inst_attr::rchcnt_loop, read_pc - result.entry_point); + add_pattern(false, inst_attr::rchcnt_loop, read_pc - result.entry_point, 0); spu_log.error("Channel Loop Pattern Detected! Report to developers! (read_pc=0x%x, branch_pc=0x%x, branch_target=0x%x, 0x%x-%s)", read_pc, pattern.branch_pc, pattern.branch_target, entry_point, func_hash); } @@ -7241,6 +7406,26 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s // Blocks starting from 0x0 or invalid instruction won't be compiled, may need special interpreter fallback } + if (!m_patterns.empty()) + { + std::string out_dump; + dump(result, out_dump); + spu_log.notice("Dump SPU Function with pattern(s):\n%s", out_dump); + } + + for (u32 i = 0; i < result.data.size(); i++) + { + const be_t ls_val = ls[result.lower_bound / 4 + i]; + + if (result.data[i] && std::bit_cast(ls_val) != result.data[i]) + { + std::string out_dump; + dump(result, out_dump); + spu_log.error("SPU Function Dump:\n%s", out_dump); + fmt::throw_exception("SPU Analyzer failed: Instruction mismatch at 0x%x [read: 0x%x vs LS: 0x%x] (i=0x%x)", result.lower_bound + i * 4, std::bit_cast>(result.data[i]), ls_val, i); + } + } + return result; } @@ -8290,19 +8475,10 @@ std::array& block_reg_info::evaluate_start_state(const s return walkby_state; } -void spu_recompiler_base::add_pattern(bool fill_all, inst_attr attr, u32 start, u32 end) +void spu_recompiler_base::add_pattern(bool fill_all, inst_attr attr, u32 start, u64 info) { - if (end == umax) - { - end = start; - } - - m_patterns[start] = pattern_info{utils::address_range32::start_end(start, end)}; - - for (u32 i = start; i <= (fill_all ? end : start); i += 4) - { - m_inst_attrs[i / 4] = attr; - } + m_patterns[start] = pattern_info{info}; + m_inst_attrs[start / 4] = attr; } extern std::string format_spu_func_info(u32 addr, cpu_thread* spu) diff --git a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp index 8b820ca600..989ba2e84f 100644 --- a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp @@ -1080,7 +1080,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator m_ir->SetInsertPoint(_body); } - void putllc16_pattern(const spu_program& /*prog*/, utils::address_range32 range) + void putllc16_pattern(const spu_program& /*prog*/, u64 pattern_info) { // Prevent store elimination m_block->store_context_ctr[s_reg_mfc_eal]++; @@ -1109,16 +1109,17 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator } }; - const union putllc16_info + const union putllc16_or_0_info { - u32 data; - bf_t type; - bf_t runtime16_select; - bf_t no_notify; - bf_t reg; - bf_t off18; - bf_t reg2; - } info = std::bit_cast(range.end); + u64 data; + bf_t required_pc; + bf_t type; + bf_t runtime16_select; + bf_t no_notify; + bf_t reg; + bf_t off18; + bf_t reg2; + } info = std::bit_cast(pattern_info); enum : u32 { @@ -1150,8 +1151,10 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator value_t eal_val; eal_val.value = _eal; - auto get_reg32 = [&](u32 reg) + auto get_reg32 = [&](u64 reg_) { + const u32 reg = static_cast(reg_); + if (get_reg_type(reg) != get_type()) { return get_reg_fixed(reg, get_type()); @@ -1170,6 +1173,19 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator } else if (info.type == v_relative) { + if (info.required_pc && info.required_pc != SPU_LS_SIZE) + { + const auto short_op = llvm::BasicBlock::Create(m_context, "__putllc16_short_op", m_function); + const auto heavy_op = llvm::BasicBlock::Create(m_context, "__putllc16_heavy_op", m_function); + + m_ir->CreateCondBr(m_ir->CreateICmpNE(m_ir->getInt32(info.required_pc), m_base_pc), heavy_op, short_op); + m_ir->SetInsertPoint(heavy_op); + update_pc(); + call("spu_exec_mfc_cmd", &exec_mfc_cmd, m_thread); + m_ir->CreateBr(_final); + m_ir->SetInsertPoint(short_op); + } + dest = m_ir->CreateAnd(get_pc(spu_branch_target(info.off18 + m_base)), 0x3fff0); } else if (info.type == v_reg_offs) @@ -1268,17 +1284,18 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator const auto _new = m_ir->CreateAlignedLoad(get_type(), _ptr(m_lsptr, dest), llvm::MaybeAlign{16}); const auto _rdata = m_ir->CreateAlignedLoad(get_type(), _ptr(spu_ptr(&spu_thread::rdata), m_ir->CreateAnd(diff, 0x70)), llvm::MaybeAlign{16}); - const bool is_accurate_op = !!g_cfg.core.spu_accurate_reservations; + const bool is_accurate_op = true || !!g_cfg.core.spu_accurate_reservations; - const auto compare_data_change_res = is_accurate_op ? m_ir->getTrue() : m_ir->CreateICmpNE(_new, _rdata); + const auto compare_data_change_res = m_ir->CreateICmpNE(_new, _rdata); + const auto second_test_for_complete_op = is_accurate_op ? m_ir->getTrue() : compare_data_change_res; if (info.runtime16_select) { - m_ir->CreateCondBr(m_ir->CreateAnd(m_ir->CreateICmpULT(diff, m_ir->getInt64(128)), compare_data_change_res), _begin_op, _inc_res, m_md_likely); + m_ir->CreateCondBr(m_ir->CreateAnd(m_ir->CreateICmpULT(diff, m_ir->getInt64(128)), second_test_for_complete_op), _begin_op, _inc_res, m_md_likely); } else { - m_ir->CreateCondBr(compare_data_change_res, _begin_op, _inc_res, m_md_unlikely); + m_ir->CreateCondBr(second_test_for_complete_op, _begin_op, _inc_res, m_md_unlikely); } m_ir->SetInsertPoint(_begin_op); @@ -1323,7 +1340,14 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator if (!info.no_notify) { + const auto notify_block = llvm::BasicBlock::Create(m_context, "__putllc16_block_notify", m_function); + const auto notify_next = llvm::BasicBlock::Create(m_context, "__putllc16_block_notify_next", m_function); + + m_ir->CreateCondBr(compare_data_change_res, notify_block, notify_next); + m_ir->SetInsertPoint(notify_block); call("atomic_wait_engine::notify_all", static_cast(atomic_wait_engine::notify_all), rptr); + m_ir->CreateBr(notify_next); + m_ir->SetInsertPoint(notify_next); } m_ir->CreateBr(_success); @@ -1373,7 +1397,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator m_ir->SetInsertPoint(_final); } - void putllc0_pattern(const spu_program& /*prog*/, utils::address_range32 /*range*/) + void putllc0_pattern(const spu_program& /*prog*/, u64 pattern_info) { // Prevent store elimination m_block->store_context_ctr[s_reg_mfc_eal]++; @@ -1401,6 +1425,18 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator } }; + const union putllc16_or_0_info + { + u64 data; + bf_t required_pc; + bf_t type; + bf_t runtime16_select; + bf_t no_notify; + bf_t reg; + bf_t off18; + bf_t reg2; + } info = std::bit_cast(pattern_info); + const auto _next = llvm::BasicBlock::Create(m_context, "", m_function); const auto _next0 = llvm::BasicBlock::Create(m_context, "", m_function); const auto _fail = llvm::BasicBlock::Create(m_context, "", m_function); @@ -1409,6 +1445,19 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator const auto _eal = (get_reg_fixed(s_reg_mfc_eal) & -128).eval(m_ir); const auto _raddr = m_ir->CreateLoad(get_type(), spu_ptr(&spu_thread::raddr)); + if (info.required_pc && info.required_pc != SPU_LS_SIZE) + { + const auto short_op = llvm::BasicBlock::Create(m_context, "__putllc0_short_op", m_function); + const auto heavy_op = llvm::BasicBlock::Create(m_context, "__putllc0_heavy_op", m_function); + + m_ir->CreateCondBr(m_ir->CreateICmpNE(m_ir->getInt32(info.required_pc), m_base_pc), heavy_op, short_op); + m_ir->SetInsertPoint(heavy_op); + update_pc(); + call("spu_exec_mfc_cmd", &exec_mfc_cmd, m_thread); + m_ir->CreateBr(_final); + m_ir->SetInsertPoint(short_op); + } + m_ir->CreateCondBr(m_ir->CreateAnd(m_ir->CreateICmpEQ(_eal, _raddr), m_ir->CreateIsNotNull(_raddr)), _next, _fail, m_md_likely); m_ir->SetInsertPoint(_next); @@ -2143,12 +2192,12 @@ public: { case inst_attr::putllc0: { - putllc0_pattern(func, m_patterns.at(m_pos - start).range); + putllc0_pattern(func, m_patterns.at(m_pos - start).info); continue; } case inst_attr::putllc16: { - putllc16_pattern(func, m_patterns.at(m_pos - start).range); + putllc16_pattern(func, m_patterns.at(m_pos - start).info); continue; } case inst_attr::omit: diff --git a/rpcs3/Emu/Cell/SPURecompiler.h b/rpcs3/Emu/Cell/SPURecompiler.h index ddee888b1e..6bddb5a035 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.h +++ b/rpcs3/Emu/Cell/SPURecompiler.h @@ -397,12 +397,12 @@ protected: struct pattern_info { - utils::address_range32 range; + u64 info; }; std::unordered_map m_patterns; - void add_pattern(bool fill_all, inst_attr attr, u32 start, u32 end = -1); + void add_pattern(bool fill_all, inst_attr attr, u32 start, u64 info); private: // For private use diff --git a/rpcs3/Emu/Cell/lv2/sys_fs.cpp b/rpcs3/Emu/Cell/lv2/sys_fs.cpp index 1f76bb7090..5bb74808be 100644 --- a/rpcs3/Emu/Cell/lv2/sys_fs.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_fs.cpp @@ -1036,7 +1036,6 @@ lv2_file::open_result_t lv2_file::open(std::string_view vpath, s32 flags, s32 mo error_code sys_fs_open(ppu_thread& ppu, vm::cptr path, s32 flags, vm::ptr fd, s32 mode, vm::cptr arg, u64 size) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.warning("sys_fs_open(path=%s, flags=%#o, fd=*0x%x, mode=%#o, arg=*0x%x, size=0x%llx)", path, flags, fd, mode, arg, size); @@ -1085,7 +1084,6 @@ error_code sys_fs_open(ppu_thread& ppu, vm::cptr path, s32 flags, vm::ptr< error_code sys_fs_read(ppu_thread& ppu, u32 fd, vm::ptr buf, u64 nbytes, vm::ptr nread) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.trace("sys_fs_read(fd=%d, buf=*0x%x, nbytes=0x%llx, nread=*0x%x)", fd, buf, nbytes, nread); @@ -1122,6 +1120,11 @@ error_code sys_fs_read(ppu_thread& ppu, u32 fd, vm::ptr buf, u64 nbytes, v return CELL_OK; } + if (nbytes >= 0x100000 && file->type != lv2_file_type::regular) + { + lv2_obj::sleep(ppu); + } + std::unique_lock lock(file->mp->mutex); if (!file->file) @@ -1154,7 +1157,6 @@ error_code sys_fs_read(ppu_thread& ppu, u32 fd, vm::ptr buf, u64 nbytes, v error_code sys_fs_write(ppu_thread& ppu, u32 fd, vm::cptr buf, u64 nbytes, vm::ptr nwrite) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.trace("sys_fs_write(fd=%d, buf=*0x%x, nbytes=0x%llx, nwrite=*0x%x)", fd, buf, nbytes, nwrite); @@ -1237,7 +1239,6 @@ error_code sys_fs_write(ppu_thread& ppu, u32 fd, vm::cptr buf, u64 nbytes, error_code sys_fs_close(ppu_thread& ppu, u32 fd) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); const auto file = idm::get_unlocked(fd); @@ -1314,7 +1315,6 @@ error_code sys_fs_close(ppu_thread& ppu, u32 fd) error_code sys_fs_opendir(ppu_thread& ppu, vm::cptr path, vm::ptr fd) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.warning("sys_fs_opendir(path=%s, fd=*0x%x)", path, fd); @@ -1491,7 +1491,6 @@ error_code sys_fs_readdir(ppu_thread& ppu, u32 fd, vm::ptr dir, vm error_code sys_fs_closedir(ppu_thread& ppu, u32 fd) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.warning("sys_fs_closedir(fd=%d)", fd); @@ -1506,7 +1505,6 @@ error_code sys_fs_closedir(ppu_thread& ppu, u32 fd) error_code sys_fs_stat(ppu_thread& ppu, vm::cptr path, vm::ptr sb) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.warning("sys_fs_stat(path=%s, sb=*0x%x)", path, sb); @@ -1610,7 +1608,6 @@ error_code sys_fs_stat(ppu_thread& ppu, vm::cptr path, vm::ptr error_code sys_fs_fstat(ppu_thread& ppu, u32 fd, vm::ptr sb) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.warning("sys_fs_fstat(fd=%d, sb=*0x%x)", fd, sb); @@ -1666,7 +1663,6 @@ error_code sys_fs_link(ppu_thread&, vm::cptr from, vm::cptr to) error_code sys_fs_mkdir(ppu_thread& ppu, vm::cptr path, s32 mode) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.warning("sys_fs_mkdir(path=%s, mode=%#o)", path, mode); @@ -1728,7 +1724,6 @@ error_code sys_fs_mkdir(ppu_thread& ppu, vm::cptr path, s32 mode) error_code sys_fs_rename(ppu_thread& ppu, vm::cptr from, vm::cptr to) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.warning("sys_fs_rename(from=%s, to=%s)", from, to); @@ -1794,7 +1789,6 @@ error_code sys_fs_rename(ppu_thread& ppu, vm::cptr from, vm::cptr to error_code sys_fs_rmdir(ppu_thread& ppu, vm::cptr path) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.warning("sys_fs_rmdir(path=%s)", path); @@ -1850,7 +1844,6 @@ error_code sys_fs_rmdir(ppu_thread& ppu, vm::cptr path) error_code sys_fs_unlink(ppu_thread& ppu, vm::cptr path) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.warning("sys_fs_unlink(path=%s)", path); @@ -1951,8 +1944,6 @@ error_code sys_fs_fcntl(ppu_thread& ppu, u32 fd, u32 op, vm::ptr _arg, u32 case 0x8000000a: // cellFsReadWithOffset case 0x8000000b: // cellFsWriteWithOffset { - lv2_obj::sleep(ppu); - const auto arg = vm::static_ptr_cast(_arg); if (_size < arg.size()) @@ -1992,6 +1983,11 @@ error_code sys_fs_fcntl(ppu_thread& ppu, u32 fd, u32 op, vm::ptr _arg, u32 sys_fs.error("%s type: Writing %u bytes to FD=%d (path=%s)", file->type, arg->size, file->name.data()); } + if (op == 0x8000000a && file->type != lv2_file_type::regular && arg->size >= 0x100000) + { + lv2_obj::sleep(ppu); + } + std::unique_lock wlock(file->mp->mutex, std::defer_lock); std::shared_lock rlock(file->mp->mutex, std::defer_lock); @@ -2047,8 +2043,6 @@ error_code sys_fs_fcntl(ppu_thread& ppu, u32 fd, u32 op, vm::ptr _arg, u32 case 0x80000009: // cellFsSdataOpenByFd { - lv2_obj::sleep(ppu); - const auto arg = vm::static_ptr_cast(_arg); if (_size < arg.size()) @@ -2102,8 +2096,6 @@ error_code sys_fs_fcntl(ppu_thread& ppu, u32 fd, u32 op, vm::ptr _arg, u32 case 0xc0000002: // cellFsGetFreeSize (TODO) { - lv2_obj::sleep(ppu); - const auto arg = vm::static_ptr_cast(_arg); const auto& mp = g_fxo->get().lookup("/dev_hdd0"); @@ -2418,8 +2410,6 @@ error_code sys_fs_fcntl(ppu_thread& ppu, u32 fd, u32 op, vm::ptr _arg, u32 case 0xe0000012: // cellFsGetDirectoryEntries { - lv2_obj::sleep(ppu); - const auto arg = vm::static_ptr_cast(_arg); if (_size < arg.size()) @@ -2434,8 +2424,6 @@ error_code sys_fs_fcntl(ppu_thread& ppu, u32 fd, u32 op, vm::ptr _arg, u32 return CELL_EBADF; } - ppu.check_state(); - u32 read_count = 0; // NOTE: This function is actually capable of reading only one entry at a time @@ -2593,7 +2581,6 @@ error_code sys_fs_fcntl(ppu_thread& ppu, u32 fd, u32 op, vm::ptr _arg, u32 error_code sys_fs_lseek(ppu_thread& ppu, u32 fd, s64 offset, s32 whence, vm::ptr pos) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.trace("sys_fs_lseek(fd=%d, offset=0x%llx, whence=0x%x, pos=*0x%x)", fd, offset, whence, pos); @@ -2639,7 +2626,6 @@ error_code sys_fs_lseek(ppu_thread& ppu, u32 fd, s64 offset, s32 whence, vm::ptr error_code sys_fs_fdatasync(ppu_thread& ppu, u32 fd) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.trace("sys_fs_fdadasync(fd=%d)", fd); @@ -2650,6 +2636,8 @@ error_code sys_fs_fdatasync(ppu_thread& ppu, u32 fd) return CELL_EBADF; } + lv2_obj::sleep(ppu); + std::lock_guard lock(file->mp->mutex); if (!file->file) @@ -2664,7 +2652,6 @@ error_code sys_fs_fdatasync(ppu_thread& ppu, u32 fd) error_code sys_fs_fsync(ppu_thread& ppu, u32 fd) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.trace("sys_fs_fsync(fd=%d)", fd); @@ -2675,6 +2662,8 @@ error_code sys_fs_fsync(ppu_thread& ppu, u32 fd) return CELL_EBADF; } + lv2_obj::sleep(ppu); + std::lock_guard lock(file->mp->mutex); if (!file->file) @@ -2763,7 +2752,6 @@ error_code sys_fs_get_block_size(ppu_thread& ppu, vm::cptr path, vm::ptr path, u64 size) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.warning("sys_fs_truncate(path=%s, size=0x%llx)", path, size); @@ -2815,7 +2803,6 @@ error_code sys_fs_truncate(ppu_thread& ppu, vm::cptr path, u64 size) error_code sys_fs_ftruncate(ppu_thread& ppu, u32 fd, u64 size) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.warning("sys_fs_ftruncate(fd=%d, size=0x%llx)", fd, size); @@ -3021,7 +3008,6 @@ error_code sys_fs_disk_free(ppu_thread& ppu, vm::cptr path, vm::ptr t error_code sys_fs_utime(ppu_thread& ppu, vm::cptr path, vm::cptr timep) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.warning("sys_fs_utime(path=%s, timep=*0x%x)", path, timep); sys_fs.warning("** actime=%u, modtime=%u", timep->actime, timep->modtime); diff --git a/rpcs3/Emu/RSX/Common/io_buffer.h b/rpcs3/Emu/RSX/Common/io_buffer.h index 29397d1136..64f95a5e61 100644 --- a/rpcs3/Emu/RSX/Common/io_buffer.h +++ b/rpcs3/Emu/RSX/Common/io_buffer.h @@ -80,6 +80,7 @@ namespace rsx std::span as_span() const { auto bytes = data(); + ensure((reinterpret_cast(bytes) & (sizeof(T) - 1)) == 0, "IO buffer span cast requires naturally aligned pointers."); return { utils::bless(bytes), m_size / sizeof(T) }; } diff --git a/rpcs3/Emu/RSX/Common/simple_array.hpp b/rpcs3/Emu/RSX/Common/simple_array.hpp index 090c51d674..4f8d2a5100 100644 --- a/rpcs3/Emu/RSX/Common/simple_array.hpp +++ b/rpcs3/Emu/RSX/Common/simple_array.hpp @@ -3,11 +3,53 @@ #include #include #include +#include #include "reverse_ptr.hpp" namespace rsx { + namespace aligned_allocator + { + template + void* malloc(size_t size) + { +#ifdef _WIN32 + return _aligned_malloc(size, Align); +#else + return std::aligned_alloc(Align, size); +#endif + } + + template + void* realloc(void* prev_ptr, [[maybe_unused]] size_t prev_size, size_t new_size) + { + if (prev_size >= new_size) + { + return prev_ptr; + } + + ensure(reinterpret_cast(prev_ptr) % Align == 0, "Pointer not aligned to Align"); +#ifdef _WIN32 + return _aligned_realloc(prev_ptr, new_size, Align); +#else + void* ret = std::aligned_alloc(Align, new_size); + std::memcpy(ret, prev_ptr, std::min(prev_size, new_size)); + std::free(prev_ptr); + return ret; +#endif + } + + static inline void free(void* ptr) + { +#ifdef _WIN32 + _aligned_free(ptr); +#else + std::free(ptr); +#endif + } + } + template concept span_like = requires(C& c) { @@ -15,7 +57,13 @@ namespace rsx { c.size() } -> std::integral; }; - template + template + concept is_trivially_comparable_v = + requires (T t1, U t2) { + { t1 == t2 } -> std::same_as; + }; + + template requires std::is_trivially_destructible_v && std::is_trivially_copyable_v struct simple_array { @@ -28,7 +76,7 @@ namespace rsx private: static constexpr u32 _local_capacity = std::max(64u / sizeof(Ty), 1u); - char _local_storage[_local_capacity * sizeof(Ty)]; + alignas(Align) char _local_storage[_local_capacity * sizeof(Ty)]; u32 _capacity = _local_capacity; Ty* _data = _local_capacity ? reinterpret_cast(_local_storage) : nullptr; @@ -128,7 +176,7 @@ namespace rsx { if (!is_local_storage()) { - free(_data); + aligned_allocator::free(_data); } _data = nullptr; @@ -196,13 +244,13 @@ namespace rsx if (is_local_storage()) { // Switch to heap storage - ensure(_data = static_cast(std::malloc(sizeof(Ty) * size))); + ensure(_data = static_cast(aligned_allocator::malloc(sizeof(Ty) * size))); std::memcpy(static_cast(_data), _local_storage, size_bytes()); } else { // Extend heap storage - ensure(_data = static_cast(std::realloc(_data, sizeof(Ty) * size))); // "realloc() failed!" + ensure(_data = static_cast(aligned_allocator::realloc(_data, size_bytes(), sizeof(Ty) * size))); // "realloc() failed!" } _capacity = size; @@ -457,6 +505,50 @@ namespace rsx return false; } + /** + * Note that find and find_if return pointers to objects and not iterators for simplified usage. + * It is functionally equivalent to retrieve a nullptr meaning empty object stored and nullptr meaning not found for all practical uses of this container. + */ + template + requires is_trivially_comparable_v + Ty* find(const T& value) + { + for (auto it = begin(); it != end(); ++it) + { + if (*it == value) + { + return &(*it); + } + } + return nullptr; + } + + // Remove when we switch to C++23 + template + requires is_trivially_comparable_v + const Ty* find(const T& value) const + { + return const_cast*>(this)->find(value); + } + + Ty* find_if(std::predicate auto predicate) + { + for (auto it = begin(); it != end(); ++it) + { + if (std::invoke(predicate, *it)) + { + return &(*it); + } + } + return nullptr; + } + + // Remove with C++23 + const Ty* find_if(std::predicate auto predicate) const + { + return const_cast*>(this)->find_if(predicate); + } + bool erase_if(std::predicate auto predicate) { if (!_size) diff --git a/rpcs3/Emu/RSX/GL/GLCompute.h b/rpcs3/Emu/RSX/GL/GLCompute.h index 442d8e4a0d..91210497c7 100644 --- a/rpcs3/Emu/RSX/GL/GLCompute.h +++ b/rpcs3/Emu/RSX/GL/GLCompute.h @@ -338,10 +338,10 @@ namespace gl params.logd = rsx::ceil_log2(depth); set_parameters(cmd); - const u32 num_bytes_per_invocation = (sizeof(_BlockType) * optimal_group_size); - const u32 texels_per_dword = std::max(4u / sizeof(_BlockType), 1u); // For block sizes less than 4 bytes wide - const u32 linear_invocations = utils::aligned_div(data_length, num_bytes_per_invocation) / texels_per_dword; - compute_task::run(cmd, linear_invocations); + const u32 word_count_per_invocation = std::max(sizeof(_BlockType) / 4u, 1u); + const u32 num_bytes_per_invocation = (word_count_per_invocation * 4u * optimal_group_size); + const u32 workgroup_invocations = utils::aligned_div(data_length, num_bytes_per_invocation); + compute_task::run(cmd, workgroup_invocations); } }; diff --git a/rpcs3/Emu/RSX/GL/GLTexture.cpp b/rpcs3/Emu/RSX/GL/GLTexture.cpp index 6fd04020ec..7b43cfc0a7 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.cpp +++ b/rpcs3/Emu/RSX/GL/GLTexture.cpp @@ -590,7 +590,7 @@ namespace gl void fill_texture(gl::command_context& cmd, texture* dst, int format, const std::vector &input_layouts, - bool is_swizzled, GLenum gl_format, GLenum gl_type, rsx::simple_array& staging_buffer) + bool is_swizzled, GLenum gl_format, GLenum gl_type, std::span staging_buffer) { const auto& driver_caps = gl::get_driver_caps(); rsx::texture_uploader_capabilities caps @@ -841,7 +841,7 @@ namespace gl void upload_texture(gl::command_context& cmd, texture* dst, u32 gcm_format, bool is_swizzled, const std::vector& subresources_layout) { // Calculate staging buffer size - rsx::simple_array data_upload_buf; + rsx::simple_array data_upload_buf; rsx::texture_uploader_capabilities caps { .supports_dxt = gl::get_driver_caps().EXT_texture_compression_s3tc_supported }; if (rsx::is_compressed_host_format(caps, gcm_format)) diff --git a/rpcs3/Emu/RSX/Program/Assembler/CFG.h b/rpcs3/Emu/RSX/Program/Assembler/CFG.h new file mode 100644 index 0000000000..9bc44a22d1 --- /dev/null +++ b/rpcs3/Emu/RSX/Program/Assembler/CFG.h @@ -0,0 +1,39 @@ +#pragma once + +#include +#include "IR.h" + +#include + +struct RSXFragmentProgram; + +namespace rsx::assembler +{ + struct FlowGraph + { + std::list blocks; + + BasicBlock* push(BasicBlock* parent = nullptr, u32 pc = 0, EdgeType edge_type = EdgeType::NONE) + { + if (!parent && !blocks.empty()) + { + parent = &blocks.back(); + } + + blocks.push_back({}); + BasicBlock* new_block = &blocks.back(); + + if (parent) + { + parent->insert_succ(new_block, edge_type); + new_block->insert_pred(parent, edge_type); + } + + new_block->id = pc; + return new_block; + } + }; + + FlowGraph deconstruct_fragment_program(const RSXFragmentProgram& prog); +} + diff --git a/rpcs3/Emu/RSX/Program/Assembler/FPToCFG.cpp b/rpcs3/Emu/RSX/Program/Assembler/FPToCFG.cpp new file mode 100644 index 0000000000..d8de4eda0b --- /dev/null +++ b/rpcs3/Emu/RSX/Program/Assembler/FPToCFG.cpp @@ -0,0 +1,193 @@ +#include "stdafx.h" + +#include "CFG.h" + +#include "Emu/RSX/Common/simple_array.hpp" +#include "Emu/RSX/Program/RSXFragmentProgram.h" + +#include +#include +#include + +#if defined(ARCH_ARM64) +#if !defined(_MSC_VER) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstrict-aliasing" +#pragma GCC diagnostic ignored "-Wold-style-cast" +#endif +#undef FORCE_INLINE +#include "Emu/CPU/sse2neon.h" +#if !defined(_MSC_VER) +#pragma GCC diagnostic pop +#endif +#endif + +namespace rsx::assembler +{ + inline v128 decode_instruction(const v128& raw_inst) + { + // Fixup of RSX's weird half-word shuffle for FP instructions + // Convert input stream into LE u16 array + __m128i _mask0 = _mm_set1_epi32(0xff00ff00); + __m128i _mask1 = _mm_set1_epi32(0x00ff00ff); + __m128i a = _mm_slli_epi32(static_cast<__m128i>(raw_inst), 8); + __m128i b = _mm_srli_epi32(static_cast<__m128i>(raw_inst), 8); + __m128i ret = _mm_or_si128( + _mm_and_si128(_mask0, a), + _mm_and_si128(_mask1, b) + ); + return v128::loadu(&ret); + } + + FlowGraph deconstruct_fragment_program(const RSXFragmentProgram& prog) + { + // For a flowgraph, we don't care at all about the actual contents, just flow control instructions. + OPDEST dst{}; + SRC0 src0{}; + SRC1 src1{}; + SRC2 src2{}; + + u32 pc = 0; // Program counter + bool end = false; + + // Flow control data + rsx::simple_array end_blocks; + rsx::simple_array else_blocks; + + // Data block + u32* data = static_cast(prog.get_data()); + + // Output + FlowGraph graph{}; + BasicBlock* bb = graph.push(); + + auto find_block_for_pc = [&](u32 id) -> BasicBlock* + { + auto found = std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == id)); + if (found != graph.blocks.end()) + { + return &(*found); + } + return nullptr; + }; + + auto safe_insert_block = [&](BasicBlock* parent, u32 id, EdgeType edge_type) -> BasicBlock* + { + if (auto found = find_block_for_pc(id)) + { + parent->insert_succ(found, edge_type); + found->insert_pred(parent, edge_type); + return found; + } + + return graph.push(parent, id, edge_type); + }; + + auto includes_literal_constant = [&]() + { + return src0.reg_type == RSX_FP_REGISTER_TYPE_CONSTANT || + src1.reg_type == RSX_FP_REGISTER_TYPE_CONSTANT || + src2.reg_type == RSX_FP_REGISTER_TYPE_CONSTANT; + }; + + while (!end) + { + BasicBlock** found = end_blocks.find_if(FN(x->id == pc)); + + if (!found) + { + found = else_blocks.find_if(FN(x->id == pc)); + } + + if (found) + { + bb = *found; + } + + const v128 raw_inst = v128::loadu(data, pc); + v128 decoded = decode_instruction(raw_inst); + + dst.HEX = decoded._u32[0]; + src0.HEX = decoded._u32[1]; + src1.HEX = decoded._u32[2]; + src2.HEX = decoded._u32[3]; + + end = !!dst.end; + const u32 opcode = dst.opcode | (src1.opcode_is_branch << 6); + + if (opcode == RSX_FP_OPCODE_NOP) + { + pc++; + continue; + } + + bb->instructions.push_back({}); + auto& ir_inst = bb->instructions.back(); + std::memcpy(ir_inst.bytecode, &decoded._u32[0], 16); + ir_inst.length = 4; + ir_inst.addr = pc * 16; + + switch (opcode) + { + case RSX_FP_OPCODE_BRK: + break; + case RSX_FP_OPCODE_CAL: + // Unimplemented. Also unused by the RSX compiler + fmt::throw_exception("Unimplemented FP CAL instruction."); + break; + case RSX_FP_OPCODE_FENCT: + break; + case RSX_FP_OPCODE_FENCB: + break; + case RSX_FP_OPCODE_RET: + // Outside a subroutine, this doesn't mean much. The main block can conditionally return to stop execution early. + // This will not alter flow control. + break; + case RSX_FP_OPCODE_IFE: + { + // Inserts if and else and end blocks + auto parent = bb; + bb = safe_insert_block(parent, pc + 1, EdgeType::IF); + if (src2.end_offset != src1.else_offset) + { + else_blocks.push_back(safe_insert_block(parent, src1.else_offset >> 2, EdgeType::ELSE)); + } + end_blocks.push_back(safe_insert_block(parent, src2.end_offset >> 2, EdgeType::ENDIF)); + break; + } + case RSX_FP_OPCODE_LOOP: + case RSX_FP_OPCODE_REP: + { + // Inserts for and end blocks + auto parent = bb; + bb = safe_insert_block(parent, pc + 1, EdgeType::LOOP); + end_blocks.push_back(safe_insert_block(parent, src2.end_offset >> 2, EdgeType::ENDLOOP)); + break; + } + default: + if (includes_literal_constant()) + { + const v128 constant_literal = v128::loadu(data, pc); + v128 decoded_literal = decode_instruction(constant_literal); + + std::memcpy(ir_inst.bytecode + 4, &decoded_literal._u32[0], 16); + ir_inst.length += 4; + pc++; + } + } + + pc++; + } + + // Sort edges for each block by distance + for (auto& block : graph.blocks) + { + std::sort(block.pred.begin(), block.pred.end(), FN(x.from->id > y.from->id)); + std::sort(block.succ.begin(), block.succ.end(), FN(x.to->id < y.to->id)); + } + + // Sort block nodes by distance + graph.blocks.sort(FN(x.id < y.id)); + return graph; + } +} diff --git a/rpcs3/Emu/RSX/Program/Assembler/IR.h b/rpcs3/Emu/RSX/Program/Assembler/IR.h new file mode 100644 index 0000000000..65960f3d99 --- /dev/null +++ b/rpcs3/Emu/RSX/Program/Assembler/IR.h @@ -0,0 +1,95 @@ +#pragma once + +#include + +namespace rsx::assembler +{ + struct BasicBlock; + + struct Register + { + int id = 0; + bool f16 = false; + }; + + struct RegisterRef + { + Register reg{}; + + // Vector information + union + { + u32 mask; + + struct + { + bool x : 1; + bool y : 1; + bool z : 1; + bool w : 1; + }; + }; + }; + + struct Instruction + { + // Raw data. Every instruction is max 128 bits. + // Each instruction can also have 128 bits of literal/embedded data. + u32 bytecode[8]{ {} }; + u32 addr = 0; + + // Decoded + u32 opcode = 0; + u8 length = 4; // Length in dwords + + // Padding + u8 reserved0 = 0; + u16 reserved1 = 0; + + // References + std::vector srcs; + std::vector dsts; + }; + + enum class EdgeType + { + NONE, + IF, + ELSE, + ENDIF, + LOOP, + ENDLOOP, + }; + + struct FlowEdge + { + EdgeType type = EdgeType::NONE; + BasicBlock* from = nullptr; + BasicBlock* to = nullptr; + }; + + struct BasicBlock + { + u32 id = 0; + std::vector instructions; // Program instructions for the RSX processor + std::vector succ; // Forward edges. Sorted closest first. + std::vector pred; // Back edges. Sorted closest first. + + std::vector prologue; // Prologue, created by passes + std::vector epilogue; // Epilogue, created by passes + + FlowEdge* insert_succ(BasicBlock* b, EdgeType type = EdgeType::NONE) + { + FlowEdge e{ .type = type, .from = this, .to = b }; + succ.push_back(e); + return &succ.back(); + } + + FlowEdge* insert_pred(BasicBlock* b, EdgeType type = EdgeType::NONE) + { + FlowEdge e{ .type = type, .from = b, .to = this }; + pred.push_back(e); + return &pred.back(); + } + }; +} diff --git a/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.cpp index e5742fffda..2ebfd7d8d7 100644 --- a/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.cpp @@ -234,7 +234,8 @@ std::string FragmentProgramDecompiler::AddCond() std::string FragmentProgramDecompiler::AddConst() { - const u32 constant_id = m_size + (4 * sizeof(u32)); + ensure(m_instruction->length == 8); + const u32 constant_id = m_instruction->addr + 16; u32 index = umax; if (auto found = m_constant_offsets.find(constant_id); @@ -249,9 +250,6 @@ std::string FragmentProgramDecompiler::AddConst() m_constant_offsets[constant_id] = index; } - // Skip next instruction, its just a literal - m_offset = 2 * 4 * sizeof(u32); - // Return the next offset index return "_fetch_constant(" + std::to_string(index) + ")"; } @@ -1297,7 +1295,7 @@ bool FragmentProgramDecompiler::handle_tex_srb(u32 opcode) std::string FragmentProgramDecompiler::Decompile() { - auto data = static_cast*>(m_prog.get_data()); + const auto graph = rsx::assembler::deconstruct_fragment_program(m_prog); m_size = 0; m_location = 0; m_loop_count = 0; @@ -1314,141 +1312,130 @@ std::string FragmentProgramDecompiler::Decompile() int forced_unit = FORCE_NONE; - while (true) + for (const auto &block : graph.blocks) { - for (auto found = std::find(m_end_offsets.begin(), m_end_offsets.end(), m_size); - found != m_end_offsets.end(); - found = std::find(m_end_offsets.begin(), m_end_offsets.end(), m_size)) + // TODO: Handle block prologue if any + if (!block.pred.empty()) { - m_end_offsets.erase(found); - m_code_level--; - AddCode("}"); - m_loop_count--; + // CFG guarantees predecessors are sorted, closest one first + for (const auto& pred : block.pred) + { + switch (pred.type) + { + case rsx::assembler::EdgeType::ENDLOOP: + m_loop_count--; + [[ fallthrough ]]; + case rsx::assembler::EdgeType::ENDIF: + m_code_level--; + AddCode("}"); + break; + case rsx::assembler::EdgeType::LOOP: + m_loop_count++; + [[ fallthrough ]]; + case rsx::assembler::EdgeType::IF: + // Instruction will be inserted by the SIP decoder + AddCode("{"); + m_code_level++; + break; + case rsx::assembler::EdgeType::ELSE: + // This one needs more testing + m_code_level--; + AddCode("}"); + AddCode("else"); + AddCode("{"); + m_code_level++; + break; + default: + // Start a new block anyway + fmt::throw_exception("Unexpected block found"); + } + } } - for (auto found = std::find(m_else_offsets.begin(), m_else_offsets.end(), m_size); - found != m_else_offsets.end(); - found = std::find(m_else_offsets.begin(), m_else_offsets.end(), m_size)) + for (const auto& inst : block.instructions) { - m_else_offsets.erase(found); - m_code_level--; - AddCode("}"); - AddCode("else"); - AddCode("{"); - m_code_level++; - } + m_instruction = &inst; - dst.HEX = GetData(data[0]); - src0.HEX = GetData(data[1]); - src1.HEX = GetData(data[2]); - src2.HEX = GetData(data[3]); + dst.HEX = inst.bytecode[0]; + src0.HEX = inst.bytecode[1]; + src1.HEX = inst.bytecode[2]; + src2.HEX = inst.bytecode[3]; - m_offset = 4 * sizeof(u32); - opflags = 0; + opflags = 0; - const u32 opcode = dst.opcode | (src1.opcode_is_branch << 6); + const u32 opcode = dst.opcode | (src1.opcode_is_branch << 6); + + auto SIP = [&]() + { + switch (opcode) + { + case RSX_FP_OPCODE_BRK: + if (m_loop_count) AddFlowOp("break"); + else rsx_log.error("BRK opcode found outside of a loop"); + break; + case RSX_FP_OPCODE_CAL: + rsx_log.error("Unimplemented SIP instruction: CAL"); + break; + case RSX_FP_OPCODE_FENCT: + AddCode("//FENCT"); + forced_unit = FORCE_SCT; + break; + case RSX_FP_OPCODE_FENCB: + AddCode("//FENCB"); + forced_unit = FORCE_SCB; + break; + case RSX_FP_OPCODE_IFE: + AddCode("if($cond)"); + break; + case RSX_FP_OPCODE_LOOP: + AddCode(fmt::format("$ifcond for(int i%u = %u; i%u < %u; i%u += %u) //LOOP", + m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment)); + break; + case RSX_FP_OPCODE_REP: + AddCode(fmt::format("if($cond) for(int i%u = %u; i%u < %u; i%u += %u) //REP", + m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment)); + break; + case RSX_FP_OPCODE_RET: + AddFlowOp("return"); + break; + + default: + return false; + } + + return true; + }; - auto SIP = [&]() - { switch (opcode) { - case RSX_FP_OPCODE_BRK: - if (m_loop_count) AddFlowOp("break"); - else rsx_log.error("BRK opcode found outside of a loop"); + case RSX_FP_OPCODE_NOP: break; - case RSX_FP_OPCODE_CAL: - rsx_log.error("Unimplemented SIP instruction: CAL"); + case RSX_FP_OPCODE_KIL: + properties.has_discard_op = true; + AddFlowOp("_kill()"); break; - case RSX_FP_OPCODE_FENCT: - AddCode("//FENCT"); - forced_unit = FORCE_SCT; - break; - case RSX_FP_OPCODE_FENCB: - AddCode("//FENCB"); - forced_unit = FORCE_SCB; - break; - case RSX_FP_OPCODE_IFE: - AddCode("if($cond)"); - if (src2.end_offset != src1.else_offset) - m_else_offsets.push_back(src1.else_offset << 2); - m_end_offsets.push_back(src2.end_offset << 2); - AddCode("{"); - m_code_level++; - break; - case RSX_FP_OPCODE_LOOP: - if (!src0.exec_if_eq && !src0.exec_if_gr && !src0.exec_if_lt) - { - AddCode(fmt::format("//$ifcond for(int i%u = %u; i%u < %u; i%u += %u) {} //-> %u //LOOP", - m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment, src2.end_offset)); - } - else - { - AddCode(fmt::format("$ifcond for(int i%u = %u; i%u < %u; i%u += %u) //LOOP", - m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment)); - m_loop_count++; - m_end_offsets.push_back(src2.end_offset << 2); - AddCode("{"); - m_code_level++; - } - break; - case RSX_FP_OPCODE_REP: - if (!src0.exec_if_eq && !src0.exec_if_gr && !src0.exec_if_lt) - { - AddCode(fmt::format("//$ifcond for(int i%u = %u; i%u < %u; i%u += %u) {} //-> %u //REP", - m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment, src2.end_offset)); - } - else - { - AddCode(fmt::format("if($cond) for(int i%u = %u; i%u < %u; i%u += %u) //REP", - m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment)); - m_loop_count++; - m_end_offsets.push_back(src2.end_offset << 2); - AddCode("{"); - m_code_level++; - } - break; - case RSX_FP_OPCODE_RET: - AddFlowOp("return"); - break; - default: - return false; + int prev_force_unit = forced_unit; + + // Some instructions do not respect forced unit + // Tested with Tales of Vesperia + if (SIP()) break; + if (handle_tex_srb(opcode)) break; + + // FENCT/FENCB do not actually reject instructions if they dont match the forced unit + // Looks like they are optimization hints and not hard-coded forced paths + if (handle_sct_scb(opcode)) break; + forced_unit = FORCE_NONE; + + rsx_log.error("Unknown/illegal instruction: 0x%x (forced unit %d)", opcode, prev_force_unit); + break; } - return true; - }; - - switch (opcode) - { - case RSX_FP_OPCODE_NOP: - break; - case RSX_FP_OPCODE_KIL: - properties.has_discard_op = true; - AddFlowOp("_kill()"); - break; - default: - int prev_force_unit = forced_unit; - - // Some instructions do not respect forced unit - // Tested with Tales of Vesperia - if (SIP()) break; - if (handle_tex_srb(opcode)) break; - - // FENCT/FENCB do not actually reject instructions if they dont match the forced unit - // Looks like they are optimization hints and not hard-coded forced paths - if (handle_sct_scb(opcode)) break; - forced_unit = FORCE_NONE; - - rsx_log.error("Unknown/illegal instruction: 0x%x (forced unit %d)", opcode, prev_force_unit); - break; + m_size += m_instruction->length * 4; + if (dst.end) break; } - m_size += m_offset; - - if (dst.end) break; - - ensure(m_offset % sizeof(u32) == 0); - data += m_offset / sizeof(u32); + // TODO: Handle block epilogue if needed } while (m_code_level > 1) diff --git a/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.h b/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.h index 467c6f3ac7..b68750bdfc 100644 --- a/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.h +++ b/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.h @@ -3,6 +3,8 @@ #include "FragmentProgramRegister.h" #include "RSXFragmentProgram.h" +#include "Assembler/CFG.h" + #include #include @@ -39,17 +41,16 @@ class FragmentProgramDecompiler SRC2 src2; u32 opflags; + const rsx::assembler::Instruction* m_instruction; + std::string main; u32& m_size; u32 m_const_index = 0; - u32 m_offset; u32 m_location = 0; bool m_is_valid_ucode = true; u32 m_loop_count; int m_code_level; - std::vector m_end_offsets; - std::vector m_else_offsets; std::unordered_map m_constant_offsets; std::array temp_registers; diff --git a/rpcs3/Emu/RSX/Program/GLSLSnippets/GPUDeswizzle.glsl b/rpcs3/Emu/RSX/Program/GLSLSnippets/GPUDeswizzle.glsl index 708f703983..1e0b66c36c 100644 --- a/rpcs3/Emu/RSX/Program/GLSLSnippets/GPUDeswizzle.glsl +++ b/rpcs3/Emu/RSX/Program/GLSLSnippets/GPUDeswizzle.glsl @@ -103,34 +103,50 @@ uint get_z_index(const in uint x_, const in uint y_, const in uint z_) #if USE_16BIT_ADDRESSING -void write16(inout uint accumulator, const in uint subword, const in uint src_id, const in uint dst_id) +void decode_16b(const in uint texel_id, in uint x, const in uint y, const in uint z) { - const uint masks[] = { 0x0000FFFF, 0xFFFF0000 }; - accumulator |= data_in[src_id / 2] & masks[subword]; + uint accumulator = 0; - if (subword == 1) + const uint subword_count = min(invocation.size.x, 2); + for (uint subword = 0; subword < subword_count; ++subword, ++x) { - data_out[dst_id / 2] = %f(accumulator); + uint src_texel_id = get_z_index(x, y, z); + uint src_id = (src_texel_id + invocation.data_offset); + int src_bit_offset = int(src_id % 2) << 4; + uint src_value = bitfieldExtract(data_in[src_id / 2], src_bit_offset, 16); + accumulator = bitfieldInsert(accumulator, src_value, int(subword << 4), 16); } + + data_out[texel_id / 2] = %f(accumulator); } #elif USE_8BIT_ADDRESSING -void write8(inout uint accumulator, const in uint subword, const in uint src_id, const in uint dst_id) +void decode_8b(const in uint texel_id, in uint x, const in uint y, const in uint z) { - const uint masks[] = { 0x000000FF, 0x0000FF00, 0x00FF0000, 0xFF000000 }; - accumulator |= data_in[src_id / 4] & masks[subword]; + uint accumulator = 0; - if (subword == 3) + const uint subword_count = min(invocation.size.x, 4); + for (uint subword = 0; subword < subword_count; ++subword, ++x) { - data_out[dst_id / 4] = accumulator; + uint src_texel_id = get_z_index(x, y, z); + uint src_id = (src_texel_id + invocation.data_offset); + int src_bit_offset = int(src_id % 4) << 3; + uint src_value = bitfieldExtract(data_in[src_id / 4], src_bit_offset, 8); + accumulator = bitfieldInsert(accumulator, src_value, int(subword << 3), 8); } + + data_out[texel_id / 4] = accumulator; } #else -void write32(const in uint word_count, in uint src_id, in uint dst_id) +void decode_32b(const in uint texel_id, const in uint word_count, const in uint x, const in uint y, const in uint z) { + uint src_texel_id = get_z_index(x, y, z); + uint dst_id = (texel_id * word_count); + uint src_id = (src_texel_id + invocation.data_offset) * word_count; + for (uint i = 0; i < word_count; ++i) { uint value = data_in[src_id++]; @@ -165,23 +181,11 @@ void main() uint x = (slice_offset % row_length); #if USE_8BIT_ADDRESSING - for (uint subword = 0, accumulator = 0; subword < 4; ++subword, ++x) { + decode_8b(texel_id, x, y, z); #elif USE_16BIT_ADDRESSING - for (uint subword = 0, accumulator = 0; subword < 2; ++subword, ++x) { -#endif - - uint src_texel_id = get_z_index(x, y, z); - uint dst_id = (texel_id * word_count); - uint src_id = (src_texel_id + invocation.data_offset) * word_count; - -#if USE_8BIT_ADDRESSING - write8(accumulator, subword, src_id, dst_id); - } -#elif USE_16BIT_ADDRESSING - write16(accumulator, subword, src_id, dst_id); - } + decode_16b(texel_id, x, y, z); #else - write32(word_count, src_id, dst_id); + decode_32b(texel_id, word_count, x, y, z); #endif } diff --git a/rpcs3/Emu/RSX/VK/VKCompute.h b/rpcs3/Emu/RSX/VK/VKCompute.h index a62d93ec74..81f8d6a165 100644 --- a/rpcs3/Emu/RSX/VK/VKCompute.h +++ b/rpcs3/Emu/RSX/VK/VKCompute.h @@ -475,10 +475,10 @@ namespace vk params.logh = rsx::ceil_log2(height); params.logd = rsx::ceil_log2(depth); - const u32 num_bytes_per_invocation = (sizeof(_BlockType) * optimal_group_size); - const u32 texels_per_dword = std::max(4u / sizeof(_BlockType), 1u); // For block sizes less than 4 bytes wide - const u32 linear_invocations = utils::aligned_div(data_length, num_bytes_per_invocation) / texels_per_dword; - compute_task::run(cmd, linear_invocations); + const u32 word_count_per_invocation = std::max(sizeof(_BlockType) / 4u, 1u); + const u32 num_bytes_per_invocation = (word_count_per_invocation * 4u * optimal_group_size); + const u32 workgroup_invocations = utils::aligned_div(data_length, num_bytes_per_invocation); + compute_task::run(cmd, workgroup_invocations); } }; diff --git a/rpcs3/Emu/RSX/VK/VKQueryPool.h b/rpcs3/Emu/RSX/VK/VKQueryPool.h index 009afca379..ee2480b8fd 100644 --- a/rpcs3/Emu/RSX/VK/VKQueryPool.h +++ b/rpcs3/Emu/RSX/VK/VKQueryPool.h @@ -69,8 +69,9 @@ namespace vk void on_query_pool_released(std::unique_ptr& pool); - template class _List> - void free_queries(vk::command_buffer& cmd, _List& list) + template + requires std::ranges::range && std::same_as, u32> // List of u32 + void free_queries(vk::command_buffer& cmd, T& list) { for (const auto index : list) { diff --git a/rpcs3/Emu/RSX/VK/vkutils/buffer_object.cpp b/rpcs3/Emu/RSX/VK/vkutils/buffer_object.cpp index 4d7c5237cc..daf60ad03c 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/buffer_object.cpp +++ b/rpcs3/Emu/RSX/VK/vkutils/buffer_object.cpp @@ -39,11 +39,20 @@ namespace vk return false; } - buffer::buffer(const vk::render_device& dev, u64 size, const memory_type_info& memory_type, u32 access_flags, VkBufferUsageFlags usage, VkBufferCreateFlags flags, vmm_allocation_pool allocation_pool) + buffer::buffer( + const vk::render_device& dev, + u64 size, + const memory_type_info& memory_type, + u32 access_flags, + VkBufferUsageFlags usage, + VkBufferCreateFlags flags, + vmm_allocation_pool allocation_pool) : m_device(dev) { + const bool nullable = !!(flags & VK_BUFFER_CREATE_ALLOW_NULL_RPCS3); + info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - info.flags = flags; + info.flags = flags & ~VK_BUFFER_CREATE_SPECIAL_FLAGS_RPCS3; info.size = size; info.usage = usage; info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; @@ -60,8 +69,18 @@ namespace vk fmt::throw_exception("No compatible memory type was found!"); } - memory = std::make_unique(m_device, memory_reqs.size, memory_reqs.alignment, allocation_type_info, allocation_pool); - vkBindBufferMemory(dev, value, memory->get_vk_device_memory(), memory->get_vk_device_memory_offset()); + memory = std::make_unique(m_device, memory_reqs.size, memory_reqs.alignment, allocation_type_info, allocation_pool, nullable); + if (auto device_memory = memory->get_vk_device_memory(); + device_memory != VK_NULL_HANDLE) + { + vkBindBufferMemory(dev, value, device_memory, memory->get_vk_device_memory_offset()); + } + else + { + ensure(nullable); + vkDestroyBuffer(m_device, value, nullptr); + value = VK_NULL_HANDLE; + } } buffer::buffer(const vk::render_device& dev, VkBufferUsageFlags usage, void* host_pointer, u64 size) diff --git a/rpcs3/Emu/RSX/VK/vkutils/buffer_object.h b/rpcs3/Emu/RSX/VK/vkutils/buffer_object.h index c74cb1aaa5..ba5309749a 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/buffer_object.h +++ b/rpcs3/Emu/RSX/VK/vkutils/buffer_object.h @@ -7,6 +7,13 @@ namespace vk { + enum : u32 + { + VK_BUFFER_CREATE_ALLOW_NULL_RPCS3 = 0x80000000, + + VK_BUFFER_CREATE_SPECIAL_FLAGS_RPCS3 = (VK_BUFFER_CREATE_ALLOW_NULL_RPCS3) + }; + struct buffer_view : public unique_resource { VkBufferView value; @@ -30,8 +37,21 @@ namespace vk VkBufferCreateInfo info = {}; std::unique_ptr memory; - buffer(const vk::render_device& dev, u64 size, const memory_type_info& memory_type, u32 access_flags, VkBufferUsageFlags usage, VkBufferCreateFlags flags, vmm_allocation_pool allocation_pool); - buffer(const vk::render_device& dev, VkBufferUsageFlags usage, void* host_pointer, u64 size); + buffer( + const vk::render_device& dev, + u64 size, + const memory_type_info& memory_type, + u32 access_flags, + VkBufferUsageFlags usage, + VkBufferCreateFlags flags, + vmm_allocation_pool allocation_pool); + + buffer( + const vk::render_device& dev, + VkBufferUsageFlags usage, + void* host_pointer, + u64 size); + ~buffer(); void* map(u64 offset, u64 size); diff --git a/rpcs3/Emu/RSX/VK/vkutils/data_heap.cpp b/rpcs3/Emu/RSX/VK/vkutils/data_heap.cpp index ba1b4e79c1..7fa6a46a81 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/data_heap.cpp +++ b/rpcs3/Emu/RSX/VK/vkutils/data_heap.cpp @@ -47,9 +47,28 @@ namespace vk usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT; memory_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; memory_index = memory_map.device_local; + m_prefer_writethrough = false; } - heap = std::make_unique(*g_render_device, size, memory_index, memory_flags, usage, 0, VMM_ALLOCATION_POOL_SYSTEM); + VkFlags create_flags = 0; + if (m_prefer_writethrough) + { + create_flags |= VK_BUFFER_CREATE_ALLOW_NULL_RPCS3; + } + + heap = std::make_unique(*g_render_device, size, memory_index, memory_flags, usage, create_flags, VMM_ALLOCATION_POOL_SYSTEM); + + if (!heap->value) + { + rsx_log.warning("Could not place heap '%s' into Re-BAR memory. Will attempt to use regular host-visible memory.", m_name); + ensure(m_prefer_writethrough); + + // We failed to place the buffer in rebar memory. Try again in host-visible. + m_prefer_writethrough = false; + auto gc = get_resource_manager(); + gc->dispose(heap); + heap = std::make_unique(*g_render_device, size, memory_map.host_visible_coherent, memory_flags, usage, 0, VMM_ALLOCATION_POOL_SYSTEM); + } initial_size = size; notify_on_grow = bool(notify); @@ -112,6 +131,7 @@ namespace vk auto gc = get_resource_manager(); if (shadow) { + ensure(!m_prefer_writethrough); rsx_log.warning("Buffer usage %u is not heap-compatible using this driver, explicit staging buffer in use", usage); gc->dispose(shadow); @@ -122,7 +142,25 @@ namespace vk } gc->dispose(heap); - heap = std::make_unique(*g_render_device, aligned_new_size, memory_index, memory_flags, usage, 0, VMM_ALLOCATION_POOL_SYSTEM); + + VkFlags create_flags = 0; + if (m_prefer_writethrough) + { + create_flags |= VK_BUFFER_CREATE_ALLOW_NULL_RPCS3; + } + + heap = std::make_unique(*g_render_device, aligned_new_size, memory_index, memory_flags, usage, create_flags, VMM_ALLOCATION_POOL_SYSTEM); + + if (!heap->value) + { + rsx_log.warning("Could not place heap '%s' into Re-BAR memory. Will attempt to use regular host-visible memory.", m_name); + ensure(m_prefer_writethrough); + + // We failed to place the buffer in rebar memory. Try again in host-visible. + m_prefer_writethrough = false; + gc->dispose(heap); + heap = std::make_unique(*g_render_device, aligned_new_size, memory_map.host_visible_coherent, memory_flags, usage, 0, VMM_ALLOCATION_POOL_SYSTEM); + } if (notify_on_grow) { diff --git a/rpcs3/Emu/System.cpp b/rpcs3/Emu/System.cpp index 6a4d941ecc..c738474499 100644 --- a/rpcs3/Emu/System.cpp +++ b/rpcs3/Emu/System.cpp @@ -75,7 +75,7 @@ atomic_t g_watchdog_hold_ctr{0}; extern bool ppu_load_exec(const ppu_exec_object&, bool virtual_load, const std::string&, utils::serial* = nullptr); extern void spu_load_exec(const spu_exec_object&); extern void spu_load_rel_exec(const spu_rel_object&); -extern void ppu_precompile(std::vector& dir_queue, std::vector*>* loaded_prx); +extern void ppu_precompile(std::vector& dir_queue, std::vector*>* loaded_prx, bool is_fast_compilation); extern bool ppu_initialize(const ppu_module&, bool check_only = false, u64 file_size = 0); extern void ppu_finalize(const ppu_module&); extern void ppu_unload_prx(const lv2_prx&); @@ -1684,7 +1684,7 @@ game_boot_result Emulator::Load(const std::string& title_id, bool is_disc_patch, } } - g_fxo->init("SPRX Loader"sv, [this, dir_queue]() mutable + g_fxo->init("SPRX Loader"sv, [this, dir_queue, is_fast = m_precompilation_option.is_fast]() mutable { std::vector*> mod_list; @@ -1705,7 +1705,7 @@ game_boot_result Emulator::Load(const std::string& title_id, bool is_disc_patch, return; } - ppu_precompile(dir_queue, mod_list.empty() ? nullptr : &mod_list); + ppu_precompile(dir_queue, mod_list.empty() ? nullptr : &mod_list, is_fast); if (Emu.IsStopped()) { @@ -3230,6 +3230,7 @@ void Emulator::Kill(bool allow_autoexit, bool savestate, savestate_stage* save_s read_used_savestate_versions(); m_savestate_extension_flags1 = {}; m_emu_state_close_pending = false; + m_precompilation_option = {}; // Enable logging rpcs3::utils::configure_logs(true); @@ -3824,6 +3825,7 @@ void Emulator::Kill(bool allow_autoexit, bool savestate, savestate_stage* save_s read_used_savestate_versions(); m_savestate_extension_flags1 = {}; m_emu_state_close_pending = false; + m_precompilation_option = {}; initialize_timebased_time(0, true); diff --git a/rpcs3/Emu/System.h b/rpcs3/Emu/System.h index 0c26d09a4b..954a041e9e 100644 --- a/rpcs3/Emu/System.h +++ b/rpcs3/Emu/System.h @@ -120,6 +120,11 @@ namespace utils struct serial; }; +struct emu_precompilation_option_t +{ + bool is_fast = false; +}; + class Emulator final { atomic_t m_state{system_state::stopped}; @@ -188,6 +193,7 @@ class Emulator final }; bs_t m_savestate_extension_flags1{}; + emu_precompilation_option_t m_precompilation_option{}; public: static constexpr std::string_view game_id_boot_prefix = "%RPCS3_GAMEID%:"; @@ -245,6 +251,11 @@ public: m_state = system_state::running; } + void SetPrecompileCacheOption(emu_precompilation_option_t option) + { + m_precompilation_option = option; + } + void Init(); std::vector argv; diff --git a/rpcs3/Emu/system_config.h b/rpcs3/Emu/system_config.h index c7b5e8e0fb..18f11da896 100644 --- a/rpcs3/Emu/system_config.h +++ b/rpcs3/Emu/system_config.h @@ -36,7 +36,7 @@ struct cfg_root : cfg::node cfg::_int<0, 16> spu_delay_penalty{ this, "SPU delay penalty", 3 }; // Number of milliseconds to block a thread if a virtual 'core' isn't free cfg::_bool spu_loop_detection{ this, "SPU loop detection", false }; // Try to detect wait loops and trigger thread yield cfg::_int<1, 6> max_spurs_threads{ this, "Max SPURS Threads", 6, true }; // HACK. If less then 6, max number of running SPURS threads in each thread group. - cfg::_enum spu_block_size{ this, "SPU Block Size", spu_block_size_type::safe }; + cfg::_enum spu_block_size{ this, "SPU Analyzer Block Size", spu_block_size_type::mega }; cfg::_bool spu_accurate_dma{ this, "Accurate SPU DMA", false }; cfg::_bool spu_accurate_reservations{ this, "Accurate SPU Reservations", true }; cfg::_bool accurate_cache_line_stores{ this, "Accurate Cache Line Stores", false }; diff --git a/rpcs3/Emu/system_utils.cpp b/rpcs3/Emu/system_utils.cpp index ba98a44795..e840887bac 100644 --- a/rpcs3/Emu/system_utils.cpp +++ b/rpcs3/Emu/system_utils.cpp @@ -101,6 +101,48 @@ namespace rpcs3::utils return worker(); } + std::vector> get_vfs_disk_usage() + { + std::vector> disk_usage; + + if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_hdd0_dir(), 1); data_size != umax) + { + disk_usage.push_back({"dev_hdd0", data_size}); + } + + if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_hdd1_dir(), 1); data_size != umax) + { + disk_usage.push_back({"dev_hdd1", data_size}); + } + + if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_flash_dir(), 1); data_size != umax) + { + disk_usage.push_back({"dev_flash", data_size}); + } + + if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_flash2_dir(), 1); data_size != umax) + { + disk_usage.push_back({"dev_flash2", data_size}); + } + + if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_flash3_dir(), 1); data_size != umax) + { + disk_usage.push_back({"dev_flash3", data_size}); + } + + if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_bdvd_dir(), 1); data_size != umax) + { + disk_usage.push_back({"dev_bdvd", data_size}); + } + + if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_games_dir(), 1); data_size != umax) + { + disk_usage.push_back({"games", data_size}); + } + + return disk_usage; + } + std::string get_emu_dir() { const std::string& emu_dir_ = g_cfg_vfs.emulator_dir; @@ -122,6 +164,36 @@ namespace rpcs3::utils return g_cfg_vfs.get(g_cfg_vfs.dev_hdd1, get_emu_dir()); } + std::string get_flash_dir() + { + return g_cfg_vfs.get(g_cfg_vfs.dev_flash, get_emu_dir()); + } + + std::string get_flash2_dir() + { + return g_cfg_vfs.get(g_cfg_vfs.dev_flash2, get_emu_dir()); + } + + std::string get_flash3_dir() + { + return g_cfg_vfs.get(g_cfg_vfs.dev_flash3, get_emu_dir()); + } + + std::string get_bdvd_dir() + { + return g_cfg_vfs.get(g_cfg_vfs.dev_bdvd, get_emu_dir()); + } + + u64 get_cache_disk_usage() + { + if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_cache_dir(), 1); data_size != umax) + { + return data_size; + } + + return 0; + } + std::string get_cache_dir() { return fs::get_cache_dir() + "cache/"; diff --git a/rpcs3/Emu/system_utils.hpp b/rpcs3/Emu/system_utils.hpp index 30ccb0add0..b4142dacb9 100644 --- a/rpcs3/Emu/system_utils.hpp +++ b/rpcs3/Emu/system_utils.hpp @@ -23,10 +23,19 @@ namespace rpcs3::utils bool install_pkg(const std::string& path); + // VFS directories and disk usage + std::vector> get_vfs_disk_usage(); std::string get_emu_dir(); std::string get_games_dir(); std::string get_hdd0_dir(); std::string get_hdd1_dir(); + std::string get_flash_dir(); + std::string get_flash2_dir(); + std::string get_flash3_dir(); + std::string get_bdvd_dir(); + + // Cache directories and disk usage + u64 get_cache_disk_usage(); std::string get_cache_dir(); std::string get_cache_dir(std::string_view module_path); diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index 0bbea41832..1b5716f01b 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -156,6 +156,7 @@ + @@ -699,6 +700,8 @@ + + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index 65cd509f85..23b7ef174d 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -133,6 +133,9 @@ {ce6d6b90-8313-4273-b46c-d92bd450c002} + + {d99df916-8a99-428b-869a-9f14ac0ab411} + @@ -1372,6 +1375,9 @@ Emu\Io + + Emu\GPU\RSX\Program\Assembler + @@ -2764,6 +2770,12 @@ Utilities + + Emu\GPU\RSX\Program\Assembler + + + Emu\GPU\RSX\Program\Assembler + diff --git a/rpcs3/rpcs3qt/emu_settings.cpp b/rpcs3/rpcs3qt/emu_settings.cpp index 0c99cfc119..bba3b0b235 100644 --- a/rpcs3/rpcs3qt/emu_settings.cpp +++ b/rpcs3/rpcs3qt/emu_settings.cpp @@ -975,9 +975,9 @@ QString emu_settings::GetLocalizedSetting(const QString& original, emu_settings_ case emu_settings_type::SPUBlockSize: switch (static_cast(index)) { - case spu_block_size_type::safe: return tr("Safe", "SPU block size"); - case spu_block_size_type::mega: return tr("Mega", "SPU block size"); - case spu_block_size_type::giga: return tr("Giga", "SPU block size"); + case spu_block_size_type::safe: return tr("Safe", "SPU Analyzer Block Size"); + case spu_block_size_type::mega: return tr("Mega", "SPU Analyzer Block Size"); + case spu_block_size_type::giga: return tr("Giga", "SPU Analyzer Block Size"); } break; case emu_settings_type::ThreadSchedulerMode: diff --git a/rpcs3/rpcs3qt/emu_settings_type.h b/rpcs3/rpcs3qt/emu_settings_type.h index c398b07a28..d90aa7c862 100644 --- a/rpcs3/rpcs3qt/emu_settings_type.h +++ b/rpcs3/rpcs3qt/emu_settings_type.h @@ -239,7 +239,7 @@ inline static const std::map settings_location { emu_settings_type::XFloatAccuracy, { "Core", "XFloat Accuracy"}}, { emu_settings_type::MFCCommandsShuffling, { "Core", "MFC Commands Shuffling Limit"}}, { emu_settings_type::SetDAZandFTZ, { "Core", "Set DAZ and FTZ"}}, - { emu_settings_type::SPUBlockSize, { "Core", "SPU Block Size"}}, + { emu_settings_type::SPUBlockSize, { "Core", "SPU Analyzer Block Size"}}, { emu_settings_type::SPUCache, { "Core", "SPU Cache"}}, { emu_settings_type::DebugConsoleMode, { "Core", "Debug Console Mode"}}, { emu_settings_type::MaxSPURSThreads, { "Core", "Max SPURS Threads"}}, diff --git a/rpcs3/rpcs3qt/game_list_frame.cpp b/rpcs3/rpcs3qt/game_list_frame.cpp index 313e043613..162b8cb0f6 100644 --- a/rpcs3/rpcs3qt/game_list_frame.cpp +++ b/rpcs3/rpcs3qt/game_list_frame.cpp @@ -2011,10 +2011,11 @@ void game_list_frame::ShowContextMenu(const QPoint &pos) menu.exec(global_pos); } -bool game_list_frame::CreateCPUCaches(const std::string& path, const std::string& serial) +bool game_list_frame::CreateCPUCaches(const std::string& path, const std::string& serial, bool is_fast_compilation) { Emu.GracefulShutdown(false); Emu.SetForceBoot(true); + Emu.SetPrecompileCacheOption(emu_precompilation_option_t{.is_fast = is_fast_compilation}); if (const auto error = Emu.BootGame(fs::is_file(path) ? fs::get_parent_dir(path) : path, serial, true); error != game_boot_result::no_errors) { @@ -2026,9 +2027,9 @@ bool game_list_frame::CreateCPUCaches(const std::string& path, const std::string return true; } -bool game_list_frame::CreateCPUCaches(const game_info& game) +bool game_list_frame::CreateCPUCaches(const game_info& game, bool is_fast_compilation) { - return game && CreateCPUCaches(game->info.path, game->info.serial); + return game && CreateCPUCaches(game->info.path, game->info.serial, is_fast_compilation); } bool game_list_frame::RemoveCustomConfiguration(const std::string& title_id, const game_info& game, bool is_interactive) @@ -2404,6 +2405,9 @@ void game_list_frame::BatchActionBySerials(progress_dialog* pdlg, const std::set connect(pdlg, &progress_dialog::canceled, this, [pdlg](){ pdlg->deleteLater(); }); QApplication::beep(); + // Signal termination back to the callback + action(""); + if (refresh_on_finish && index) { Refresh(true); @@ -2414,7 +2418,7 @@ void game_list_frame::BatchActionBySerials(progress_dialog* pdlg, const std::set QTimer::singleShot(1, this, *periodic_func); } -void game_list_frame::BatchCreateCPUCaches(const std::vector& game_data) +void game_list_frame::BatchCreateCPUCaches(const std::vector& game_data, bool is_fast_compilation) { std::set serials; @@ -2433,11 +2437,13 @@ void game_list_frame::BatchCreateCPUCaches(const std::vector& game_da if (total == 0) { QMessageBox::information(this, tr("LLVM Cache Batch Creation"), tr("No titles found"), QMessageBox::Ok); + Q_EMIT NotifyBatchedGameActionFinished(); return; } if (!m_gui_settings->GetBootConfirmation(this)) { + Q_EMIT NotifyBatchedGameActionFinished(); return; } @@ -2459,13 +2465,19 @@ void game_list_frame::BatchCreateCPUCaches(const std::vector& game_da BatchActionBySerials(pdlg, serials, tr("%0\nProgress: %1/%2 caches compiled").arg(main_label), [&, game_data](const std::string& serial) { + if (serial.empty()) + { + Q_EMIT NotifyBatchedGameActionFinished(); + return false; + } + if (Emu.IsStopped(true)) { const auto it = std::find_if(m_game_data.begin(), m_game_data.end(), FN(x->info.serial == serial)); if (it != m_game_data.end()) { - return CreateCPUCaches((*it)->info.path, serial); + return CreateCPUCaches((*it)->info.path, serial, is_fast_compilation); } } @@ -2512,7 +2524,7 @@ void game_list_frame::BatchRemovePPUCaches() BatchActionBySerials(pdlg, serials, tr("%0/%1 caches cleared"), [this](const std::string& serial) { - return Emu.IsStopped(true) && RemovePPUCache(GetCacheDirBySerial(serial)); + return !serial.empty() &&Emu.IsStopped(true) && RemovePPUCache(GetCacheDirBySerial(serial)); }, [this](u32, u32) { @@ -2551,7 +2563,7 @@ void game_list_frame::BatchRemoveSPUCaches() BatchActionBySerials(pdlg, serials, tr("%0/%1 caches cleared"), [this](const std::string& serial) { - return Emu.IsStopped(true) && RemoveSPUCache(GetCacheDirBySerial(serial)); + return !serial.empty() && Emu.IsStopped(true) && RemoveSPUCache(GetCacheDirBySerial(serial)); }, [this](u32 removed, u32 total) { @@ -2586,7 +2598,7 @@ void game_list_frame::BatchRemoveCustomConfigurations() BatchActionBySerials(pdlg, serials, tr("%0/%1 custom configurations cleared"), [this](const std::string& serial) { - return Emu.IsStopped(true) && RemoveCustomConfiguration(serial); + return !serial.empty() && Emu.IsStopped(true) && RemoveCustomConfiguration(serial); }, [this](u32 removed, u32 total) { @@ -2620,7 +2632,7 @@ void game_list_frame::BatchRemoveCustomPadConfigurations() BatchActionBySerials(pdlg, serials, tr("%0/%1 custom pad configurations cleared"), [this](const std::string& serial) { - return Emu.IsStopped(true) && RemoveCustomPadConfiguration(serial); + return !serial.empty() && Emu.IsStopped(true) && RemoveCustomPadConfiguration(serial); }, [this](u32 removed, u32 total) { @@ -2659,7 +2671,7 @@ void game_list_frame::BatchRemoveShaderCaches() BatchActionBySerials(pdlg, serials, tr("%0/%1 shader caches cleared"), [this](const std::string& serial) { - return Emu.IsStopped(true) && RemoveShadersCache(GetCacheDirBySerial(serial)); + return !serial.empty() && Emu.IsStopped(true) && RemoveShadersCache(GetCacheDirBySerial(serial)); }, [this](u32 removed, u32 total) { diff --git a/rpcs3/rpcs3qt/game_list_frame.h b/rpcs3/rpcs3qt/game_list_frame.h index 0252cbf84f..fb366c933c 100644 --- a/rpcs3/rpcs3qt/game_list_frame.h +++ b/rpcs3/rpcs3qt/game_list_frame.h @@ -64,7 +64,7 @@ public: bool IsEntryVisible(const game_info& game, bool search_fallback = false) const; public Q_SLOTS: - void BatchCreateCPUCaches(const std::vector& game_data = {}); + void BatchCreateCPUCaches(const std::vector& game_data = {}, bool is_fast_compilation = false); void BatchRemovePPUCaches(); void BatchRemoveSPUCaches(); void BatchRemoveCustomConfigurations(); @@ -96,6 +96,7 @@ Q_SIGNALS: void FocusToSearchBar(); void Refreshed(); void RequestSaveStateManager(const game_info& game); + void NotifyBatchedGameActionFinished(); public: template @@ -135,8 +136,8 @@ private: bool RemovePPUCache(const std::string& base_dir, bool is_interactive = false); bool RemoveSPUCache(const std::string& base_dir, bool is_interactive = false); void RemoveHDD1Cache(const std::string& base_dir, const std::string& title_id, bool is_interactive = false); - static bool CreateCPUCaches(const std::string& path, const std::string& serial = {}); - static bool CreateCPUCaches(const game_info& game); + static bool CreateCPUCaches(const std::string& path, const std::string& serial = {}, bool is_fast_compilation = false); + static bool CreateCPUCaches(const game_info& game, bool is_fast_compilation = false); static bool RemoveContentPath(const std::string& path, const std::string& desc); static u32 RemoveContentPathList(const std::vector& path_list, const std::string& desc); diff --git a/rpcs3/rpcs3qt/log_frame.cpp b/rpcs3/rpcs3qt/log_frame.cpp index 4dd664a99e..a155cf215d 100644 --- a/rpcs3/rpcs3qt/log_frame.cpp +++ b/rpcs3/rpcs3qt/log_frame.cpp @@ -4,10 +4,14 @@ #include "hex_validator.h" #include "memory_viewer_panel.h" +#include "Emu/System.h" +#include "Emu/system_utils.hpp" #include "Utilities/lockless.h" #include "util/asm.hpp" +#include #include +#include #include #include #include @@ -17,6 +21,8 @@ #include #include +LOG_CHANNEL(sys_log, "SYS"); + extern fs::file g_tty; extern atomic_t g_tty_size; extern std::array, 16> g_tty_input; @@ -165,6 +171,28 @@ log_frame::log_frame(std::shared_ptr _gui_settings, QWidget* paren connect(m_timer, &QTimer::timeout, this, &log_frame::UpdateUI); } +void log_frame::show_disk_usage(const std::vector>& vfs_disk_usage, u64 cache_disk_usage) +{ + QString text; + u64 tot_data_size = 0; + + for (const auto& [dev, data_size] : vfs_disk_usage) + { + text += tr("\n %0: %1").arg(QString::fromStdString(dev)).arg(gui::utils::format_byte_size(data_size)); + tot_data_size += data_size; + } + + if (!text.isEmpty()) + { + text = tr("\n VFS disk usage: %0%1").arg(gui::utils::format_byte_size(tot_data_size)).arg(text); + } + + text += tr("\n Cache disk usage: %0").arg(gui::utils::format_byte_size(cache_disk_usage)); + + sys_log.success("%s", text); + QMessageBox::information(this, tr("Disk usage"), text); +} + void log_frame::SetLogLevel(logs::level lev) const { switch (lev) @@ -245,6 +273,26 @@ void log_frame::CreateAndConnectActions() m_tty->clear(); }); + m_show_disk_usage_act = new QAction(tr("Show Disk Usage"), this); + connect(m_show_disk_usage_act, &QAction::triggered, [this]() + { + if (m_disk_usage_future.isRunning()) + { + return; // Still running the last request + } + + m_disk_usage_future = QtConcurrent::run([this]() + { + const std::vector> vfs_disk_usage = rpcs3::utils::get_vfs_disk_usage(); + const u64 cache_disk_usage = rpcs3::utils::get_cache_disk_usage(); + + Emu.CallFromMainThread([this, vfs_disk_usage, cache_disk_usage]() + { + show_disk_usage(vfs_disk_usage, cache_disk_usage); + }, nullptr, false); + }); + }); + m_perform_goto_on_debugger = new QAction(tr("Go-To On The Debugger"), this); connect(m_perform_goto_on_debugger, &QAction::triggered, [this]() { @@ -369,6 +417,9 @@ void log_frame::CreateAndConnectActions() { QMenu* menu = m_log->createStandardContextMenu(); menu->addAction(m_clear_act); + menu->addSeparator(); + menu->addAction(m_show_disk_usage_act); + menu->addSeparator(); menu->addAction(m_perform_goto_on_debugger); menu->addAction(m_perform_goto_thread_on_debugger); menu->addAction(m_perform_show_in_mem_viewer); diff --git a/rpcs3/rpcs3qt/log_frame.h b/rpcs3/rpcs3qt/log_frame.h index 0de081863c..159fdd38aa 100644 --- a/rpcs3/rpcs3qt/log_frame.h +++ b/rpcs3/rpcs3qt/log_frame.h @@ -8,6 +8,7 @@ #include +#include #include #include #include @@ -38,6 +39,7 @@ protected: private Q_SLOTS: void UpdateUI(); private: + void show_disk_usage(const std::vector>& vfs_disk_usage, u64 cache_disk_usage); void SetLogLevel(logs::level lev) const; void SetTTYLogging(bool val) const; @@ -48,6 +50,7 @@ private: std::unique_ptr m_find_dialog; QTimer* m_timer = nullptr; + QFuture m_disk_usage_future; std::vector m_color; QColor m_color_stack; @@ -72,6 +75,7 @@ private: QAction* m_clear_act = nullptr; QAction* m_clear_tty_act = nullptr; + QAction* m_show_disk_usage_act = nullptr; QAction* m_perform_goto_on_debugger = nullptr; QAction* m_perform_goto_thread_on_debugger = nullptr; QAction* m_perform_show_in_mem_viewer = nullptr; diff --git a/rpcs3/rpcs3qt/main_window.cpp b/rpcs3/rpcs3qt/main_window.cpp index 38767ceabb..34154d846d 100644 --- a/rpcs3/rpcs3qt/main_window.cpp +++ b/rpcs3/rpcs3qt/main_window.cpp @@ -1187,7 +1187,13 @@ bool main_window::HandlePackageInstallation(QStringList file_paths, bool from_bo } } - ShowOptionalGamePreparations(tr("Success!"), tr("Successfully installed software from package(s)!"), std::move(paths)); + // Executes after PrecompileCachesFromInstalledPackages + m_notify_batch_game_action_cb = [this, paths]() mutable + { + ShowOptionalGamePreparations(tr("Success!"), tr("Successfully installed software from package(s)!"), std::move(paths)); + }; + + PrecompileCachesFromInstalledPackages(paths); }); } @@ -2368,8 +2374,7 @@ void main_window::ShowOptionalGamePreparations(const QString& title, const QStri #else QCheckBox* quick_check = new QCheckBox(tr("Add launcher shortcut(s)")); #endif - QCheckBox* precompile_check = new QCheckBox(tr("Precompile caches")); - QLabel* label = new QLabel(tr("%1\nWould you like to install shortcuts to the installed software and precompile caches? (%2 new software detected)\n\n").arg(message).arg(bootable_paths.size()), dlg); + QLabel* label = new QLabel(tr("%1\nWould you like to install shortcuts to the installed software? (%2 new software detected)\n\n").arg(message).arg(bootable_paths.size()), dlg); vlayout->addWidget(label); vlayout->addStretch(10); @@ -2377,10 +2382,6 @@ void main_window::ShowOptionalGamePreparations(const QString& title, const QStri vlayout->addStretch(3); vlayout->addWidget(quick_check); vlayout->addStretch(3); - vlayout->addWidget(precompile_check); - vlayout->addStretch(3); - - precompile_check->setToolTip(tr("Spend time building data needed for game boot now instead of at launch.")); QDialogButtonBox* btn_box = new QDialogButtonBox(QDialogButtonBox::Ok); @@ -2391,7 +2392,6 @@ void main_window::ShowOptionalGamePreparations(const QString& title, const QStri { const bool create_desktop_shortcuts = desk_check->isChecked(); const bool create_app_shortcut = quick_check->isChecked(); - const bool create_caches = precompile_check->isChecked(); dlg->hide(); dlg->accept(); @@ -2411,12 +2411,11 @@ void main_window::ShowOptionalGamePreparations(const QString& title, const QStri locations.insert(gui::utils::shortcut_location::applications); } - if (locations.empty() && !create_caches) + if (locations.empty()) { return; } - std::vector game_data; std::vector game_data_shortcuts; for (const auto& [boot_path, title_id] : paths) @@ -2431,11 +2430,6 @@ void main_window::ShowOptionalGamePreparations(const QString& title, const QStri { game_data_shortcuts.push_back(gameinfo); } - - if (create_caches) - { - game_data.push_back(gameinfo); - } } break; @@ -2447,17 +2441,39 @@ void main_window::ShowOptionalGamePreparations(const QString& title, const QStri { m_game_list_frame->CreateShortcuts(game_data_shortcuts, locations); } - - if (!game_data.empty()) - { - m_game_list_frame->BatchCreateCPUCaches(game_data); - } }); dlg->setAttribute(Qt::WA_DeleteOnClose); dlg->open(); } + +void main_window::PrecompileCachesFromInstalledPackages(const std::map& bootable_paths) +{ + std::vector game_data; + + for (const auto& [boot_path, title_id] : bootable_paths) + { + for (const game_info& gameinfo : m_game_list_frame->GetGameInfo()) + { + if (gameinfo && gameinfo->info.serial == title_id.toStdString()) + { + if (Emu.IsPathInsideDir(boot_path, gameinfo->info.path)) + { + game_data.push_back(gameinfo); + } + + break; + } + } + } + + if (!game_data.empty()) + { + m_game_list_frame->BatchCreateCPUCaches(game_data, true); + } +} + void main_window::CreateActions() { ui->exitAct->setShortcuts(QKeySequence::Quit); @@ -3401,6 +3417,15 @@ void main_window::CreateConnects() connect(ui->mw_searchbar, &QLineEdit::textChanged, m_game_list_frame, &game_list_frame::SetSearchText); connect(ui->mw_searchbar, &QLineEdit::returnPressed, m_game_list_frame, &game_list_frame::FocusAndSelectFirstEntryIfNoneIs); connect(m_game_list_frame, &game_list_frame::FocusToSearchBar, this, [this]() { ui->mw_searchbar->setFocus(); }); + + connect(m_game_list_frame, &game_list_frame::NotifyBatchedGameActionFinished, this, [this]() mutable + { + if (m_notify_batch_game_action_cb) + { + m_notify_batch_game_action_cb(); + m_notify_batch_game_action_cb = {}; + } + }); } void main_window::CreateDockWindows() diff --git a/rpcs3/rpcs3qt/main_window.h b/rpcs3/rpcs3qt/main_window.h index c712d01fd3..4e5b498587 100644 --- a/rpcs3/rpcs3qt/main_window.h +++ b/rpcs3/rpcs3qt/main_window.h @@ -48,6 +48,7 @@ class main_window : public QMainWindow bool m_save_slider_pos = false; bool m_requested_show_logs_on_exit = false; int m_other_slider_pos = 0; + std::function m_notify_batch_game_action_cb; QIcon m_app_icon; QIcon m_icon_play; @@ -141,6 +142,7 @@ private: void CreateDockWindows(); void EnableMenus(bool enabled) const; void ShowTitleBars(bool show) const; + void PrecompileCachesFromInstalledPackages(const std::map& bootable_paths); void ShowOptionalGamePreparations(const QString& title, const QString& message, std::map game_path); static bool InstallFileInExData(const std::string& extension, const QString& path, const std::string& filename); diff --git a/rpcs3/rpcs3qt/tooltips.h b/rpcs3/rpcs3qt/tooltips.h index f7ec927332..3b84a66270 100644 --- a/rpcs3/rpcs3qt/tooltips.h +++ b/rpcs3/rpcs3qt/tooltips.h @@ -91,7 +91,7 @@ public: const QString xfloat = tr("Control accuracy to SPU float vectors processing.\nFixes bugs in various games at the cost of performance.\nThis setting is only applied when SPU Decoder is set to Dynamic or LLVM."); const QString enable_thread_scheduler = tr("Control how RPCS3 utilizes the threads of your system.\nEach option heavily depends on the game and on your CPU. It's recommended to try each option to find out which performs the best.\nChanging the thread scheduler is not supported on CPUs with less than 12 threads."); const QString spu_loop_detection = tr("Try to detect loop conditions in SPU kernels and use them as scheduling hints.\nImproves performance and reduces CPU usage.\nMay cause severe audio stuttering in rare cases."); - const QString spu_block_size = tr("This option controls the SPU analyser, particularly the size of compiled units. The Mega and Giga modes may improve performance by tying smaller units together, decreasing the number of compiled units but increasing their size.\nUse the Safe mode for maximum compatibility."); + const QString spu_block_size = tr("This option controls the SPU analyser, particularly the size of compiled units. The Mega and Giga modes may improve performance by tying smaller units together, decreasing the number of compiled units but increasing their size.\nUse the Safe mode for maximum compatibility at the cost of lower performance."); const QString preferred_spu_threads = tr("Some SPU stages are sensitive to race conditions and allowing a limited number at a time helps alleviate performance stalls.\nSetting this to a smaller value might improve performance and reduce stuttering in some games.\nLeave this on auto if performance is negatively affected when setting a small value."); const QString max_cpu_preempt = tr("Reduces CPU usage and power consumption, improving battery life on mobile devices. (0 means disabled)\nHigher values cause a more pronounced effect, but may cause audio or performance issues. A value of 50 or less is recommended.\nThis option forces an FPS limit because it's active when framerate is stable.\nThe lighter the game is on the hardware, the more power is saved by it. (until the preemption count barrier is reached)"); diff --git a/rpcs3/tests/rpcs3_test.vcxproj b/rpcs3/tests/rpcs3_test.vcxproj index 4f0d136a9a..22992e6a07 100644 --- a/rpcs3/tests/rpcs3_test.vcxproj +++ b/rpcs3/tests/rpcs3_test.vcxproj @@ -88,6 +88,7 @@ + diff --git a/rpcs3/tests/test_pair.cpp b/rpcs3/tests/test_pair.cpp index 086f7102b6..5df152f054 100644 --- a/rpcs3/tests/test_pair.cpp +++ b/rpcs3/tests/test_pair.cpp @@ -3,44 +3,47 @@ #include "util/types.hpp" #include "util/pair.hpp" -struct some_struct +namespace utils { - u64 v {}; - char s[12] = "Hello World"; - - bool operator == (const some_struct& r) const + struct some_struct { - return v == r.v && std::memcmp(s, r.s, sizeof(s)) == 0; + u64 v{}; + char s[12] = "Hello World"; + + bool operator == (const some_struct& r) const + { + return v == r.v && std::memcmp(s, r.s, sizeof(s)) == 0; + } + }; + + TEST(Pair, General) + { + some_struct s{}; + s.v = 1234; + + utils::pair p; + EXPECT_EQ(sizeof(p), 32); + EXPECT_EQ(p.first, 0); + EXPECT_EQ(p.second, some_struct{}); + + p = { 666, s }; + EXPECT_EQ(p.first, 666); + EXPECT_EQ(p.second, s); + + const utils::pair p1 = p; + EXPECT_EQ(p.first, 666); + EXPECT_EQ(p.second, s); + EXPECT_EQ(p1.first, 666); + EXPECT_EQ(p1.second, s); + + utils::pair p2 = p1; + EXPECT_EQ(p1.first, 666); + EXPECT_EQ(p1.second, s); + EXPECT_EQ(p2.first, 666); + EXPECT_EQ(p2.second, s); + + utils::pair p3 = std::move(p); + EXPECT_EQ(p3.first, 666); + EXPECT_EQ(p3.second, s); } -}; - -TEST(Utils, Pair) -{ - some_struct s {}; - s.v = 1234; - - utils::pair p; - EXPECT_EQ(sizeof(p), 32); - EXPECT_EQ(p.first, 0); - EXPECT_EQ(p.second, some_struct{}); - - p = { 666, s }; - EXPECT_EQ(p.first, 666); - EXPECT_EQ(p.second, s); - - const utils::pair p1 = p; - EXPECT_EQ(p.first, 666); - EXPECT_EQ(p.second, s); - EXPECT_EQ(p1.first, 666); - EXPECT_EQ(p1.second, s); - - utils::pair p2 = p1; - EXPECT_EQ(p1.first, 666); - EXPECT_EQ(p1.second, s); - EXPECT_EQ(p2.first, 666); - EXPECT_EQ(p2.second, s); - - utils::pair p3 = std::move(p); - EXPECT_EQ(p3.first, 666); - EXPECT_EQ(p3.second, s); } diff --git a/rpcs3/tests/test_rsx_cfg.cpp b/rpcs3/tests/test_rsx_cfg.cpp new file mode 100644 index 0000000000..1708774d76 --- /dev/null +++ b/rpcs3/tests/test_rsx_cfg.cpp @@ -0,0 +1,239 @@ +#include + +#include "Emu/RSX/Common/simple_array.hpp" +#include "Emu/RSX/Program/Assembler/CFG.h" +#include "Emu/RSX/Program/RSXFragmentProgram.h" + +#include + +namespace rsx::assembler +{ + auto swap_bytes16 = [](u32 dword) -> u32 + { + // Lazy encode, but good enough for what we need here. + union v32 + { + u32 HEX; + u8 _v[4]; + }; + + u8* src_bytes = reinterpret_cast(&dword); + v32 dst_bytes; + + dst_bytes._v[0] = src_bytes[1]; + dst_bytes._v[1] = src_bytes[0]; + dst_bytes._v[2] = src_bytes[3]; + dst_bytes._v[3] = src_bytes[2]; + + return dst_bytes.HEX; + }; + + // Instruction mocks because we don't have a working assember (yet) + auto encode_instruction = [](u32 opcode, bool end = false) -> v128 + { + OPDEST dst{}; + dst.opcode = opcode; + + if (end) + { + dst.end = 1; + } + + return v128::from32(swap_bytes16(dst.HEX), 0, 0, 0); + }; + + auto create_if(u32 end, u32 _else = 0) + { + OPDEST dst{}; + dst.opcode = RSX_FP_OPCODE_IFE & 0x3Fu; + + SRC1 src1{}; + src1.else_offset = (_else ? _else : end) << 2; + src1.opcode_is_branch = 1; + + SRC2 src2{}; + src2.end_offset = end << 2; + + return v128::from32(swap_bytes16(dst.HEX), 0, swap_bytes16(src1.HEX), swap_bytes16(src2.HEX)); + }; + + TEST(CFG, FpToCFG_Basic) + { + rsx::simple_array buffer = { + encode_instruction(RSX_FP_OPCODE_ADD), + encode_instruction(RSX_FP_OPCODE_MOV, true) + }; + + RSXFragmentProgram program{}; + program.data = buffer.data(); + + FlowGraph graph = deconstruct_fragment_program(program); + + EXPECT_EQ(graph.blocks.size(), 1); + EXPECT_EQ(graph.blocks.front().instructions.size(), 2); + EXPECT_EQ(graph.blocks.front().instructions.front().length, 4); + EXPECT_EQ(graph.blocks.front().instructions[0].addr, 0); + EXPECT_EQ(graph.blocks.front().instructions[1].addr, 16); + } + + TEST(CFG, FpToCFG_IF) + { + rsx::simple_array buffer = { + encode_instruction(RSX_FP_OPCODE_ADD), // 0 + encode_instruction(RSX_FP_OPCODE_MOV), // 1 + create_if(4), // 2 (BR, 4) + encode_instruction(RSX_FP_OPCODE_ADD), // 3 + encode_instruction(RSX_FP_OPCODE_MOV, true), // 4 (Merge block) + }; + + const std::pair expected_block_data[3] = { + { 0, 3 }, // Head + { 3, 1 }, // Branch + { 4, 1 }, // Merge + }; + + RSXFragmentProgram program{}; + program.data = buffer.data(); + + FlowGraph graph = deconstruct_fragment_program(program); + + ASSERT_EQ(graph.blocks.size(), 3); + + int i = 0; + for (auto it = graph.blocks.begin(); it != graph.blocks.end(); ++it) + { + const auto& expected = expected_block_data[i++]; + EXPECT_EQ(it->id, expected.first); + EXPECT_EQ(it->instructions.size(), expected.second); + } + + // Check edges + EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 3))->pred[0].type, EdgeType::IF); + EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 0))->succ[0].type, EdgeType::IF); + EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 4))->pred[0].type, EdgeType::ENDIF); + } + + TEST(CFG, FpToCFG_NestedIF) + { + rsx::simple_array buffer = { + encode_instruction(RSX_FP_OPCODE_ADD), // 0 + encode_instruction(RSX_FP_OPCODE_MOV), // 1 + create_if(8), // 2 (BR, 8) + encode_instruction(RSX_FP_OPCODE_ADD), // 3 + create_if(6), // 4 (BR, 6) + encode_instruction(RSX_FP_OPCODE_MOV), // 5 + encode_instruction(RSX_FP_OPCODE_MOV), // 6 (merge block 1) + encode_instruction(RSX_FP_OPCODE_ADD), // 7 + encode_instruction(RSX_FP_OPCODE_MOV, true) // 8 (merge block 2 + }; + + const std::pair expected_block_data[5] = { + { 0, 3 }, // Head + { 3, 2 }, // Branch 1 + { 5, 1 }, // Branch 2 + { 6, 2 }, // Merge 1 + { 8, 1 }, // Merge 2 + }; + + RSXFragmentProgram program{}; + program.data = buffer.data(); + + FlowGraph graph = deconstruct_fragment_program(program); + + ASSERT_EQ(graph.blocks.size(), 5); + + int i = 0; + for (auto it = graph.blocks.begin(); it != graph.blocks.end(); ++it) + { + const auto& expected = expected_block_data[i++]; + EXPECT_EQ(it->id, expected.first); + EXPECT_EQ(it->instructions.size(), expected.second); + } + } + + TEST(CFG, FpToCFG_NestedIF_MultiplePred) + { + rsx::simple_array buffer = { + encode_instruction(RSX_FP_OPCODE_ADD), // 0 + encode_instruction(RSX_FP_OPCODE_MOV), // 1 + create_if(6), // 2 (BR, 6) + encode_instruction(RSX_FP_OPCODE_ADD), // 3 + create_if(6), // 4 (BR, 6) + encode_instruction(RSX_FP_OPCODE_MOV), // 5 + encode_instruction(RSX_FP_OPCODE_MOV), // 6 (merge block) + encode_instruction(RSX_FP_OPCODE_ADD), // 7 + encode_instruction(RSX_FP_OPCODE_MOV, true) // 8 + }; + + const std::pair expected_block_data[4] = { + { 0, 3 }, // Head + { 3, 2 }, // Branch 1 + { 5, 1 }, // Branch 2 + { 6, 3 }, // Merge + }; + + RSXFragmentProgram program{}; + program.data = buffer.data(); + + FlowGraph graph = deconstruct_fragment_program(program); + + ASSERT_EQ(graph.blocks.size(), 4); + + int i = 0; + for (auto it = graph.blocks.begin(); it != graph.blocks.end(); ++it) + { + const auto& expected = expected_block_data[i++]; + EXPECT_EQ(it->id, expected.first); + EXPECT_EQ(it->instructions.size(), expected.second); + } + + // Predecessors must be ordered, closest first + ASSERT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 6))->pred.size(), 2); + EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 6))->pred[0].type, EdgeType::ENDIF); + EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 6))->pred[0].from->id, 3); + EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 6))->pred[1].type, EdgeType::ENDIF); + EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 6))->pred[1].from->id, 0); + + // Successors must also be ordered, closest first + ASSERT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 0))->succ.size(), 2); + EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 0))->succ[0].type, EdgeType::IF); + EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 0))->succ[0].to->id, 3); + EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 0))->succ[1].type, EdgeType::ENDIF); + EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 0))->succ[1].to->id, 6); + } + + TEST(CFG, FpToCFG_IF_ELSE) + { + rsx::simple_array buffer = { + encode_instruction(RSX_FP_OPCODE_ADD), // 0 + encode_instruction(RSX_FP_OPCODE_MOV), // 1 + create_if(6, 4), // 2 (BR, 6) + encode_instruction(RSX_FP_OPCODE_ADD), // 3 + encode_instruction(RSX_FP_OPCODE_MOV), // 4 (Else) + encode_instruction(RSX_FP_OPCODE_ADD), // 5 + encode_instruction(RSX_FP_OPCODE_MOV, true), // 6 (Merge) + }; + + const std::pair expected_block_data[4] = { + { 0, 3 }, // Head + { 3, 1 }, // Branch positive + { 4, 2 }, // Branch negative + { 6, 1 }, // Merge + }; + + RSXFragmentProgram program{}; + program.data = buffer.data(); + + FlowGraph graph = deconstruct_fragment_program(program); + + ASSERT_EQ(graph.blocks.size(), 4); + + int i = 0; + for (auto it = graph.blocks.begin(); it != graph.blocks.end(); ++it) + { + const auto& expected = expected_block_data[i++]; + EXPECT_EQ(it->id, expected.first); + EXPECT_EQ(it->instructions.size(), expected.second); + } + } +} diff --git a/rpcs3/tests/test_simple_array.cpp b/rpcs3/tests/test_simple_array.cpp index c581ab2277..8d64599b96 100644 --- a/rpcs3/tests/test_simple_array.cpp +++ b/rpcs3/tests/test_simple_array.cpp @@ -267,4 +267,90 @@ namespace rsx EXPECT_EQ(std::memcmp(arr[i].second.s, "Hello World", sizeof(arr[i].second.s)), 0); } } + + TEST(SimpleArray, DataAlignment_SmallVector) + { + struct alignas(16) some_struct { + char data[16]; + }; + + rsx::simple_array arr(2); + const auto data_ptr = reinterpret_cast(arr.data()); + + EXPECT_EQ(data_ptr & 15, 0); + } + + TEST(SimpleArray, DataAlignment_HeapAlloc) + { + struct alignas(16) some_struct { + char data[16]; + }; + + rsx::simple_array arr(128); + const auto data_ptr = reinterpret_cast(arr.data()); + + EXPECT_EQ(data_ptr & 15, 0); + } + + TEST(SimpleArray, DataAlignment_Overrides) + { + rsx::simple_array arr(4); + rsx::simple_array arr2(4); + + const auto data_ptr1 = reinterpret_cast(arr.data()); + const auto data_ptr2 = reinterpret_cast(arr2.data()); + + EXPECT_EQ(data_ptr1 & 15, 0); + EXPECT_EQ(data_ptr2 & 127, 0); + } + + TEST(SimpleArray, Find) + { + const rsx::simple_array arr{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 + }; + + EXPECT_EQ(*arr.find(8), 8); + EXPECT_EQ(arr.find(99), nullptr); + } + + TEST(SimpleArray, FindIf) + { + const rsx::simple_array arr{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 + }; + + EXPECT_EQ(*arr.find_if(FN(x == 8)), 8); + EXPECT_EQ(arr.find_if(FN(x == 99)), nullptr); + } + + TEST(AlignedAllocator, Alloc) + { + auto ptr = rsx::aligned_allocator::malloc<256>(16); + const auto ptr_value = reinterpret_cast(ptr); + rsx::aligned_allocator::free(ptr); + + EXPECT_NE(ptr_value, 0); + EXPECT_EQ(ptr_value % 256, 0); + } + + TEST(AlignedAllocator, Realloc) + { + auto ptr = rsx::aligned_allocator::malloc<256>(16); + auto ptr2 = rsx::aligned_allocator::realloc<256>(ptr, 16, 32); + const auto ptr_value = reinterpret_cast(ptr2); + rsx::aligned_allocator::free(ptr2); + + EXPECT_NE(ptr_value, 0); + EXPECT_EQ(ptr_value % 256, 0); + } + + TEST(AlignedAllocator, Realloc_ReturnsPreviousPointerIfFits) + { + auto ptr = rsx::aligned_allocator::malloc<256>(16); + auto ptr2 = rsx::aligned_allocator::realloc<256>(ptr, 16, 8); + rsx::aligned_allocator::free(ptr2); + + EXPECT_EQ(ptr, ptr2); + } } diff --git a/rpcs3/tests/test_tuple.cpp b/rpcs3/tests/test_tuple.cpp index 2a174d85d3..831c0aac51 100644 --- a/rpcs3/tests/test_tuple.cpp +++ b/rpcs3/tests/test_tuple.cpp @@ -2,113 +2,116 @@ #include "util/tuple.hpp" -struct some_struct +namespace utils { - u64 v {}; - char s[12] = "Hello World"; - - bool operator == (const some_struct& r) const + struct some_struct { - return v == r.v && std::memcmp(s, r.s, sizeof(s)) == 0; + u64 v{}; + char s[12] = "Hello World"; + + bool operator == (const some_struct& r) const + { + return v == r.v && std::memcmp(s, r.s, sizeof(s)) == 0; + } + }; + + TEST(Tuple, General) + { + some_struct s{}; + s.v = 1234; + + utils::tuple t0 = {}; + EXPECT_EQ(t0.size(), 0); + + utils::tuple t; + EXPECT_EQ(sizeof(t), sizeof(int)); + EXPECT_TRUE((std::is_same_v()), int&>)); + EXPECT_EQ(t.size(), 1); + EXPECT_EQ(t.get<0>(), 0); + + utils::tuple t1 = 2; + EXPECT_EQ(sizeof(t1), sizeof(int)); + EXPECT_TRUE((std::is_same_v()), int&>)); + EXPECT_EQ(t1.size(), 1); + EXPECT_EQ(t1.get<0>(), 2); + t1 = {}; + EXPECT_EQ(t1.size(), 1); + EXPECT_EQ(t1.get<0>(), 0); + + utils::tuple t2 = { 2, s }; + EXPECT_EQ(sizeof(t2), 32); + EXPECT_EQ(t2.size(), 2); + EXPECT_TRUE((std::is_same_v()), int&>)); + EXPECT_TRUE((std::is_same_v()), some_struct&>)); + EXPECT_EQ(t2.get<0>(), 2); + EXPECT_EQ(t2.get<1>(), s); + t2 = {}; + EXPECT_EQ(t2.size(), 2); + EXPECT_EQ(t2.get<0>(), 0); + EXPECT_EQ(t2.get<1>(), some_struct{}); + + t2.get<0>() = 666; + t2.get<1>() = s; + EXPECT_EQ(t2.get<0>(), 666); + EXPECT_EQ(t2.get<1>(), s); + + utils::tuple t3 = { 2, s, 1234.0 }; + EXPECT_EQ(sizeof(t3), 40); + EXPECT_EQ(t3.size(), 3); + EXPECT_TRUE((std::is_same_v()), int&>)); + EXPECT_TRUE((std::is_same_v()), some_struct&>)); + EXPECT_TRUE((std::is_same_v()), double&>)); + EXPECT_EQ(t3.get<0>(), 2); + EXPECT_EQ(t3.get<1>(), s); + EXPECT_EQ(t3.get<2>(), 1234.0); + t3 = {}; + EXPECT_EQ(t3.size(), 3); + EXPECT_EQ(t3.get<0>(), 0); + EXPECT_EQ(t3.get<1>(), some_struct{}); + EXPECT_EQ(t3.get<2>(), 0.0); + + t3.get<0>() = 666; + t3.get<1>() = s; + t3.get<2>() = 7.0; + EXPECT_EQ(t3.get<0>(), 666); + EXPECT_EQ(t3.get<1>(), s); + EXPECT_EQ(t3.get<2>(), 7.0); + + // const + const utils::tuple tc = { 2, s }; + EXPECT_EQ(tc.size(), 2); + EXPECT_TRUE((std::is_same_v()), const int&>)); + EXPECT_TRUE((std::is_same_v()), const some_struct&>)); + EXPECT_EQ(tc.get<0>(), 2); + EXPECT_EQ(tc.get<1>(), s); + + // assignment + const utils::tuple ta1 = { 2, s }; + utils::tuple ta = ta1; + EXPECT_EQ(ta.size(), 2); + EXPECT_TRUE((std::is_same_v()), int&>)); + EXPECT_TRUE((std::is_same_v()), some_struct&>)); + EXPECT_EQ(ta.get<0>(), 2); + EXPECT_EQ(ta.get<1>(), s); + + utils::tuple ta2 = { 2, s }; + ta = ta2; + EXPECT_EQ(ta.size(), 2); + EXPECT_TRUE((std::is_same_v()), int&>)); + EXPECT_TRUE((std::is_same_v()), some_struct&>)); + EXPECT_EQ(ta.get<0>(), 2); + EXPECT_EQ(ta.get<1>(), s); + EXPECT_EQ(ta2.size(), 2); + EXPECT_TRUE((std::is_same_v()), int&>)); + EXPECT_TRUE((std::is_same_v()), some_struct&>)); + EXPECT_EQ(ta2.get<0>(), 2); + EXPECT_EQ(ta2.get<1>(), s); + + ta = std::move(ta2); + EXPECT_EQ(ta.size(), 2); + EXPECT_TRUE((std::is_same_v()), int&>)); + EXPECT_TRUE((std::is_same_v()), some_struct&>)); + EXPECT_EQ(ta.get<0>(), 2); + EXPECT_EQ(ta.get<1>(), s); } -}; - -TEST(Utils, Tuple) -{ - some_struct s {}; - s.v = 1234; - - utils::tuple t0 = {}; - EXPECT_EQ(t0.size(), 0); - - utils::tuple t; - EXPECT_EQ(sizeof(t), sizeof(int)); - EXPECT_TRUE((std::is_same_v()), int&>)); - EXPECT_EQ(t.size(), 1); - EXPECT_EQ(t.get<0>(), 0); - - utils::tuple t1 = 2; - EXPECT_EQ(sizeof(t1), sizeof(int)); - EXPECT_TRUE((std::is_same_v()), int&>)); - EXPECT_EQ(t1.size(), 1); - EXPECT_EQ(t1.get<0>(), 2); - t1 = {}; - EXPECT_EQ(t1.size(), 1); - EXPECT_EQ(t1.get<0>(), 0); - - utils::tuple t2 = { 2, s }; - EXPECT_EQ(sizeof(t2), 32); - EXPECT_EQ(t2.size(), 2); - EXPECT_TRUE((std::is_same_v()), int&>)); - EXPECT_TRUE((std::is_same_v()), some_struct&>)); - EXPECT_EQ(t2.get<0>(), 2); - EXPECT_EQ(t2.get<1>(), s); - t2 = {}; - EXPECT_EQ(t2.size(), 2); - EXPECT_EQ(t2.get<0>(), 0); - EXPECT_EQ(t2.get<1>(), some_struct{}); - - t2.get<0>() = 666; - t2.get<1>() = s; - EXPECT_EQ(t2.get<0>(), 666); - EXPECT_EQ(t2.get<1>(), s); - - utils::tuple t3 = { 2, s, 1234.0 }; - EXPECT_EQ(sizeof(t3), 40); - EXPECT_EQ(t3.size(), 3); - EXPECT_TRUE((std::is_same_v()), int&>)); - EXPECT_TRUE((std::is_same_v()), some_struct&>)); - EXPECT_TRUE((std::is_same_v()), double&>)); - EXPECT_EQ(t3.get<0>(), 2); - EXPECT_EQ(t3.get<1>(), s); - EXPECT_EQ(t3.get<2>(), 1234.0); - t3 = {}; - EXPECT_EQ(t3.size(), 3); - EXPECT_EQ(t3.get<0>(), 0); - EXPECT_EQ(t3.get<1>(), some_struct{}); - EXPECT_EQ(t3.get<2>(), 0.0); - - t3.get<0>() = 666; - t3.get<1>() = s; - t3.get<2>() = 7.0; - EXPECT_EQ(t3.get<0>(), 666); - EXPECT_EQ(t3.get<1>(), s); - EXPECT_EQ(t3.get<2>(), 7.0); - - // const - const utils::tuple tc = { 2, s }; - EXPECT_EQ(tc.size(), 2); - EXPECT_TRUE((std::is_same_v()), const int&>)); - EXPECT_TRUE((std::is_same_v()), const some_struct&>)); - EXPECT_EQ(tc.get<0>(), 2); - EXPECT_EQ(tc.get<1>(), s); - - // assignment - const utils::tuple ta1 = { 2, s }; - utils::tuple ta = ta1; - EXPECT_EQ(ta.size(), 2); - EXPECT_TRUE((std::is_same_v()), int&>)); - EXPECT_TRUE((std::is_same_v()), some_struct&>)); - EXPECT_EQ(ta.get<0>(), 2); - EXPECT_EQ(ta.get<1>(), s); - - utils::tuple ta2 = { 2, s }; - ta = ta2; - EXPECT_EQ(ta.size(), 2); - EXPECT_TRUE((std::is_same_v()), int&>)); - EXPECT_TRUE((std::is_same_v()), some_struct&>)); - EXPECT_EQ(ta.get<0>(), 2); - EXPECT_EQ(ta.get<1>(), s); - EXPECT_EQ(ta2.size(), 2); - EXPECT_TRUE((std::is_same_v()), int&>)); - EXPECT_TRUE((std::is_same_v()), some_struct&>)); - EXPECT_EQ(ta2.get<0>(), 2); - EXPECT_EQ(ta2.get<1>(), s); - - ta = std::move(ta2); - EXPECT_EQ(ta.size(), 2); - EXPECT_TRUE((std::is_same_v()), int&>)); - EXPECT_TRUE((std::is_same_v()), some_struct&>)); - EXPECT_EQ(ta.get<0>(), 2); - EXPECT_EQ(ta.get<1>(), s); } diff --git a/rpcs3/util/asm.hpp b/rpcs3/util/asm.hpp index deca38b413..8942dc2a09 100644 --- a/rpcs3/util/asm.hpp +++ b/rpcs3/util/asm.hpp @@ -60,7 +60,7 @@ namespace utils #if defined(ARCH_X64) return _m_prefetchw(const_cast(ptr)); #else - return __builtin_prefetch(ptr, 1, 0); + return __builtin_prefetch(ptr, 1, 3); #endif } diff --git a/rpcs3/util/bless.hpp b/rpcs3/util/bless.hpp index af2f8d32f3..6a097a2c64 100644 --- a/rpcs3/util/bless.hpp +++ b/rpcs3/util/bless.hpp @@ -1,10 +1,12 @@ #pragma once +#include + namespace utils { // Hack. Pointer cast util to workaround UB. Use with extreme care. - template - [[nodiscard]] T* bless(U* ptr) + template requires (std::is_pointer_v>) + [[nodiscard]] inline T* bless(const U& ptr) { #ifdef _MSC_VER return (T*)ptr; @@ -21,3 +23,4 @@ namespace utils #endif } } + diff --git a/rpcs3/util/types.hpp b/rpcs3/util/types.hpp index 690f51c0e7..4a2ef5baea 100644 --- a/rpcs3/util/types.hpp +++ b/rpcs3/util/types.hpp @@ -999,17 +999,18 @@ template requires (std::is_integral_v; constexpr bool is_to_signed = std::is_signed_v; - constexpr auto from_mask = (is_from_signed && !is_to_signed) ? UnFrom{umax} >> 1 : UnFrom{umax}; + // For unsigned/signed mismatch, create an "unsigned" compatible mask + constexpr auto from_mask = (is_from_signed && !is_to_signed && sizeof(CommonFrom) <= sizeof(CommonTo)) ? UnFrom{umax} >> 1 : UnFrom{umax}; constexpr auto to_mask = (is_to_signed && !is_from_signed) ? UnTo{umax} >> 1 : UnTo{umax}; - constexpr auto mask = ~(from_mask & to_mask); + constexpr auto mask = static_cast(~(from_mask & to_mask)); - // Signed to unsigned always require test - // Otherwise, this is bit-wise narrowing or conversion between types of different signedness of the same size - if constexpr ((is_from_signed && !is_to_signed) || to_mask < from_mask) + // If destination ("unsigned" compatible) mask is smaller than source ("unsigned" compatible) mask + // It requires narrowing. + if constexpr (!!mask) { // Try to optimize test if both are of the same signedness - if (is_from_signed != is_to_signed ? !!(value & mask) : static_cast(value) != value) [[unlikely]] + if (is_from_signed != is_to_signed ? !!(value & mask) : static_cast(static_cast(value)) != value) [[unlikely]] { fmt::raw_verify_error(src_loc, u8"Narrowing error", +value); }