diff --git a/Utilities/StrFmt.h b/Utilities/StrFmt.h index 5eba199e26..6d9ea05c9e 100644 --- a/Utilities/StrFmt.h +++ b/Utilities/StrFmt.h @@ -394,7 +394,7 @@ namespace fmt } #if !defined(_MSC_VER) || defined(__clang__) - [[noreturn]] ~throw_exception(); + [[noreturn]] ~throw_exception() = default; #endif }; diff --git a/rpcs3/CMakeLists.txt b/rpcs3/CMakeLists.txt index e8ccc21d22..74d579870f 100644 --- a/rpcs3/CMakeLists.txt +++ b/rpcs3/CMakeLists.txt @@ -197,6 +197,7 @@ if(BUILD_RPCS3_TESTS) tests/test_tuple.cpp tests/test_simple_array.cpp tests/test_address_range.cpp + tests/test_rsx_cfg.cpp ) target_link_libraries(rpcs3_test diff --git a/rpcs3/Crypto/utils.cpp b/rpcs3/Crypto/utils.cpp index 7432acbf62..8d2fd4e9aa 100644 --- a/rpcs3/Crypto/utils.cpp +++ b/rpcs3/Crypto/utils.cpp @@ -157,7 +157,7 @@ std::array sc_combine_laid_paid(s64 laid, s64 paid) { const std::string paid_laid = fmt::format("%016llx%016llx", laid, paid); std::array out{}; - hex_to_bytes(out.data(), paid_laid.c_str(), PASSPHRASE_KEY_LEN * 2); + hex_to_bytes(out.data(), paid_laid, PASSPHRASE_KEY_LEN * 2); return out; } diff --git a/rpcs3/Emu/CMakeLists.txt b/rpcs3/Emu/CMakeLists.txt index 294241d07e..aef3321208 100644 --- a/rpcs3/Emu/CMakeLists.txt +++ b/rpcs3/Emu/CMakeLists.txt @@ -517,6 +517,7 @@ target_sources(rpcs3_emu PRIVATE RSX/Overlays/overlay_video.cpp RSX/Overlays/Shaders/shader_loading_dialog.cpp RSX/Overlays/Shaders/shader_loading_dialog_native.cpp + RSX/Program/Assembler/FPToCFG.cpp RSX/Program/CgBinaryProgram.cpp RSX/Program/CgBinaryFragmentProgram.cpp RSX/Program/CgBinaryVertexProgram.cpp diff --git a/rpcs3/Emu/Cell/Modules/sceNpTrophy.h b/rpcs3/Emu/Cell/Modules/sceNpTrophy.h index 0c48623fda..6f7d88c148 100644 --- a/rpcs3/Emu/Cell/Modules/sceNpTrophy.h +++ b/rpcs3/Emu/Cell/Modules/sceNpTrophy.h @@ -3,7 +3,6 @@ #include "util/types.hpp" #include "Emu/Memory/vm_ptr.h" #include "Emu/Cell/ErrorCodes.h" -#include #include #include diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp index 629aae88ae..e59f14892b 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.cpp +++ b/rpcs3/Emu/Cell/PPUTranslator.cpp @@ -416,7 +416,6 @@ Function* PPUTranslator::GetSymbolResolver(const ppu_module& info) assert(ptr_inst->getResultElementType() == m_ir->getPtrTy()); const auto faddr = m_ir->CreateLoad(ptr_inst->getResultElementType(), ptr_inst); - const auto faddr_int = m_ir->CreatePtrToInt(faddr, get_type()); const auto pos_32 = m_reloc ? m_ir->CreateAdd(func_pc, m_seg0) : func_pc; const auto pos = m_ir->CreateShl(pos_32, 1); const auto ptr = m_ir->CreatePtrAdd(m_exec, pos); @@ -427,7 +426,7 @@ Function* PPUTranslator::GetSymbolResolver(const ppu_module& info) const auto seg_val = m_ir->CreateTrunc(m_ir->CreateLShr(m_seg0, 13), get_type()); // Store to jumptable - m_ir->CreateStore(faddr_int, ptr); + m_ir->CreateStore(faddr, ptr); m_ir->CreateStore(seg_val, seg_ptr); // Increment index and branch back to loop diff --git a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp index 11eb124eae..9a192989e0 100644 --- a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp @@ -3122,7 +3122,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s u64 dabs = 0; u64 drel = 0; - for (u32 i = start; i < limit; i += 4) + for (u32 i = start, abs_fail = 0, rel_fail = 0; i < limit; i += 4) { const u32 target = ls[i / 4]; @@ -3135,13 +3135,27 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s if (target >= lsa && target < SPU_LS_SIZE) { // Possible jump table entry (absolute) - jt_abs.push_back(target); + if (!abs_fail) + { + jt_abs.push_back(target); + } + } + else + { + abs_fail++; } if (target + start >= lsa && target + start < SPU_LS_SIZE) { // Possible jump table entry (relative) - jt_rel.push_back(target + start); + if (!rel_fail) + { + jt_rel.push_back(target + start); + } + } + else + { + rel_fail++; } if (std::max(jt_abs.size(), jt_rel.size()) * 4 + start <= i) @@ -3153,6 +3167,35 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s } } + for (usz i = 0; i < jt_abs.size(); i++) + { + if (jt_abs[i] == start + jt_abs.size() * 4) + { + // If jumptable contains absolute address of code start after the jumptable itself + // It is likely an absolute-type jumptable + + bool is_good_conclusion = true; + + // For verification: make sure there is none like this in relative table + + for (u32 target : jt_rel) + { + if (target == start + jt_rel.size() * 4) + { + is_good_conclusion = false; + break; + } + } + + if (is_good_conclusion) + { + jt_rel.clear(); + } + + break; + } + } + // Choose position after the jt as an anchor and compute the average distance for (u32 target : jt_abs) { @@ -7241,6 +7284,19 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s // Blocks starting from 0x0 or invalid instruction won't be compiled, may need special interpreter fallback } + for (u32 i = 0; i < result.data.size(); i++) + { + const be_t ls_val = ls[result.lower_bound / 4 + i]; + + if (result.data[i] && std::bit_cast(ls_val) != result.data[i]) + { + std::string out_dump; + dump(result, out_dump); + spu_log.error("SPU Function Dump:\n%s", out_dump); + fmt::throw_exception("SPU Analyzer failed: Instruction mismatch at 0x%x [read: 0x%x vs LS: 0x%x] (i=0x%x)", result.lower_bound + i * 4, std::bit_cast>(result.data[i]), ls_val, i); + } + } + return result; } diff --git a/rpcs3/Emu/Cell/lv2/sys_fs.cpp b/rpcs3/Emu/Cell/lv2/sys_fs.cpp index 1f76bb7090..5bb74808be 100644 --- a/rpcs3/Emu/Cell/lv2/sys_fs.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_fs.cpp @@ -1036,7 +1036,6 @@ lv2_file::open_result_t lv2_file::open(std::string_view vpath, s32 flags, s32 mo error_code sys_fs_open(ppu_thread& ppu, vm::cptr path, s32 flags, vm::ptr fd, s32 mode, vm::cptr arg, u64 size) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.warning("sys_fs_open(path=%s, flags=%#o, fd=*0x%x, mode=%#o, arg=*0x%x, size=0x%llx)", path, flags, fd, mode, arg, size); @@ -1085,7 +1084,6 @@ error_code sys_fs_open(ppu_thread& ppu, vm::cptr path, s32 flags, vm::ptr< error_code sys_fs_read(ppu_thread& ppu, u32 fd, vm::ptr buf, u64 nbytes, vm::ptr nread) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.trace("sys_fs_read(fd=%d, buf=*0x%x, nbytes=0x%llx, nread=*0x%x)", fd, buf, nbytes, nread); @@ -1122,6 +1120,11 @@ error_code sys_fs_read(ppu_thread& ppu, u32 fd, vm::ptr buf, u64 nbytes, v return CELL_OK; } + if (nbytes >= 0x100000 && file->type != lv2_file_type::regular) + { + lv2_obj::sleep(ppu); + } + std::unique_lock lock(file->mp->mutex); if (!file->file) @@ -1154,7 +1157,6 @@ error_code sys_fs_read(ppu_thread& ppu, u32 fd, vm::ptr buf, u64 nbytes, v error_code sys_fs_write(ppu_thread& ppu, u32 fd, vm::cptr buf, u64 nbytes, vm::ptr nwrite) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.trace("sys_fs_write(fd=%d, buf=*0x%x, nbytes=0x%llx, nwrite=*0x%x)", fd, buf, nbytes, nwrite); @@ -1237,7 +1239,6 @@ error_code sys_fs_write(ppu_thread& ppu, u32 fd, vm::cptr buf, u64 nbytes, error_code sys_fs_close(ppu_thread& ppu, u32 fd) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); const auto file = idm::get_unlocked(fd); @@ -1314,7 +1315,6 @@ error_code sys_fs_close(ppu_thread& ppu, u32 fd) error_code sys_fs_opendir(ppu_thread& ppu, vm::cptr path, vm::ptr fd) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.warning("sys_fs_opendir(path=%s, fd=*0x%x)", path, fd); @@ -1491,7 +1491,6 @@ error_code sys_fs_readdir(ppu_thread& ppu, u32 fd, vm::ptr dir, vm error_code sys_fs_closedir(ppu_thread& ppu, u32 fd) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.warning("sys_fs_closedir(fd=%d)", fd); @@ -1506,7 +1505,6 @@ error_code sys_fs_closedir(ppu_thread& ppu, u32 fd) error_code sys_fs_stat(ppu_thread& ppu, vm::cptr path, vm::ptr sb) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.warning("sys_fs_stat(path=%s, sb=*0x%x)", path, sb); @@ -1610,7 +1608,6 @@ error_code sys_fs_stat(ppu_thread& ppu, vm::cptr path, vm::ptr error_code sys_fs_fstat(ppu_thread& ppu, u32 fd, vm::ptr sb) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.warning("sys_fs_fstat(fd=%d, sb=*0x%x)", fd, sb); @@ -1666,7 +1663,6 @@ error_code sys_fs_link(ppu_thread&, vm::cptr from, vm::cptr to) error_code sys_fs_mkdir(ppu_thread& ppu, vm::cptr path, s32 mode) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.warning("sys_fs_mkdir(path=%s, mode=%#o)", path, mode); @@ -1728,7 +1724,6 @@ error_code sys_fs_mkdir(ppu_thread& ppu, vm::cptr path, s32 mode) error_code sys_fs_rename(ppu_thread& ppu, vm::cptr from, vm::cptr to) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.warning("sys_fs_rename(from=%s, to=%s)", from, to); @@ -1794,7 +1789,6 @@ error_code sys_fs_rename(ppu_thread& ppu, vm::cptr from, vm::cptr to error_code sys_fs_rmdir(ppu_thread& ppu, vm::cptr path) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.warning("sys_fs_rmdir(path=%s)", path); @@ -1850,7 +1844,6 @@ error_code sys_fs_rmdir(ppu_thread& ppu, vm::cptr path) error_code sys_fs_unlink(ppu_thread& ppu, vm::cptr path) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.warning("sys_fs_unlink(path=%s)", path); @@ -1951,8 +1944,6 @@ error_code sys_fs_fcntl(ppu_thread& ppu, u32 fd, u32 op, vm::ptr _arg, u32 case 0x8000000a: // cellFsReadWithOffset case 0x8000000b: // cellFsWriteWithOffset { - lv2_obj::sleep(ppu); - const auto arg = vm::static_ptr_cast(_arg); if (_size < arg.size()) @@ -1992,6 +1983,11 @@ error_code sys_fs_fcntl(ppu_thread& ppu, u32 fd, u32 op, vm::ptr _arg, u32 sys_fs.error("%s type: Writing %u bytes to FD=%d (path=%s)", file->type, arg->size, file->name.data()); } + if (op == 0x8000000a && file->type != lv2_file_type::regular && arg->size >= 0x100000) + { + lv2_obj::sleep(ppu); + } + std::unique_lock wlock(file->mp->mutex, std::defer_lock); std::shared_lock rlock(file->mp->mutex, std::defer_lock); @@ -2047,8 +2043,6 @@ error_code sys_fs_fcntl(ppu_thread& ppu, u32 fd, u32 op, vm::ptr _arg, u32 case 0x80000009: // cellFsSdataOpenByFd { - lv2_obj::sleep(ppu); - const auto arg = vm::static_ptr_cast(_arg); if (_size < arg.size()) @@ -2102,8 +2096,6 @@ error_code sys_fs_fcntl(ppu_thread& ppu, u32 fd, u32 op, vm::ptr _arg, u32 case 0xc0000002: // cellFsGetFreeSize (TODO) { - lv2_obj::sleep(ppu); - const auto arg = vm::static_ptr_cast(_arg); const auto& mp = g_fxo->get().lookup("/dev_hdd0"); @@ -2418,8 +2410,6 @@ error_code sys_fs_fcntl(ppu_thread& ppu, u32 fd, u32 op, vm::ptr _arg, u32 case 0xe0000012: // cellFsGetDirectoryEntries { - lv2_obj::sleep(ppu); - const auto arg = vm::static_ptr_cast(_arg); if (_size < arg.size()) @@ -2434,8 +2424,6 @@ error_code sys_fs_fcntl(ppu_thread& ppu, u32 fd, u32 op, vm::ptr _arg, u32 return CELL_EBADF; } - ppu.check_state(); - u32 read_count = 0; // NOTE: This function is actually capable of reading only one entry at a time @@ -2593,7 +2581,6 @@ error_code sys_fs_fcntl(ppu_thread& ppu, u32 fd, u32 op, vm::ptr _arg, u32 error_code sys_fs_lseek(ppu_thread& ppu, u32 fd, s64 offset, s32 whence, vm::ptr pos) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.trace("sys_fs_lseek(fd=%d, offset=0x%llx, whence=0x%x, pos=*0x%x)", fd, offset, whence, pos); @@ -2639,7 +2626,6 @@ error_code sys_fs_lseek(ppu_thread& ppu, u32 fd, s64 offset, s32 whence, vm::ptr error_code sys_fs_fdatasync(ppu_thread& ppu, u32 fd) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.trace("sys_fs_fdadasync(fd=%d)", fd); @@ -2650,6 +2636,8 @@ error_code sys_fs_fdatasync(ppu_thread& ppu, u32 fd) return CELL_EBADF; } + lv2_obj::sleep(ppu); + std::lock_guard lock(file->mp->mutex); if (!file->file) @@ -2664,7 +2652,6 @@ error_code sys_fs_fdatasync(ppu_thread& ppu, u32 fd) error_code sys_fs_fsync(ppu_thread& ppu, u32 fd) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.trace("sys_fs_fsync(fd=%d)", fd); @@ -2675,6 +2662,8 @@ error_code sys_fs_fsync(ppu_thread& ppu, u32 fd) return CELL_EBADF; } + lv2_obj::sleep(ppu); + std::lock_guard lock(file->mp->mutex); if (!file->file) @@ -2763,7 +2752,6 @@ error_code sys_fs_get_block_size(ppu_thread& ppu, vm::cptr path, vm::ptr path, u64 size) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.warning("sys_fs_truncate(path=%s, size=0x%llx)", path, size); @@ -2815,7 +2803,6 @@ error_code sys_fs_truncate(ppu_thread& ppu, vm::cptr path, u64 size) error_code sys_fs_ftruncate(ppu_thread& ppu, u32 fd, u64 size) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.warning("sys_fs_ftruncate(fd=%d, size=0x%llx)", fd, size); @@ -3021,7 +3008,6 @@ error_code sys_fs_disk_free(ppu_thread& ppu, vm::cptr path, vm::ptr t error_code sys_fs_utime(ppu_thread& ppu, vm::cptr path, vm::cptr timep) { ppu.state += cpu_flag::wait; - lv2_obj::sleep(ppu); sys_fs.warning("sys_fs_utime(path=%s, timep=*0x%x)", path, timep); sys_fs.warning("** actime=%u, modtime=%u", timep->actime, timep->modtime); diff --git a/rpcs3/Emu/RSX/Common/simple_array.hpp b/rpcs3/Emu/RSX/Common/simple_array.hpp index 4b5ceac877..69397291f9 100644 --- a/rpcs3/Emu/RSX/Common/simple_array.hpp +++ b/rpcs3/Emu/RSX/Common/simple_array.hpp @@ -50,6 +50,12 @@ namespace rsx { c.size() } -> std::integral; }; + template + concept is_trivially_comparable_v = + requires (T t1, U t2) { + { t1 == t2 } -> std::same_as; + }; + template requires std::is_trivially_destructible_v && std::is_trivially_copyable_v struct simple_array @@ -492,6 +498,50 @@ namespace rsx return false; } + /** + * Note that find and find_if return pointers to objects and not iterators for simplified usage. + * It is functionally equivalent to retrieve a nullptr meaning empty object stored and nullptr meaning not found for all practical uses of this container. + */ + template + requires is_trivially_comparable_v + Ty* find(const T& value) + { + for (auto it = begin(); it != end(); ++it) + { + if (*it == value) + { + return &(*it); + } + } + return nullptr; + } + + // Remove when we switch to C++23 + template + requires is_trivially_comparable_v + const Ty* find(const T& value) const + { + return const_cast*>(this)->find(value); + } + + Ty* find_if(std::predicate auto predicate) + { + for (auto it = begin(); it != end(); ++it) + { + if (std::invoke(predicate, *it)) + { + return &(*it); + } + } + return nullptr; + } + + // Remove with C++23 + const Ty* find_if(std::predicate auto predicate) const + { + return const_cast*>(this)->find_if(predicate); + } + bool erase_if(std::predicate auto predicate) { if (!_size) diff --git a/rpcs3/Emu/RSX/Program/Assembler/CFG.h b/rpcs3/Emu/RSX/Program/Assembler/CFG.h new file mode 100644 index 0000000000..9bc44a22d1 --- /dev/null +++ b/rpcs3/Emu/RSX/Program/Assembler/CFG.h @@ -0,0 +1,39 @@ +#pragma once + +#include +#include "IR.h" + +#include + +struct RSXFragmentProgram; + +namespace rsx::assembler +{ + struct FlowGraph + { + std::list blocks; + + BasicBlock* push(BasicBlock* parent = nullptr, u32 pc = 0, EdgeType edge_type = EdgeType::NONE) + { + if (!parent && !blocks.empty()) + { + parent = &blocks.back(); + } + + blocks.push_back({}); + BasicBlock* new_block = &blocks.back(); + + if (parent) + { + parent->insert_succ(new_block, edge_type); + new_block->insert_pred(parent, edge_type); + } + + new_block->id = pc; + return new_block; + } + }; + + FlowGraph deconstruct_fragment_program(const RSXFragmentProgram& prog); +} + diff --git a/rpcs3/Emu/RSX/Program/Assembler/FPToCFG.cpp b/rpcs3/Emu/RSX/Program/Assembler/FPToCFG.cpp new file mode 100644 index 0000000000..d8de4eda0b --- /dev/null +++ b/rpcs3/Emu/RSX/Program/Assembler/FPToCFG.cpp @@ -0,0 +1,193 @@ +#include "stdafx.h" + +#include "CFG.h" + +#include "Emu/RSX/Common/simple_array.hpp" +#include "Emu/RSX/Program/RSXFragmentProgram.h" + +#include +#include +#include + +#if defined(ARCH_ARM64) +#if !defined(_MSC_VER) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstrict-aliasing" +#pragma GCC diagnostic ignored "-Wold-style-cast" +#endif +#undef FORCE_INLINE +#include "Emu/CPU/sse2neon.h" +#if !defined(_MSC_VER) +#pragma GCC diagnostic pop +#endif +#endif + +namespace rsx::assembler +{ + inline v128 decode_instruction(const v128& raw_inst) + { + // Fixup of RSX's weird half-word shuffle for FP instructions + // Convert input stream into LE u16 array + __m128i _mask0 = _mm_set1_epi32(0xff00ff00); + __m128i _mask1 = _mm_set1_epi32(0x00ff00ff); + __m128i a = _mm_slli_epi32(static_cast<__m128i>(raw_inst), 8); + __m128i b = _mm_srli_epi32(static_cast<__m128i>(raw_inst), 8); + __m128i ret = _mm_or_si128( + _mm_and_si128(_mask0, a), + _mm_and_si128(_mask1, b) + ); + return v128::loadu(&ret); + } + + FlowGraph deconstruct_fragment_program(const RSXFragmentProgram& prog) + { + // For a flowgraph, we don't care at all about the actual contents, just flow control instructions. + OPDEST dst{}; + SRC0 src0{}; + SRC1 src1{}; + SRC2 src2{}; + + u32 pc = 0; // Program counter + bool end = false; + + // Flow control data + rsx::simple_array end_blocks; + rsx::simple_array else_blocks; + + // Data block + u32* data = static_cast(prog.get_data()); + + // Output + FlowGraph graph{}; + BasicBlock* bb = graph.push(); + + auto find_block_for_pc = [&](u32 id) -> BasicBlock* + { + auto found = std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == id)); + if (found != graph.blocks.end()) + { + return &(*found); + } + return nullptr; + }; + + auto safe_insert_block = [&](BasicBlock* parent, u32 id, EdgeType edge_type) -> BasicBlock* + { + if (auto found = find_block_for_pc(id)) + { + parent->insert_succ(found, edge_type); + found->insert_pred(parent, edge_type); + return found; + } + + return graph.push(parent, id, edge_type); + }; + + auto includes_literal_constant = [&]() + { + return src0.reg_type == RSX_FP_REGISTER_TYPE_CONSTANT || + src1.reg_type == RSX_FP_REGISTER_TYPE_CONSTANT || + src2.reg_type == RSX_FP_REGISTER_TYPE_CONSTANT; + }; + + while (!end) + { + BasicBlock** found = end_blocks.find_if(FN(x->id == pc)); + + if (!found) + { + found = else_blocks.find_if(FN(x->id == pc)); + } + + if (found) + { + bb = *found; + } + + const v128 raw_inst = v128::loadu(data, pc); + v128 decoded = decode_instruction(raw_inst); + + dst.HEX = decoded._u32[0]; + src0.HEX = decoded._u32[1]; + src1.HEX = decoded._u32[2]; + src2.HEX = decoded._u32[3]; + + end = !!dst.end; + const u32 opcode = dst.opcode | (src1.opcode_is_branch << 6); + + if (opcode == RSX_FP_OPCODE_NOP) + { + pc++; + continue; + } + + bb->instructions.push_back({}); + auto& ir_inst = bb->instructions.back(); + std::memcpy(ir_inst.bytecode, &decoded._u32[0], 16); + ir_inst.length = 4; + ir_inst.addr = pc * 16; + + switch (opcode) + { + case RSX_FP_OPCODE_BRK: + break; + case RSX_FP_OPCODE_CAL: + // Unimplemented. Also unused by the RSX compiler + fmt::throw_exception("Unimplemented FP CAL instruction."); + break; + case RSX_FP_OPCODE_FENCT: + break; + case RSX_FP_OPCODE_FENCB: + break; + case RSX_FP_OPCODE_RET: + // Outside a subroutine, this doesn't mean much. The main block can conditionally return to stop execution early. + // This will not alter flow control. + break; + case RSX_FP_OPCODE_IFE: + { + // Inserts if and else and end blocks + auto parent = bb; + bb = safe_insert_block(parent, pc + 1, EdgeType::IF); + if (src2.end_offset != src1.else_offset) + { + else_blocks.push_back(safe_insert_block(parent, src1.else_offset >> 2, EdgeType::ELSE)); + } + end_blocks.push_back(safe_insert_block(parent, src2.end_offset >> 2, EdgeType::ENDIF)); + break; + } + case RSX_FP_OPCODE_LOOP: + case RSX_FP_OPCODE_REP: + { + // Inserts for and end blocks + auto parent = bb; + bb = safe_insert_block(parent, pc + 1, EdgeType::LOOP); + end_blocks.push_back(safe_insert_block(parent, src2.end_offset >> 2, EdgeType::ENDLOOP)); + break; + } + default: + if (includes_literal_constant()) + { + const v128 constant_literal = v128::loadu(data, pc); + v128 decoded_literal = decode_instruction(constant_literal); + + std::memcpy(ir_inst.bytecode + 4, &decoded_literal._u32[0], 16); + ir_inst.length += 4; + pc++; + } + } + + pc++; + } + + // Sort edges for each block by distance + for (auto& block : graph.blocks) + { + std::sort(block.pred.begin(), block.pred.end(), FN(x.from->id > y.from->id)); + std::sort(block.succ.begin(), block.succ.end(), FN(x.to->id < y.to->id)); + } + + // Sort block nodes by distance + graph.blocks.sort(FN(x.id < y.id)); + return graph; + } +} diff --git a/rpcs3/Emu/RSX/Program/Assembler/IR.h b/rpcs3/Emu/RSX/Program/Assembler/IR.h new file mode 100644 index 0000000000..65960f3d99 --- /dev/null +++ b/rpcs3/Emu/RSX/Program/Assembler/IR.h @@ -0,0 +1,95 @@ +#pragma once + +#include + +namespace rsx::assembler +{ + struct BasicBlock; + + struct Register + { + int id = 0; + bool f16 = false; + }; + + struct RegisterRef + { + Register reg{}; + + // Vector information + union + { + u32 mask; + + struct + { + bool x : 1; + bool y : 1; + bool z : 1; + bool w : 1; + }; + }; + }; + + struct Instruction + { + // Raw data. Every instruction is max 128 bits. + // Each instruction can also have 128 bits of literal/embedded data. + u32 bytecode[8]{ {} }; + u32 addr = 0; + + // Decoded + u32 opcode = 0; + u8 length = 4; // Length in dwords + + // Padding + u8 reserved0 = 0; + u16 reserved1 = 0; + + // References + std::vector srcs; + std::vector dsts; + }; + + enum class EdgeType + { + NONE, + IF, + ELSE, + ENDIF, + LOOP, + ENDLOOP, + }; + + struct FlowEdge + { + EdgeType type = EdgeType::NONE; + BasicBlock* from = nullptr; + BasicBlock* to = nullptr; + }; + + struct BasicBlock + { + u32 id = 0; + std::vector instructions; // Program instructions for the RSX processor + std::vector succ; // Forward edges. Sorted closest first. + std::vector pred; // Back edges. Sorted closest first. + + std::vector prologue; // Prologue, created by passes + std::vector epilogue; // Epilogue, created by passes + + FlowEdge* insert_succ(BasicBlock* b, EdgeType type = EdgeType::NONE) + { + FlowEdge e{ .type = type, .from = this, .to = b }; + succ.push_back(e); + return &succ.back(); + } + + FlowEdge* insert_pred(BasicBlock* b, EdgeType type = EdgeType::NONE) + { + FlowEdge e{ .type = type, .from = b, .to = this }; + pred.push_back(e); + return &pred.back(); + } + }; +} diff --git a/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.cpp index e5742fffda..2ebfd7d8d7 100644 --- a/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.cpp @@ -234,7 +234,8 @@ std::string FragmentProgramDecompiler::AddCond() std::string FragmentProgramDecompiler::AddConst() { - const u32 constant_id = m_size + (4 * sizeof(u32)); + ensure(m_instruction->length == 8); + const u32 constant_id = m_instruction->addr + 16; u32 index = umax; if (auto found = m_constant_offsets.find(constant_id); @@ -249,9 +250,6 @@ std::string FragmentProgramDecompiler::AddConst() m_constant_offsets[constant_id] = index; } - // Skip next instruction, its just a literal - m_offset = 2 * 4 * sizeof(u32); - // Return the next offset index return "_fetch_constant(" + std::to_string(index) + ")"; } @@ -1297,7 +1295,7 @@ bool FragmentProgramDecompiler::handle_tex_srb(u32 opcode) std::string FragmentProgramDecompiler::Decompile() { - auto data = static_cast*>(m_prog.get_data()); + const auto graph = rsx::assembler::deconstruct_fragment_program(m_prog); m_size = 0; m_location = 0; m_loop_count = 0; @@ -1314,141 +1312,130 @@ std::string FragmentProgramDecompiler::Decompile() int forced_unit = FORCE_NONE; - while (true) + for (const auto &block : graph.blocks) { - for (auto found = std::find(m_end_offsets.begin(), m_end_offsets.end(), m_size); - found != m_end_offsets.end(); - found = std::find(m_end_offsets.begin(), m_end_offsets.end(), m_size)) + // TODO: Handle block prologue if any + if (!block.pred.empty()) { - m_end_offsets.erase(found); - m_code_level--; - AddCode("}"); - m_loop_count--; + // CFG guarantees predecessors are sorted, closest one first + for (const auto& pred : block.pred) + { + switch (pred.type) + { + case rsx::assembler::EdgeType::ENDLOOP: + m_loop_count--; + [[ fallthrough ]]; + case rsx::assembler::EdgeType::ENDIF: + m_code_level--; + AddCode("}"); + break; + case rsx::assembler::EdgeType::LOOP: + m_loop_count++; + [[ fallthrough ]]; + case rsx::assembler::EdgeType::IF: + // Instruction will be inserted by the SIP decoder + AddCode("{"); + m_code_level++; + break; + case rsx::assembler::EdgeType::ELSE: + // This one needs more testing + m_code_level--; + AddCode("}"); + AddCode("else"); + AddCode("{"); + m_code_level++; + break; + default: + // Start a new block anyway + fmt::throw_exception("Unexpected block found"); + } + } } - for (auto found = std::find(m_else_offsets.begin(), m_else_offsets.end(), m_size); - found != m_else_offsets.end(); - found = std::find(m_else_offsets.begin(), m_else_offsets.end(), m_size)) + for (const auto& inst : block.instructions) { - m_else_offsets.erase(found); - m_code_level--; - AddCode("}"); - AddCode("else"); - AddCode("{"); - m_code_level++; - } + m_instruction = &inst; - dst.HEX = GetData(data[0]); - src0.HEX = GetData(data[1]); - src1.HEX = GetData(data[2]); - src2.HEX = GetData(data[3]); + dst.HEX = inst.bytecode[0]; + src0.HEX = inst.bytecode[1]; + src1.HEX = inst.bytecode[2]; + src2.HEX = inst.bytecode[3]; - m_offset = 4 * sizeof(u32); - opflags = 0; + opflags = 0; - const u32 opcode = dst.opcode | (src1.opcode_is_branch << 6); + const u32 opcode = dst.opcode | (src1.opcode_is_branch << 6); + + auto SIP = [&]() + { + switch (opcode) + { + case RSX_FP_OPCODE_BRK: + if (m_loop_count) AddFlowOp("break"); + else rsx_log.error("BRK opcode found outside of a loop"); + break; + case RSX_FP_OPCODE_CAL: + rsx_log.error("Unimplemented SIP instruction: CAL"); + break; + case RSX_FP_OPCODE_FENCT: + AddCode("//FENCT"); + forced_unit = FORCE_SCT; + break; + case RSX_FP_OPCODE_FENCB: + AddCode("//FENCB"); + forced_unit = FORCE_SCB; + break; + case RSX_FP_OPCODE_IFE: + AddCode("if($cond)"); + break; + case RSX_FP_OPCODE_LOOP: + AddCode(fmt::format("$ifcond for(int i%u = %u; i%u < %u; i%u += %u) //LOOP", + m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment)); + break; + case RSX_FP_OPCODE_REP: + AddCode(fmt::format("if($cond) for(int i%u = %u; i%u < %u; i%u += %u) //REP", + m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment)); + break; + case RSX_FP_OPCODE_RET: + AddFlowOp("return"); + break; + + default: + return false; + } + + return true; + }; - auto SIP = [&]() - { switch (opcode) { - case RSX_FP_OPCODE_BRK: - if (m_loop_count) AddFlowOp("break"); - else rsx_log.error("BRK opcode found outside of a loop"); + case RSX_FP_OPCODE_NOP: break; - case RSX_FP_OPCODE_CAL: - rsx_log.error("Unimplemented SIP instruction: CAL"); + case RSX_FP_OPCODE_KIL: + properties.has_discard_op = true; + AddFlowOp("_kill()"); break; - case RSX_FP_OPCODE_FENCT: - AddCode("//FENCT"); - forced_unit = FORCE_SCT; - break; - case RSX_FP_OPCODE_FENCB: - AddCode("//FENCB"); - forced_unit = FORCE_SCB; - break; - case RSX_FP_OPCODE_IFE: - AddCode("if($cond)"); - if (src2.end_offset != src1.else_offset) - m_else_offsets.push_back(src1.else_offset << 2); - m_end_offsets.push_back(src2.end_offset << 2); - AddCode("{"); - m_code_level++; - break; - case RSX_FP_OPCODE_LOOP: - if (!src0.exec_if_eq && !src0.exec_if_gr && !src0.exec_if_lt) - { - AddCode(fmt::format("//$ifcond for(int i%u = %u; i%u < %u; i%u += %u) {} //-> %u //LOOP", - m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment, src2.end_offset)); - } - else - { - AddCode(fmt::format("$ifcond for(int i%u = %u; i%u < %u; i%u += %u) //LOOP", - m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment)); - m_loop_count++; - m_end_offsets.push_back(src2.end_offset << 2); - AddCode("{"); - m_code_level++; - } - break; - case RSX_FP_OPCODE_REP: - if (!src0.exec_if_eq && !src0.exec_if_gr && !src0.exec_if_lt) - { - AddCode(fmt::format("//$ifcond for(int i%u = %u; i%u < %u; i%u += %u) {} //-> %u //REP", - m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment, src2.end_offset)); - } - else - { - AddCode(fmt::format("if($cond) for(int i%u = %u; i%u < %u; i%u += %u) //REP", - m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment)); - m_loop_count++; - m_end_offsets.push_back(src2.end_offset << 2); - AddCode("{"); - m_code_level++; - } - break; - case RSX_FP_OPCODE_RET: - AddFlowOp("return"); - break; - default: - return false; + int prev_force_unit = forced_unit; + + // Some instructions do not respect forced unit + // Tested with Tales of Vesperia + if (SIP()) break; + if (handle_tex_srb(opcode)) break; + + // FENCT/FENCB do not actually reject instructions if they dont match the forced unit + // Looks like they are optimization hints and not hard-coded forced paths + if (handle_sct_scb(opcode)) break; + forced_unit = FORCE_NONE; + + rsx_log.error("Unknown/illegal instruction: 0x%x (forced unit %d)", opcode, prev_force_unit); + break; } - return true; - }; - - switch (opcode) - { - case RSX_FP_OPCODE_NOP: - break; - case RSX_FP_OPCODE_KIL: - properties.has_discard_op = true; - AddFlowOp("_kill()"); - break; - default: - int prev_force_unit = forced_unit; - - // Some instructions do not respect forced unit - // Tested with Tales of Vesperia - if (SIP()) break; - if (handle_tex_srb(opcode)) break; - - // FENCT/FENCB do not actually reject instructions if they dont match the forced unit - // Looks like they are optimization hints and not hard-coded forced paths - if (handle_sct_scb(opcode)) break; - forced_unit = FORCE_NONE; - - rsx_log.error("Unknown/illegal instruction: 0x%x (forced unit %d)", opcode, prev_force_unit); - break; + m_size += m_instruction->length * 4; + if (dst.end) break; } - m_size += m_offset; - - if (dst.end) break; - - ensure(m_offset % sizeof(u32) == 0); - data += m_offset / sizeof(u32); + // TODO: Handle block epilogue if needed } while (m_code_level > 1) diff --git a/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.h b/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.h index 467c6f3ac7..b68750bdfc 100644 --- a/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.h +++ b/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.h @@ -3,6 +3,8 @@ #include "FragmentProgramRegister.h" #include "RSXFragmentProgram.h" +#include "Assembler/CFG.h" + #include #include @@ -39,17 +41,16 @@ class FragmentProgramDecompiler SRC2 src2; u32 opflags; + const rsx::assembler::Instruction* m_instruction; + std::string main; u32& m_size; u32 m_const_index = 0; - u32 m_offset; u32 m_location = 0; bool m_is_valid_ucode = true; u32 m_loop_count; int m_code_level; - std::vector m_end_offsets; - std::vector m_else_offsets; std::unordered_map m_constant_offsets; std::array temp_registers; diff --git a/rpcs3/Emu/RSX/Program/GLSLSnippets/GPUDeswizzle.glsl b/rpcs3/Emu/RSX/Program/GLSLSnippets/GPUDeswizzle.glsl index bdb432d7e5..1e0b66c36c 100644 --- a/rpcs3/Emu/RSX/Program/GLSLSnippets/GPUDeswizzle.glsl +++ b/rpcs3/Emu/RSX/Program/GLSLSnippets/GPUDeswizzle.glsl @@ -105,7 +105,6 @@ uint get_z_index(const in uint x_, const in uint y_, const in uint z_) void decode_16b(const in uint texel_id, in uint x, const in uint y, const in uint z) { - const uint masks[] = { 0x0000FFFF, 0xFFFF0000 }; uint accumulator = 0; const uint subword_count = min(invocation.size.x, 2); @@ -113,7 +112,9 @@ void decode_16b(const in uint texel_id, in uint x, const in uint y, const in uin { uint src_texel_id = get_z_index(x, y, z); uint src_id = (src_texel_id + invocation.data_offset); - accumulator |= data_in[src_id / 2] & masks[subword]; + int src_bit_offset = int(src_id % 2) << 4; + uint src_value = bitfieldExtract(data_in[src_id / 2], src_bit_offset, 16); + accumulator = bitfieldInsert(accumulator, src_value, int(subword << 4), 16); } data_out[texel_id / 2] = %f(accumulator); @@ -123,7 +124,6 @@ void decode_16b(const in uint texel_id, in uint x, const in uint y, const in uin void decode_8b(const in uint texel_id, in uint x, const in uint y, const in uint z) { - const uint masks[] = { 0x000000FF, 0x0000FF00, 0x00FF0000, 0xFF000000 }; uint accumulator = 0; const uint subword_count = min(invocation.size.x, 4); @@ -131,7 +131,9 @@ void decode_8b(const in uint texel_id, in uint x, const in uint y, const in uint { uint src_texel_id = get_z_index(x, y, z); uint src_id = (src_texel_id + invocation.data_offset); - accumulator |= data_in[src_id / 4] & masks[subword]; + int src_bit_offset = int(src_id % 4) << 3; + uint src_value = bitfieldExtract(data_in[src_id / 4], src_bit_offset, 8); + accumulator = bitfieldInsert(accumulator, src_value, int(subword << 3), 8); } data_out[texel_id / 4] = accumulator; diff --git a/rpcs3/Emu/RSX/VK/vkutils/buffer_object.cpp b/rpcs3/Emu/RSX/VK/vkutils/buffer_object.cpp index 4d7c5237cc..daf60ad03c 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/buffer_object.cpp +++ b/rpcs3/Emu/RSX/VK/vkutils/buffer_object.cpp @@ -39,11 +39,20 @@ namespace vk return false; } - buffer::buffer(const vk::render_device& dev, u64 size, const memory_type_info& memory_type, u32 access_flags, VkBufferUsageFlags usage, VkBufferCreateFlags flags, vmm_allocation_pool allocation_pool) + buffer::buffer( + const vk::render_device& dev, + u64 size, + const memory_type_info& memory_type, + u32 access_flags, + VkBufferUsageFlags usage, + VkBufferCreateFlags flags, + vmm_allocation_pool allocation_pool) : m_device(dev) { + const bool nullable = !!(flags & VK_BUFFER_CREATE_ALLOW_NULL_RPCS3); + info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - info.flags = flags; + info.flags = flags & ~VK_BUFFER_CREATE_SPECIAL_FLAGS_RPCS3; info.size = size; info.usage = usage; info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; @@ -60,8 +69,18 @@ namespace vk fmt::throw_exception("No compatible memory type was found!"); } - memory = std::make_unique(m_device, memory_reqs.size, memory_reqs.alignment, allocation_type_info, allocation_pool); - vkBindBufferMemory(dev, value, memory->get_vk_device_memory(), memory->get_vk_device_memory_offset()); + memory = std::make_unique(m_device, memory_reqs.size, memory_reqs.alignment, allocation_type_info, allocation_pool, nullable); + if (auto device_memory = memory->get_vk_device_memory(); + device_memory != VK_NULL_HANDLE) + { + vkBindBufferMemory(dev, value, device_memory, memory->get_vk_device_memory_offset()); + } + else + { + ensure(nullable); + vkDestroyBuffer(m_device, value, nullptr); + value = VK_NULL_HANDLE; + } } buffer::buffer(const vk::render_device& dev, VkBufferUsageFlags usage, void* host_pointer, u64 size) diff --git a/rpcs3/Emu/RSX/VK/vkutils/buffer_object.h b/rpcs3/Emu/RSX/VK/vkutils/buffer_object.h index c74cb1aaa5..ba5309749a 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/buffer_object.h +++ b/rpcs3/Emu/RSX/VK/vkutils/buffer_object.h @@ -7,6 +7,13 @@ namespace vk { + enum : u32 + { + VK_BUFFER_CREATE_ALLOW_NULL_RPCS3 = 0x80000000, + + VK_BUFFER_CREATE_SPECIAL_FLAGS_RPCS3 = (VK_BUFFER_CREATE_ALLOW_NULL_RPCS3) + }; + struct buffer_view : public unique_resource { VkBufferView value; @@ -30,8 +37,21 @@ namespace vk VkBufferCreateInfo info = {}; std::unique_ptr memory; - buffer(const vk::render_device& dev, u64 size, const memory_type_info& memory_type, u32 access_flags, VkBufferUsageFlags usage, VkBufferCreateFlags flags, vmm_allocation_pool allocation_pool); - buffer(const vk::render_device& dev, VkBufferUsageFlags usage, void* host_pointer, u64 size); + buffer( + const vk::render_device& dev, + u64 size, + const memory_type_info& memory_type, + u32 access_flags, + VkBufferUsageFlags usage, + VkBufferCreateFlags flags, + vmm_allocation_pool allocation_pool); + + buffer( + const vk::render_device& dev, + VkBufferUsageFlags usage, + void* host_pointer, + u64 size); + ~buffer(); void* map(u64 offset, u64 size); diff --git a/rpcs3/Emu/RSX/VK/vkutils/data_heap.cpp b/rpcs3/Emu/RSX/VK/vkutils/data_heap.cpp index ba1b4e79c1..7fa6a46a81 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/data_heap.cpp +++ b/rpcs3/Emu/RSX/VK/vkutils/data_heap.cpp @@ -47,9 +47,28 @@ namespace vk usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT; memory_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; memory_index = memory_map.device_local; + m_prefer_writethrough = false; } - heap = std::make_unique(*g_render_device, size, memory_index, memory_flags, usage, 0, VMM_ALLOCATION_POOL_SYSTEM); + VkFlags create_flags = 0; + if (m_prefer_writethrough) + { + create_flags |= VK_BUFFER_CREATE_ALLOW_NULL_RPCS3; + } + + heap = std::make_unique(*g_render_device, size, memory_index, memory_flags, usage, create_flags, VMM_ALLOCATION_POOL_SYSTEM); + + if (!heap->value) + { + rsx_log.warning("Could not place heap '%s' into Re-BAR memory. Will attempt to use regular host-visible memory.", m_name); + ensure(m_prefer_writethrough); + + // We failed to place the buffer in rebar memory. Try again in host-visible. + m_prefer_writethrough = false; + auto gc = get_resource_manager(); + gc->dispose(heap); + heap = std::make_unique(*g_render_device, size, memory_map.host_visible_coherent, memory_flags, usage, 0, VMM_ALLOCATION_POOL_SYSTEM); + } initial_size = size; notify_on_grow = bool(notify); @@ -112,6 +131,7 @@ namespace vk auto gc = get_resource_manager(); if (shadow) { + ensure(!m_prefer_writethrough); rsx_log.warning("Buffer usage %u is not heap-compatible using this driver, explicit staging buffer in use", usage); gc->dispose(shadow); @@ -122,7 +142,25 @@ namespace vk } gc->dispose(heap); - heap = std::make_unique(*g_render_device, aligned_new_size, memory_index, memory_flags, usage, 0, VMM_ALLOCATION_POOL_SYSTEM); + + VkFlags create_flags = 0; + if (m_prefer_writethrough) + { + create_flags |= VK_BUFFER_CREATE_ALLOW_NULL_RPCS3; + } + + heap = std::make_unique(*g_render_device, aligned_new_size, memory_index, memory_flags, usage, create_flags, VMM_ALLOCATION_POOL_SYSTEM); + + if (!heap->value) + { + rsx_log.warning("Could not place heap '%s' into Re-BAR memory. Will attempt to use regular host-visible memory.", m_name); + ensure(m_prefer_writethrough); + + // We failed to place the buffer in rebar memory. Try again in host-visible. + m_prefer_writethrough = false; + gc->dispose(heap); + heap = std::make_unique(*g_render_device, aligned_new_size, memory_map.host_visible_coherent, memory_flags, usage, 0, VMM_ALLOCATION_POOL_SYSTEM); + } if (notify_on_grow) { diff --git a/rpcs3/Emu/system_utils.cpp b/rpcs3/Emu/system_utils.cpp index ba98a44795..e840887bac 100644 --- a/rpcs3/Emu/system_utils.cpp +++ b/rpcs3/Emu/system_utils.cpp @@ -101,6 +101,48 @@ namespace rpcs3::utils return worker(); } + std::vector> get_vfs_disk_usage() + { + std::vector> disk_usage; + + if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_hdd0_dir(), 1); data_size != umax) + { + disk_usage.push_back({"dev_hdd0", data_size}); + } + + if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_hdd1_dir(), 1); data_size != umax) + { + disk_usage.push_back({"dev_hdd1", data_size}); + } + + if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_flash_dir(), 1); data_size != umax) + { + disk_usage.push_back({"dev_flash", data_size}); + } + + if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_flash2_dir(), 1); data_size != umax) + { + disk_usage.push_back({"dev_flash2", data_size}); + } + + if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_flash3_dir(), 1); data_size != umax) + { + disk_usage.push_back({"dev_flash3", data_size}); + } + + if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_bdvd_dir(), 1); data_size != umax) + { + disk_usage.push_back({"dev_bdvd", data_size}); + } + + if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_games_dir(), 1); data_size != umax) + { + disk_usage.push_back({"games", data_size}); + } + + return disk_usage; + } + std::string get_emu_dir() { const std::string& emu_dir_ = g_cfg_vfs.emulator_dir; @@ -122,6 +164,36 @@ namespace rpcs3::utils return g_cfg_vfs.get(g_cfg_vfs.dev_hdd1, get_emu_dir()); } + std::string get_flash_dir() + { + return g_cfg_vfs.get(g_cfg_vfs.dev_flash, get_emu_dir()); + } + + std::string get_flash2_dir() + { + return g_cfg_vfs.get(g_cfg_vfs.dev_flash2, get_emu_dir()); + } + + std::string get_flash3_dir() + { + return g_cfg_vfs.get(g_cfg_vfs.dev_flash3, get_emu_dir()); + } + + std::string get_bdvd_dir() + { + return g_cfg_vfs.get(g_cfg_vfs.dev_bdvd, get_emu_dir()); + } + + u64 get_cache_disk_usage() + { + if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_cache_dir(), 1); data_size != umax) + { + return data_size; + } + + return 0; + } + std::string get_cache_dir() { return fs::get_cache_dir() + "cache/"; diff --git a/rpcs3/Emu/system_utils.hpp b/rpcs3/Emu/system_utils.hpp index 30ccb0add0..b4142dacb9 100644 --- a/rpcs3/Emu/system_utils.hpp +++ b/rpcs3/Emu/system_utils.hpp @@ -23,10 +23,19 @@ namespace rpcs3::utils bool install_pkg(const std::string& path); + // VFS directories and disk usage + std::vector> get_vfs_disk_usage(); std::string get_emu_dir(); std::string get_games_dir(); std::string get_hdd0_dir(); std::string get_hdd1_dir(); + std::string get_flash_dir(); + std::string get_flash2_dir(); + std::string get_flash3_dir(); + std::string get_bdvd_dir(); + + // Cache directories and disk usage + u64 get_cache_disk_usage(); std::string get_cache_dir(); std::string get_cache_dir(std::string_view module_path); diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index 0bbea41832..1b5716f01b 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -156,6 +156,7 @@ + @@ -699,6 +700,8 @@ + + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index 65cd509f85..23b7ef174d 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -133,6 +133,9 @@ {ce6d6b90-8313-4273-b46c-d92bd450c002} + + {d99df916-8a99-428b-869a-9f14ac0ab411} + @@ -1372,6 +1375,9 @@ Emu\Io + + Emu\GPU\RSX\Program\Assembler + @@ -2764,6 +2770,12 @@ Utilities + + Emu\GPU\RSX\Program\Assembler + + + Emu\GPU\RSX\Program\Assembler + diff --git a/rpcs3/rpcs3qt/log_frame.cpp b/rpcs3/rpcs3qt/log_frame.cpp index 4dd664a99e..a155cf215d 100644 --- a/rpcs3/rpcs3qt/log_frame.cpp +++ b/rpcs3/rpcs3qt/log_frame.cpp @@ -4,10 +4,14 @@ #include "hex_validator.h" #include "memory_viewer_panel.h" +#include "Emu/System.h" +#include "Emu/system_utils.hpp" #include "Utilities/lockless.h" #include "util/asm.hpp" +#include #include +#include #include #include #include @@ -17,6 +21,8 @@ #include #include +LOG_CHANNEL(sys_log, "SYS"); + extern fs::file g_tty; extern atomic_t g_tty_size; extern std::array, 16> g_tty_input; @@ -165,6 +171,28 @@ log_frame::log_frame(std::shared_ptr _gui_settings, QWidget* paren connect(m_timer, &QTimer::timeout, this, &log_frame::UpdateUI); } +void log_frame::show_disk_usage(const std::vector>& vfs_disk_usage, u64 cache_disk_usage) +{ + QString text; + u64 tot_data_size = 0; + + for (const auto& [dev, data_size] : vfs_disk_usage) + { + text += tr("\n %0: %1").arg(QString::fromStdString(dev)).arg(gui::utils::format_byte_size(data_size)); + tot_data_size += data_size; + } + + if (!text.isEmpty()) + { + text = tr("\n VFS disk usage: %0%1").arg(gui::utils::format_byte_size(tot_data_size)).arg(text); + } + + text += tr("\n Cache disk usage: %0").arg(gui::utils::format_byte_size(cache_disk_usage)); + + sys_log.success("%s", text); + QMessageBox::information(this, tr("Disk usage"), text); +} + void log_frame::SetLogLevel(logs::level lev) const { switch (lev) @@ -245,6 +273,26 @@ void log_frame::CreateAndConnectActions() m_tty->clear(); }); + m_show_disk_usage_act = new QAction(tr("Show Disk Usage"), this); + connect(m_show_disk_usage_act, &QAction::triggered, [this]() + { + if (m_disk_usage_future.isRunning()) + { + return; // Still running the last request + } + + m_disk_usage_future = QtConcurrent::run([this]() + { + const std::vector> vfs_disk_usage = rpcs3::utils::get_vfs_disk_usage(); + const u64 cache_disk_usage = rpcs3::utils::get_cache_disk_usage(); + + Emu.CallFromMainThread([this, vfs_disk_usage, cache_disk_usage]() + { + show_disk_usage(vfs_disk_usage, cache_disk_usage); + }, nullptr, false); + }); + }); + m_perform_goto_on_debugger = new QAction(tr("Go-To On The Debugger"), this); connect(m_perform_goto_on_debugger, &QAction::triggered, [this]() { @@ -369,6 +417,9 @@ void log_frame::CreateAndConnectActions() { QMenu* menu = m_log->createStandardContextMenu(); menu->addAction(m_clear_act); + menu->addSeparator(); + menu->addAction(m_show_disk_usage_act); + menu->addSeparator(); menu->addAction(m_perform_goto_on_debugger); menu->addAction(m_perform_goto_thread_on_debugger); menu->addAction(m_perform_show_in_mem_viewer); diff --git a/rpcs3/rpcs3qt/log_frame.h b/rpcs3/rpcs3qt/log_frame.h index 0de081863c..159fdd38aa 100644 --- a/rpcs3/rpcs3qt/log_frame.h +++ b/rpcs3/rpcs3qt/log_frame.h @@ -8,6 +8,7 @@ #include +#include #include #include #include @@ -38,6 +39,7 @@ protected: private Q_SLOTS: void UpdateUI(); private: + void show_disk_usage(const std::vector>& vfs_disk_usage, u64 cache_disk_usage); void SetLogLevel(logs::level lev) const; void SetTTYLogging(bool val) const; @@ -48,6 +50,7 @@ private: std::unique_ptr m_find_dialog; QTimer* m_timer = nullptr; + QFuture m_disk_usage_future; std::vector m_color; QColor m_color_stack; @@ -72,6 +75,7 @@ private: QAction* m_clear_act = nullptr; QAction* m_clear_tty_act = nullptr; + QAction* m_show_disk_usage_act = nullptr; QAction* m_perform_goto_on_debugger = nullptr; QAction* m_perform_goto_thread_on_debugger = nullptr; QAction* m_perform_show_in_mem_viewer = nullptr; diff --git a/rpcs3/tests/rpcs3_test.vcxproj b/rpcs3/tests/rpcs3_test.vcxproj index 4f0d136a9a..22992e6a07 100644 --- a/rpcs3/tests/rpcs3_test.vcxproj +++ b/rpcs3/tests/rpcs3_test.vcxproj @@ -88,6 +88,7 @@ + diff --git a/rpcs3/tests/test_rsx_cfg.cpp b/rpcs3/tests/test_rsx_cfg.cpp new file mode 100644 index 0000000000..1708774d76 --- /dev/null +++ b/rpcs3/tests/test_rsx_cfg.cpp @@ -0,0 +1,239 @@ +#include + +#include "Emu/RSX/Common/simple_array.hpp" +#include "Emu/RSX/Program/Assembler/CFG.h" +#include "Emu/RSX/Program/RSXFragmentProgram.h" + +#include + +namespace rsx::assembler +{ + auto swap_bytes16 = [](u32 dword) -> u32 + { + // Lazy encode, but good enough for what we need here. + union v32 + { + u32 HEX; + u8 _v[4]; + }; + + u8* src_bytes = reinterpret_cast(&dword); + v32 dst_bytes; + + dst_bytes._v[0] = src_bytes[1]; + dst_bytes._v[1] = src_bytes[0]; + dst_bytes._v[2] = src_bytes[3]; + dst_bytes._v[3] = src_bytes[2]; + + return dst_bytes.HEX; + }; + + // Instruction mocks because we don't have a working assember (yet) + auto encode_instruction = [](u32 opcode, bool end = false) -> v128 + { + OPDEST dst{}; + dst.opcode = opcode; + + if (end) + { + dst.end = 1; + } + + return v128::from32(swap_bytes16(dst.HEX), 0, 0, 0); + }; + + auto create_if(u32 end, u32 _else = 0) + { + OPDEST dst{}; + dst.opcode = RSX_FP_OPCODE_IFE & 0x3Fu; + + SRC1 src1{}; + src1.else_offset = (_else ? _else : end) << 2; + src1.opcode_is_branch = 1; + + SRC2 src2{}; + src2.end_offset = end << 2; + + return v128::from32(swap_bytes16(dst.HEX), 0, swap_bytes16(src1.HEX), swap_bytes16(src2.HEX)); + }; + + TEST(CFG, FpToCFG_Basic) + { + rsx::simple_array buffer = { + encode_instruction(RSX_FP_OPCODE_ADD), + encode_instruction(RSX_FP_OPCODE_MOV, true) + }; + + RSXFragmentProgram program{}; + program.data = buffer.data(); + + FlowGraph graph = deconstruct_fragment_program(program); + + EXPECT_EQ(graph.blocks.size(), 1); + EXPECT_EQ(graph.blocks.front().instructions.size(), 2); + EXPECT_EQ(graph.blocks.front().instructions.front().length, 4); + EXPECT_EQ(graph.blocks.front().instructions[0].addr, 0); + EXPECT_EQ(graph.blocks.front().instructions[1].addr, 16); + } + + TEST(CFG, FpToCFG_IF) + { + rsx::simple_array buffer = { + encode_instruction(RSX_FP_OPCODE_ADD), // 0 + encode_instruction(RSX_FP_OPCODE_MOV), // 1 + create_if(4), // 2 (BR, 4) + encode_instruction(RSX_FP_OPCODE_ADD), // 3 + encode_instruction(RSX_FP_OPCODE_MOV, true), // 4 (Merge block) + }; + + const std::pair expected_block_data[3] = { + { 0, 3 }, // Head + { 3, 1 }, // Branch + { 4, 1 }, // Merge + }; + + RSXFragmentProgram program{}; + program.data = buffer.data(); + + FlowGraph graph = deconstruct_fragment_program(program); + + ASSERT_EQ(graph.blocks.size(), 3); + + int i = 0; + for (auto it = graph.blocks.begin(); it != graph.blocks.end(); ++it) + { + const auto& expected = expected_block_data[i++]; + EXPECT_EQ(it->id, expected.first); + EXPECT_EQ(it->instructions.size(), expected.second); + } + + // Check edges + EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 3))->pred[0].type, EdgeType::IF); + EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 0))->succ[0].type, EdgeType::IF); + EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 4))->pred[0].type, EdgeType::ENDIF); + } + + TEST(CFG, FpToCFG_NestedIF) + { + rsx::simple_array buffer = { + encode_instruction(RSX_FP_OPCODE_ADD), // 0 + encode_instruction(RSX_FP_OPCODE_MOV), // 1 + create_if(8), // 2 (BR, 8) + encode_instruction(RSX_FP_OPCODE_ADD), // 3 + create_if(6), // 4 (BR, 6) + encode_instruction(RSX_FP_OPCODE_MOV), // 5 + encode_instruction(RSX_FP_OPCODE_MOV), // 6 (merge block 1) + encode_instruction(RSX_FP_OPCODE_ADD), // 7 + encode_instruction(RSX_FP_OPCODE_MOV, true) // 8 (merge block 2 + }; + + const std::pair expected_block_data[5] = { + { 0, 3 }, // Head + { 3, 2 }, // Branch 1 + { 5, 1 }, // Branch 2 + { 6, 2 }, // Merge 1 + { 8, 1 }, // Merge 2 + }; + + RSXFragmentProgram program{}; + program.data = buffer.data(); + + FlowGraph graph = deconstruct_fragment_program(program); + + ASSERT_EQ(graph.blocks.size(), 5); + + int i = 0; + for (auto it = graph.blocks.begin(); it != graph.blocks.end(); ++it) + { + const auto& expected = expected_block_data[i++]; + EXPECT_EQ(it->id, expected.first); + EXPECT_EQ(it->instructions.size(), expected.second); + } + } + + TEST(CFG, FpToCFG_NestedIF_MultiplePred) + { + rsx::simple_array buffer = { + encode_instruction(RSX_FP_OPCODE_ADD), // 0 + encode_instruction(RSX_FP_OPCODE_MOV), // 1 + create_if(6), // 2 (BR, 6) + encode_instruction(RSX_FP_OPCODE_ADD), // 3 + create_if(6), // 4 (BR, 6) + encode_instruction(RSX_FP_OPCODE_MOV), // 5 + encode_instruction(RSX_FP_OPCODE_MOV), // 6 (merge block) + encode_instruction(RSX_FP_OPCODE_ADD), // 7 + encode_instruction(RSX_FP_OPCODE_MOV, true) // 8 + }; + + const std::pair expected_block_data[4] = { + { 0, 3 }, // Head + { 3, 2 }, // Branch 1 + { 5, 1 }, // Branch 2 + { 6, 3 }, // Merge + }; + + RSXFragmentProgram program{}; + program.data = buffer.data(); + + FlowGraph graph = deconstruct_fragment_program(program); + + ASSERT_EQ(graph.blocks.size(), 4); + + int i = 0; + for (auto it = graph.blocks.begin(); it != graph.blocks.end(); ++it) + { + const auto& expected = expected_block_data[i++]; + EXPECT_EQ(it->id, expected.first); + EXPECT_EQ(it->instructions.size(), expected.second); + } + + // Predecessors must be ordered, closest first + ASSERT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 6))->pred.size(), 2); + EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 6))->pred[0].type, EdgeType::ENDIF); + EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 6))->pred[0].from->id, 3); + EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 6))->pred[1].type, EdgeType::ENDIF); + EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 6))->pred[1].from->id, 0); + + // Successors must also be ordered, closest first + ASSERT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 0))->succ.size(), 2); + EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 0))->succ[0].type, EdgeType::IF); + EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 0))->succ[0].to->id, 3); + EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 0))->succ[1].type, EdgeType::ENDIF); + EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 0))->succ[1].to->id, 6); + } + + TEST(CFG, FpToCFG_IF_ELSE) + { + rsx::simple_array buffer = { + encode_instruction(RSX_FP_OPCODE_ADD), // 0 + encode_instruction(RSX_FP_OPCODE_MOV), // 1 + create_if(6, 4), // 2 (BR, 6) + encode_instruction(RSX_FP_OPCODE_ADD), // 3 + encode_instruction(RSX_FP_OPCODE_MOV), // 4 (Else) + encode_instruction(RSX_FP_OPCODE_ADD), // 5 + encode_instruction(RSX_FP_OPCODE_MOV, true), // 6 (Merge) + }; + + const std::pair expected_block_data[4] = { + { 0, 3 }, // Head + { 3, 1 }, // Branch positive + { 4, 2 }, // Branch negative + { 6, 1 }, // Merge + }; + + RSXFragmentProgram program{}; + program.data = buffer.data(); + + FlowGraph graph = deconstruct_fragment_program(program); + + ASSERT_EQ(graph.blocks.size(), 4); + + int i = 0; + for (auto it = graph.blocks.begin(); it != graph.blocks.end(); ++it) + { + const auto& expected = expected_block_data[i++]; + EXPECT_EQ(it->id, expected.first); + EXPECT_EQ(it->instructions.size(), expected.second); + } + } +} diff --git a/rpcs3/tests/test_simple_array.cpp b/rpcs3/tests/test_simple_array.cpp index 90a0e73575..0627c1d4d1 100644 --- a/rpcs3/tests/test_simple_array.cpp +++ b/rpcs3/tests/test_simple_array.cpp @@ -303,4 +303,24 @@ namespace rsx EXPECT_EQ(data_ptr1 & 15, 0); EXPECT_EQ(data_ptr2 & 127, 0); } + + TEST(SimpleArray, Find) + { + const rsx::simple_array arr{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 + }; + + EXPECT_EQ(*arr.find(8), 8); + EXPECT_EQ(arr.find(99), nullptr); + } + + TEST(SimpleArray, FindIf) + { + const rsx::simple_array arr{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 + }; + + EXPECT_EQ(*arr.find_if(FN(x == 8)), 8); + EXPECT_EQ(arr.find_if(FN(x == 99)), nullptr); + } } diff --git a/rpcs3/util/asm.hpp b/rpcs3/util/asm.hpp index 6441a00f6b..5fa4ba58d2 100644 --- a/rpcs3/util/asm.hpp +++ b/rpcs3/util/asm.hpp @@ -60,7 +60,7 @@ namespace utils #if defined(ARCH_X64) return _m_prefetchw(const_cast(ptr)); #else - return __builtin_prefetch(ptr, 1, 0); + return __builtin_prefetch(ptr, 1, 3); #endif } diff --git a/rpcs3/util/bless.hpp b/rpcs3/util/bless.hpp index af2f8d32f3..6a097a2c64 100644 --- a/rpcs3/util/bless.hpp +++ b/rpcs3/util/bless.hpp @@ -1,10 +1,12 @@ #pragma once +#include + namespace utils { // Hack. Pointer cast util to workaround UB. Use with extreme care. - template - [[nodiscard]] T* bless(U* ptr) + template requires (std::is_pointer_v>) + [[nodiscard]] inline T* bless(const U& ptr) { #ifdef _MSC_VER return (T*)ptr; @@ -21,3 +23,4 @@ namespace utils #endif } } + diff --git a/rpcs3/util/types.hpp b/rpcs3/util/types.hpp index 2ef140e941..9be3118f4d 100644 --- a/rpcs3/util/types.hpp +++ b/rpcs3/util/types.hpp @@ -999,17 +999,18 @@ template requires (std::is_integral_v; constexpr bool is_to_signed = std::is_signed_v; - constexpr auto from_mask = (is_from_signed && !is_to_signed) ? UnFrom{umax} >> 1 : UnFrom{umax}; + // For unsigned/signed mismatch, create an "unsigned" compatible mask + constexpr auto from_mask = (is_from_signed && !is_to_signed && sizeof(CommonFrom) <= sizeof(CommonTo)) ? UnFrom{umax} >> 1 : UnFrom{umax}; constexpr auto to_mask = (is_to_signed && !is_from_signed) ? UnTo{umax} >> 1 : UnTo{umax}; - constexpr auto mask = ~(from_mask & to_mask); + constexpr auto mask = static_cast(~(from_mask & to_mask)); - // Signed to unsigned always require test - // Otherwise, this is bit-wise narrowing or conversion between types of different signedness of the same size - if constexpr ((is_from_signed && !is_to_signed) || to_mask < from_mask) + // If destination ("unsigned" compatible) mask is smaller than source ("unsigned" compatible) mask + // It requires narrowing. + if constexpr (!!mask) { // Try to optimize test if both are of the same signedness - if (is_from_signed != is_to_signed ? !!(value & mask) : static_cast(value) != value) [[unlikely]] + if (is_from_signed != is_to_signed ? !!(value & mask) : static_cast(static_cast(value)) != value) [[unlikely]] { fmt::raw_verify_error(src_loc, u8"Narrowing error", +value); }