Merge branch 'master' into windows-clang

This commit is contained in:
Live session user 2025-11-27 05:53:38 -08:00
commit ddf1a098c7
30 changed files with 1083 additions and 182 deletions

View file

@ -394,7 +394,7 @@ namespace fmt
} }
#if !defined(_MSC_VER) || defined(__clang__) #if !defined(_MSC_VER) || defined(__clang__)
[[noreturn]] ~throw_exception(); [[noreturn]] ~throw_exception() = default;
#endif #endif
}; };

View file

@ -197,6 +197,7 @@ if(BUILD_RPCS3_TESTS)
tests/test_tuple.cpp tests/test_tuple.cpp
tests/test_simple_array.cpp tests/test_simple_array.cpp
tests/test_address_range.cpp tests/test_address_range.cpp
tests/test_rsx_cfg.cpp
) )
target_link_libraries(rpcs3_test target_link_libraries(rpcs3_test

View file

@ -157,7 +157,7 @@ std::array<u8, PASSPHRASE_KEY_LEN> sc_combine_laid_paid(s64 laid, s64 paid)
{ {
const std::string paid_laid = fmt::format("%016llx%016llx", laid, paid); const std::string paid_laid = fmt::format("%016llx%016llx", laid, paid);
std::array<u8, PASSPHRASE_KEY_LEN> out{}; std::array<u8, PASSPHRASE_KEY_LEN> out{};
hex_to_bytes(out.data(), paid_laid.c_str(), PASSPHRASE_KEY_LEN * 2); hex_to_bytes(out.data(), paid_laid, PASSPHRASE_KEY_LEN * 2);
return out; return out;
} }

View file

@ -517,6 +517,7 @@ target_sources(rpcs3_emu PRIVATE
RSX/Overlays/overlay_video.cpp RSX/Overlays/overlay_video.cpp
RSX/Overlays/Shaders/shader_loading_dialog.cpp RSX/Overlays/Shaders/shader_loading_dialog.cpp
RSX/Overlays/Shaders/shader_loading_dialog_native.cpp RSX/Overlays/Shaders/shader_loading_dialog_native.cpp
RSX/Program/Assembler/FPToCFG.cpp
RSX/Program/CgBinaryProgram.cpp RSX/Program/CgBinaryProgram.cpp
RSX/Program/CgBinaryFragmentProgram.cpp RSX/Program/CgBinaryFragmentProgram.cpp
RSX/Program/CgBinaryVertexProgram.cpp RSX/Program/CgBinaryVertexProgram.cpp

View file

@ -3,7 +3,6 @@
#include "util/types.hpp" #include "util/types.hpp"
#include "Emu/Memory/vm_ptr.h" #include "Emu/Memory/vm_ptr.h"
#include "Emu/Cell/ErrorCodes.h" #include "Emu/Cell/ErrorCodes.h"
#include <mutex>
#include <vector> #include <vector>
#include <mutex> #include <mutex>

View file

@ -416,7 +416,6 @@ Function* PPUTranslator::GetSymbolResolver(const ppu_module<lv2_obj>& info)
assert(ptr_inst->getResultElementType() == m_ir->getPtrTy()); assert(ptr_inst->getResultElementType() == m_ir->getPtrTy());
const auto faddr = m_ir->CreateLoad(ptr_inst->getResultElementType(), ptr_inst); const auto faddr = m_ir->CreateLoad(ptr_inst->getResultElementType(), ptr_inst);
const auto faddr_int = m_ir->CreatePtrToInt(faddr, get_type<uptr>());
const auto pos_32 = m_reloc ? m_ir->CreateAdd(func_pc, m_seg0) : func_pc; const auto pos_32 = m_reloc ? m_ir->CreateAdd(func_pc, m_seg0) : func_pc;
const auto pos = m_ir->CreateShl(pos_32, 1); const auto pos = m_ir->CreateShl(pos_32, 1);
const auto ptr = m_ir->CreatePtrAdd(m_exec, pos); const auto ptr = m_ir->CreatePtrAdd(m_exec, pos);
@ -427,7 +426,7 @@ Function* PPUTranslator::GetSymbolResolver(const ppu_module<lv2_obj>& info)
const auto seg_val = m_ir->CreateTrunc(m_ir->CreateLShr(m_seg0, 13), get_type<u16>()); const auto seg_val = m_ir->CreateTrunc(m_ir->CreateLShr(m_seg0, 13), get_type<u16>());
// Store to jumptable // Store to jumptable
m_ir->CreateStore(faddr_int, ptr); m_ir->CreateStore(faddr, ptr);
m_ir->CreateStore(seg_val, seg_ptr); m_ir->CreateStore(seg_val, seg_ptr);
// Increment index and branch back to loop // Increment index and branch back to loop

View file

@ -3122,7 +3122,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
u64 dabs = 0; u64 dabs = 0;
u64 drel = 0; u64 drel = 0;
for (u32 i = start; i < limit; i += 4) for (u32 i = start, abs_fail = 0, rel_fail = 0; i < limit; i += 4)
{ {
const u32 target = ls[i / 4]; const u32 target = ls[i / 4];
@ -3135,13 +3135,27 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
if (target >= lsa && target < SPU_LS_SIZE) if (target >= lsa && target < SPU_LS_SIZE)
{ {
// Possible jump table entry (absolute) // Possible jump table entry (absolute)
jt_abs.push_back(target); if (!abs_fail)
{
jt_abs.push_back(target);
}
}
else
{
abs_fail++;
} }
if (target + start >= lsa && target + start < SPU_LS_SIZE) if (target + start >= lsa && target + start < SPU_LS_SIZE)
{ {
// Possible jump table entry (relative) // Possible jump table entry (relative)
jt_rel.push_back(target + start); if (!rel_fail)
{
jt_rel.push_back(target + start);
}
}
else
{
rel_fail++;
} }
if (std::max(jt_abs.size(), jt_rel.size()) * 4 + start <= i) if (std::max(jt_abs.size(), jt_rel.size()) * 4 + start <= i)
@ -3153,6 +3167,35 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
} }
} }
for (usz i = 0; i < jt_abs.size(); i++)
{
if (jt_abs[i] == start + jt_abs.size() * 4)
{
// If jumptable contains absolute address of code start after the jumptable itself
// It is likely an absolute-type jumptable
bool is_good_conclusion = true;
// For verification: make sure there is none like this in relative table
for (u32 target : jt_rel)
{
if (target == start + jt_rel.size() * 4)
{
is_good_conclusion = false;
break;
}
}
if (is_good_conclusion)
{
jt_rel.clear();
}
break;
}
}
// Choose position after the jt as an anchor and compute the average distance // Choose position after the jt as an anchor and compute the average distance
for (u32 target : jt_abs) for (u32 target : jt_abs)
{ {
@ -7241,6 +7284,19 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
// Blocks starting from 0x0 or invalid instruction won't be compiled, may need special interpreter fallback // Blocks starting from 0x0 or invalid instruction won't be compiled, may need special interpreter fallback
} }
for (u32 i = 0; i < result.data.size(); i++)
{
const be_t<u32> ls_val = ls[result.lower_bound / 4 + i];
if (result.data[i] && std::bit_cast<u32>(ls_val) != result.data[i])
{
std::string out_dump;
dump(result, out_dump);
spu_log.error("SPU Function Dump:\n%s", out_dump);
fmt::throw_exception("SPU Analyzer failed: Instruction mismatch at 0x%x [read: 0x%x vs LS: 0x%x] (i=0x%x)", result.lower_bound + i * 4, std::bit_cast<be_t<u32>>(result.data[i]), ls_val, i);
}
}
return result; return result;
} }

View file

@ -1036,7 +1036,6 @@ lv2_file::open_result_t lv2_file::open(std::string_view vpath, s32 flags, s32 mo
error_code sys_fs_open(ppu_thread& ppu, vm::cptr<char> path, s32 flags, vm::ptr<u32> fd, s32 mode, vm::cptr<void> arg, u64 size) error_code sys_fs_open(ppu_thread& ppu, vm::cptr<char> path, s32 flags, vm::ptr<u32> fd, s32 mode, vm::cptr<void> arg, u64 size)
{ {
ppu.state += cpu_flag::wait; ppu.state += cpu_flag::wait;
lv2_obj::sleep(ppu);
sys_fs.warning("sys_fs_open(path=%s, flags=%#o, fd=*0x%x, mode=%#o, arg=*0x%x, size=0x%llx)", path, flags, fd, mode, arg, size); sys_fs.warning("sys_fs_open(path=%s, flags=%#o, fd=*0x%x, mode=%#o, arg=*0x%x, size=0x%llx)", path, flags, fd, mode, arg, size);
@ -1085,7 +1084,6 @@ error_code sys_fs_open(ppu_thread& ppu, vm::cptr<char> path, s32 flags, vm::ptr<
error_code sys_fs_read(ppu_thread& ppu, u32 fd, vm::ptr<void> buf, u64 nbytes, vm::ptr<u64> nread) error_code sys_fs_read(ppu_thread& ppu, u32 fd, vm::ptr<void> buf, u64 nbytes, vm::ptr<u64> nread)
{ {
ppu.state += cpu_flag::wait; ppu.state += cpu_flag::wait;
lv2_obj::sleep(ppu);
sys_fs.trace("sys_fs_read(fd=%d, buf=*0x%x, nbytes=0x%llx, nread=*0x%x)", fd, buf, nbytes, nread); sys_fs.trace("sys_fs_read(fd=%d, buf=*0x%x, nbytes=0x%llx, nread=*0x%x)", fd, buf, nbytes, nread);
@ -1122,6 +1120,11 @@ error_code sys_fs_read(ppu_thread& ppu, u32 fd, vm::ptr<void> buf, u64 nbytes, v
return CELL_OK; return CELL_OK;
} }
if (nbytes >= 0x100000 && file->type != lv2_file_type::regular)
{
lv2_obj::sleep(ppu);
}
std::unique_lock lock(file->mp->mutex); std::unique_lock lock(file->mp->mutex);
if (!file->file) if (!file->file)
@ -1154,7 +1157,6 @@ error_code sys_fs_read(ppu_thread& ppu, u32 fd, vm::ptr<void> buf, u64 nbytes, v
error_code sys_fs_write(ppu_thread& ppu, u32 fd, vm::cptr<void> buf, u64 nbytes, vm::ptr<u64> nwrite) error_code sys_fs_write(ppu_thread& ppu, u32 fd, vm::cptr<void> buf, u64 nbytes, vm::ptr<u64> nwrite)
{ {
ppu.state += cpu_flag::wait; ppu.state += cpu_flag::wait;
lv2_obj::sleep(ppu);
sys_fs.trace("sys_fs_write(fd=%d, buf=*0x%x, nbytes=0x%llx, nwrite=*0x%x)", fd, buf, nbytes, nwrite); sys_fs.trace("sys_fs_write(fd=%d, buf=*0x%x, nbytes=0x%llx, nwrite=*0x%x)", fd, buf, nbytes, nwrite);
@ -1237,7 +1239,6 @@ error_code sys_fs_write(ppu_thread& ppu, u32 fd, vm::cptr<void> buf, u64 nbytes,
error_code sys_fs_close(ppu_thread& ppu, u32 fd) error_code sys_fs_close(ppu_thread& ppu, u32 fd)
{ {
ppu.state += cpu_flag::wait; ppu.state += cpu_flag::wait;
lv2_obj::sleep(ppu);
const auto file = idm::get_unlocked<lv2_fs_object, lv2_file>(fd); const auto file = idm::get_unlocked<lv2_fs_object, lv2_file>(fd);
@ -1314,7 +1315,6 @@ error_code sys_fs_close(ppu_thread& ppu, u32 fd)
error_code sys_fs_opendir(ppu_thread& ppu, vm::cptr<char> path, vm::ptr<u32> fd) error_code sys_fs_opendir(ppu_thread& ppu, vm::cptr<char> path, vm::ptr<u32> fd)
{ {
ppu.state += cpu_flag::wait; ppu.state += cpu_flag::wait;
lv2_obj::sleep(ppu);
sys_fs.warning("sys_fs_opendir(path=%s, fd=*0x%x)", path, fd); sys_fs.warning("sys_fs_opendir(path=%s, fd=*0x%x)", path, fd);
@ -1491,7 +1491,6 @@ error_code sys_fs_readdir(ppu_thread& ppu, u32 fd, vm::ptr<CellFsDirent> dir, vm
error_code sys_fs_closedir(ppu_thread& ppu, u32 fd) error_code sys_fs_closedir(ppu_thread& ppu, u32 fd)
{ {
ppu.state += cpu_flag::wait; ppu.state += cpu_flag::wait;
lv2_obj::sleep(ppu);
sys_fs.warning("sys_fs_closedir(fd=%d)", fd); sys_fs.warning("sys_fs_closedir(fd=%d)", fd);
@ -1506,7 +1505,6 @@ error_code sys_fs_closedir(ppu_thread& ppu, u32 fd)
error_code sys_fs_stat(ppu_thread& ppu, vm::cptr<char> path, vm::ptr<CellFsStat> sb) error_code sys_fs_stat(ppu_thread& ppu, vm::cptr<char> path, vm::ptr<CellFsStat> sb)
{ {
ppu.state += cpu_flag::wait; ppu.state += cpu_flag::wait;
lv2_obj::sleep(ppu);
sys_fs.warning("sys_fs_stat(path=%s, sb=*0x%x)", path, sb); sys_fs.warning("sys_fs_stat(path=%s, sb=*0x%x)", path, sb);
@ -1610,7 +1608,6 @@ error_code sys_fs_stat(ppu_thread& ppu, vm::cptr<char> path, vm::ptr<CellFsStat>
error_code sys_fs_fstat(ppu_thread& ppu, u32 fd, vm::ptr<CellFsStat> sb) error_code sys_fs_fstat(ppu_thread& ppu, u32 fd, vm::ptr<CellFsStat> sb)
{ {
ppu.state += cpu_flag::wait; ppu.state += cpu_flag::wait;
lv2_obj::sleep(ppu);
sys_fs.warning("sys_fs_fstat(fd=%d, sb=*0x%x)", fd, sb); sys_fs.warning("sys_fs_fstat(fd=%d, sb=*0x%x)", fd, sb);
@ -1666,7 +1663,6 @@ error_code sys_fs_link(ppu_thread&, vm::cptr<char> from, vm::cptr<char> to)
error_code sys_fs_mkdir(ppu_thread& ppu, vm::cptr<char> path, s32 mode) error_code sys_fs_mkdir(ppu_thread& ppu, vm::cptr<char> path, s32 mode)
{ {
ppu.state += cpu_flag::wait; ppu.state += cpu_flag::wait;
lv2_obj::sleep(ppu);
sys_fs.warning("sys_fs_mkdir(path=%s, mode=%#o)", path, mode); sys_fs.warning("sys_fs_mkdir(path=%s, mode=%#o)", path, mode);
@ -1728,7 +1724,6 @@ error_code sys_fs_mkdir(ppu_thread& ppu, vm::cptr<char> path, s32 mode)
error_code sys_fs_rename(ppu_thread& ppu, vm::cptr<char> from, vm::cptr<char> to) error_code sys_fs_rename(ppu_thread& ppu, vm::cptr<char> from, vm::cptr<char> to)
{ {
ppu.state += cpu_flag::wait; ppu.state += cpu_flag::wait;
lv2_obj::sleep(ppu);
sys_fs.warning("sys_fs_rename(from=%s, to=%s)", from, to); sys_fs.warning("sys_fs_rename(from=%s, to=%s)", from, to);
@ -1794,7 +1789,6 @@ error_code sys_fs_rename(ppu_thread& ppu, vm::cptr<char> from, vm::cptr<char> to
error_code sys_fs_rmdir(ppu_thread& ppu, vm::cptr<char> path) error_code sys_fs_rmdir(ppu_thread& ppu, vm::cptr<char> path)
{ {
ppu.state += cpu_flag::wait; ppu.state += cpu_flag::wait;
lv2_obj::sleep(ppu);
sys_fs.warning("sys_fs_rmdir(path=%s)", path); sys_fs.warning("sys_fs_rmdir(path=%s)", path);
@ -1850,7 +1844,6 @@ error_code sys_fs_rmdir(ppu_thread& ppu, vm::cptr<char> path)
error_code sys_fs_unlink(ppu_thread& ppu, vm::cptr<char> path) error_code sys_fs_unlink(ppu_thread& ppu, vm::cptr<char> path)
{ {
ppu.state += cpu_flag::wait; ppu.state += cpu_flag::wait;
lv2_obj::sleep(ppu);
sys_fs.warning("sys_fs_unlink(path=%s)", path); sys_fs.warning("sys_fs_unlink(path=%s)", path);
@ -1951,8 +1944,6 @@ error_code sys_fs_fcntl(ppu_thread& ppu, u32 fd, u32 op, vm::ptr<void> _arg, u32
case 0x8000000a: // cellFsReadWithOffset case 0x8000000a: // cellFsReadWithOffset
case 0x8000000b: // cellFsWriteWithOffset case 0x8000000b: // cellFsWriteWithOffset
{ {
lv2_obj::sleep(ppu);
const auto arg = vm::static_ptr_cast<lv2_file_op_rw>(_arg); const auto arg = vm::static_ptr_cast<lv2_file_op_rw>(_arg);
if (_size < arg.size()) if (_size < arg.size())
@ -1992,6 +1983,11 @@ error_code sys_fs_fcntl(ppu_thread& ppu, u32 fd, u32 op, vm::ptr<void> _arg, u32
sys_fs.error("%s type: Writing %u bytes to FD=%d (path=%s)", file->type, arg->size, file->name.data()); sys_fs.error("%s type: Writing %u bytes to FD=%d (path=%s)", file->type, arg->size, file->name.data());
} }
if (op == 0x8000000a && file->type != lv2_file_type::regular && arg->size >= 0x100000)
{
lv2_obj::sleep(ppu);
}
std::unique_lock wlock(file->mp->mutex, std::defer_lock); std::unique_lock wlock(file->mp->mutex, std::defer_lock);
std::shared_lock rlock(file->mp->mutex, std::defer_lock); std::shared_lock rlock(file->mp->mutex, std::defer_lock);
@ -2047,8 +2043,6 @@ error_code sys_fs_fcntl(ppu_thread& ppu, u32 fd, u32 op, vm::ptr<void> _arg, u32
case 0x80000009: // cellFsSdataOpenByFd case 0x80000009: // cellFsSdataOpenByFd
{ {
lv2_obj::sleep(ppu);
const auto arg = vm::static_ptr_cast<lv2_file_op_09>(_arg); const auto arg = vm::static_ptr_cast<lv2_file_op_09>(_arg);
if (_size < arg.size()) if (_size < arg.size())
@ -2102,8 +2096,6 @@ error_code sys_fs_fcntl(ppu_thread& ppu, u32 fd, u32 op, vm::ptr<void> _arg, u32
case 0xc0000002: // cellFsGetFreeSize (TODO) case 0xc0000002: // cellFsGetFreeSize (TODO)
{ {
lv2_obj::sleep(ppu);
const auto arg = vm::static_ptr_cast<lv2_file_c0000002>(_arg); const auto arg = vm::static_ptr_cast<lv2_file_c0000002>(_arg);
const auto& mp = g_fxo->get<lv2_fs_mount_info_map>().lookup("/dev_hdd0"); const auto& mp = g_fxo->get<lv2_fs_mount_info_map>().lookup("/dev_hdd0");
@ -2418,8 +2410,6 @@ error_code sys_fs_fcntl(ppu_thread& ppu, u32 fd, u32 op, vm::ptr<void> _arg, u32
case 0xe0000012: // cellFsGetDirectoryEntries case 0xe0000012: // cellFsGetDirectoryEntries
{ {
lv2_obj::sleep(ppu);
const auto arg = vm::static_ptr_cast<lv2_file_op_dir::dir_info>(_arg); const auto arg = vm::static_ptr_cast<lv2_file_op_dir::dir_info>(_arg);
if (_size < arg.size()) if (_size < arg.size())
@ -2434,8 +2424,6 @@ error_code sys_fs_fcntl(ppu_thread& ppu, u32 fd, u32 op, vm::ptr<void> _arg, u32
return CELL_EBADF; return CELL_EBADF;
} }
ppu.check_state();
u32 read_count = 0; u32 read_count = 0;
// NOTE: This function is actually capable of reading only one entry at a time // NOTE: This function is actually capable of reading only one entry at a time
@ -2593,7 +2581,6 @@ error_code sys_fs_fcntl(ppu_thread& ppu, u32 fd, u32 op, vm::ptr<void> _arg, u32
error_code sys_fs_lseek(ppu_thread& ppu, u32 fd, s64 offset, s32 whence, vm::ptr<u64> pos) error_code sys_fs_lseek(ppu_thread& ppu, u32 fd, s64 offset, s32 whence, vm::ptr<u64> pos)
{ {
ppu.state += cpu_flag::wait; ppu.state += cpu_flag::wait;
lv2_obj::sleep(ppu);
sys_fs.trace("sys_fs_lseek(fd=%d, offset=0x%llx, whence=0x%x, pos=*0x%x)", fd, offset, whence, pos); sys_fs.trace("sys_fs_lseek(fd=%d, offset=0x%llx, whence=0x%x, pos=*0x%x)", fd, offset, whence, pos);
@ -2639,7 +2626,6 @@ error_code sys_fs_lseek(ppu_thread& ppu, u32 fd, s64 offset, s32 whence, vm::ptr
error_code sys_fs_fdatasync(ppu_thread& ppu, u32 fd) error_code sys_fs_fdatasync(ppu_thread& ppu, u32 fd)
{ {
ppu.state += cpu_flag::wait; ppu.state += cpu_flag::wait;
lv2_obj::sleep(ppu);
sys_fs.trace("sys_fs_fdadasync(fd=%d)", fd); sys_fs.trace("sys_fs_fdadasync(fd=%d)", fd);
@ -2650,6 +2636,8 @@ error_code sys_fs_fdatasync(ppu_thread& ppu, u32 fd)
return CELL_EBADF; return CELL_EBADF;
} }
lv2_obj::sleep(ppu);
std::lock_guard lock(file->mp->mutex); std::lock_guard lock(file->mp->mutex);
if (!file->file) if (!file->file)
@ -2664,7 +2652,6 @@ error_code sys_fs_fdatasync(ppu_thread& ppu, u32 fd)
error_code sys_fs_fsync(ppu_thread& ppu, u32 fd) error_code sys_fs_fsync(ppu_thread& ppu, u32 fd)
{ {
ppu.state += cpu_flag::wait; ppu.state += cpu_flag::wait;
lv2_obj::sleep(ppu);
sys_fs.trace("sys_fs_fsync(fd=%d)", fd); sys_fs.trace("sys_fs_fsync(fd=%d)", fd);
@ -2675,6 +2662,8 @@ error_code sys_fs_fsync(ppu_thread& ppu, u32 fd)
return CELL_EBADF; return CELL_EBADF;
} }
lv2_obj::sleep(ppu);
std::lock_guard lock(file->mp->mutex); std::lock_guard lock(file->mp->mutex);
if (!file->file) if (!file->file)
@ -2763,7 +2752,6 @@ error_code sys_fs_get_block_size(ppu_thread& ppu, vm::cptr<char> path, vm::ptr<u
error_code sys_fs_truncate(ppu_thread& ppu, vm::cptr<char> path, u64 size) error_code sys_fs_truncate(ppu_thread& ppu, vm::cptr<char> path, u64 size)
{ {
ppu.state += cpu_flag::wait; ppu.state += cpu_flag::wait;
lv2_obj::sleep(ppu);
sys_fs.warning("sys_fs_truncate(path=%s, size=0x%llx)", path, size); sys_fs.warning("sys_fs_truncate(path=%s, size=0x%llx)", path, size);
@ -2815,7 +2803,6 @@ error_code sys_fs_truncate(ppu_thread& ppu, vm::cptr<char> path, u64 size)
error_code sys_fs_ftruncate(ppu_thread& ppu, u32 fd, u64 size) error_code sys_fs_ftruncate(ppu_thread& ppu, u32 fd, u64 size)
{ {
ppu.state += cpu_flag::wait; ppu.state += cpu_flag::wait;
lv2_obj::sleep(ppu);
sys_fs.warning("sys_fs_ftruncate(fd=%d, size=0x%llx)", fd, size); sys_fs.warning("sys_fs_ftruncate(fd=%d, size=0x%llx)", fd, size);
@ -3021,7 +3008,6 @@ error_code sys_fs_disk_free(ppu_thread& ppu, vm::cptr<char> path, vm::ptr<u64> t
error_code sys_fs_utime(ppu_thread& ppu, vm::cptr<char> path, vm::cptr<CellFsUtimbuf> timep) error_code sys_fs_utime(ppu_thread& ppu, vm::cptr<char> path, vm::cptr<CellFsUtimbuf> timep)
{ {
ppu.state += cpu_flag::wait; ppu.state += cpu_flag::wait;
lv2_obj::sleep(ppu);
sys_fs.warning("sys_fs_utime(path=%s, timep=*0x%x)", path, timep); sys_fs.warning("sys_fs_utime(path=%s, timep=*0x%x)", path, timep);
sys_fs.warning("** actime=%u, modtime=%u", timep->actime, timep->modtime); sys_fs.warning("** actime=%u, modtime=%u", timep->actime, timep->modtime);

View file

@ -50,6 +50,12 @@ namespace rsx
{ c.size() } -> std::integral; { c.size() } -> std::integral;
}; };
template <typename T, typename U>
concept is_trivially_comparable_v =
requires (T t1, U t2) {
{ t1 == t2 } -> std::same_as<bool>;
};
template <typename Ty, size_t Align=alignof(Ty)> template <typename Ty, size_t Align=alignof(Ty)>
requires std::is_trivially_destructible_v<Ty> && std::is_trivially_copyable_v<Ty> requires std::is_trivially_destructible_v<Ty> && std::is_trivially_copyable_v<Ty>
struct simple_array struct simple_array
@ -492,6 +498,50 @@ namespace rsx
return false; return false;
} }
/**
* Note that find and find_if return pointers to objects and not iterators for simplified usage.
* It is functionally equivalent to retrieve a nullptr meaning empty object stored and nullptr meaning not found for all practical uses of this container.
*/
template <typename T = Ty>
requires is_trivially_comparable_v<Ty, T>
Ty* find(const T& value)
{
for (auto it = begin(); it != end(); ++it)
{
if (*it == value)
{
return &(*it);
}
}
return nullptr;
}
// Remove when we switch to C++23
template <typename T = Ty>
requires is_trivially_comparable_v<Ty, T>
const Ty* find(const T& value) const
{
return const_cast<simple_array<Ty, Align>*>(this)->find(value);
}
Ty* find_if(std::predicate<const Ty&> auto predicate)
{
for (auto it = begin(); it != end(); ++it)
{
if (std::invoke(predicate, *it))
{
return &(*it);
}
}
return nullptr;
}
// Remove with C++23
const Ty* find_if(std::predicate<const Ty&> auto predicate) const
{
return const_cast<simple_array<Ty, Align>*>(this)->find_if(predicate);
}
bool erase_if(std::predicate<const Ty&> auto predicate) bool erase_if(std::predicate<const Ty&> auto predicate)
{ {
if (!_size) if (!_size)

View file

@ -0,0 +1,39 @@
#pragma once
#include <util/asm.hpp>
#include "IR.h"
#include <list>
struct RSXFragmentProgram;
namespace rsx::assembler
{
struct FlowGraph
{
std::list<BasicBlock> blocks;
BasicBlock* push(BasicBlock* parent = nullptr, u32 pc = 0, EdgeType edge_type = EdgeType::NONE)
{
if (!parent && !blocks.empty())
{
parent = &blocks.back();
}
blocks.push_back({});
BasicBlock* new_block = &blocks.back();
if (parent)
{
parent->insert_succ(new_block, edge_type);
new_block->insert_pred(parent, edge_type);
}
new_block->id = pc;
return new_block;
}
};
FlowGraph deconstruct_fragment_program(const RSXFragmentProgram& prog);
}

View file

@ -0,0 +1,193 @@
#include "stdafx.h"
#include "CFG.h"
#include "Emu/RSX/Common/simple_array.hpp"
#include "Emu/RSX/Program/RSXFragmentProgram.h"
#include <util/asm.hpp>
#include <util/v128.hpp>
#include <span>
#if defined(ARCH_ARM64)
#if !defined(_MSC_VER)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wstrict-aliasing"
#pragma GCC diagnostic ignored "-Wold-style-cast"
#endif
#undef FORCE_INLINE
#include "Emu/CPU/sse2neon.h"
#if !defined(_MSC_VER)
#pragma GCC diagnostic pop
#endif
#endif
namespace rsx::assembler
{
inline v128 decode_instruction(const v128& raw_inst)
{
// Fixup of RSX's weird half-word shuffle for FP instructions
// Convert input stream into LE u16 array
__m128i _mask0 = _mm_set1_epi32(0xff00ff00);
__m128i _mask1 = _mm_set1_epi32(0x00ff00ff);
__m128i a = _mm_slli_epi32(static_cast<__m128i>(raw_inst), 8);
__m128i b = _mm_srli_epi32(static_cast<__m128i>(raw_inst), 8);
__m128i ret = _mm_or_si128(
_mm_and_si128(_mask0, a),
_mm_and_si128(_mask1, b)
);
return v128::loadu(&ret);
}
FlowGraph deconstruct_fragment_program(const RSXFragmentProgram& prog)
{
// For a flowgraph, we don't care at all about the actual contents, just flow control instructions.
OPDEST dst{};
SRC0 src0{};
SRC1 src1{};
SRC2 src2{};
u32 pc = 0; // Program counter
bool end = false;
// Flow control data
rsx::simple_array<BasicBlock*> end_blocks;
rsx::simple_array<BasicBlock*> else_blocks;
// Data block
u32* data = static_cast<u32*>(prog.get_data());
// Output
FlowGraph graph{};
BasicBlock* bb = graph.push();
auto find_block_for_pc = [&](u32 id) -> BasicBlock*
{
auto found = std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == id));
if (found != graph.blocks.end())
{
return &(*found);
}
return nullptr;
};
auto safe_insert_block = [&](BasicBlock* parent, u32 id, EdgeType edge_type) -> BasicBlock*
{
if (auto found = find_block_for_pc(id))
{
parent->insert_succ(found, edge_type);
found->insert_pred(parent, edge_type);
return found;
}
return graph.push(parent, id, edge_type);
};
auto includes_literal_constant = [&]()
{
return src0.reg_type == RSX_FP_REGISTER_TYPE_CONSTANT ||
src1.reg_type == RSX_FP_REGISTER_TYPE_CONSTANT ||
src2.reg_type == RSX_FP_REGISTER_TYPE_CONSTANT;
};
while (!end)
{
BasicBlock** found = end_blocks.find_if(FN(x->id == pc));
if (!found)
{
found = else_blocks.find_if(FN(x->id == pc));
}
if (found)
{
bb = *found;
}
const v128 raw_inst = v128::loadu(data, pc);
v128 decoded = decode_instruction(raw_inst);
dst.HEX = decoded._u32[0];
src0.HEX = decoded._u32[1];
src1.HEX = decoded._u32[2];
src2.HEX = decoded._u32[3];
end = !!dst.end;
const u32 opcode = dst.opcode | (src1.opcode_is_branch << 6);
if (opcode == RSX_FP_OPCODE_NOP)
{
pc++;
continue;
}
bb->instructions.push_back({});
auto& ir_inst = bb->instructions.back();
std::memcpy(ir_inst.bytecode, &decoded._u32[0], 16);
ir_inst.length = 4;
ir_inst.addr = pc * 16;
switch (opcode)
{
case RSX_FP_OPCODE_BRK:
break;
case RSX_FP_OPCODE_CAL:
// Unimplemented. Also unused by the RSX compiler
fmt::throw_exception("Unimplemented FP CAL instruction.");
break;
case RSX_FP_OPCODE_FENCT:
break;
case RSX_FP_OPCODE_FENCB:
break;
case RSX_FP_OPCODE_RET:
// Outside a subroutine, this doesn't mean much. The main block can conditionally return to stop execution early.
// This will not alter flow control.
break;
case RSX_FP_OPCODE_IFE:
{
// Inserts if and else and end blocks
auto parent = bb;
bb = safe_insert_block(parent, pc + 1, EdgeType::IF);
if (src2.end_offset != src1.else_offset)
{
else_blocks.push_back(safe_insert_block(parent, src1.else_offset >> 2, EdgeType::ELSE));
}
end_blocks.push_back(safe_insert_block(parent, src2.end_offset >> 2, EdgeType::ENDIF));
break;
}
case RSX_FP_OPCODE_LOOP:
case RSX_FP_OPCODE_REP:
{
// Inserts for and end blocks
auto parent = bb;
bb = safe_insert_block(parent, pc + 1, EdgeType::LOOP);
end_blocks.push_back(safe_insert_block(parent, src2.end_offset >> 2, EdgeType::ENDLOOP));
break;
}
default:
if (includes_literal_constant())
{
const v128 constant_literal = v128::loadu(data, pc);
v128 decoded_literal = decode_instruction(constant_literal);
std::memcpy(ir_inst.bytecode + 4, &decoded_literal._u32[0], 16);
ir_inst.length += 4;
pc++;
}
}
pc++;
}
// Sort edges for each block by distance
for (auto& block : graph.blocks)
{
std::sort(block.pred.begin(), block.pred.end(), FN(x.from->id > y.from->id));
std::sort(block.succ.begin(), block.succ.end(), FN(x.to->id < y.to->id));
}
// Sort block nodes by distance
graph.blocks.sort(FN(x.id < y.id));
return graph;
}
}

View file

@ -0,0 +1,95 @@
#pragma once
#include <util/asm.hpp>
namespace rsx::assembler
{
struct BasicBlock;
struct Register
{
int id = 0;
bool f16 = false;
};
struct RegisterRef
{
Register reg{};
// Vector information
union
{
u32 mask;
struct
{
bool x : 1;
bool y : 1;
bool z : 1;
bool w : 1;
};
};
};
struct Instruction
{
// Raw data. Every instruction is max 128 bits.
// Each instruction can also have 128 bits of literal/embedded data.
u32 bytecode[8]{ {} };
u32 addr = 0;
// Decoded
u32 opcode = 0;
u8 length = 4; // Length in dwords
// Padding
u8 reserved0 = 0;
u16 reserved1 = 0;
// References
std::vector<RegisterRef> srcs;
std::vector<RegisterRef> dsts;
};
enum class EdgeType
{
NONE,
IF,
ELSE,
ENDIF,
LOOP,
ENDLOOP,
};
struct FlowEdge
{
EdgeType type = EdgeType::NONE;
BasicBlock* from = nullptr;
BasicBlock* to = nullptr;
};
struct BasicBlock
{
u32 id = 0;
std::vector<Instruction> instructions; // Program instructions for the RSX processor
std::vector<FlowEdge> succ; // Forward edges. Sorted closest first.
std::vector<FlowEdge> pred; // Back edges. Sorted closest first.
std::vector<Instruction> prologue; // Prologue, created by passes
std::vector<Instruction> epilogue; // Epilogue, created by passes
FlowEdge* insert_succ(BasicBlock* b, EdgeType type = EdgeType::NONE)
{
FlowEdge e{ .type = type, .from = this, .to = b };
succ.push_back(e);
return &succ.back();
}
FlowEdge* insert_pred(BasicBlock* b, EdgeType type = EdgeType::NONE)
{
FlowEdge e{ .type = type, .from = b, .to = this };
pred.push_back(e);
return &pred.back();
}
};
}

View file

@ -234,7 +234,8 @@ std::string FragmentProgramDecompiler::AddCond()
std::string FragmentProgramDecompiler::AddConst() std::string FragmentProgramDecompiler::AddConst()
{ {
const u32 constant_id = m_size + (4 * sizeof(u32)); ensure(m_instruction->length == 8);
const u32 constant_id = m_instruction->addr + 16;
u32 index = umax; u32 index = umax;
if (auto found = m_constant_offsets.find(constant_id); if (auto found = m_constant_offsets.find(constant_id);
@ -249,9 +250,6 @@ std::string FragmentProgramDecompiler::AddConst()
m_constant_offsets[constant_id] = index; m_constant_offsets[constant_id] = index;
} }
// Skip next instruction, its just a literal
m_offset = 2 * 4 * sizeof(u32);
// Return the next offset index // Return the next offset index
return "_fetch_constant(" + std::to_string(index) + ")"; return "_fetch_constant(" + std::to_string(index) + ")";
} }
@ -1297,7 +1295,7 @@ bool FragmentProgramDecompiler::handle_tex_srb(u32 opcode)
std::string FragmentProgramDecompiler::Decompile() std::string FragmentProgramDecompiler::Decompile()
{ {
auto data = static_cast<be_t<u32>*>(m_prog.get_data()); const auto graph = rsx::assembler::deconstruct_fragment_program(m_prog);
m_size = 0; m_size = 0;
m_location = 0; m_location = 0;
m_loop_count = 0; m_loop_count = 0;
@ -1314,141 +1312,130 @@ std::string FragmentProgramDecompiler::Decompile()
int forced_unit = FORCE_NONE; int forced_unit = FORCE_NONE;
while (true) for (const auto &block : graph.blocks)
{ {
for (auto found = std::find(m_end_offsets.begin(), m_end_offsets.end(), m_size); // TODO: Handle block prologue if any
found != m_end_offsets.end(); if (!block.pred.empty())
found = std::find(m_end_offsets.begin(), m_end_offsets.end(), m_size))
{ {
m_end_offsets.erase(found); // CFG guarantees predecessors are sorted, closest one first
m_code_level--; for (const auto& pred : block.pred)
AddCode("}"); {
m_loop_count--; switch (pred.type)
{
case rsx::assembler::EdgeType::ENDLOOP:
m_loop_count--;
[[ fallthrough ]];
case rsx::assembler::EdgeType::ENDIF:
m_code_level--;
AddCode("}");
break;
case rsx::assembler::EdgeType::LOOP:
m_loop_count++;
[[ fallthrough ]];
case rsx::assembler::EdgeType::IF:
// Instruction will be inserted by the SIP decoder
AddCode("{");
m_code_level++;
break;
case rsx::assembler::EdgeType::ELSE:
// This one needs more testing
m_code_level--;
AddCode("}");
AddCode("else");
AddCode("{");
m_code_level++;
break;
default:
// Start a new block anyway
fmt::throw_exception("Unexpected block found");
}
}
} }
for (auto found = std::find(m_else_offsets.begin(), m_else_offsets.end(), m_size); for (const auto& inst : block.instructions)
found != m_else_offsets.end();
found = std::find(m_else_offsets.begin(), m_else_offsets.end(), m_size))
{ {
m_else_offsets.erase(found); m_instruction = &inst;
m_code_level--;
AddCode("}");
AddCode("else");
AddCode("{");
m_code_level++;
}
dst.HEX = GetData(data[0]); dst.HEX = inst.bytecode[0];
src0.HEX = GetData(data[1]); src0.HEX = inst.bytecode[1];
src1.HEX = GetData(data[2]); src1.HEX = inst.bytecode[2];
src2.HEX = GetData(data[3]); src2.HEX = inst.bytecode[3];
m_offset = 4 * sizeof(u32); opflags = 0;
opflags = 0;
const u32 opcode = dst.opcode | (src1.opcode_is_branch << 6); const u32 opcode = dst.opcode | (src1.opcode_is_branch << 6);
auto SIP = [&]()
{
switch (opcode)
{
case RSX_FP_OPCODE_BRK:
if (m_loop_count) AddFlowOp("break");
else rsx_log.error("BRK opcode found outside of a loop");
break;
case RSX_FP_OPCODE_CAL:
rsx_log.error("Unimplemented SIP instruction: CAL");
break;
case RSX_FP_OPCODE_FENCT:
AddCode("//FENCT");
forced_unit = FORCE_SCT;
break;
case RSX_FP_OPCODE_FENCB:
AddCode("//FENCB");
forced_unit = FORCE_SCB;
break;
case RSX_FP_OPCODE_IFE:
AddCode("if($cond)");
break;
case RSX_FP_OPCODE_LOOP:
AddCode(fmt::format("$ifcond for(int i%u = %u; i%u < %u; i%u += %u) //LOOP",
m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment));
break;
case RSX_FP_OPCODE_REP:
AddCode(fmt::format("if($cond) for(int i%u = %u; i%u < %u; i%u += %u) //REP",
m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment));
break;
case RSX_FP_OPCODE_RET:
AddFlowOp("return");
break;
default:
return false;
}
return true;
};
auto SIP = [&]()
{
switch (opcode) switch (opcode)
{ {
case RSX_FP_OPCODE_BRK: case RSX_FP_OPCODE_NOP:
if (m_loop_count) AddFlowOp("break");
else rsx_log.error("BRK opcode found outside of a loop");
break; break;
case RSX_FP_OPCODE_CAL: case RSX_FP_OPCODE_KIL:
rsx_log.error("Unimplemented SIP instruction: CAL"); properties.has_discard_op = true;
AddFlowOp("_kill()");
break; break;
case RSX_FP_OPCODE_FENCT:
AddCode("//FENCT");
forced_unit = FORCE_SCT;
break;
case RSX_FP_OPCODE_FENCB:
AddCode("//FENCB");
forced_unit = FORCE_SCB;
break;
case RSX_FP_OPCODE_IFE:
AddCode("if($cond)");
if (src2.end_offset != src1.else_offset)
m_else_offsets.push_back(src1.else_offset << 2);
m_end_offsets.push_back(src2.end_offset << 2);
AddCode("{");
m_code_level++;
break;
case RSX_FP_OPCODE_LOOP:
if (!src0.exec_if_eq && !src0.exec_if_gr && !src0.exec_if_lt)
{
AddCode(fmt::format("//$ifcond for(int i%u = %u; i%u < %u; i%u += %u) {} //-> %u //LOOP",
m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment, src2.end_offset));
}
else
{
AddCode(fmt::format("$ifcond for(int i%u = %u; i%u < %u; i%u += %u) //LOOP",
m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment));
m_loop_count++;
m_end_offsets.push_back(src2.end_offset << 2);
AddCode("{");
m_code_level++;
}
break;
case RSX_FP_OPCODE_REP:
if (!src0.exec_if_eq && !src0.exec_if_gr && !src0.exec_if_lt)
{
AddCode(fmt::format("//$ifcond for(int i%u = %u; i%u < %u; i%u += %u) {} //-> %u //REP",
m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment, src2.end_offset));
}
else
{
AddCode(fmt::format("if($cond) for(int i%u = %u; i%u < %u; i%u += %u) //REP",
m_loop_count, src1.init_counter, m_loop_count, src1.end_counter, m_loop_count, src1.increment));
m_loop_count++;
m_end_offsets.push_back(src2.end_offset << 2);
AddCode("{");
m_code_level++;
}
break;
case RSX_FP_OPCODE_RET:
AddFlowOp("return");
break;
default: default:
return false; int prev_force_unit = forced_unit;
// Some instructions do not respect forced unit
// Tested with Tales of Vesperia
if (SIP()) break;
if (handle_tex_srb(opcode)) break;
// FENCT/FENCB do not actually reject instructions if they dont match the forced unit
// Looks like they are optimization hints and not hard-coded forced paths
if (handle_sct_scb(opcode)) break;
forced_unit = FORCE_NONE;
rsx_log.error("Unknown/illegal instruction: 0x%x (forced unit %d)", opcode, prev_force_unit);
break;
} }
return true; m_size += m_instruction->length * 4;
}; if (dst.end) break;
switch (opcode)
{
case RSX_FP_OPCODE_NOP:
break;
case RSX_FP_OPCODE_KIL:
properties.has_discard_op = true;
AddFlowOp("_kill()");
break;
default:
int prev_force_unit = forced_unit;
// Some instructions do not respect forced unit
// Tested with Tales of Vesperia
if (SIP()) break;
if (handle_tex_srb(opcode)) break;
// FENCT/FENCB do not actually reject instructions if they dont match the forced unit
// Looks like they are optimization hints and not hard-coded forced paths
if (handle_sct_scb(opcode)) break;
forced_unit = FORCE_NONE;
rsx_log.error("Unknown/illegal instruction: 0x%x (forced unit %d)", opcode, prev_force_unit);
break;
} }
m_size += m_offset; // TODO: Handle block epilogue if needed
if (dst.end) break;
ensure(m_offset % sizeof(u32) == 0);
data += m_offset / sizeof(u32);
} }
while (m_code_level > 1) while (m_code_level > 1)

View file

@ -3,6 +3,8 @@
#include "FragmentProgramRegister.h" #include "FragmentProgramRegister.h"
#include "RSXFragmentProgram.h" #include "RSXFragmentProgram.h"
#include "Assembler/CFG.h"
#include <sstream> #include <sstream>
#include <unordered_map> #include <unordered_map>
@ -39,17 +41,16 @@ class FragmentProgramDecompiler
SRC2 src2; SRC2 src2;
u32 opflags; u32 opflags;
const rsx::assembler::Instruction* m_instruction;
std::string main; std::string main;
u32& m_size; u32& m_size;
u32 m_const_index = 0; u32 m_const_index = 0;
u32 m_offset;
u32 m_location = 0; u32 m_location = 0;
bool m_is_valid_ucode = true; bool m_is_valid_ucode = true;
u32 m_loop_count; u32 m_loop_count;
int m_code_level; int m_code_level;
std::vector<u32> m_end_offsets;
std::vector<u32> m_else_offsets;
std::unordered_map<u32, u32> m_constant_offsets; std::unordered_map<u32, u32> m_constant_offsets;
std::array<rsx::MixedPrecisionRegister, 64> temp_registers; std::array<rsx::MixedPrecisionRegister, 64> temp_registers;

View file

@ -105,7 +105,6 @@ uint get_z_index(const in uint x_, const in uint y_, const in uint z_)
void decode_16b(const in uint texel_id, in uint x, const in uint y, const in uint z) void decode_16b(const in uint texel_id, in uint x, const in uint y, const in uint z)
{ {
const uint masks[] = { 0x0000FFFF, 0xFFFF0000 };
uint accumulator = 0; uint accumulator = 0;
const uint subword_count = min(invocation.size.x, 2); const uint subword_count = min(invocation.size.x, 2);
@ -113,7 +112,9 @@ void decode_16b(const in uint texel_id, in uint x, const in uint y, const in uin
{ {
uint src_texel_id = get_z_index(x, y, z); uint src_texel_id = get_z_index(x, y, z);
uint src_id = (src_texel_id + invocation.data_offset); uint src_id = (src_texel_id + invocation.data_offset);
accumulator |= data_in[src_id / 2] & masks[subword]; int src_bit_offset = int(src_id % 2) << 4;
uint src_value = bitfieldExtract(data_in[src_id / 2], src_bit_offset, 16);
accumulator = bitfieldInsert(accumulator, src_value, int(subword << 4), 16);
} }
data_out[texel_id / 2] = %f(accumulator); data_out[texel_id / 2] = %f(accumulator);
@ -123,7 +124,6 @@ void decode_16b(const in uint texel_id, in uint x, const in uint y, const in uin
void decode_8b(const in uint texel_id, in uint x, const in uint y, const in uint z) void decode_8b(const in uint texel_id, in uint x, const in uint y, const in uint z)
{ {
const uint masks[] = { 0x000000FF, 0x0000FF00, 0x00FF0000, 0xFF000000 };
uint accumulator = 0; uint accumulator = 0;
const uint subword_count = min(invocation.size.x, 4); const uint subword_count = min(invocation.size.x, 4);
@ -131,7 +131,9 @@ void decode_8b(const in uint texel_id, in uint x, const in uint y, const in uint
{ {
uint src_texel_id = get_z_index(x, y, z); uint src_texel_id = get_z_index(x, y, z);
uint src_id = (src_texel_id + invocation.data_offset); uint src_id = (src_texel_id + invocation.data_offset);
accumulator |= data_in[src_id / 4] & masks[subword]; int src_bit_offset = int(src_id % 4) << 3;
uint src_value = bitfieldExtract(data_in[src_id / 4], src_bit_offset, 8);
accumulator = bitfieldInsert(accumulator, src_value, int(subword << 3), 8);
} }
data_out[texel_id / 4] = accumulator; data_out[texel_id / 4] = accumulator;

View file

@ -39,11 +39,20 @@ namespace vk
return false; return false;
} }
buffer::buffer(const vk::render_device& dev, u64 size, const memory_type_info& memory_type, u32 access_flags, VkBufferUsageFlags usage, VkBufferCreateFlags flags, vmm_allocation_pool allocation_pool) buffer::buffer(
const vk::render_device& dev,
u64 size,
const memory_type_info& memory_type,
u32 access_flags,
VkBufferUsageFlags usage,
VkBufferCreateFlags flags,
vmm_allocation_pool allocation_pool)
: m_device(dev) : m_device(dev)
{ {
const bool nullable = !!(flags & VK_BUFFER_CREATE_ALLOW_NULL_RPCS3);
info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
info.flags = flags; info.flags = flags & ~VK_BUFFER_CREATE_SPECIAL_FLAGS_RPCS3;
info.size = size; info.size = size;
info.usage = usage; info.usage = usage;
info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
@ -60,8 +69,18 @@ namespace vk
fmt::throw_exception("No compatible memory type was found!"); fmt::throw_exception("No compatible memory type was found!");
} }
memory = std::make_unique<memory_block>(m_device, memory_reqs.size, memory_reqs.alignment, allocation_type_info, allocation_pool); memory = std::make_unique<memory_block>(m_device, memory_reqs.size, memory_reqs.alignment, allocation_type_info, allocation_pool, nullable);
vkBindBufferMemory(dev, value, memory->get_vk_device_memory(), memory->get_vk_device_memory_offset()); if (auto device_memory = memory->get_vk_device_memory();
device_memory != VK_NULL_HANDLE)
{
vkBindBufferMemory(dev, value, device_memory, memory->get_vk_device_memory_offset());
}
else
{
ensure(nullable);
vkDestroyBuffer(m_device, value, nullptr);
value = VK_NULL_HANDLE;
}
} }
buffer::buffer(const vk::render_device& dev, VkBufferUsageFlags usage, void* host_pointer, u64 size) buffer::buffer(const vk::render_device& dev, VkBufferUsageFlags usage, void* host_pointer, u64 size)

View file

@ -7,6 +7,13 @@
namespace vk namespace vk
{ {
enum : u32
{
VK_BUFFER_CREATE_ALLOW_NULL_RPCS3 = 0x80000000,
VK_BUFFER_CREATE_SPECIAL_FLAGS_RPCS3 = (VK_BUFFER_CREATE_ALLOW_NULL_RPCS3)
};
struct buffer_view : public unique_resource struct buffer_view : public unique_resource
{ {
VkBufferView value; VkBufferView value;
@ -30,8 +37,21 @@ namespace vk
VkBufferCreateInfo info = {}; VkBufferCreateInfo info = {};
std::unique_ptr<vk::memory_block> memory; std::unique_ptr<vk::memory_block> memory;
buffer(const vk::render_device& dev, u64 size, const memory_type_info& memory_type, u32 access_flags, VkBufferUsageFlags usage, VkBufferCreateFlags flags, vmm_allocation_pool allocation_pool); buffer(
buffer(const vk::render_device& dev, VkBufferUsageFlags usage, void* host_pointer, u64 size); const vk::render_device& dev,
u64 size,
const memory_type_info& memory_type,
u32 access_flags,
VkBufferUsageFlags usage,
VkBufferCreateFlags flags,
vmm_allocation_pool allocation_pool);
buffer(
const vk::render_device& dev,
VkBufferUsageFlags usage,
void* host_pointer,
u64 size);
~buffer(); ~buffer();
void* map(u64 offset, u64 size); void* map(u64 offset, u64 size);

View file

@ -47,9 +47,28 @@ namespace vk
usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT; usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
memory_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; memory_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
memory_index = memory_map.device_local; memory_index = memory_map.device_local;
m_prefer_writethrough = false;
} }
heap = std::make_unique<buffer>(*g_render_device, size, memory_index, memory_flags, usage, 0, VMM_ALLOCATION_POOL_SYSTEM); VkFlags create_flags = 0;
if (m_prefer_writethrough)
{
create_flags |= VK_BUFFER_CREATE_ALLOW_NULL_RPCS3;
}
heap = std::make_unique<buffer>(*g_render_device, size, memory_index, memory_flags, usage, create_flags, VMM_ALLOCATION_POOL_SYSTEM);
if (!heap->value)
{
rsx_log.warning("Could not place heap '%s' into Re-BAR memory. Will attempt to use regular host-visible memory.", m_name);
ensure(m_prefer_writethrough);
// We failed to place the buffer in rebar memory. Try again in host-visible.
m_prefer_writethrough = false;
auto gc = get_resource_manager();
gc->dispose(heap);
heap = std::make_unique<buffer>(*g_render_device, size, memory_map.host_visible_coherent, memory_flags, usage, 0, VMM_ALLOCATION_POOL_SYSTEM);
}
initial_size = size; initial_size = size;
notify_on_grow = bool(notify); notify_on_grow = bool(notify);
@ -112,6 +131,7 @@ namespace vk
auto gc = get_resource_manager(); auto gc = get_resource_manager();
if (shadow) if (shadow)
{ {
ensure(!m_prefer_writethrough);
rsx_log.warning("Buffer usage %u is not heap-compatible using this driver, explicit staging buffer in use", usage); rsx_log.warning("Buffer usage %u is not heap-compatible using this driver, explicit staging buffer in use", usage);
gc->dispose(shadow); gc->dispose(shadow);
@ -122,7 +142,25 @@ namespace vk
} }
gc->dispose(heap); gc->dispose(heap);
heap = std::make_unique<buffer>(*g_render_device, aligned_new_size, memory_index, memory_flags, usage, 0, VMM_ALLOCATION_POOL_SYSTEM);
VkFlags create_flags = 0;
if (m_prefer_writethrough)
{
create_flags |= VK_BUFFER_CREATE_ALLOW_NULL_RPCS3;
}
heap = std::make_unique<buffer>(*g_render_device, aligned_new_size, memory_index, memory_flags, usage, create_flags, VMM_ALLOCATION_POOL_SYSTEM);
if (!heap->value)
{
rsx_log.warning("Could not place heap '%s' into Re-BAR memory. Will attempt to use regular host-visible memory.", m_name);
ensure(m_prefer_writethrough);
// We failed to place the buffer in rebar memory. Try again in host-visible.
m_prefer_writethrough = false;
gc->dispose(heap);
heap = std::make_unique<buffer>(*g_render_device, aligned_new_size, memory_map.host_visible_coherent, memory_flags, usage, 0, VMM_ALLOCATION_POOL_SYSTEM);
}
if (notify_on_grow) if (notify_on_grow)
{ {

View file

@ -101,6 +101,48 @@ namespace rpcs3::utils
return worker(); return worker();
} }
std::vector<std::pair<std::string, u64>> get_vfs_disk_usage()
{
std::vector<std::pair<std::string, u64>> disk_usage;
if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_hdd0_dir(), 1); data_size != umax)
{
disk_usage.push_back({"dev_hdd0", data_size});
}
if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_hdd1_dir(), 1); data_size != umax)
{
disk_usage.push_back({"dev_hdd1", data_size});
}
if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_flash_dir(), 1); data_size != umax)
{
disk_usage.push_back({"dev_flash", data_size});
}
if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_flash2_dir(), 1); data_size != umax)
{
disk_usage.push_back({"dev_flash2", data_size});
}
if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_flash3_dir(), 1); data_size != umax)
{
disk_usage.push_back({"dev_flash3", data_size});
}
if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_bdvd_dir(), 1); data_size != umax)
{
disk_usage.push_back({"dev_bdvd", data_size});
}
if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_games_dir(), 1); data_size != umax)
{
disk_usage.push_back({"games", data_size});
}
return disk_usage;
}
std::string get_emu_dir() std::string get_emu_dir()
{ {
const std::string& emu_dir_ = g_cfg_vfs.emulator_dir; const std::string& emu_dir_ = g_cfg_vfs.emulator_dir;
@ -122,6 +164,36 @@ namespace rpcs3::utils
return g_cfg_vfs.get(g_cfg_vfs.dev_hdd1, get_emu_dir()); return g_cfg_vfs.get(g_cfg_vfs.dev_hdd1, get_emu_dir());
} }
std::string get_flash_dir()
{
return g_cfg_vfs.get(g_cfg_vfs.dev_flash, get_emu_dir());
}
std::string get_flash2_dir()
{
return g_cfg_vfs.get(g_cfg_vfs.dev_flash2, get_emu_dir());
}
std::string get_flash3_dir()
{
return g_cfg_vfs.get(g_cfg_vfs.dev_flash3, get_emu_dir());
}
std::string get_bdvd_dir()
{
return g_cfg_vfs.get(g_cfg_vfs.dev_bdvd, get_emu_dir());
}
u64 get_cache_disk_usage()
{
if (const u64 data_size = fs::get_dir_size(rpcs3::utils::get_cache_dir(), 1); data_size != umax)
{
return data_size;
}
return 0;
}
std::string get_cache_dir() std::string get_cache_dir()
{ {
return fs::get_cache_dir() + "cache/"; return fs::get_cache_dir() + "cache/";

View file

@ -23,10 +23,19 @@ namespace rpcs3::utils
bool install_pkg(const std::string& path); bool install_pkg(const std::string& path);
// VFS directories and disk usage
std::vector<std::pair<std::string, u64>> get_vfs_disk_usage();
std::string get_emu_dir(); std::string get_emu_dir();
std::string get_games_dir(); std::string get_games_dir();
std::string get_hdd0_dir(); std::string get_hdd0_dir();
std::string get_hdd1_dir(); std::string get_hdd1_dir();
std::string get_flash_dir();
std::string get_flash2_dir();
std::string get_flash3_dir();
std::string get_bdvd_dir();
// Cache directories and disk usage
u64 get_cache_disk_usage();
std::string get_cache_dir(); std::string get_cache_dir();
std::string get_cache_dir(std::string_view module_path); std::string get_cache_dir(std::string_view module_path);

View file

@ -156,6 +156,7 @@
<ClCompile Include="Emu\RSX\Overlays\Shaders\shader_loading_dialog.cpp" /> <ClCompile Include="Emu\RSX\Overlays\Shaders\shader_loading_dialog.cpp" />
<ClCompile Include="Emu\RSX\Overlays\Shaders\shader_loading_dialog_native.cpp" /> <ClCompile Include="Emu\RSX\Overlays\Shaders\shader_loading_dialog_native.cpp" />
<ClCompile Include="Emu\RSX\Overlays\Trophies\overlay_trophy_list_dialog.cpp" /> <ClCompile Include="Emu\RSX\Overlays\Trophies\overlay_trophy_list_dialog.cpp" />
<ClCompile Include="Emu\RSX\Program\Assembler\FPToCFG.cpp" />
<ClCompile Include="Emu\RSX\Program\FragmentProgramRegister.cpp" /> <ClCompile Include="Emu\RSX\Program\FragmentProgramRegister.cpp" />
<ClCompile Include="Emu\RSX\Program\ProgramStateCache.cpp" /> <ClCompile Include="Emu\RSX\Program\ProgramStateCache.cpp" />
<ClCompile Include="Emu\RSX\Program\program_util.cpp" /> <ClCompile Include="Emu\RSX\Program\program_util.cpp" />
@ -699,6 +700,8 @@
<ClInclude Include="Emu\RSX\Overlays\overlay_progress_bar.hpp" /> <ClInclude Include="Emu\RSX\Overlays\overlay_progress_bar.hpp" />
<ClInclude Include="Emu\RSX\Overlays\overlay_video.h" /> <ClInclude Include="Emu\RSX\Overlays\overlay_video.h" />
<ClInclude Include="Emu\RSX\Overlays\Trophies\overlay_trophy_list_dialog.h" /> <ClInclude Include="Emu\RSX\Overlays\Trophies\overlay_trophy_list_dialog.h" />
<ClInclude Include="Emu\RSX\Program\Assembler\CFG.h" />
<ClInclude Include="Emu\RSX\Program\Assembler\IR.h" />
<ClInclude Include="Emu\RSX\Program\FragmentProgramRegister.h" /> <ClInclude Include="Emu\RSX\Program\FragmentProgramRegister.h" />
<ClInclude Include="Emu\RSX\Program\GLSLTypes.h" /> <ClInclude Include="Emu\RSX\Program\GLSLTypes.h" />
<ClInclude Include="Emu\RSX\Program\ProgramStateCache.h" /> <ClInclude Include="Emu\RSX\Program\ProgramStateCache.h" />

View file

@ -133,6 +133,9 @@
<Filter Include="Emu\GPU\RSX\Program\MSAA"> <Filter Include="Emu\GPU\RSX\Program\MSAA">
<UniqueIdentifier>{ce6d6b90-8313-4273-b46c-d92bd450c002}</UniqueIdentifier> <UniqueIdentifier>{ce6d6b90-8313-4273-b46c-d92bd450c002}</UniqueIdentifier>
</Filter> </Filter>
<Filter Include="Emu\GPU\RSX\Program\Assembler">
<UniqueIdentifier>{d99df916-8a99-428b-869a-9f14ac0ab411}</UniqueIdentifier>
</Filter>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClCompile Include="Crypto\aes.cpp"> <ClCompile Include="Crypto\aes.cpp">
@ -1372,6 +1375,9 @@
<ClCompile Include="Emu\Io\evdev_gun_handler.cpp"> <ClCompile Include="Emu\Io\evdev_gun_handler.cpp">
<Filter>Emu\Io</Filter> <Filter>Emu\Io</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="Emu\RSX\Program\Assembler\FPToCFG.cpp">
<Filter>Emu\GPU\RSX\Program\Assembler</Filter>
</ClCompile>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="Crypto\aes.h"> <ClInclude Include="Crypto\aes.h">
@ -2764,6 +2770,12 @@
<ClInclude Include="util\pair.hpp"> <ClInclude Include="util\pair.hpp">
<Filter>Utilities</Filter> <Filter>Utilities</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="Emu\RSX\Program\Assembler\CFG.h">
<Filter>Emu\GPU\RSX\Program\Assembler</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\Program\Assembler\IR.h">
<Filter>Emu\GPU\RSX\Program\Assembler</Filter>
</ClInclude>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<None Include="Emu\RSX\Program\GLSLSnippets\GPUDeswizzle.glsl"> <None Include="Emu\RSX\Program\GLSLSnippets\GPUDeswizzle.glsl">

View file

@ -4,10 +4,14 @@
#include "hex_validator.h" #include "hex_validator.h"
#include "memory_viewer_panel.h" #include "memory_viewer_panel.h"
#include "Emu/System.h"
#include "Emu/system_utils.hpp"
#include "Utilities/lockless.h" #include "Utilities/lockless.h"
#include "util/asm.hpp" #include "util/asm.hpp"
#include <QtConcurrent>
#include <QMenu> #include <QMenu>
#include <QMessageBox>
#include <QActionGroup> #include <QActionGroup>
#include <QScrollBar> #include <QScrollBar>
#include <QVBoxLayout> #include <QVBoxLayout>
@ -17,6 +21,8 @@
#include <deque> #include <deque>
#include <mutex> #include <mutex>
LOG_CHANNEL(sys_log, "SYS");
extern fs::file g_tty; extern fs::file g_tty;
extern atomic_t<s64> g_tty_size; extern atomic_t<s64> g_tty_size;
extern std::array<std::deque<std::string>, 16> g_tty_input; extern std::array<std::deque<std::string>, 16> g_tty_input;
@ -165,6 +171,28 @@ log_frame::log_frame(std::shared_ptr<gui_settings> _gui_settings, QWidget* paren
connect(m_timer, &QTimer::timeout, this, &log_frame::UpdateUI); connect(m_timer, &QTimer::timeout, this, &log_frame::UpdateUI);
} }
void log_frame::show_disk_usage(const std::vector<std::pair<std::string, u64>>& vfs_disk_usage, u64 cache_disk_usage)
{
QString text;
u64 tot_data_size = 0;
for (const auto& [dev, data_size] : vfs_disk_usage)
{
text += tr("\n %0: %1").arg(QString::fromStdString(dev)).arg(gui::utils::format_byte_size(data_size));
tot_data_size += data_size;
}
if (!text.isEmpty())
{
text = tr("\n VFS disk usage: %0%1").arg(gui::utils::format_byte_size(tot_data_size)).arg(text);
}
text += tr("\n Cache disk usage: %0").arg(gui::utils::format_byte_size(cache_disk_usage));
sys_log.success("%s", text);
QMessageBox::information(this, tr("Disk usage"), text);
}
void log_frame::SetLogLevel(logs::level lev) const void log_frame::SetLogLevel(logs::level lev) const
{ {
switch (lev) switch (lev)
@ -245,6 +273,26 @@ void log_frame::CreateAndConnectActions()
m_tty->clear(); m_tty->clear();
}); });
m_show_disk_usage_act = new QAction(tr("Show Disk Usage"), this);
connect(m_show_disk_usage_act, &QAction::triggered, [this]()
{
if (m_disk_usage_future.isRunning())
{
return; // Still running the last request
}
m_disk_usage_future = QtConcurrent::run([this]()
{
const std::vector<std::pair<std::string, u64>> vfs_disk_usage = rpcs3::utils::get_vfs_disk_usage();
const u64 cache_disk_usage = rpcs3::utils::get_cache_disk_usage();
Emu.CallFromMainThread([this, vfs_disk_usage, cache_disk_usage]()
{
show_disk_usage(vfs_disk_usage, cache_disk_usage);
}, nullptr, false);
});
});
m_perform_goto_on_debugger = new QAction(tr("Go-To On The Debugger"), this); m_perform_goto_on_debugger = new QAction(tr("Go-To On The Debugger"), this);
connect(m_perform_goto_on_debugger, &QAction::triggered, [this]() connect(m_perform_goto_on_debugger, &QAction::triggered, [this]()
{ {
@ -369,6 +417,9 @@ void log_frame::CreateAndConnectActions()
{ {
QMenu* menu = m_log->createStandardContextMenu(); QMenu* menu = m_log->createStandardContextMenu();
menu->addAction(m_clear_act); menu->addAction(m_clear_act);
menu->addSeparator();
menu->addAction(m_show_disk_usage_act);
menu->addSeparator();
menu->addAction(m_perform_goto_on_debugger); menu->addAction(m_perform_goto_on_debugger);
menu->addAction(m_perform_goto_thread_on_debugger); menu->addAction(m_perform_goto_thread_on_debugger);
menu->addAction(m_perform_show_in_mem_viewer); menu->addAction(m_perform_show_in_mem_viewer);

View file

@ -8,6 +8,7 @@
#include <memory> #include <memory>
#include <QFuture>
#include <QTabWidget> #include <QTabWidget>
#include <QPlainTextEdit> #include <QPlainTextEdit>
#include <QActionGroup> #include <QActionGroup>
@ -38,6 +39,7 @@ protected:
private Q_SLOTS: private Q_SLOTS:
void UpdateUI(); void UpdateUI();
private: private:
void show_disk_usage(const std::vector<std::pair<std::string, u64>>& vfs_disk_usage, u64 cache_disk_usage);
void SetLogLevel(logs::level lev) const; void SetLogLevel(logs::level lev) const;
void SetTTYLogging(bool val) const; void SetTTYLogging(bool val) const;
@ -48,6 +50,7 @@ private:
std::unique_ptr<find_dialog> m_find_dialog; std::unique_ptr<find_dialog> m_find_dialog;
QTimer* m_timer = nullptr; QTimer* m_timer = nullptr;
QFuture<void> m_disk_usage_future;
std::vector<QColor> m_color; std::vector<QColor> m_color;
QColor m_color_stack; QColor m_color_stack;
@ -72,6 +75,7 @@ private:
QAction* m_clear_act = nullptr; QAction* m_clear_act = nullptr;
QAction* m_clear_tty_act = nullptr; QAction* m_clear_tty_act = nullptr;
QAction* m_show_disk_usage_act = nullptr;
QAction* m_perform_goto_on_debugger = nullptr; QAction* m_perform_goto_on_debugger = nullptr;
QAction* m_perform_goto_thread_on_debugger = nullptr; QAction* m_perform_goto_thread_on_debugger = nullptr;
QAction* m_perform_show_in_mem_viewer = nullptr; QAction* m_perform_show_in_mem_viewer = nullptr;

View file

@ -88,6 +88,7 @@
<ItemGroup> <ItemGroup>
<ClCompile Include="test.cpp" /> <ClCompile Include="test.cpp" />
<ClCompile Include="test_fmt.cpp" /> <ClCompile Include="test_fmt.cpp" />
<ClCompile Include="test_rsx_cfg.cpp" />
<ClCompile Include="test_simple_array.cpp" /> <ClCompile Include="test_simple_array.cpp" />
<ClCompile Include="test_address_range.cpp" /> <ClCompile Include="test_address_range.cpp" />
<ClCompile Include="test_tuple.cpp" /> <ClCompile Include="test_tuple.cpp" />

View file

@ -0,0 +1,239 @@
#include <gtest/gtest.h>
#include "Emu/RSX/Common/simple_array.hpp"
#include "Emu/RSX/Program/Assembler/CFG.h"
#include "Emu/RSX/Program/RSXFragmentProgram.h"
#include <util/v128.hpp>
namespace rsx::assembler
{
auto swap_bytes16 = [](u32 dword) -> u32
{
// Lazy encode, but good enough for what we need here.
union v32
{
u32 HEX;
u8 _v[4];
};
u8* src_bytes = reinterpret_cast<u8*>(&dword);
v32 dst_bytes;
dst_bytes._v[0] = src_bytes[1];
dst_bytes._v[1] = src_bytes[0];
dst_bytes._v[2] = src_bytes[3];
dst_bytes._v[3] = src_bytes[2];
return dst_bytes.HEX;
};
// Instruction mocks because we don't have a working assember (yet)
auto encode_instruction = [](u32 opcode, bool end = false) -> v128
{
OPDEST dst{};
dst.opcode = opcode;
if (end)
{
dst.end = 1;
}
return v128::from32(swap_bytes16(dst.HEX), 0, 0, 0);
};
auto create_if(u32 end, u32 _else = 0)
{
OPDEST dst{};
dst.opcode = RSX_FP_OPCODE_IFE & 0x3Fu;
SRC1 src1{};
src1.else_offset = (_else ? _else : end) << 2;
src1.opcode_is_branch = 1;
SRC2 src2{};
src2.end_offset = end << 2;
return v128::from32(swap_bytes16(dst.HEX), 0, swap_bytes16(src1.HEX), swap_bytes16(src2.HEX));
};
TEST(CFG, FpToCFG_Basic)
{
rsx::simple_array<v128> buffer = {
encode_instruction(RSX_FP_OPCODE_ADD),
encode_instruction(RSX_FP_OPCODE_MOV, true)
};
RSXFragmentProgram program{};
program.data = buffer.data();
FlowGraph graph = deconstruct_fragment_program(program);
EXPECT_EQ(graph.blocks.size(), 1);
EXPECT_EQ(graph.blocks.front().instructions.size(), 2);
EXPECT_EQ(graph.blocks.front().instructions.front().length, 4);
EXPECT_EQ(graph.blocks.front().instructions[0].addr, 0);
EXPECT_EQ(graph.blocks.front().instructions[1].addr, 16);
}
TEST(CFG, FpToCFG_IF)
{
rsx::simple_array<v128> buffer = {
encode_instruction(RSX_FP_OPCODE_ADD), // 0
encode_instruction(RSX_FP_OPCODE_MOV), // 1
create_if(4), // 2 (BR, 4)
encode_instruction(RSX_FP_OPCODE_ADD), // 3
encode_instruction(RSX_FP_OPCODE_MOV, true), // 4 (Merge block)
};
const std::pair<int, size_t> expected_block_data[3] = {
{ 0, 3 }, // Head
{ 3, 1 }, // Branch
{ 4, 1 }, // Merge
};
RSXFragmentProgram program{};
program.data = buffer.data();
FlowGraph graph = deconstruct_fragment_program(program);
ASSERT_EQ(graph.blocks.size(), 3);
int i = 0;
for (auto it = graph.blocks.begin(); it != graph.blocks.end(); ++it)
{
const auto& expected = expected_block_data[i++];
EXPECT_EQ(it->id, expected.first);
EXPECT_EQ(it->instructions.size(), expected.second);
}
// Check edges
EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 3))->pred[0].type, EdgeType::IF);
EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 0))->succ[0].type, EdgeType::IF);
EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 4))->pred[0].type, EdgeType::ENDIF);
}
TEST(CFG, FpToCFG_NestedIF)
{
rsx::simple_array<v128> buffer = {
encode_instruction(RSX_FP_OPCODE_ADD), // 0
encode_instruction(RSX_FP_OPCODE_MOV), // 1
create_if(8), // 2 (BR, 8)
encode_instruction(RSX_FP_OPCODE_ADD), // 3
create_if(6), // 4 (BR, 6)
encode_instruction(RSX_FP_OPCODE_MOV), // 5
encode_instruction(RSX_FP_OPCODE_MOV), // 6 (merge block 1)
encode_instruction(RSX_FP_OPCODE_ADD), // 7
encode_instruction(RSX_FP_OPCODE_MOV, true) // 8 (merge block 2
};
const std::pair<int, size_t> expected_block_data[5] = {
{ 0, 3 }, // Head
{ 3, 2 }, // Branch 1
{ 5, 1 }, // Branch 2
{ 6, 2 }, // Merge 1
{ 8, 1 }, // Merge 2
};
RSXFragmentProgram program{};
program.data = buffer.data();
FlowGraph graph = deconstruct_fragment_program(program);
ASSERT_EQ(graph.blocks.size(), 5);
int i = 0;
for (auto it = graph.blocks.begin(); it != graph.blocks.end(); ++it)
{
const auto& expected = expected_block_data[i++];
EXPECT_EQ(it->id, expected.first);
EXPECT_EQ(it->instructions.size(), expected.second);
}
}
TEST(CFG, FpToCFG_NestedIF_MultiplePred)
{
rsx::simple_array<v128> buffer = {
encode_instruction(RSX_FP_OPCODE_ADD), // 0
encode_instruction(RSX_FP_OPCODE_MOV), // 1
create_if(6), // 2 (BR, 6)
encode_instruction(RSX_FP_OPCODE_ADD), // 3
create_if(6), // 4 (BR, 6)
encode_instruction(RSX_FP_OPCODE_MOV), // 5
encode_instruction(RSX_FP_OPCODE_MOV), // 6 (merge block)
encode_instruction(RSX_FP_OPCODE_ADD), // 7
encode_instruction(RSX_FP_OPCODE_MOV, true) // 8
};
const std::pair<int, size_t> expected_block_data[4] = {
{ 0, 3 }, // Head
{ 3, 2 }, // Branch 1
{ 5, 1 }, // Branch 2
{ 6, 3 }, // Merge
};
RSXFragmentProgram program{};
program.data = buffer.data();
FlowGraph graph = deconstruct_fragment_program(program);
ASSERT_EQ(graph.blocks.size(), 4);
int i = 0;
for (auto it = graph.blocks.begin(); it != graph.blocks.end(); ++it)
{
const auto& expected = expected_block_data[i++];
EXPECT_EQ(it->id, expected.first);
EXPECT_EQ(it->instructions.size(), expected.second);
}
// Predecessors must be ordered, closest first
ASSERT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 6))->pred.size(), 2);
EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 6))->pred[0].type, EdgeType::ENDIF);
EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 6))->pred[0].from->id, 3);
EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 6))->pred[1].type, EdgeType::ENDIF);
EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 6))->pred[1].from->id, 0);
// Successors must also be ordered, closest first
ASSERT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 0))->succ.size(), 2);
EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 0))->succ[0].type, EdgeType::IF);
EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 0))->succ[0].to->id, 3);
EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 0))->succ[1].type, EdgeType::ENDIF);
EXPECT_EQ(std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == 0))->succ[1].to->id, 6);
}
TEST(CFG, FpToCFG_IF_ELSE)
{
rsx::simple_array<v128> buffer = {
encode_instruction(RSX_FP_OPCODE_ADD), // 0
encode_instruction(RSX_FP_OPCODE_MOV), // 1
create_if(6, 4), // 2 (BR, 6)
encode_instruction(RSX_FP_OPCODE_ADD), // 3
encode_instruction(RSX_FP_OPCODE_MOV), // 4 (Else)
encode_instruction(RSX_FP_OPCODE_ADD), // 5
encode_instruction(RSX_FP_OPCODE_MOV, true), // 6 (Merge)
};
const std::pair<int, size_t> expected_block_data[4] = {
{ 0, 3 }, // Head
{ 3, 1 }, // Branch positive
{ 4, 2 }, // Branch negative
{ 6, 1 }, // Merge
};
RSXFragmentProgram program{};
program.data = buffer.data();
FlowGraph graph = deconstruct_fragment_program(program);
ASSERT_EQ(graph.blocks.size(), 4);
int i = 0;
for (auto it = graph.blocks.begin(); it != graph.blocks.end(); ++it)
{
const auto& expected = expected_block_data[i++];
EXPECT_EQ(it->id, expected.first);
EXPECT_EQ(it->instructions.size(), expected.second);
}
}
}

View file

@ -303,4 +303,24 @@ namespace rsx
EXPECT_EQ(data_ptr1 & 15, 0); EXPECT_EQ(data_ptr1 & 15, 0);
EXPECT_EQ(data_ptr2 & 127, 0); EXPECT_EQ(data_ptr2 & 127, 0);
} }
TEST(SimpleArray, Find)
{
const rsx::simple_array<int> arr{
0, 1, 2, 3, 4, 5, 6, 7, 8, 9
};
EXPECT_EQ(*arr.find(8), 8);
EXPECT_EQ(arr.find(99), nullptr);
}
TEST(SimpleArray, FindIf)
{
const rsx::simple_array<int> arr{
0, 1, 2, 3, 4, 5, 6, 7, 8, 9
};
EXPECT_EQ(*arr.find_if(FN(x == 8)), 8);
EXPECT_EQ(arr.find_if(FN(x == 99)), nullptr);
}
} }

View file

@ -60,7 +60,7 @@ namespace utils
#if defined(ARCH_X64) #if defined(ARCH_X64)
return _m_prefetchw(const_cast<void*>(ptr)); return _m_prefetchw(const_cast<void*>(ptr));
#else #else
return __builtin_prefetch(ptr, 1, 0); return __builtin_prefetch(ptr, 1, 3);
#endif #endif
} }

View file

@ -1,10 +1,12 @@
#pragma once #pragma once
#include <type_traits>
namespace utils namespace utils
{ {
// Hack. Pointer cast util to workaround UB. Use with extreme care. // Hack. Pointer cast util to workaround UB. Use with extreme care.
template <typename T, typename U> template <typename T, typename U> requires (std::is_pointer_v<std::remove_reference_t<U>>)
[[nodiscard]] T* bless(U* ptr) [[nodiscard]] inline T* bless(const U& ptr)
{ {
#ifdef _MSC_VER #ifdef _MSC_VER
return (T*)ptr; return (T*)ptr;
@ -21,3 +23,4 @@ namespace utils
#endif #endif
} }
} }

View file

@ -999,17 +999,18 @@ template <typename To, typename From> requires (std::is_integral_v<decltype(std:
constexpr bool is_from_signed = std::is_signed_v<CommonFrom>; constexpr bool is_from_signed = std::is_signed_v<CommonFrom>;
constexpr bool is_to_signed = std::is_signed_v<CommonTo>; constexpr bool is_to_signed = std::is_signed_v<CommonTo>;
constexpr auto from_mask = (is_from_signed && !is_to_signed) ? UnFrom{umax} >> 1 : UnFrom{umax}; // For unsigned/signed mismatch, create an "unsigned" compatible mask
constexpr auto from_mask = (is_from_signed && !is_to_signed && sizeof(CommonFrom) <= sizeof(CommonTo)) ? UnFrom{umax} >> 1 : UnFrom{umax};
constexpr auto to_mask = (is_to_signed && !is_from_signed) ? UnTo{umax} >> 1 : UnTo{umax}; constexpr auto to_mask = (is_to_signed && !is_from_signed) ? UnTo{umax} >> 1 : UnTo{umax};
constexpr auto mask = ~(from_mask & to_mask); constexpr auto mask = static_cast<UnFrom>(~(from_mask & to_mask));
// Signed to unsigned always require test // If destination ("unsigned" compatible) mask is smaller than source ("unsigned" compatible) mask
// Otherwise, this is bit-wise narrowing or conversion between types of different signedness of the same size // It requires narrowing.
if constexpr ((is_from_signed && !is_to_signed) || to_mask < from_mask) if constexpr (!!mask)
{ {
// Try to optimize test if both are of the same signedness // Try to optimize test if both are of the same signedness
if (is_from_signed != is_to_signed ? !!(value & mask) : static_cast<CommonTo>(value) != value) [[unlikely]] if (is_from_signed != is_to_signed ? !!(value & mask) : static_cast<CommonFrom>(static_cast<CommonTo>(value)) != value) [[unlikely]]
{ {
fmt::raw_verify_error(src_loc, u8"Narrowing error", +value); fmt::raw_verify_error(src_loc, u8"Narrowing error", +value);
} }