mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-12-06 07:12:28 +01:00
PPU Profiler
Some checks failed
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (/rpcs3/.ci/build-linux-aarch64.sh, gcc, rpcs3/rpcs3-ci-jammy-aarch64:1.7, ubuntu-24.04-arm) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (/rpcs3/.ci/build-linux.sh, gcc, rpcs3/rpcs3-ci-jammy:1.7, ubuntu-24.04) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (a1d35836e8d45bfc6f63c26f0a3e5d46ef622fe1, rpcs3/rpcs3-binaries-linux-arm64, /rpcs3/.ci/build-linux-aarch64.sh, clang, rpcs3/rpcs3-ci-jammy-aarch64:1.7, ubuntu-24.04-arm) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (d812f1254a1157c80fd402f94446310560f54e5f, rpcs3/rpcs3-binaries-linux, /rpcs3/.ci/build-linux.sh, clang, rpcs3/rpcs3-ci-jammy:1.7, ubuntu-24.04) (push) Waiting to run
Build RPCS3 / RPCS3 Mac ${{ matrix.name }} (51ae32f468089a8169aaf1567de355ff4a3e0842, rpcs3/rpcs3-binaries-mac, .ci/build-mac.sh, Intel) (push) Waiting to run
Build RPCS3 / RPCS3 Mac ${{ matrix.name }} (8e21bdbc40711a3fccd18fbf17b742348b0f4281, rpcs3/rpcs3-binaries-mac-arm64, .ci/build-mac-arm64.sh, Apple Silicon) (push) Waiting to run
Build RPCS3 / RPCS3 Windows (push) Waiting to run
Build RPCS3 / RPCS3 Windows Clang (win64, clang, clang64) (push) Waiting to run
Build RPCS3 / RPCS3 FreeBSD (push) Waiting to run
Generate Translation Template / Generate Translation Template (push) Has been cancelled
Some checks failed
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (/rpcs3/.ci/build-linux-aarch64.sh, gcc, rpcs3/rpcs3-ci-jammy-aarch64:1.7, ubuntu-24.04-arm) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (/rpcs3/.ci/build-linux.sh, gcc, rpcs3/rpcs3-ci-jammy:1.7, ubuntu-24.04) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (a1d35836e8d45bfc6f63c26f0a3e5d46ef622fe1, rpcs3/rpcs3-binaries-linux-arm64, /rpcs3/.ci/build-linux-aarch64.sh, clang, rpcs3/rpcs3-ci-jammy-aarch64:1.7, ubuntu-24.04-arm) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (d812f1254a1157c80fd402f94446310560f54e5f, rpcs3/rpcs3-binaries-linux, /rpcs3/.ci/build-linux.sh, clang, rpcs3/rpcs3-ci-jammy:1.7, ubuntu-24.04) (push) Waiting to run
Build RPCS3 / RPCS3 Mac ${{ matrix.name }} (51ae32f468089a8169aaf1567de355ff4a3e0842, rpcs3/rpcs3-binaries-mac, .ci/build-mac.sh, Intel) (push) Waiting to run
Build RPCS3 / RPCS3 Mac ${{ matrix.name }} (8e21bdbc40711a3fccd18fbf17b742348b0f4281, rpcs3/rpcs3-binaries-mac-arm64, .ci/build-mac-arm64.sh, Apple Silicon) (push) Waiting to run
Build RPCS3 / RPCS3 Windows (push) Waiting to run
Build RPCS3 / RPCS3 Windows Clang (win64, clang, clang64) (push) Waiting to run
Build RPCS3 / RPCS3 FreeBSD (push) Waiting to run
Generate Translation Template / Generate Translation Template (push) Has been cancelled
This commit is contained in:
parent
c669a0beb7
commit
107d9dc533
|
|
@ -190,7 +190,7 @@ struct cpu_prof
|
|||
reservation_samples = 0;
|
||||
}
|
||||
|
||||
static std::string format(const std::multimap<u64, u64, std::greater<u64>>& chart, u64 samples, u64 idle, bool extended_print = false)
|
||||
static std::string format(const std::multimap<u64, u64, std::greater<u64>>& chart, u64 samples, u64 idle, u32 type_id, bool extended_print = false)
|
||||
{
|
||||
// Print results
|
||||
std::string results;
|
||||
|
|
@ -204,11 +204,18 @@ struct cpu_prof
|
|||
const f64 _frac = count / busy / samples;
|
||||
|
||||
// Print only 7 hash characters out of 11 (which covers roughly 48 bits)
|
||||
if (type_id == 2)
|
||||
{
|
||||
fmt::append(results, "\n\t[%s", fmt::base57(be_t<u64>{name}));
|
||||
results.resize(results.size() - 4);
|
||||
|
||||
// Print chunk address from lowest 16 bits
|
||||
fmt::append(results, "...chunk-0x%05x]: %.4f%% (%u)", (name & 0xffff) * 4, _frac * 100., count);
|
||||
}
|
||||
else
|
||||
{
|
||||
fmt::append(results, "\n\t[0x%07x]: %.4f%% (%u)", name, _frac * 100., count);
|
||||
}
|
||||
|
||||
if (results.size() >= (extended_print ? 10000 : 5000))
|
||||
{
|
||||
|
|
@ -257,27 +264,37 @@ struct cpu_prof
|
|||
}
|
||||
|
||||
// Print results
|
||||
const std::string results = format(chart, samples, idle);
|
||||
const std::string results = format(chart, samples, idle, ptr->id_type());
|
||||
profiler.notice("Thread \"%s\" [0x%08x]: %u samples (%.4f%% idle), %u new, %u reservation (%.4f%%):\n%s", ptr->get_name(), ptr->id, samples, get_percent(idle, samples), new_samples, reservation_samples, get_percent(reservation_samples, samples - idle), results);
|
||||
|
||||
new_samples = 0;
|
||||
}
|
||||
|
||||
static void print_all(std::unordered_map<shared_ptr<cpu_thread>, sample_info>& threads, sample_info& all_info)
|
||||
static void print_all(std::unordered_map<shared_ptr<cpu_thread>, sample_info>& threads, sample_info& all_info, u32 type_id)
|
||||
{
|
||||
u64 new_samples = 0;
|
||||
|
||||
// Print all results and cleanup
|
||||
for (auto& [ptr, info] : threads)
|
||||
{
|
||||
if (ptr->id_type() != type_id)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
new_samples += info.new_samples;
|
||||
info.print(ptr);
|
||||
}
|
||||
|
||||
std::multimap<u64, u64, std::greater<u64>> chart;
|
||||
|
||||
for (auto& [_, info] : threads)
|
||||
for (auto& [ptr, info] : threads)
|
||||
{
|
||||
if (ptr->id_type() != type_id)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// This function collects thread information regardless of 'new_samples' member state
|
||||
for (auto& [name, count] : info.freq)
|
||||
{
|
||||
|
|
@ -301,7 +318,7 @@ struct cpu_prof
|
|||
|
||||
if (new_samples < min_print_all_samples && thread_ctrl::state() != thread_state::aborting)
|
||||
{
|
||||
profiler.notice("All Threads: %u samples (%.4f%% idle), %u new, %u reservation (%.4f%%): Not enough new samples have been collected since the last print.", samples, get_percent(idle, samples), new_samples, reservation, get_percent(reservation, samples - idle));
|
||||
profiler.notice("All %s Threads: %u samples (%.4f%% idle), %u new, %u reservation (%.4f%%): Not enough new samples have been collected since the last print.", type_id == 1 ? "PPU" : "SPU", samples, get_percent(idle, samples), new_samples, reservation, get_percent(reservation, samples - idle));
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -310,12 +327,13 @@ struct cpu_prof
|
|||
chart.emplace(count, name);
|
||||
}
|
||||
|
||||
const std::string results = format(chart, samples, idle, true);
|
||||
profiler.notice("All Threads: %u samples (%.4f%% idle), %u new, %u reservation (%.4f%%):%s", samples, get_percent(idle, samples), new_samples, reservation, get_percent(reservation, samples - idle), results);
|
||||
const std::string results = format(chart, samples, idle, type_id, true);
|
||||
profiler.notice("All %s Threads: %u samples (%.4f%% idle), %u new, %u reservation (%.4f%%):%s", type_id == 1 ? "PPU" : "SPU", samples, get_percent(idle, samples), new_samples, reservation, get_percent(reservation, samples - idle), results);
|
||||
}
|
||||
};
|
||||
|
||||
sample_info all_threads_info{};
|
||||
sample_info all_spu_threads_info{};
|
||||
sample_info all_ppu_threads_info{};
|
||||
|
||||
void operator()()
|
||||
{
|
||||
|
|
@ -376,8 +394,11 @@ struct cpu_prof
|
|||
{
|
||||
if (auto state = +ptr->state; cpu_flag::exit - state)
|
||||
{
|
||||
const auto spu = ptr->try_get<spu_thread>();
|
||||
const auto ppu = ptr->try_get<ppu_thread>();
|
||||
|
||||
// Get short function hash
|
||||
const u64 name = atomic_storage<u64>::load(ptr->block_hash);
|
||||
const u64 name = ppu ? atomic_storage<u32>::load(ppu->cia) : atomic_storage<u64>::load(ptr->block_hash);
|
||||
|
||||
// Append occurrence
|
||||
info.samples++;
|
||||
|
|
@ -387,18 +408,18 @@ struct cpu_prof
|
|||
info.freq[name]++;
|
||||
info.new_samples++;
|
||||
|
||||
if (auto spu = ptr->try_get<spu_thread>())
|
||||
if (spu)
|
||||
{
|
||||
if (spu->raddr)
|
||||
{
|
||||
info.reservation_samples++;
|
||||
}
|
||||
}
|
||||
|
||||
// Append verification time to fixed common name 0000000...chunk-0x3fffc
|
||||
if (name >> 16 && (name & 0xffff) == 0)
|
||||
info.freq[0xffff]++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (state & (cpu_flag::dbg_pause + cpu_flag::dbg_global_pause))
|
||||
|
|
@ -420,8 +441,10 @@ struct cpu_prof
|
|||
{
|
||||
profiler.success("Flushing profiling results...");
|
||||
|
||||
all_threads_info = {};
|
||||
sample_info::print_all(threads, all_threads_info);
|
||||
all_ppu_threads_info = {};
|
||||
all_spu_threads_info = {};
|
||||
sample_info::print_all(threads, all_ppu_threads_info, 1);
|
||||
sample_info::print_all(threads, all_spu_threads_info, 2);
|
||||
}
|
||||
|
||||
if (Emu.IsPaused())
|
||||
|
|
@ -442,7 +465,8 @@ struct cpu_prof
|
|||
}
|
||||
|
||||
// Print all remaining results
|
||||
sample_info::print_all(threads, all_threads_info);
|
||||
sample_info::print_all(threads, all_ppu_threads_info, 1);
|
||||
sample_info::print_all(threads, all_spu_threads_info, 2);
|
||||
}
|
||||
|
||||
static constexpr auto thread_name = "CPU Profiler"sv;
|
||||
|
|
@ -459,7 +483,7 @@ extern f64 get_cpu_program_usage_percent(u64 hash)
|
|||
{
|
||||
u64 total = 0;
|
||||
|
||||
for (auto [name, count] : prof->all_threads_info.freq)
|
||||
for (auto [name, count] : prof->all_spu_threads_info.freq)
|
||||
{
|
||||
if ((name & -65536) == hash)
|
||||
{
|
||||
|
|
@ -472,7 +496,7 @@ extern f64 get_cpu_program_usage_percent(u64 hash)
|
|||
return 0;
|
||||
}
|
||||
|
||||
return std::max<f64>(0.0001, static_cast<f64>(total) * 100 / (prof->all_threads_info.samples - prof->all_threads_info.idle));
|
||||
return std::max<f64>(0.0001, static_cast<f64>(total) * 100 / (prof->all_spu_threads_info.samples - prof->all_spu_threads_info.idle));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -639,22 +663,17 @@ void cpu_thread::operator()()
|
|||
thread_ctrl::set_thread_affinity_mask(thread_ctrl::get_affinity_mask(get_class()));
|
||||
}
|
||||
|
||||
while (!g_fxo->is_init<cpu_profiler>())
|
||||
{
|
||||
if (Emu.IsStopped())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// Can we have a little race, right? First thread is started concurrently with g_fxo->init()
|
||||
thread_ctrl::wait_for(1000);
|
||||
}
|
||||
ensure(g_fxo->is_init<cpu_profiler>());
|
||||
|
||||
switch (get_class())
|
||||
{
|
||||
case thread_class::ppu:
|
||||
{
|
||||
//g_fxo->get<cpu_profiler>().registered.push(id);
|
||||
if (g_cfg.core.ppu_prof)
|
||||
{
|
||||
g_fxo->get<cpu_profiler>().registered.push(id);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case thread_class::spu:
|
||||
|
|
@ -1546,7 +1565,7 @@ void cpu_thread::flush_profilers() noexcept
|
|||
return;
|
||||
}
|
||||
|
||||
if (g_cfg.core.spu_prof || g_cfg.core.spu_debug)
|
||||
if (g_cfg.core.spu_prof || g_cfg.core.spu_debug || g_cfg.core.ppu_prof)
|
||||
{
|
||||
g_fxo->get<cpu_profiler>().registered.push(0);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -593,6 +593,11 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect)
|
|||
{
|
||||
callee = m_module->getOrInsertFunction(fmt::format("__0x%x", target_last - base), type);
|
||||
cast<Function>(callee.getCallee())->setCallingConv(CallingConv::GHC);
|
||||
|
||||
if (g_cfg.core.ppu_prof)
|
||||
{
|
||||
m_ir->CreateStore(m_ir->getInt32(target_last), m_ir->CreateStructGEP(m_thread_type, m_thread, static_cast<uint>(&m_cia - m_locals)));
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
|
|
|
|||
|
|
@ -57,6 +57,7 @@ struct cfg_root : cfg::node
|
|||
cfg::_bool spu_verification{ this, "SPU Verification", true }; // Should be enabled
|
||||
cfg::_bool spu_cache{ this, "SPU Cache", true };
|
||||
cfg::_bool spu_prof{ this, "SPU Profiler", false };
|
||||
cfg::_bool ppu_prof{ this, "PPU Profiler", false };
|
||||
cfg::uint<0, 16> mfc_transfers_shuffling{ this, "MFC Commands Shuffling Limit", 0 };
|
||||
cfg::uint<0, 10000> mfc_transfers_timeout{ this, "MFC Commands Timeout", 0, true };
|
||||
cfg::_bool mfc_shuffling_in_steps{ this, "MFC Commands Shuffling In Steps", false, true };
|
||||
|
|
|
|||
Loading…
Reference in a new issue