mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-12-06 07:12:28 +01:00
PPU Profiler
Some checks failed
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (/rpcs3/.ci/build-linux-aarch64.sh, gcc, rpcs3/rpcs3-ci-jammy-aarch64:1.7, ubuntu-24.04-arm) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (/rpcs3/.ci/build-linux.sh, gcc, rpcs3/rpcs3-ci-jammy:1.7, ubuntu-24.04) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (a1d35836e8d45bfc6f63c26f0a3e5d46ef622fe1, rpcs3/rpcs3-binaries-linux-arm64, /rpcs3/.ci/build-linux-aarch64.sh, clang, rpcs3/rpcs3-ci-jammy-aarch64:1.7, ubuntu-24.04-arm) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (d812f1254a1157c80fd402f94446310560f54e5f, rpcs3/rpcs3-binaries-linux, /rpcs3/.ci/build-linux.sh, clang, rpcs3/rpcs3-ci-jammy:1.7, ubuntu-24.04) (push) Waiting to run
Build RPCS3 / RPCS3 Mac ${{ matrix.name }} (51ae32f468089a8169aaf1567de355ff4a3e0842, rpcs3/rpcs3-binaries-mac, .ci/build-mac.sh, Intel) (push) Waiting to run
Build RPCS3 / RPCS3 Mac ${{ matrix.name }} (8e21bdbc40711a3fccd18fbf17b742348b0f4281, rpcs3/rpcs3-binaries-mac-arm64, .ci/build-mac-arm64.sh, Apple Silicon) (push) Waiting to run
Build RPCS3 / RPCS3 Windows (push) Waiting to run
Build RPCS3 / RPCS3 Windows Clang (win64, clang, clang64) (push) Waiting to run
Build RPCS3 / RPCS3 FreeBSD (push) Waiting to run
Generate Translation Template / Generate Translation Template (push) Has been cancelled
Some checks failed
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (/rpcs3/.ci/build-linux-aarch64.sh, gcc, rpcs3/rpcs3-ci-jammy-aarch64:1.7, ubuntu-24.04-arm) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (/rpcs3/.ci/build-linux.sh, gcc, rpcs3/rpcs3-ci-jammy:1.7, ubuntu-24.04) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (a1d35836e8d45bfc6f63c26f0a3e5d46ef622fe1, rpcs3/rpcs3-binaries-linux-arm64, /rpcs3/.ci/build-linux-aarch64.sh, clang, rpcs3/rpcs3-ci-jammy-aarch64:1.7, ubuntu-24.04-arm) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (d812f1254a1157c80fd402f94446310560f54e5f, rpcs3/rpcs3-binaries-linux, /rpcs3/.ci/build-linux.sh, clang, rpcs3/rpcs3-ci-jammy:1.7, ubuntu-24.04) (push) Waiting to run
Build RPCS3 / RPCS3 Mac ${{ matrix.name }} (51ae32f468089a8169aaf1567de355ff4a3e0842, rpcs3/rpcs3-binaries-mac, .ci/build-mac.sh, Intel) (push) Waiting to run
Build RPCS3 / RPCS3 Mac ${{ matrix.name }} (8e21bdbc40711a3fccd18fbf17b742348b0f4281, rpcs3/rpcs3-binaries-mac-arm64, .ci/build-mac-arm64.sh, Apple Silicon) (push) Waiting to run
Build RPCS3 / RPCS3 Windows (push) Waiting to run
Build RPCS3 / RPCS3 Windows Clang (win64, clang, clang64) (push) Waiting to run
Build RPCS3 / RPCS3 FreeBSD (push) Waiting to run
Generate Translation Template / Generate Translation Template (push) Has been cancelled
This commit is contained in:
parent
c669a0beb7
commit
107d9dc533
|
|
@ -190,7 +190,7 @@ struct cpu_prof
|
||||||
reservation_samples = 0;
|
reservation_samples = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::string format(const std::multimap<u64, u64, std::greater<u64>>& chart, u64 samples, u64 idle, bool extended_print = false)
|
static std::string format(const std::multimap<u64, u64, std::greater<u64>>& chart, u64 samples, u64 idle, u32 type_id, bool extended_print = false)
|
||||||
{
|
{
|
||||||
// Print results
|
// Print results
|
||||||
std::string results;
|
std::string results;
|
||||||
|
|
@ -204,11 +204,18 @@ struct cpu_prof
|
||||||
const f64 _frac = count / busy / samples;
|
const f64 _frac = count / busy / samples;
|
||||||
|
|
||||||
// Print only 7 hash characters out of 11 (which covers roughly 48 bits)
|
// Print only 7 hash characters out of 11 (which covers roughly 48 bits)
|
||||||
fmt::append(results, "\n\t[%s", fmt::base57(be_t<u64>{name}));
|
if (type_id == 2)
|
||||||
results.resize(results.size() - 4);
|
{
|
||||||
|
fmt::append(results, "\n\t[%s", fmt::base57(be_t<u64>{name}));
|
||||||
|
results.resize(results.size() - 4);
|
||||||
|
|
||||||
// Print chunk address from lowest 16 bits
|
// Print chunk address from lowest 16 bits
|
||||||
fmt::append(results, "...chunk-0x%05x]: %.4f%% (%u)", (name & 0xffff) * 4, _frac * 100., count);
|
fmt::append(results, "...chunk-0x%05x]: %.4f%% (%u)", (name & 0xffff) * 4, _frac * 100., count);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
fmt::append(results, "\n\t[0x%07x]: %.4f%% (%u)", name, _frac * 100., count);
|
||||||
|
}
|
||||||
|
|
||||||
if (results.size() >= (extended_print ? 10000 : 5000))
|
if (results.size() >= (extended_print ? 10000 : 5000))
|
||||||
{
|
{
|
||||||
|
|
@ -257,27 +264,37 @@ struct cpu_prof
|
||||||
}
|
}
|
||||||
|
|
||||||
// Print results
|
// Print results
|
||||||
const std::string results = format(chart, samples, idle);
|
const std::string results = format(chart, samples, idle, ptr->id_type());
|
||||||
profiler.notice("Thread \"%s\" [0x%08x]: %u samples (%.4f%% idle), %u new, %u reservation (%.4f%%):\n%s", ptr->get_name(), ptr->id, samples, get_percent(idle, samples), new_samples, reservation_samples, get_percent(reservation_samples, samples - idle), results);
|
profiler.notice("Thread \"%s\" [0x%08x]: %u samples (%.4f%% idle), %u new, %u reservation (%.4f%%):\n%s", ptr->get_name(), ptr->id, samples, get_percent(idle, samples), new_samples, reservation_samples, get_percent(reservation_samples, samples - idle), results);
|
||||||
|
|
||||||
new_samples = 0;
|
new_samples = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void print_all(std::unordered_map<shared_ptr<cpu_thread>, sample_info>& threads, sample_info& all_info)
|
static void print_all(std::unordered_map<shared_ptr<cpu_thread>, sample_info>& threads, sample_info& all_info, u32 type_id)
|
||||||
{
|
{
|
||||||
u64 new_samples = 0;
|
u64 new_samples = 0;
|
||||||
|
|
||||||
// Print all results and cleanup
|
// Print all results and cleanup
|
||||||
for (auto& [ptr, info] : threads)
|
for (auto& [ptr, info] : threads)
|
||||||
{
|
{
|
||||||
|
if (ptr->id_type() != type_id)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
new_samples += info.new_samples;
|
new_samples += info.new_samples;
|
||||||
info.print(ptr);
|
info.print(ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::multimap<u64, u64, std::greater<u64>> chart;
|
std::multimap<u64, u64, std::greater<u64>> chart;
|
||||||
|
|
||||||
for (auto& [_, info] : threads)
|
for (auto& [ptr, info] : threads)
|
||||||
{
|
{
|
||||||
|
if (ptr->id_type() != type_id)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// This function collects thread information regardless of 'new_samples' member state
|
// This function collects thread information regardless of 'new_samples' member state
|
||||||
for (auto& [name, count] : info.freq)
|
for (auto& [name, count] : info.freq)
|
||||||
{
|
{
|
||||||
|
|
@ -301,7 +318,7 @@ struct cpu_prof
|
||||||
|
|
||||||
if (new_samples < min_print_all_samples && thread_ctrl::state() != thread_state::aborting)
|
if (new_samples < min_print_all_samples && thread_ctrl::state() != thread_state::aborting)
|
||||||
{
|
{
|
||||||
profiler.notice("All Threads: %u samples (%.4f%% idle), %u new, %u reservation (%.4f%%): Not enough new samples have been collected since the last print.", samples, get_percent(idle, samples), new_samples, reservation, get_percent(reservation, samples - idle));
|
profiler.notice("All %s Threads: %u samples (%.4f%% idle), %u new, %u reservation (%.4f%%): Not enough new samples have been collected since the last print.", type_id == 1 ? "PPU" : "SPU", samples, get_percent(idle, samples), new_samples, reservation, get_percent(reservation, samples - idle));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -310,12 +327,13 @@ struct cpu_prof
|
||||||
chart.emplace(count, name);
|
chart.emplace(count, name);
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::string results = format(chart, samples, idle, true);
|
const std::string results = format(chart, samples, idle, type_id, true);
|
||||||
profiler.notice("All Threads: %u samples (%.4f%% idle), %u new, %u reservation (%.4f%%):%s", samples, get_percent(idle, samples), new_samples, reservation, get_percent(reservation, samples - idle), results);
|
profiler.notice("All %s Threads: %u samples (%.4f%% idle), %u new, %u reservation (%.4f%%):%s", type_id == 1 ? "PPU" : "SPU", samples, get_percent(idle, samples), new_samples, reservation, get_percent(reservation, samples - idle), results);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
sample_info all_threads_info{};
|
sample_info all_spu_threads_info{};
|
||||||
|
sample_info all_ppu_threads_info{};
|
||||||
|
|
||||||
void operator()()
|
void operator()()
|
||||||
{
|
{
|
||||||
|
|
@ -376,8 +394,11 @@ struct cpu_prof
|
||||||
{
|
{
|
||||||
if (auto state = +ptr->state; cpu_flag::exit - state)
|
if (auto state = +ptr->state; cpu_flag::exit - state)
|
||||||
{
|
{
|
||||||
|
const auto spu = ptr->try_get<spu_thread>();
|
||||||
|
const auto ppu = ptr->try_get<ppu_thread>();
|
||||||
|
|
||||||
// Get short function hash
|
// Get short function hash
|
||||||
const u64 name = atomic_storage<u64>::load(ptr->block_hash);
|
const u64 name = ppu ? atomic_storage<u32>::load(ppu->cia) : atomic_storage<u64>::load(ptr->block_hash);
|
||||||
|
|
||||||
// Append occurrence
|
// Append occurrence
|
||||||
info.samples++;
|
info.samples++;
|
||||||
|
|
@ -387,17 +408,17 @@ struct cpu_prof
|
||||||
info.freq[name]++;
|
info.freq[name]++;
|
||||||
info.new_samples++;
|
info.new_samples++;
|
||||||
|
|
||||||
if (auto spu = ptr->try_get<spu_thread>())
|
if (spu)
|
||||||
{
|
{
|
||||||
if (spu->raddr)
|
if (spu->raddr)
|
||||||
{
|
{
|
||||||
info.reservation_samples++;
|
info.reservation_samples++;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// Append verification time to fixed common name 0000000...chunk-0x3fffc
|
// Append verification time to fixed common name 0000000...chunk-0x3fffc
|
||||||
if (name >> 16 && (name & 0xffff) == 0)
|
if (name >> 16 && (name & 0xffff) == 0)
|
||||||
info.freq[0xffff]++;
|
info.freq[0xffff]++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
@ -420,8 +441,10 @@ struct cpu_prof
|
||||||
{
|
{
|
||||||
profiler.success("Flushing profiling results...");
|
profiler.success("Flushing profiling results...");
|
||||||
|
|
||||||
all_threads_info = {};
|
all_ppu_threads_info = {};
|
||||||
sample_info::print_all(threads, all_threads_info);
|
all_spu_threads_info = {};
|
||||||
|
sample_info::print_all(threads, all_ppu_threads_info, 1);
|
||||||
|
sample_info::print_all(threads, all_spu_threads_info, 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Emu.IsPaused())
|
if (Emu.IsPaused())
|
||||||
|
|
@ -442,7 +465,8 @@ struct cpu_prof
|
||||||
}
|
}
|
||||||
|
|
||||||
// Print all remaining results
|
// Print all remaining results
|
||||||
sample_info::print_all(threads, all_threads_info);
|
sample_info::print_all(threads, all_ppu_threads_info, 1);
|
||||||
|
sample_info::print_all(threads, all_spu_threads_info, 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
static constexpr auto thread_name = "CPU Profiler"sv;
|
static constexpr auto thread_name = "CPU Profiler"sv;
|
||||||
|
|
@ -459,7 +483,7 @@ extern f64 get_cpu_program_usage_percent(u64 hash)
|
||||||
{
|
{
|
||||||
u64 total = 0;
|
u64 total = 0;
|
||||||
|
|
||||||
for (auto [name, count] : prof->all_threads_info.freq)
|
for (auto [name, count] : prof->all_spu_threads_info.freq)
|
||||||
{
|
{
|
||||||
if ((name & -65536) == hash)
|
if ((name & -65536) == hash)
|
||||||
{
|
{
|
||||||
|
|
@ -472,7 +496,7 @@ extern f64 get_cpu_program_usage_percent(u64 hash)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
return std::max<f64>(0.0001, static_cast<f64>(total) * 100 / (prof->all_threads_info.samples - prof->all_threads_info.idle));
|
return std::max<f64>(0.0001, static_cast<f64>(total) * 100 / (prof->all_spu_threads_info.samples - prof->all_spu_threads_info.idle));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -639,22 +663,17 @@ void cpu_thread::operator()()
|
||||||
thread_ctrl::set_thread_affinity_mask(thread_ctrl::get_affinity_mask(get_class()));
|
thread_ctrl::set_thread_affinity_mask(thread_ctrl::get_affinity_mask(get_class()));
|
||||||
}
|
}
|
||||||
|
|
||||||
while (!g_fxo->is_init<cpu_profiler>())
|
ensure(g_fxo->is_init<cpu_profiler>());
|
||||||
{
|
|
||||||
if (Emu.IsStopped())
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Can we have a little race, right? First thread is started concurrently with g_fxo->init()
|
|
||||||
thread_ctrl::wait_for(1000);
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (get_class())
|
switch (get_class())
|
||||||
{
|
{
|
||||||
case thread_class::ppu:
|
case thread_class::ppu:
|
||||||
{
|
{
|
||||||
//g_fxo->get<cpu_profiler>().registered.push(id);
|
if (g_cfg.core.ppu_prof)
|
||||||
|
{
|
||||||
|
g_fxo->get<cpu_profiler>().registered.push(id);
|
||||||
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case thread_class::spu:
|
case thread_class::spu:
|
||||||
|
|
@ -1546,7 +1565,7 @@ void cpu_thread::flush_profilers() noexcept
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (g_cfg.core.spu_prof || g_cfg.core.spu_debug)
|
if (g_cfg.core.spu_prof || g_cfg.core.spu_debug || g_cfg.core.ppu_prof)
|
||||||
{
|
{
|
||||||
g_fxo->get<cpu_profiler>().registered.push(0);
|
g_fxo->get<cpu_profiler>().registered.push(0);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -593,6 +593,11 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect)
|
||||||
{
|
{
|
||||||
callee = m_module->getOrInsertFunction(fmt::format("__0x%x", target_last - base), type);
|
callee = m_module->getOrInsertFunction(fmt::format("__0x%x", target_last - base), type);
|
||||||
cast<Function>(callee.getCallee())->setCallingConv(CallingConv::GHC);
|
cast<Function>(callee.getCallee())->setCallingConv(CallingConv::GHC);
|
||||||
|
|
||||||
|
if (g_cfg.core.ppu_prof)
|
||||||
|
{
|
||||||
|
m_ir->CreateStore(m_ir->getInt32(target_last), m_ir->CreateStructGEP(m_thread_type, m_thread, static_cast<uint>(&m_cia - m_locals)));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
|
|
||||||
|
|
@ -57,6 +57,7 @@ struct cfg_root : cfg::node
|
||||||
cfg::_bool spu_verification{ this, "SPU Verification", true }; // Should be enabled
|
cfg::_bool spu_verification{ this, "SPU Verification", true }; // Should be enabled
|
||||||
cfg::_bool spu_cache{ this, "SPU Cache", true };
|
cfg::_bool spu_cache{ this, "SPU Cache", true };
|
||||||
cfg::_bool spu_prof{ this, "SPU Profiler", false };
|
cfg::_bool spu_prof{ this, "SPU Profiler", false };
|
||||||
|
cfg::_bool ppu_prof{ this, "PPU Profiler", false };
|
||||||
cfg::uint<0, 16> mfc_transfers_shuffling{ this, "MFC Commands Shuffling Limit", 0 };
|
cfg::uint<0, 16> mfc_transfers_shuffling{ this, "MFC Commands Shuffling Limit", 0 };
|
||||||
cfg::uint<0, 10000> mfc_transfers_timeout{ this, "MFC Commands Timeout", 0, true };
|
cfg::uint<0, 10000> mfc_transfers_timeout{ this, "MFC Commands Timeout", 0, true };
|
||||||
cfg::_bool mfc_shuffling_in_steps{ this, "MFC Commands Shuffling In Steps", false, true };
|
cfg::_bool mfc_shuffling_in_steps{ this, "MFC Commands Shuffling In Steps", false, true };
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue