rpcsx/rpcs3/Emu/perf_meter.cpp
Nekotekina 580bd2b25e Initial Linux Aarch64 support
* Update asmjit dependency (aarch64 branch)
* Disable USE_DISCORD_RPC by default
* Dump some JIT objects in rpcs3 cache dir
* Add SIGILL handler for all platforms
* Fix resetting zeroing denormals in thread pool
* Refactor most v128:: utils into global gv_** functions
* Refactor PPU interpreter (incomplete), remove "precise"
* - Instruction specializations with multiple accuracy flags
* - Adjust calling convention for speed
* - Removed precise/fast setting, replaced with static
* - Started refactoring interpreters for building at runtime JIT
*   (I got tired of poor compiler optimizations)
* - Expose some accuracy settings (SAT, NJ, VNAN, FPCC)
* - Add exec_bytes PPU thread variable (akin to cycle count)
* PPU LLVM: fix VCTUXS+VCTSXS instruction NaN results
* SPU interpreter: remove "precise" for now (extremely non-portable)
* - As with PPU, settings changed to static/dynamic for interpreters.
* - Precise options will be implemented later
* Fix termination after fatal error dialog
2022-01-15 06:48:04 +03:00

157 lines
3.4 KiB
C++

#include "stdafx.h"
#include "perf_meter.hpp"
#include "util/sysinfo.hpp"
#include "util/fence.hpp"
#include "util/tsc.hpp"
#include "Utilities/Thread.h"
#include <map>
#include <mutex>
void perf_stat_base::push(u64 ns[66]) noexcept
{
if (!ns[0])
{
return;
}
for (u32 i = 0; i < 66; i++)
{
m_log[i] += atomic_storage<u64>::exchange(ns[i], 0);
}
}
void perf_stat_base::print(const char* name) const noexcept
{
if (u64 num_total = m_log[0].load())
{
perf_log.notice(u8"Perf stats for %s: total events: %u (total time %.4fs, avg %.4fµs)", name, num_total, m_log[65].load() / 1000'000'000., m_log[65].load() / 1000. / num_total);
for (u32 i = 0; i < 13; i++)
{
if (u64 count = m_log[i + 1].load())
{
perf_log.notice(u8"Perf stats for %s: events < %.3fµs: %u", name, std::pow(2., i) / 1000., count);
}
}
for (u32 i = 14; i < 23; i++)
{
if (u64 count = m_log[i + 1].load()) [[unlikely]]
{
perf_log.notice("Perf stats for %s: events < %.3fms: %u", name, std::pow(2., i) / 1000'000., count);
}
}
for (u32 i = 24; i < 33; i++)
{
if (u64 count = m_log[i + 1].load()) [[unlikely]]
{
perf_log.notice("Perf stats for %s: events < %.3fs: %u", name, std::pow(2., i) / 1000'000'000., count);
}
}
for (u32 i = 34; i < 43; i++)
{
if (u64 count = m_log[i + 1].load()) [[unlikely]]
{
perf_log.notice("Perf stats for %s: events < %.0f SEC: %u", name, std::pow(2., i) / 1000'000'000., count);
}
}
for (u32 i = 44; i < 63; i++)
{
if (u64 count = m_log[i + 1].load()) [[unlikely]]
{
perf_log.notice("Perf stats for %s: events < %.0f MIN: %u", name, std::pow(2., i) / 60'000'000'000., count);
}
}
}
}
SAFE_BUFFERS(void) perf_stat_base::push(u64 data[66], u64 start_time, const char* name) noexcept
{
// Event end
const u64 end_time = (utils::lfence(), utils::get_tsc());
// Compute difference in seconds
const f64 diff = (end_time - start_time) * 1. / utils::get_tsc_freq();
// Register perf stat in nanoseconds
const u64 ns = static_cast<u64>(diff * 1000'000'000.);
// Print in microseconds
if (static_cast<u64>(diff * 1000'000.) >= g_cfg.core.perf_report_threshold)
{
perf_log.notice(u8"%s: %.3fµs", name, diff * 1000'000.);
}
data[0] += ns != 0;
data[64 - std::countl_zero(ns)]++;
data[65] += ns;
}
static shared_mutex s_perf_mutex;
static std::map<std::string, perf_stat_base> s_perf_acc;
static std::multimap<std::string, u64*> s_perf_sources;
void perf_stat_base::add(u64 ns[66], const char* name) noexcept
{
// Don't attempt to register some foreign/unnamed threads
if (!thread_ctrl::get_current())
{
return;
}
std::lock_guard lock(s_perf_mutex);
s_perf_sources.emplace(name, ns);
s_perf_acc[name];
}
void perf_stat_base::remove(u64 ns[66], const char* name) noexcept
{
if (!thread_ctrl::get_current())
{
return;
}
std::lock_guard lock(s_perf_mutex);
const auto found = s_perf_sources.equal_range(name);
for (auto it = found.first; it != found.second; it++)
{
if (it->second == ns)
{
s_perf_acc[name].push(ns);
s_perf_sources.erase(it);
break;
}
}
}
void perf_stat_base::report() noexcept
{
std::lock_guard lock(s_perf_mutex);
perf_log.notice("Performance report begin (%u src, %u acc):", s_perf_sources.size(), s_perf_acc.size());
for (auto& [name, ns] : s_perf_sources)
{
s_perf_acc[name].push(ns);
}
for (auto& [name, data] : s_perf_acc)
{
data.print(name.c_str());
}
s_perf_acc.clear();
perf_log.notice("Performance report end.");
}