2020-12-06 17:34:16 +01:00
|
|
|
#pragma once
|
2020-10-18 14:00:10 +02:00
|
|
|
|
|
|
|
|
#include "Utilities/types.h"
|
|
|
|
|
#include "util/logs.hpp"
|
|
|
|
|
#include "Utilities/sysinfo.h"
|
|
|
|
|
#include "system_config.h"
|
|
|
|
|
#include "IdManager.h"
|
|
|
|
|
#include <array>
|
|
|
|
|
#include <cmath>
|
|
|
|
|
|
|
|
|
|
LOG_CHANNEL(perf_log, "PERF");
|
|
|
|
|
|
|
|
|
|
// TODO: constexpr with the help of bitcast
|
|
|
|
|
template <auto Name>
|
|
|
|
|
inline const auto perf_name = []
|
|
|
|
|
{
|
|
|
|
|
constexpr auto short_name = Name;
|
|
|
|
|
std::array<char, sizeof(Name) + 1> result{};
|
|
|
|
|
std::memcpy(result.data(), &short_name, sizeof(Name));
|
|
|
|
|
return result;
|
|
|
|
|
}();
|
|
|
|
|
|
|
|
|
|
class perf_stat_base
|
|
|
|
|
{
|
|
|
|
|
atomic_t<u64> m_log[66]{};
|
|
|
|
|
|
|
|
|
|
protected:
|
|
|
|
|
// Print accumulated values
|
|
|
|
|
void print(const char* name) noexcept;
|
|
|
|
|
|
|
|
|
|
// Accumulate values from a thread
|
|
|
|
|
void push(u64 ns[66]) noexcept;
|
|
|
|
|
|
2020-11-19 09:12:59 +01:00
|
|
|
// Register TLS storage for stats
|
|
|
|
|
static void add(u64 ns[66], const char* name) noexcept;
|
|
|
|
|
|
|
|
|
|
// Unregister TLS storage and drain its data
|
|
|
|
|
static void remove(u64 ns[66], const char* name) noexcept;
|
|
|
|
|
|
2020-10-18 14:00:10 +02:00
|
|
|
public:
|
|
|
|
|
perf_stat_base() noexcept = default;
|
|
|
|
|
|
|
|
|
|
perf_stat_base(const perf_stat_base&) = delete;
|
|
|
|
|
|
|
|
|
|
perf_stat_base& operator =(const perf_stat_base&) = delete;
|
|
|
|
|
|
|
|
|
|
~perf_stat_base() {}
|
2020-11-19 09:12:59 +01:00
|
|
|
|
|
|
|
|
// Collect all data, report it, and clean
|
|
|
|
|
static void report() noexcept;
|
2020-10-18 14:00:10 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Object that prints event length stats at the end
|
|
|
|
|
template <auto ShortName>
|
|
|
|
|
class perf_stat final : public perf_stat_base
|
|
|
|
|
{
|
|
|
|
|
static inline thread_local struct perf_stat_local
|
|
|
|
|
{
|
|
|
|
|
// Local non-atomic values for increments
|
|
|
|
|
u64 m_log[66]{};
|
|
|
|
|
|
2020-11-19 09:12:59 +01:00
|
|
|
perf_stat_local() noexcept
|
|
|
|
|
{
|
|
|
|
|
perf_stat_base::add(m_log, perf_name<ShortName>.data());
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-18 14:00:10 +02:00
|
|
|
~perf_stat_local()
|
|
|
|
|
{
|
2020-11-19 09:12:59 +01:00
|
|
|
perf_stat_base::remove(m_log, perf_name<ShortName>.data());
|
2020-10-18 14:00:10 +02:00
|
|
|
}
|
2020-11-19 09:12:59 +01:00
|
|
|
|
2020-10-18 14:00:10 +02:00
|
|
|
} g_tls_perf_stat;
|
|
|
|
|
|
|
|
|
|
public:
|
2020-11-25 05:26:37 +01:00
|
|
|
static NEVER_INLINE void push(u64 start_time) noexcept
|
2020-10-18 14:00:10 +02:00
|
|
|
{
|
2020-11-25 05:26:37 +01:00
|
|
|
// Event end
|
|
|
|
|
const u64 end_time = (_mm_lfence(), __rdtsc());
|
|
|
|
|
|
|
|
|
|
// Compute difference in seconds
|
|
|
|
|
const f64 diff = (end_time - start_time) * 1. / utils::get_tsc_freq();
|
|
|
|
|
|
|
|
|
|
// Register perf stat in nanoseconds
|
|
|
|
|
const u64 ns = static_cast<u64>(diff * 1000'000'000.);
|
|
|
|
|
|
|
|
|
|
// Print in microseconds
|
|
|
|
|
if (static_cast<u64>(diff * 1000'000.) >= g_cfg.core.perf_report_threshold)
|
|
|
|
|
{
|
|
|
|
|
perf_log.notice(u8"%s: %.3fµs", perf_name<ShortName>.data(), diff * 1000'000.);
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-18 14:00:10 +02:00
|
|
|
auto& data = g_tls_perf_stat.m_log;
|
|
|
|
|
data[0] += ns != 0;
|
|
|
|
|
data[64 - std::countl_zero(ns)]++;
|
|
|
|
|
data[65] += ns;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Object that prints event length at the end
|
|
|
|
|
template <auto ShortName, auto... SubEvents>
|
|
|
|
|
class perf_meter
|
|
|
|
|
{
|
|
|
|
|
// Initialize array (possibly only 1 element) with timestamp
|
2020-10-30 01:19:13 +01:00
|
|
|
u64 m_timestamps[1 + sizeof...(SubEvents)];
|
2020-10-18 14:00:10 +02:00
|
|
|
|
|
|
|
|
public:
|
2020-10-30 01:19:13 +01:00
|
|
|
SAFE_BUFFERS FORCE_INLINE perf_meter() noexcept
|
2020-10-18 14:00:10 +02:00
|
|
|
{
|
2020-10-30 01:19:13 +01:00
|
|
|
restart();
|
2020-10-18 14:00:10 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Copy first timestamp
|
|
|
|
|
template <auto SN, auto... S>
|
2020-10-30 01:19:13 +01:00
|
|
|
SAFE_BUFFERS FORCE_INLINE perf_meter(const perf_meter<SN, S...>& r) noexcept
|
2020-10-18 14:00:10 +02:00
|
|
|
{
|
|
|
|
|
m_timestamps[0] = r.get();
|
|
|
|
|
std::memset(m_timestamps + 1, 0, sizeof(m_timestamps) - sizeof(u64));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <auto SN, auto... S>
|
|
|
|
|
SAFE_BUFFERS perf_meter(perf_meter<SN, S...>&& r) noexcept
|
|
|
|
|
{
|
|
|
|
|
m_timestamps[0] = r.get();
|
|
|
|
|
r.reset();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Copy first timestamp
|
|
|
|
|
template <auto SN, auto... S>
|
|
|
|
|
SAFE_BUFFERS perf_meter& operator =(const perf_meter<SN, S...>& r) noexcept
|
|
|
|
|
{
|
|
|
|
|
m_timestamps[0] = r.get();
|
|
|
|
|
return *this;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <auto SN, auto... S>
|
|
|
|
|
SAFE_BUFFERS perf_meter& operator =(perf_meter<SN, S...>& r) noexcept
|
|
|
|
|
{
|
|
|
|
|
m_timestamps[0] = r.get();
|
|
|
|
|
r.reset();
|
|
|
|
|
return *this;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Push subevent data in array
|
|
|
|
|
template <auto Event, std::size_t Index = 0>
|
|
|
|
|
SAFE_BUFFERS void push() noexcept
|
|
|
|
|
{
|
|
|
|
|
// TODO: should use more efficient search with type comparison, then value comparison, or pattern matching
|
|
|
|
|
if constexpr (std::array<bool, sizeof...(SubEvents)>{(SubEvents == Event)...}[Index])
|
|
|
|
|
{
|
|
|
|
|
// Push actual timestamp into an array
|
|
|
|
|
m_timestamps[Index + 1] = __rdtsc();
|
|
|
|
|
}
|
|
|
|
|
else if constexpr (Index < sizeof...(SubEvents))
|
|
|
|
|
{
|
|
|
|
|
// Proceed search recursively
|
|
|
|
|
push<Event, Index + 1>();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Obtain initial timestamp
|
|
|
|
|
u64 get() const noexcept
|
|
|
|
|
{
|
|
|
|
|
return m_timestamps[0];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Disable this counter
|
2020-10-30 01:19:13 +01:00
|
|
|
SAFE_BUFFERS FORCE_INLINE void reset() noexcept
|
2020-10-18 14:00:10 +02:00
|
|
|
{
|
|
|
|
|
m_timestamps[0] = 0;
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-30 01:19:13 +01:00
|
|
|
// Re-initialize first timestamp
|
|
|
|
|
SAFE_BUFFERS FORCE_INLINE void restart() noexcept
|
|
|
|
|
{
|
|
|
|
|
m_timestamps[0] = __rdtsc();
|
|
|
|
|
std::memset(m_timestamps + 1, 0, sizeof(m_timestamps) - sizeof(u64));
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-18 14:00:10 +02:00
|
|
|
SAFE_BUFFERS ~perf_meter()
|
|
|
|
|
{
|
|
|
|
|
// Disabled counter
|
|
|
|
|
if (!m_timestamps[0]) [[unlikely]]
|
|
|
|
|
{
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!g_cfg.core.perf_report) [[likely]]
|
|
|
|
|
{
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Register perf stat in nanoseconds
|
2020-11-25 05:26:37 +01:00
|
|
|
perf_stat<ShortName>::push(m_timestamps[0]);
|
2020-10-18 14:00:10 +02:00
|
|
|
|
|
|
|
|
// TODO: handle push(), currently ignored
|
|
|
|
|
}
|
|
|
|
|
};
|