mirror of
https://github.com/RPCS3/rpcs3.git
synced 2026-04-04 14:08:30 +00:00
cell/scheduler: Manage thread placement depending on cpu hardware
This commit is contained in:
parent
54fbde0de1
commit
cbc8bf01a1
13 changed files with 185 additions and 40 deletions
|
|
@ -6,7 +6,9 @@
|
|||
#include "Emu/Cell/lv2/sys_mmapper.h"
|
||||
#include "Emu/Cell/lv2/sys_event.h"
|
||||
#include "Thread.h"
|
||||
#include "sysinfo.h"
|
||||
#include <typeinfo>
|
||||
#include <thread>
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <Windows.h>
|
||||
|
|
@ -1547,6 +1549,8 @@ thread_local DECLARE(thread_ctrl::g_tls_this_thread) = nullptr;
|
|||
|
||||
extern thread_local std::string(*g_tls_log_prefix)();
|
||||
|
||||
DECLARE(thread_ctrl::g_native_core_layout) { native_core_arrangement::undefined };
|
||||
|
||||
void thread_ctrl::start(const std::shared_ptr<thread_ctrl>& ctrl, task_stack task)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
|
|
@ -1853,6 +1857,89 @@ void thread_ctrl::test()
|
|||
}
|
||||
}
|
||||
|
||||
void thread_ctrl::detect_cpu_layout()
|
||||
{
|
||||
if (!g_native_core_layout.compare_and_swap_test(native_core_arrangement::undefined, native_core_arrangement::generic))
|
||||
return;
|
||||
|
||||
const auto system_id = utils::get_system_info();
|
||||
if (system_id.find("Ryzen") != std::string::npos)
|
||||
{
|
||||
g_native_core_layout.store(native_core_arrangement::amd_ccx);
|
||||
}
|
||||
else if (system_id.find("i3") != std::string::npos || system_id.find("i7") != std::string::npos)
|
||||
{
|
||||
g_native_core_layout.store(native_core_arrangement::intel_ht);
|
||||
}
|
||||
}
|
||||
|
||||
u16 thread_ctrl::get_affinity_mask(thread_class group)
|
||||
{
|
||||
detect_cpu_layout();
|
||||
|
||||
if (const auto thread_count = std::thread::hardware_concurrency())
|
||||
{
|
||||
const u16 all_cores_mask = thread_count < 16 ? (u16)(~(UINT16_MAX << thread_count)): UINT16_MAX;
|
||||
|
||||
switch (g_native_core_layout)
|
||||
{
|
||||
default:
|
||||
case native_core_arrangement::generic:
|
||||
{
|
||||
return all_cores_mask;
|
||||
}
|
||||
case native_core_arrangement::amd_ccx:
|
||||
{
|
||||
u16 primary_ccx_unit_mask;
|
||||
if (thread_count >= 16)
|
||||
{
|
||||
// Threadripper, R7
|
||||
// Assign threads 8-16
|
||||
// It appears some windows code is bound to lower core addresses, binding 8-16 is alot faster than 0-7
|
||||
primary_ccx_unit_mask = 0b1111111100000000;
|
||||
}
|
||||
else
|
||||
{
|
||||
// R5 & R3 don't seem to improve performance no matter how these are shuffled (including 1600)
|
||||
primary_ccx_unit_mask = 0b11111111 & all_cores_mask;
|
||||
}
|
||||
|
||||
switch (group)
|
||||
{
|
||||
default:
|
||||
case thread_class::general:
|
||||
return all_cores_mask;
|
||||
case thread_class::rsx:
|
||||
case thread_class::ppu:
|
||||
case thread_class::spu:
|
||||
return primary_ccx_unit_mask;
|
||||
}
|
||||
}
|
||||
case native_core_arrangement::intel_ht:
|
||||
{
|
||||
if (thread_count <= 4)
|
||||
{
|
||||
//i3 or worse
|
||||
switch (group)
|
||||
{
|
||||
case thread_class::rsx:
|
||||
case thread_class::ppu:
|
||||
return (0b0101 & all_cores_mask);
|
||||
case thread_class::spu:
|
||||
return (0b1010 & all_cores_mask);
|
||||
case thread_class::general:
|
||||
return all_cores_mask;
|
||||
}
|
||||
}
|
||||
|
||||
return all_cores_mask;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return UINT16_MAX;
|
||||
}
|
||||
|
||||
void thread_ctrl::set_native_priority(int priority)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
|
|
@ -1886,24 +1973,31 @@ void thread_ctrl::set_native_priority(int priority)
|
|||
#endif
|
||||
}
|
||||
|
||||
void thread_ctrl::set_ideal_processor_core(int core)
|
||||
void thread_ctrl::set_thread_affinity_mask(u16 mask)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
HANDLE _this_thread = GetCurrentThread();
|
||||
SetThreadIdealProcessor(_this_thread, core);
|
||||
SetThreadAffinityMask(_this_thread, (DWORD_PTR)mask);
|
||||
#elif __APPLE__
|
||||
thread_affinity_policy_data_t policy = { static_cast<integer_t>(core) };
|
||||
thread_affinity_policy_data_t policy = { static_cast<integer_t>(mask) };
|
||||
thread_port_t mach_thread = pthread_mach_thread_np(pthread_self());
|
||||
thread_policy_set(mach_thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&policy, 1);
|
||||
#elif defined(__linux__) || defined(__DragonFly__) || defined(__FreeBSD__)
|
||||
cpu_set_t cs;
|
||||
CPU_ZERO(&cs);
|
||||
CPU_SET(core, &cs);
|
||||
|
||||
for (u32 core = 0; core < 16u; ++core)
|
||||
{
|
||||
if ((u32)mask & (1u << core))
|
||||
{
|
||||
CPU_SET(core, &cs);
|
||||
}
|
||||
}
|
||||
|
||||
pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cs);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
named_thread::named_thread()
|
||||
{
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,6 +16,23 @@
|
|||
// Will report exception and call std::abort() if put in catch(...)
|
||||
[[noreturn]] void catch_all_exceptions();
|
||||
|
||||
// Hardware core layout
|
||||
enum class native_core_arrangement : u32
|
||||
{
|
||||
undefined,
|
||||
generic,
|
||||
intel_ht,
|
||||
amd_ccx
|
||||
};
|
||||
|
||||
enum class thread_class : u32
|
||||
{
|
||||
general,
|
||||
rsx,
|
||||
spu,
|
||||
ppu
|
||||
};
|
||||
|
||||
// Simple list of void() functors
|
||||
class task_stack
|
||||
{
|
||||
|
|
@ -91,6 +108,9 @@ class thread_ctrl final
|
|||
// Current thread
|
||||
static thread_local thread_ctrl* g_tls_this_thread;
|
||||
|
||||
// Target cpu core layout
|
||||
static atomic_t<native_core_arrangement> g_native_core_layout;
|
||||
|
||||
// Self pointer
|
||||
std::shared_ptr<thread_ctrl> m_self;
|
||||
|
||||
|
|
@ -234,8 +254,17 @@ public:
|
|||
thread_ctrl::start(out, std::forward<F>(func));
|
||||
}
|
||||
|
||||
// Detect layout
|
||||
static void detect_cpu_layout();
|
||||
|
||||
// Returns a core affinity mask. Set whether to generate the high priority set or not
|
||||
static u16 get_affinity_mask(thread_class group);
|
||||
|
||||
// Sets the native thread priority
|
||||
static void set_native_priority(int priority);
|
||||
static void set_ideal_processor_core(int core);
|
||||
|
||||
// Sets the preferred affinity mask for this thread
|
||||
static void set_thread_affinity_mask(u16 mask);
|
||||
};
|
||||
|
||||
class named_thread
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue