mirror of
https://github.com/RPCS3/rpcs3.git
synced 2026-04-04 22:18:53 +00:00
SPU: multithread compilation
Allow parallel compilation of SPU code, both at startup and runtime Remove 'SPU Shared Runtime' option (it became obsolete) Refactor spu_runtime class (now is common for ASMJIT and LLVM) Implement SPU ubertrampoline generation in raw assembly (LLVM) Minor improvement of balanced_wait_until<> and balanced_awaken<> Make JIT MemoryManager2 shared (global) Fix wrong assertion in cond_variable
This commit is contained in:
parent
8d5d44141e
commit
4f152ad126
9 changed files with 503 additions and 394 deletions
|
|
@ -95,6 +95,12 @@ static void* const s_memory = []() -> void*
|
|||
return utils::memory_reserve(s_memory_size);
|
||||
}();
|
||||
|
||||
// Reserve 2G of memory, should replace previous area for ASLR compatibility
|
||||
static void* const s_memory2 = utils::memory_reserve(0x80000000);
|
||||
|
||||
static u64 s_code_pos = 0;
|
||||
static u64 s_data_pos = 0;
|
||||
|
||||
static void* s_next = s_memory;
|
||||
|
||||
#ifdef _WIN32
|
||||
|
|
@ -129,6 +135,11 @@ extern void jit_finalize()
|
|||
utils::memory_decommit(s_memory, s_memory_size);
|
||||
|
||||
s_next = s_memory;
|
||||
|
||||
utils::memory_decommit(s_memory2, 0x80000000);
|
||||
|
||||
s_code_pos = 0;
|
||||
s_data_pos = 0;
|
||||
}
|
||||
|
||||
// Helper class
|
||||
|
|
@ -311,24 +322,25 @@ struct MemoryManager : llvm::RTDyldMemoryManager
|
|||
// Simple memory manager
|
||||
struct MemoryManager2 : llvm::RTDyldMemoryManager
|
||||
{
|
||||
// Reserve 2 GiB
|
||||
void* const m_memory = utils::memory_reserve(0x80000000);
|
||||
// Patchwork again...
|
||||
void* const m_memory = s_memory2;
|
||||
|
||||
u8* const m_code = static_cast<u8*>(m_memory) + 0x00000000;
|
||||
u8* const m_data = static_cast<u8*>(m_memory) + 0x40000000;
|
||||
|
||||
u64 m_code_pos = 0;
|
||||
u64 m_data_pos = 0;
|
||||
u64& m_code_pos = s_code_pos;
|
||||
u64& m_data_pos = s_data_pos;
|
||||
|
||||
MemoryManager2() = default;
|
||||
|
||||
~MemoryManager2() override
|
||||
{
|
||||
utils::memory_release(m_memory, 0x80000000);
|
||||
}
|
||||
|
||||
u8* allocateCodeSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name) override
|
||||
{
|
||||
std::lock_guard lock(s_mutex);
|
||||
|
||||
// Simple allocation
|
||||
const u64 old = m_code_pos;
|
||||
const u64 pos = ::align(m_code_pos, align);
|
||||
|
|
@ -349,12 +361,20 @@ struct MemoryManager2 : llvm::RTDyldMemoryManager
|
|||
utils::memory_commit(m_code + olda, newa - olda, utils::protection::wx);
|
||||
}
|
||||
|
||||
if (!sec_id && sec_name.empty())
|
||||
{
|
||||
// Special case: don't log
|
||||
return m_code + pos;
|
||||
}
|
||||
|
||||
LOG_NOTICE(GENERAL, "LLVM: Code section %u '%s' allocated -> %p (size=0x%x, align=0x%x)", sec_id, sec_name.data(), m_code + pos, size, align);
|
||||
return m_code + pos;
|
||||
}
|
||||
|
||||
u8* allocateDataSection(std::uintptr_t size, uint align, uint sec_id, llvm::StringRef sec_name, bool is_ro) override
|
||||
{
|
||||
std::lock_guard lock(s_mutex);
|
||||
|
||||
// Simple allocation
|
||||
const u64 old = m_data_pos;
|
||||
const u64 pos = ::align(m_data_pos, align);
|
||||
|
|
@ -642,33 +662,12 @@ u64 jit_compiler::get(const std::string& name)
|
|||
return m_engine->getGlobalValueAddress(name);
|
||||
}
|
||||
|
||||
std::unordered_map<std::string, u64> jit_compiler::add(std::unordered_map<std::string, std::string> data)
|
||||
u8* jit_compiler::alloc(u32 size)
|
||||
{
|
||||
// Lock memory manager
|
||||
std::lock_guard lock(s_mutex);
|
||||
// Dummy memory manager object
|
||||
MemoryManager2 mm;
|
||||
|
||||
std::unordered_map<std::string, u64> result;
|
||||
|
||||
std::size_t size = 0;
|
||||
|
||||
for (auto&& pair : data)
|
||||
{
|
||||
size += ::align(pair.second.size(), 16);
|
||||
}
|
||||
|
||||
utils::memory_commit(s_next, size, utils::protection::wx);
|
||||
std::memset(s_next, 0xc3, ::align(size, 4096));
|
||||
|
||||
for (auto&& pair : data)
|
||||
{
|
||||
std::memcpy(s_next, pair.second.data(), pair.second.size());
|
||||
result.emplace(pair.first, (u64)s_next);
|
||||
s_next = (void*)::align((u64)s_next + pair.second.size(), 16);
|
||||
}
|
||||
|
||||
s_next = (void*)::align((u64)s_next, 4096);
|
||||
|
||||
return result;
|
||||
return mm.allocateCodeSection(size, 16, 0, {});
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -61,6 +61,7 @@ FT build_function_asm(F&& builder)
|
|||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "types.h"
|
||||
|
|
@ -129,8 +130,8 @@ public:
|
|||
// Get compiled function address
|
||||
u64 get(const std::string& name);
|
||||
|
||||
// Add functions directly to the memory manager (name -> code)
|
||||
static std::unordered_map<std::string, u64> add(std::unordered_map<std::string, std::string>);
|
||||
// Allocate writable executable memory (alignment is assumed 16)
|
||||
static u8* alloc(u32 size);
|
||||
|
||||
// Get CPU info
|
||||
static std::string cpu(const std::string& _cpu);
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@
|
|||
|
||||
bool cond_variable::imp_wait(u32 _old, u64 _timeout) noexcept
|
||||
{
|
||||
verify("cond_variable overflow" HERE), (_old & 0xffff) == 0; // Very unlikely: it requires 65535 distinct threads to wait simultaneously
|
||||
verify("cond_variable overflow" HERE), (_old & 0xffff) != 0xffff; // Very unlikely: it requires 65535 distinct threads to wait simultaneously
|
||||
|
||||
return balanced_wait_until(m_value, _timeout, [&](u32& value, auto... ret) -> int
|
||||
{
|
||||
|
|
@ -42,7 +42,8 @@ bool cond_variable::imp_wait(u32 _old, u64 _timeout) noexcept
|
|||
|
||||
void cond_variable::imp_wake(u32 _count) noexcept
|
||||
{
|
||||
balanced_awaken(m_value, m_value.atomic_op([&](u32& value) -> u32
|
||||
// TODO (notify_one)
|
||||
balanced_awaken<true>(m_value, m_value.atomic_op([&](u32& value) -> u32
|
||||
{
|
||||
// Subtract already signaled number from total amount of waiters
|
||||
const u32 can_sig = (value & 0xffff) - (value >> 16);
|
||||
|
|
@ -266,7 +267,7 @@ void cond_x16::imp_notify() noexcept
|
|||
return;
|
||||
}
|
||||
|
||||
balanced_awaken(m_cvx16, utils::popcnt16(wait_mask));
|
||||
balanced_awaken<true>(m_cvx16, utils::popcnt16(wait_mask));
|
||||
}
|
||||
|
||||
bool lf_queue_base::wait(u64 _timeout)
|
||||
|
|
|
|||
|
|
@ -186,7 +186,7 @@ bool balanced_wait_until(atomic_t<T>& var, u64 usec_timeout, Pred&& pred)
|
|||
{
|
||||
if (OptWaitOnAddress(&var, &value, sizeof(T), is_inf ? INFINITE : usec_timeout / 1000))
|
||||
{
|
||||
if (!test_pred(value) && !test_pred(value, nullptr))
|
||||
if (!test_pred(value, nullptr))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
|
@ -220,7 +220,7 @@ bool balanced_wait_until(atomic_t<T>& var, u64 usec_timeout, Pred&& pred)
|
|||
return true;
|
||||
}
|
||||
|
||||
if (!test_pred(value) && !test_pred(value, nullptr))
|
||||
if (!test_pred(value, nullptr))
|
||||
{
|
||||
// Stolen notification: restore balance
|
||||
NtReleaseKeyedEvent(nullptr, &var, false, nullptr);
|
||||
|
|
@ -237,7 +237,7 @@ bool balanced_wait_until(atomic_t<T>& var, u64 usec_timeout, Pred&& pred)
|
|||
{
|
||||
if (futex(&var, FUTEX_WAIT_PRIVATE, static_cast<u32>(value), is_inf ? nullptr : &timeout) == 0)
|
||||
{
|
||||
if (!test_pred(value) && !test_pred(value, nullptr))
|
||||
if (!test_pred(value, nullptr))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
|
@ -257,7 +257,7 @@ bool balanced_wait_until(atomic_t<T>& var, u64 usec_timeout, Pred&& pred)
|
|||
#endif
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
template <bool All = false, typename T>
|
||||
void balanced_awaken(atomic_t<T>& var, u32 weight)
|
||||
{
|
||||
static_assert(sizeof(T) == 4 || sizeof(T) == 8);
|
||||
|
|
@ -265,11 +265,13 @@ void balanced_awaken(atomic_t<T>& var, u32 weight)
|
|||
#ifdef _WIN32
|
||||
if (OptWaitOnAddress)
|
||||
{
|
||||
if (weight > 1)
|
||||
if (All || weight > 3)
|
||||
{
|
||||
OptWakeByAddressAll(&var);
|
||||
return;
|
||||
}
|
||||
else if (weight == 1)
|
||||
|
||||
for (u32 i = 0; i < weight; i++)
|
||||
{
|
||||
OptWakeByAddressSingle(&var);
|
||||
}
|
||||
|
|
@ -282,9 +284,9 @@ void balanced_awaken(atomic_t<T>& var, u32 weight)
|
|||
NtReleaseKeyedEvent(nullptr, &var, false, nullptr);
|
||||
}
|
||||
#else
|
||||
if (weight)
|
||||
if (All || weight)
|
||||
{
|
||||
futex(&var, FUTEX_WAKE_PRIVATE, std::min<u32>(INT_MAX, weight));
|
||||
futex(&var, FUTEX_WAKE_PRIVATE, All ? INT_MAX : std::min<u32>(INT_MAX, weight));
|
||||
}
|
||||
|
||||
return;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue