SPU: multithread compilation

Allow parallel compilation of SPU code, both at startup and runtime
Remove 'SPU Shared Runtime' option (it became obsolete)
Refactor spu_runtime class (now is common for ASMJIT and LLVM)
Implement SPU ubertrampoline generation in raw assembly (LLVM)
Minor improvement of balanced_wait_until<> and balanced_awaken<>
Make JIT MemoryManager2 shared (global)
Fix wrong assertion in cond_variable
This commit is contained in:
Nekotekina 2019-01-21 21:04:32 +03:00
parent 8d5d44141e
commit 4f152ad126
9 changed files with 503 additions and 394 deletions

View file

@ -32,33 +32,8 @@ std::unique_ptr<spu_recompiler_base> spu_recompiler_base::make_asmjit_recompiler
return std::make_unique<spu_recompiler>();
}
spu_runtime::spu_runtime()
{
m_cache_path = fxm::check_unlocked<ppu_module>()->cache;
if (g_cfg.core.spu_debug)
{
fs::file(m_cache_path + "spu.log", fs::rewrite);
}
LOG_SUCCESS(SPU, "SPU Recompiler Runtime (ASMJIT) initialized...");
// Initialize lookup table
for (auto& v : m_dispatcher)
{
v.raw() = &spu_recompiler_base::dispatch;
}
// Initialize "empty" block
m_map[std::vector<u32>()] = &spu_recompiler_base::dispatch;
}
spu_recompiler::spu_recompiler()
{
if (!g_cfg.core.spu_shared_runtime)
{
m_spurt = std::make_shared<spu_runtime>();
}
}
void spu_recompiler::init()
@ -68,6 +43,7 @@ void spu_recompiler::init()
{
m_cache = fxm::get<spu_cache>();
m_spurt = fxm::get_always<spu_runtime>();
m_asmrt = m_spurt->get_asmjit_rt();
}
}
@ -83,19 +59,22 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
{
init();
// Don't lock without shared runtime
std::unique_lock lock(m_spurt->m_mutex, std::defer_lock);
if (g_cfg.core.spu_shared_runtime)
{
lock.lock();
}
std::unique_lock lock(m_spurt->m_mutex);
// Try to find existing function, register new one if necessary
const auto fn_info = m_spurt->m_map.emplace(std::move(func_rv), nullptr);
auto& fn_location = fn_info.first->second;
if (!fn_location && !fn_info.second)
{
// Wait if already in progress
while (!fn_location)
{
m_spurt->m_cond.wait(lock);
}
}
if (fn_location)
{
return fn_location;
@ -103,6 +82,8 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
auto& func = fn_info.first->first;
lock.unlock();
using namespace asmjit;
SPUDisAsm dis_asm(CPUDisAsm_InterpreterMode);
@ -124,7 +105,7 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
}
CodeHolder code;
code.init(m_spurt->m_jitrt.getCodeInfo());
code.init(m_asmrt->getCodeInfo());
code._globalHints = asmjit::CodeEmitter::kHintOptimizedAlign;
X86Assembler compiler(&code);
@ -861,14 +842,11 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
// Compile and get function address
spu_function_t fn;
if (m_spurt->m_jitrt.add(&fn, &code))
if (m_asmrt->add(&fn, &code))
{
LOG_FATAL(SPU, "Failed to build a function");
}
// Register function
fn_location = fn;
if (g_cfg.core.spu_debug)
{
// Add ASMJIT logs
@ -885,6 +863,11 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
m_cache->add(func);
}
lock.lock();
// Register function (possibly temporarily)
fn_location = fn;
// Generate a dispatcher (übertrampoline)
std::vector<u32> addrv{func[0]};
const auto beg = m_spurt->m_map.lower_bound(addrv);
@ -899,19 +882,11 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
else
{
CodeHolder code;
code.init(m_spurt->m_jitrt.getCodeInfo());
code.init(m_asmrt->getCodeInfo());
X86Assembler compiler(&code);
this->c = &compiler;
if (g_cfg.core.spu_debug)
{
// Set logger
code.setLogger(&logger);
}
compiler.comment("\n\nTrampoline:\n\n");
struct work
{
u32 size;
@ -1110,7 +1085,7 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
spu_function_t tr;
if (m_spurt->m_jitrt.add(&tr, &code))
if (m_asmrt->add(&tr, &code))
{
LOG_FATAL(SPU, "Failed to build a trampoline");
}
@ -1118,6 +1093,9 @@ spu_function_t spu_recompiler::compile(std::vector<u32>&& func_rv)
m_spurt->m_dispatcher[func[0] / 4] = tr;
}
lock.unlock();
m_spurt->m_cond.notify_all();
return fn;
}