mirror of
https://github.com/RPCSX/rpcsx.git
synced 2026-04-06 06:55:09 +00:00
LLVM: Slice PPU executable memory
This commit is contained in:
parent
7b8fee7cdb
commit
9d5b75bb7a
7 changed files with 486 additions and 126 deletions
|
|
@ -66,6 +66,7 @@
|
|||
#include <cctype>
|
||||
#include <span>
|
||||
#include <optional>
|
||||
#include <charconv>
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "util/vm.hpp"
|
||||
|
|
@ -176,7 +177,7 @@ bool serialize<ppu_thread::cr_bits>(utils::serial& ar, typename ppu_thread::cr_b
|
|||
extern void ppu_initialize();
|
||||
extern void ppu_finalize(const ppu_module<lv2_obj>& info, bool force_mem_release = false);
|
||||
extern bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only = false, u64 file_size = 0);
|
||||
static void ppu_initialize2(class jit_compiler& jit, const ppu_module<lv2_obj>& module_part, const std::string& cache_path, const std::string& obj_name, const ppu_module<lv2_obj>& whole_module);
|
||||
static void ppu_initialize2(class jit_compiler& jit, const ppu_module<lv2_obj>& module_part, const std::string& cache_path, const std::string& obj_name);
|
||||
extern bool ppu_load_exec(const ppu_exec_object&, bool virtual_load, const std::string&, utils::serial* = nullptr);
|
||||
extern std::pair<shared_ptr<lv2_overlay>, CellError> ppu_load_overlay(const ppu_exec_object&, bool virtual_load, const std::string& path, s64 file_offset, utils::serial* = nullptr);
|
||||
extern void ppu_unload_prx(const lv2_prx&);
|
||||
|
|
@ -342,11 +343,10 @@ const auto ppu_gateway = build_function_asm<void(*)(ppu_thread*)>("ppu_gateway",
|
|||
// Load offset value
|
||||
c.mov(cia_addr_reg, Imm(static_cast<u64>(::offset32(&ppu_thread::cia))));
|
||||
// Load cia
|
||||
c.ldr(a64::w15, arm::Mem(ppu_t_base, cia_addr_reg));
|
||||
c.ldr(pc.w(), arm::Mem(ppu_t_base, cia_addr_reg));
|
||||
|
||||
// Multiply by 2 to index into ptr table
|
||||
const arm::GpX index_shift = a64::x12;
|
||||
c.mov(index_shift, Imm(2));
|
||||
c.mul(pc, pc, index_shift);
|
||||
c.add(pc, pc, pc);
|
||||
|
||||
// Load call target
|
||||
const arm::GpX call_target = a64::x13;
|
||||
|
|
@ -355,7 +355,7 @@ const auto ppu_gateway = build_function_asm<void(*)(ppu_thread*)>("ppu_gateway",
|
|||
const arm::GpX reg_hp = a64::x21;
|
||||
c.mov(reg_hp, call_target);
|
||||
c.lsr(reg_hp, reg_hp, 48);
|
||||
c.lsl(a64::w21, a64::w21, 13);
|
||||
c.lsl(reg_hp.w(), reg_hp.w(), 13);
|
||||
|
||||
// Zero top 16 bits of call target
|
||||
c.lsl(call_target, call_target, Imm(16));
|
||||
|
|
@ -3665,6 +3665,9 @@ struct jit_core_allocator
|
|||
// Initialize global semaphore with the max number of threads
|
||||
::semaphore<0x7fff> sem{std::max<s16>(thread_count, 1)};
|
||||
|
||||
// Mutex for special extra-large modules to compile alone
|
||||
shared_mutex shared_mtx;
|
||||
|
||||
static s16 limit()
|
||||
{
|
||||
return static_cast<s16>(std::min<s32>(0x7fff, utils::get_thread_count()));
|
||||
|
|
@ -3677,8 +3680,8 @@ namespace
|
|||
// Compiled PPU module info
|
||||
struct jit_module
|
||||
{
|
||||
void(*symbol_resolver)(u8*, u64) = nullptr;
|
||||
std::shared_ptr<jit_compiler> pjit;
|
||||
std::vector<void(*)(u8*, u64)> symbol_resolvers;
|
||||
std::vector<std::shared_ptr<jit_compiler>> pjit;
|
||||
bool init = false;
|
||||
};
|
||||
|
||||
|
|
@ -3729,6 +3732,7 @@ namespace
|
|||
}
|
||||
|
||||
to_destroy.pjit = std::move(found->second.pjit);
|
||||
to_destroy.symbol_resolvers = std::move(found->second.symbol_resolvers);
|
||||
|
||||
bucket.map.erase(found);
|
||||
}
|
||||
|
|
@ -4445,7 +4449,7 @@ extern void ppu_initialize()
|
|||
|
||||
idm::select<lv2_obj, lv2_prx>([&](u32, lv2_prx& _module)
|
||||
{
|
||||
if (_module.funcs.empty())
|
||||
if (_module.get_funcs().empty())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
|
@ -4556,7 +4560,7 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
|||
|
||||
auto& ppu_toc = toc_manager.toc_map;
|
||||
|
||||
for (const auto& func : info.funcs)
|
||||
for (const auto& func : info.get_funcs())
|
||||
{
|
||||
if (func.size && func.blocks.empty())
|
||||
{
|
||||
|
|
@ -4659,11 +4663,14 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
|||
jit_module& jit_mod = g_fxo->get<jit_module_manager>().get(cache_path + "_" + std::to_string(std::bit_cast<usz>(info.segs[0].ptr)));
|
||||
|
||||
// Compiler instance (deferred initialization)
|
||||
std::shared_ptr<jit_compiler>& jit = jit_mod.pjit;
|
||||
std::vector<std::shared_ptr<jit_compiler>>& jits = jit_mod.pjit;
|
||||
|
||||
// Split module into fragments <= 1 MiB
|
||||
usz fpos = 0;
|
||||
|
||||
// Modules counted so far
|
||||
usz module_counter = 0;
|
||||
|
||||
// Difference between function name and current location
|
||||
const u32 reloc = info.relocs.empty() ? 0 : ::at32(info.segs, 0).addr;
|
||||
|
||||
|
|
@ -4684,14 +4691,14 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
|||
|
||||
const cpu_thread* cpu = cpu_thread::get_current();
|
||||
|
||||
for (auto& func : info.funcs)
|
||||
for (auto& func : info.get_funcs())
|
||||
{
|
||||
if (func.size == 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const auto& [addr, size] : func.blocks)
|
||||
for (const auto [addr, size] : func)
|
||||
{
|
||||
if (size == 0)
|
||||
{
|
||||
|
|
@ -4724,26 +4731,138 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
|||
|
||||
u32 total_compile = 0;
|
||||
|
||||
while (!jit_mod.init && fpos < info.funcs.size())
|
||||
// Limit how many modules are per JIt instance
|
||||
// Advantage to lower the limit:
|
||||
// 1. Lowering contoniues memory requirements for allocations
|
||||
// Its disadvantage:
|
||||
// 1. B instruction can wander up to 16MB relatively to its range,
|
||||
// each additional split of JIT instance results in a downgraded version of around (100% / N-1th) - (100% / Nth) percent of instructions
|
||||
// where N is the total amunt of JIT instances
|
||||
// Subject to change
|
||||
constexpr u32 c_moudles_per_jit = 100;
|
||||
|
||||
std::shared_ptr<std::pair<u32, u32>> local_jit_bounds = std::make_shared<std::pair<u32, u32>>(u32{umax}, 0);
|
||||
|
||||
const auto shared_runtime = make_shared<jit_runtime>();
|
||||
const auto shared_map = make_shared<std::unordered_map<u32, u64>>();
|
||||
const auto shared_mtx = make_shared<shared_mutex>();
|
||||
|
||||
auto symbols_cement = [runtime = shared_runtime, reloc, bound = info.segs[0].addr + info.segs[0].size - reloc, func_map = shared_map, shared_mtx](const std::string& name) -> u64
|
||||
{
|
||||
// Initialize compiler instance
|
||||
if (!jit && is_being_used_in_emulation)
|
||||
u32 func_addr = umax;
|
||||
|
||||
if (name.starts_with("__0x"))
|
||||
{
|
||||
jit = std::make_shared<jit_compiler>(s_link_table, g_cfg.core.llvm_cpu);
|
||||
u32 addr = umax;
|
||||
auto res = std::from_chars(name.c_str() + 4, name.c_str() + name.size(), addr, 16);
|
||||
|
||||
if (res.ec == std::errc() && res.ptr == name.c_str() + name.size() && addr < bound)
|
||||
{
|
||||
func_addr = addr + reloc;
|
||||
}
|
||||
}
|
||||
|
||||
// Copy module information (TODO: optimize)
|
||||
if (func_addr == umax)
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
reader_lock rlock(*shared_mtx);
|
||||
|
||||
if (auto it = func_map->find(func_addr); it != func_map->end())
|
||||
{
|
||||
return it->second;
|
||||
}
|
||||
|
||||
rlock.upgrade();
|
||||
|
||||
u64& code_ptr = (*func_map)[func_addr];
|
||||
|
||||
if (code_ptr)
|
||||
{
|
||||
return +code_ptr;
|
||||
}
|
||||
|
||||
using namespace asmjit;
|
||||
|
||||
auto func = build_function_asm<u8*(*)(ppu_thread&, u64, u8*, u64, u64, u64)>(name, [&](native_asm& c, auto& args)
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
c.mov(x86::rax, x86::qword_ptr(reinterpret_cast<u64>(&vm::g_exec_addr)));
|
||||
c.mov(x86::edx, func_addr); // Load PC
|
||||
c.mov(x86::dword_ptr(x86::rbp, ::offset32(&ppu_thread::cia)), x86::edx);
|
||||
|
||||
c.mov(x86::rax, x86::qword_ptr(x86::rax, x86::edx, 1, 0)); // Load call target
|
||||
c.mov(x86::rdx, x86::rax);
|
||||
c.shl(x86::rax, 16);
|
||||
c.shr(x86::rax, 16);
|
||||
c.shr(x86::rdx, 48);
|
||||
c.shl(x86::edx, 13);
|
||||
c.mov(x86::r12d, x86::edx); // Load relocation base
|
||||
c.jmp(x86::rax);
|
||||
#else
|
||||
// Load REG_Base - use absolute jump target to bypass rel jmp range limits
|
||||
// X19 contains vm::g_exec_addr
|
||||
const arm::GpX exec_addr = a64::x19;
|
||||
|
||||
// X20 contains ppu_thread*
|
||||
const arm::GpX ppu_t_base = a64::x20;
|
||||
|
||||
// Load PC
|
||||
const arm::GpX pc = a64::x15;
|
||||
const arm::GpX cia_addr_reg = a64::x11;
|
||||
|
||||
// Load offset value
|
||||
c.mov(cia_addr_reg, static_cast<u64>(::offset32(&ppu_thread::cia)));
|
||||
|
||||
// Update CIA
|
||||
c.mov(pc.w(), func_addr);
|
||||
c.str(pc.w(), arm::Mem(ppu_t_base, cia_addr_reg));
|
||||
|
||||
// Multiply by 2 to index into ptr table
|
||||
c.add(pc, pc, pc);
|
||||
|
||||
// Load call target
|
||||
const arm::GpX call_target = a64::x13;
|
||||
c.ldr(call_target, arm::Mem(exec_addr, pc));
|
||||
|
||||
// Compute REG_Hp
|
||||
const arm::GpX reg_hp = a64::x21;
|
||||
c.mov(reg_hp, call_target);
|
||||
c.lsr(reg_hp, reg_hp, 48);
|
||||
c.lsl(reg_hp.w(), reg_hp.w(), 13);
|
||||
|
||||
// Zero top 16 bits of call target
|
||||
c.lsl(call_target, call_target, 16);
|
||||
c.lsr(call_target, call_target, 16);
|
||||
|
||||
// Execute LLE call
|
||||
c.br(call_target);
|
||||
#endif
|
||||
}, runtime.get());
|
||||
|
||||
code_ptr = reinterpret_cast<u64>(func);
|
||||
return code_ptr;
|
||||
};
|
||||
|
||||
if (has_mfvscr && g_cfg.core.ppu_set_sat_bit)
|
||||
{
|
||||
info.attr += ppu_attr::has_mfvscr;
|
||||
}
|
||||
|
||||
while (!jit_mod.init && fpos < info.get_funcs().size())
|
||||
{
|
||||
// Copy module information
|
||||
ppu_module<lv2_obj> part;
|
||||
part.copy_part(info);
|
||||
part.funcs.reserve(16000);
|
||||
|
||||
// Overall block size in bytes
|
||||
usz bsize = 0;
|
||||
usz bcount = 0;
|
||||
|
||||
while (fpos < info.funcs.size())
|
||||
while (fpos < info.get_funcs().size())
|
||||
{
|
||||
auto& func = info.funcs[fpos];
|
||||
auto& func = info.get_funcs()[fpos];
|
||||
|
||||
if (!func.size)
|
||||
{
|
||||
|
|
@ -4767,9 +4886,9 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
|||
{
|
||||
auto far_jump = ensure(g_fxo->get<ppu_far_jumps_t>().gen_jump(source));
|
||||
|
||||
if (source == func.addr && jit)
|
||||
if (source == func.addr)
|
||||
{
|
||||
jit->update_global_mapping(fmt::format("__0x%x", func.addr - reloc), reinterpret_cast<u64>(far_jump));
|
||||
(*shared_map)[func.addr - reloc] = reinterpret_cast<u64>(far_jump);
|
||||
}
|
||||
|
||||
ppu_register_function_at(source, 4, far_jump);
|
||||
|
|
@ -4783,22 +4902,14 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
|||
}
|
||||
}
|
||||
|
||||
// Copy block or function entry
|
||||
ppu_function& entry = part.funcs.emplace_back(func);
|
||||
local_jit_bounds->first = std::min<u32>(local_jit_bounds->first, func.addr);
|
||||
local_jit_bounds->second = std::max<u32>(local_jit_bounds->second, func.addr + func.size);
|
||||
|
||||
part.local_bounds.first = std::min<u32>(part.local_bounds.first, func.addr);
|
||||
part.local_bounds.second = std::max<u32>(part.local_bounds.second, func.addr + func.size);
|
||||
|
||||
// Fixup some information
|
||||
entry.name = fmt::format("__0x%x", entry.addr - reloc);
|
||||
|
||||
if (has_mfvscr && g_cfg.core.ppu_set_sat_bit)
|
||||
{
|
||||
// TODO
|
||||
entry.attr += ppu_attr::has_mfvscr;
|
||||
}
|
||||
|
||||
if (entry.blocks.empty())
|
||||
{
|
||||
entry.blocks.emplace(func.addr, func.size);
|
||||
}
|
||||
func.name = fmt::format("__0x%x", func.addr - reloc);
|
||||
|
||||
bsize += func.size;
|
||||
|
||||
|
|
@ -4815,7 +4926,7 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
|||
|
||||
int has_dcbz = !!g_cfg.core.accurate_cache_line_stores;
|
||||
|
||||
for (const auto& func : part.funcs)
|
||||
for (const auto& func : part.get_funcs())
|
||||
{
|
||||
if (func.size == 0)
|
||||
{
|
||||
|
|
@ -4827,7 +4938,7 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
|||
sha1_update(&ctx, reinterpret_cast<const u8*>(&addr), sizeof(addr));
|
||||
sha1_update(&ctx, reinterpret_cast<const u8*>(&size), sizeof(size));
|
||||
|
||||
for (const auto& block : func.blocks)
|
||||
for (const auto block : func)
|
||||
{
|
||||
if (block.second == 0 || reloc)
|
||||
{
|
||||
|
|
@ -4898,7 +5009,7 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
|||
sha1_update(&ctx, ensure(info.get_ptr<const u8>(func.addr)), func.size);
|
||||
}
|
||||
|
||||
if (!workload.empty() && fpos >= info.funcs.size())
|
||||
if (fpos >= info.get_funcs().size() || module_counter % c_moudles_per_jit == c_moudles_per_jit - 1)
|
||||
{
|
||||
// Hash the entire function grouped addresses for the integrity of the symbol resolver function
|
||||
// Potentially occuring during patches
|
||||
|
|
@ -4906,7 +5017,13 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
|||
|
||||
std::vector<be_t<u32>> addrs;
|
||||
|
||||
for (const ppu_function& func : info.funcs)
|
||||
constexpr auto compare = [](const ppu_function& a, u32 addr) { return a.addr < addr; };
|
||||
|
||||
const auto start = std::lower_bound(info.funcs.begin(), info.funcs.end(), local_jit_bounds->first, compare);
|
||||
|
||||
std::span<const ppu_function> span_range{ start, std::lower_bound(start, info.funcs.end(), local_jit_bounds->second, compare) };
|
||||
|
||||
for (const ppu_function& func : span_range)
|
||||
{
|
||||
if (func.size == 0)
|
||||
{
|
||||
|
|
@ -4919,7 +5036,13 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
|||
// Hash its size too
|
||||
addrs.emplace_back(::size32(addrs));
|
||||
|
||||
sha1_update(&ctx, reinterpret_cast<const u8*>(addrs.data()), addrs.size() * sizeof(be_t<u32>));
|
||||
if (module_counter != 0)
|
||||
{
|
||||
sha1_update(&ctx, reinterpret_cast<const u8*>(addrs.data()), addrs.size() * sizeof(be_t<u32>));
|
||||
}
|
||||
|
||||
part.jit_bounds = std::move(local_jit_bounds);
|
||||
local_jit_bounds = std::make_shared<std::pair<u32, u32>>(u32{umax}, 0);
|
||||
}
|
||||
|
||||
if (false)
|
||||
|
|
@ -4974,7 +5097,7 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
|||
settings += ppu_settings::accurate_vnan, settings -= ppu_settings::fixup_vnan, fmt::throw_exception("VNAN Not implemented");
|
||||
if (g_cfg.core.ppu_use_nj_bit)
|
||||
settings += ppu_settings::accurate_nj_mode, settings -= ppu_settings::fixup_nj_denormals, fmt::throw_exception("NJ Not implemented");
|
||||
if (fpos >= info.funcs.size())
|
||||
if (fpos >= info.get_funcs().size() || module_counter % c_moudles_per_jit == c_moudles_per_jit - 1)
|
||||
settings += ppu_settings::contains_symbol_resolver; // Avoid invalidating all modules for this purpose
|
||||
|
||||
// Write version, hash, CPU, settings
|
||||
|
|
@ -4986,6 +5109,8 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
|||
break;
|
||||
}
|
||||
|
||||
module_counter++;
|
||||
|
||||
if (!check_only)
|
||||
{
|
||||
total_compile++;
|
||||
|
|
@ -4996,13 +5121,14 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
|||
// Check object file
|
||||
if (jit_compiler::check(cache_path + obj_name))
|
||||
{
|
||||
if (!jit && !check_only)
|
||||
if (!is_being_used_in_emulation && !check_only)
|
||||
{
|
||||
ppu_log.success("LLVM: Module exists: %s", obj_name);
|
||||
|
||||
// Done already, revert total amount increase
|
||||
// Avoid incrementing "pdone" instead because it creates false appreciation for both the progress dialog and the user
|
||||
total_compile--;
|
||||
link_workload.pop_back();
|
||||
}
|
||||
|
||||
continue;
|
||||
|
|
@ -5113,11 +5239,26 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
|||
// Keep allocating workload
|
||||
const auto& [obj_name, part] = std::as_const(workload)[i];
|
||||
|
||||
std::shared_lock rlock(g_fxo->get<jit_core_allocator>().shared_mtx, std::defer_lock);
|
||||
std::unique_lock lock(g_fxo->get<jit_core_allocator>().shared_mtx, std::defer_lock);
|
||||
|
||||
if (part.jit_bounds && part.parent->funcs.size() >= 0x8000)
|
||||
{
|
||||
// Make a large symbol-resolving function compile alone because it has massive memory requirements
|
||||
lock.lock();
|
||||
}
|
||||
else
|
||||
{
|
||||
rlock.lock();
|
||||
}
|
||||
|
||||
ppu_log.warning("LLVM: Compiling module %s%s", cache_path, obj_name);
|
||||
|
||||
// Use another JIT instance
|
||||
jit_compiler jit2({}, g_cfg.core.llvm_cpu, 0x1);
|
||||
ppu_initialize2(jit2, part, cache_path, obj_name, i == workload.size() - 1 ? main_module : part);
|
||||
{
|
||||
// Use another JIT instance
|
||||
jit_compiler jit2({}, g_cfg.core.llvm_cpu, 0x1);
|
||||
ppu_initialize2(jit2, part, cache_path, obj_name);
|
||||
}
|
||||
|
||||
ppu_log.success("LLVM: Compiled module %s", obj_name);
|
||||
}
|
||||
|
|
@ -5145,6 +5286,17 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
|||
g_watchdog_hold_ctr--;
|
||||
}
|
||||
|
||||
// Initialize compiler instance
|
||||
while (jits.size() < utils::aligned_div<u64>(module_counter, c_moudles_per_jit) && is_being_used_in_emulation)
|
||||
{
|
||||
jits.emplace_back(std::make_shared<jit_compiler>(s_link_table, g_cfg.core.llvm_cpu, 0, symbols_cement));
|
||||
}
|
||||
|
||||
if (jit_mod.symbol_resolvers.empty() && is_being_used_in_emulation)
|
||||
{
|
||||
jit_mod.symbol_resolvers.resize(jits.size());
|
||||
}
|
||||
|
||||
bool failed_to_load = false;
|
||||
{
|
||||
if (!is_being_used_in_emulation || (cpu ? cpu->state.all_of(cpu_flag::exit) : Emu.IsStopped()))
|
||||
|
|
@ -5158,14 +5310,18 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
|||
*progress_dialog = get_localized_string(localized_string_id::PROGRESS_DIALOG_LINKING_PPU_MODULES);
|
||||
}
|
||||
|
||||
usz mod_index = umax;
|
||||
|
||||
for (const auto& [obj_name, is_compiled] : link_workload)
|
||||
{
|
||||
mod_index++;
|
||||
|
||||
if (cpu ? cpu->state.all_of(cpu_flag::exit) : Emu.IsStopped())
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
if (!failed_to_load && !jit->add(cache_path + obj_name))
|
||||
if (!failed_to_load && !jits[mod_index / c_moudles_per_jit]->add(cache_path + obj_name))
|
||||
{
|
||||
ppu_log.error("LLVM: Failed to load module %s", obj_name);
|
||||
failed_to_load = true;
|
||||
|
|
@ -5205,10 +5361,10 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
|||
|
||||
progress_dialog = get_localized_string(localized_string_id::PROGRESS_DIALOG_APPLYING_PPU_CODE);
|
||||
|
||||
if (!jit)
|
||||
if (jits.empty())
|
||||
{
|
||||
// No functions - nothing to do
|
||||
ensure(info.funcs.empty());
|
||||
ensure(info.get_funcs().empty());
|
||||
return compiled_new;
|
||||
}
|
||||
|
||||
|
|
@ -5216,25 +5372,27 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
|||
|
||||
if (is_first)
|
||||
{
|
||||
jit->fin();
|
||||
}
|
||||
|
||||
if (is_first)
|
||||
{
|
||||
jit_mod.symbol_resolver = reinterpret_cast<void(*)(u8*, u64)>(jit->get("__resolve_symbols"));
|
||||
ensure(jit_mod.symbol_resolver);
|
||||
}
|
||||
else
|
||||
{
|
||||
ensure(jit_mod.symbol_resolver);
|
||||
for (auto& jit : jits)
|
||||
{
|
||||
jit->fin();
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __APPLE__
|
||||
// Symbol resolver is in JIT mem, so we must enable execution
|
||||
pthread_jit_write_protect_np(true);
|
||||
#endif
|
||||
{
|
||||
usz index = umax;
|
||||
|
||||
jit_mod.symbol_resolver(vm::g_exec_addr, info.segs[0].addr);
|
||||
for (auto& sim : jit_mod.symbol_resolvers)
|
||||
{
|
||||
index++;
|
||||
|
||||
sim = ensure(!is_first ? sim : reinterpret_cast<void(*)(u8*, u64)>(jits[index]->get("__resolve_symbols")));
|
||||
sim(vm::g_exec_addr, info.segs[0].addr);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __APPLE__
|
||||
// Symbol resolver is in JIT mem, so we must enable execution
|
||||
|
|
@ -5242,7 +5400,7 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
|||
#endif
|
||||
|
||||
// Find a BLR-only function in order to copy it to all BLRs (some games need it)
|
||||
for (const auto& func : info.funcs)
|
||||
for (const auto& func : info.get_funcs())
|
||||
{
|
||||
if (func.size == 4 && *info.get_ptr<u32>(func.addr) == ppu_instructions::BLR())
|
||||
{
|
||||
|
|
@ -5281,7 +5439,7 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
|||
#endif
|
||||
}
|
||||
|
||||
static void ppu_initialize2(jit_compiler& jit, const ppu_module<lv2_obj>& module_part, const std::string& cache_path, const std::string& obj_name, const ppu_module<lv2_obj>& whole_module)
|
||||
static void ppu_initialize2(jit_compiler& jit, const ppu_module<lv2_obj>& module_part, const std::string& cache_path, const std::string& obj_name)
|
||||
{
|
||||
#ifdef LLVM_AVAILABLE
|
||||
using namespace llvm;
|
||||
|
|
@ -5307,8 +5465,11 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module<lv2_obj>& module
|
|||
translator.get_type<u64>(), // r2
|
||||
}, false);
|
||||
|
||||
// Difference between function name and current location
|
||||
const u32 reloc = module_part.get_relocs().empty() ? 0 : ::at32(module_part.segs, 0).addr;
|
||||
|
||||
// Initialize function list
|
||||
for (const auto& func : module_part.funcs)
|
||||
for (const auto& func : module_part.get_funcs())
|
||||
{
|
||||
if (func.size)
|
||||
{
|
||||
|
|
@ -5374,8 +5535,14 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module<lv2_obj>& module
|
|||
fpm.addPass(EarlyCSEPass());
|
||||
#endif
|
||||
|
||||
u32 guest_code_size = 0;
|
||||
u32 min_addr = umax;
|
||||
u32 max_addr = 0;
|
||||
u32 num_func = 0;
|
||||
|
||||
// Translate functions
|
||||
for (usz fi = 0, fmax = module_part.funcs.size(); fi < fmax; fi++)
|
||||
// Start with the lowest bound of the module, function list is sorted
|
||||
for (const auto& mod_func : module_part.get_funcs())
|
||||
{
|
||||
if (Emu.IsStopped())
|
||||
{
|
||||
|
|
@ -5383,10 +5550,15 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module<lv2_obj>& module
|
|||
return;
|
||||
}
|
||||
|
||||
if (module_part.funcs[fi].size)
|
||||
if (mod_func.size)
|
||||
{
|
||||
num_func++;
|
||||
guest_code_size += mod_func.size;
|
||||
max_addr = std::max<u32>(max_addr, mod_func.addr + mod_func.size);
|
||||
min_addr = std::min<u32>(min_addr, mod_func.addr);
|
||||
|
||||
// Translate
|
||||
if (const auto func = translator.Translate(module_part.funcs[fi]))
|
||||
if (const auto func = translator.Translate(mod_func))
|
||||
{
|
||||
#ifdef ARCH_X64 // TODO
|
||||
// Run optimization passes
|
||||
|
|
@ -5405,10 +5577,10 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module<lv2_obj>& module
|
|||
}
|
||||
}
|
||||
|
||||
// Run this only in one module for all functions
|
||||
if (&whole_module != &module_part)
|
||||
// Run this only in one module for all functions compiled
|
||||
if (module_part.jit_bounds)
|
||||
{
|
||||
if (const auto func = translator.GetSymbolResolver(whole_module))
|
||||
if (const auto func = translator.GetSymbolResolver(module_part))
|
||||
{
|
||||
#ifdef ARCH_X64 // TODO
|
||||
// Run optimization passes
|
||||
|
|
@ -5452,7 +5624,7 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module<lv2_obj>& module
|
|||
return;
|
||||
}
|
||||
|
||||
ppu_log.notice("LLVM: %zu functions generated", _module->getFunctionList().size());
|
||||
ppu_log.notice("LLVM: %zu functions generated (code_size=0x%x, num_func=%d, max_addr(-)min_addr=0x%x)", _module->getFunctionList().size(), guest_code_size, num_func, max_addr - min_addr);
|
||||
}
|
||||
|
||||
// Load or compile module
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue