LLVM: Slice PPU executable memory

2026-04-06 06:55:09 +00:00 · 2025-01-10 17:34:24 +02:00 · 2025-01-10 17:34:24 +02:00 · 9d5b75bb7a
commit 9d5b75bb7a
parent 7b8fee7cdb
7 changed files with 486 additions and 126 deletions
--- a/rpcs3/Emu/Cell/PPUThread.cpp
+++ b/rpcs3/Emu/Cell/PPUThread.cpp
@ -66,6 +66,7 @@
 #include <cctype>
 #include <span>
 #include <optional>
+#include <charconv>

 #include "util/asm.hpp"
 #include "util/vm.hpp"
@ -176,7 +177,7 @@ bool serialize<ppu_thread::cr_bits>(utils::serial& ar, typename ppu_thread::cr_b
 extern void ppu_initialize();
 extern void ppu_finalize(const ppu_module<lv2_obj>& info, bool force_mem_release = false);
 extern bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only = false, u64 file_size = 0);
-static void ppu_initialize2(class jit_compiler& jit, const ppu_module<lv2_obj>& module_part, const std::string& cache_path, const std::string& obj_name, const ppu_module<lv2_obj>& whole_module);
+static void ppu_initialize2(class jit_compiler& jit, const ppu_module<lv2_obj>& module_part, const std::string& cache_path, const std::string& obj_name);
 extern bool ppu_load_exec(const ppu_exec_object&, bool virtual_load, const std::string&, utils::serial* = nullptr);
 extern std::pair<shared_ptr<lv2_overlay>, CellError> ppu_load_overlay(const ppu_exec_object&, bool virtual_load, const std::string& path, s64 file_offset, utils::serial* = nullptr);
 extern void ppu_unload_prx(const lv2_prx&);
@ -342,11 +343,10 @@ const auto ppu_gateway = build_function_asm<void(*)(ppu_thread*)>("ppu_gateway",
 	// Load offset value
 	c.mov(cia_addr_reg, Imm(static_cast<u64>(::offset32(&ppu_thread::cia))));
 	// Load cia
-	c.ldr(a64::w15, arm::Mem(ppu_t_base, cia_addr_reg));
+	c.ldr(pc.w(), arm::Mem(ppu_t_base, cia_addr_reg));
+
 	// Multiply by 2 to index into ptr table
-	const arm::GpX index_shift = a64::x12;
-	c.mov(index_shift, Imm(2));
-	c.mul(pc, pc, index_shift);
+	c.add(pc, pc, pc);

 	// Load call target
 	const arm::GpX call_target = a64::x13;
@ -355,7 +355,7 @@ const auto ppu_gateway = build_function_asm<void(*)(ppu_thread*)>("ppu_gateway",
 	const arm::GpX reg_hp = a64::x21;
 	c.mov(reg_hp, call_target);
 	c.lsr(reg_hp, reg_hp, 48);
-	c.lsl(a64::w21, a64::w21, 13);
+	c.lsl(reg_hp.w(), reg_hp.w(), 13);

 	// Zero top 16 bits of call target
 	c.lsl(call_target, call_target, Imm(16));
@ -3665,6 +3665,9 @@ struct jit_core_allocator
 	// Initialize global semaphore with the max number of threads
 	::semaphore<0x7fff> sem{std::max<s16>(thread_count, 1)};

+	// Mutex for special extra-large modules to compile alone
+	shared_mutex shared_mtx;
+
 	static s16 limit()
 	{
 		return static_cast<s16>(std::min<s32>(0x7fff, utils::get_thread_count()));
@ -3677,8 +3680,8 @@ namespace
 	// Compiled PPU module info
 	struct jit_module
 	{
-		void(*symbol_resolver)(u8*, u64) = nullptr;
-		std::shared_ptr<jit_compiler> pjit;
+		std::vector<void(*)(u8*, u64)> symbol_resolvers;
+		std::vector<std::shared_ptr<jit_compiler>> pjit;
 		bool init = false;
 	};

@ -3729,6 +3732,7 @@ namespace
 			}

 			to_destroy.pjit = std::move(found->second.pjit);
+			to_destroy.symbol_resolvers = std::move(found->second.symbol_resolvers);

 			bucket.map.erase(found);
 		}
@ -4445,7 +4449,7 @@ extern void ppu_initialize()

 	idm::select<lv2_obj, lv2_prx>([&](u32, lv2_prx& _module)
 	{
-		if (_module.funcs.empty())
+		if (_module.get_funcs().empty())
 		{
 			return;
 		}
@ -4556,7 +4560,7 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s

 		auto& ppu_toc = toc_manager.toc_map;

-		for (const auto& func : info.funcs)
+		for (const auto& func : info.get_funcs())
 		{
 			if (func.size && func.blocks.empty())
 			{
@ -4659,11 +4663,14 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
 	jit_module& jit_mod = g_fxo->get<jit_module_manager>().get(cache_path + "_" + std::to_string(std::bit_cast<usz>(info.segs[0].ptr)));

 	// Compiler instance (deferred initialization)
-	std::shared_ptr<jit_compiler>& jit = jit_mod.pjit;
+	std::vector<std::shared_ptr<jit_compiler>>& jits = jit_mod.pjit;

 	// Split module into fragments <= 1 MiB
 	usz fpos = 0;

+	// Modules counted so far
+	usz module_counter = 0;
+
 	// Difference between function name and current location
 	const u32 reloc = info.relocs.empty() ? 0 : ::at32(info.segs, 0).addr;

@ -4684,14 +4691,14 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s

 	const cpu_thread* cpu = cpu_thread::get_current();

-	for (auto& func : info.funcs)
+	for (auto& func : info.get_funcs())
 	{
 		if (func.size == 0)
 		{
 			continue;
 		}

-		for (const auto& [addr, size] : func.blocks)
+		for (const auto [addr, size] : func)
 		{
 			if (size == 0)
 			{
@ -4724,26 +4731,138 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s

 	u32 total_compile = 0;

-	while (!jit_mod.init && fpos < info.funcs.size())
+	// Limit how many modules are per JIt instance
+	// Advantage to lower the limit:
+	// 1. Lowering contoniues memory requirements for allocations
+	// Its disadvantage:
+	// 1. B instruction can wander up to 16MB relatively to its range,
+	// each additional split of JIT instance results in a downgraded version of around (100% / N-1th) - (100% / Nth) percent of instructions
+	// where N is the total amunt of JIT instances
+	// Subject to change
+	constexpr u32 c_moudles_per_jit = 100;
+
+	std::shared_ptr<std::pair<u32, u32>> local_jit_bounds = std::make_shared<std::pair<u32, u32>>(u32{umax}, 0);
+
+	const auto shared_runtime = make_shared<jit_runtime>();
+	const auto shared_map = make_shared<std::unordered_map<u32, u64>>();
+	const auto shared_mtx = make_shared<shared_mutex>();
+
+	auto symbols_cement = [runtime = shared_runtime, reloc, bound = info.segs[0].addr + info.segs[0].size - reloc, func_map = shared_map, shared_mtx](const std::string& name) -> u64
 	{
-		// Initialize compiler instance
-		if (!jit && is_being_used_in_emulation)
+		u32 func_addr = umax;
+
+		if (name.starts_with("__0x"))
 		{
-			jit = std::make_shared<jit_compiler>(s_link_table, g_cfg.core.llvm_cpu);
+			u32 addr = umax;
+			auto res = std::from_chars(name.c_str() + 4, name.c_str() + name.size(), addr, 16);
+
+			if (res.ec == std::errc() && res.ptr == name.c_str() + name.size() && addr < bound)
+			{
+				func_addr = addr + reloc;
+			}
 		}

-		// Copy module information (TODO: optimize)
+		if (func_addr == umax)
+		{
+			return {};
+		}
+
+		reader_lock rlock(*shared_mtx);
+
+		if (auto it = func_map->find(func_addr); it != func_map->end())
+		{
+			return it->second;
+		}
+
+		rlock.upgrade();
+
+		u64& code_ptr = (*func_map)[func_addr];
+
+		if (code_ptr)
+		{
+			return +code_ptr;
+		}
+
+		using namespace asmjit;
+
+		auto func = build_function_asm<u8*(*)(ppu_thread&, u64, u8*, u64, u64, u64)>(name, [&](native_asm& c, auto& args)
+		{
+#if defined(ARCH_X64)
+			c.mov(x86::rax, x86::qword_ptr(reinterpret_cast<u64>(&vm::g_exec_addr)));
+			c.mov(x86::edx, func_addr); // Load PC
+			c.mov(x86::dword_ptr(x86::rbp, ::offset32(&ppu_thread::cia)), x86::edx);
+
+			c.mov(x86::rax, x86::qword_ptr(x86::rax, x86::edx, 1, 0)); // Load call target
+			c.mov(x86::rdx, x86::rax);
+			c.shl(x86::rax, 16);
+			c.shr(x86::rax, 16);
+			c.shr(x86::rdx, 48);
+			c.shl(x86::edx, 13);
+			c.mov(x86::r12d, x86::edx); // Load relocation base
+			c.jmp(x86::rax);
+#else
+			// Load REG_Base - use absolute jump target to bypass rel jmp range limits
+			// X19 contains vm::g_exec_addr
+			const arm::GpX exec_addr = a64::x19;
+
+			// X20 contains ppu_thread*
+			const arm::GpX ppu_t_base = a64::x20;
+
+			// Load PC
+			const arm::GpX pc = a64::x15;
+			const arm::GpX cia_addr_reg = a64::x11;
+
+			// Load offset value
+			c.mov(cia_addr_reg, static_cast<u64>(::offset32(&ppu_thread::cia)));
+
+			// Update CIA
+			c.mov(pc.w(), func_addr);
+			c.str(pc.w(), arm::Mem(ppu_t_base, cia_addr_reg));
+
+			// Multiply by 2 to index into ptr table
+			c.add(pc, pc, pc);
+
+			// Load call target
+			const arm::GpX call_target = a64::x13;
+			c.ldr(call_target, arm::Mem(exec_addr, pc));
+
+			// Compute REG_Hp
+			const arm::GpX reg_hp = a64::x21;
+			c.mov(reg_hp, call_target);
+			c.lsr(reg_hp, reg_hp, 48);
+			c.lsl(reg_hp.w(), reg_hp.w(), 13);
+
+			// Zero top 16 bits of call target
+			c.lsl(call_target, call_target, 16);
+			c.lsr(call_target, call_target, 16);
+
+			// Execute LLE call
+			c.br(call_target);
+#endif
+		}, runtime.get());
+
+		code_ptr = reinterpret_cast<u64>(func);
+		return code_ptr;
+	};
+
+	if (has_mfvscr && g_cfg.core.ppu_set_sat_bit)
+	{
+		info.attr += ppu_attr::has_mfvscr;
+	}
+
+	while (!jit_mod.init && fpos < info.get_funcs().size())
+	{
+		// Copy module information
 		ppu_module<lv2_obj> part;
 		part.copy_part(info);
-		part.funcs.reserve(16000);

 		// Overall block size in bytes
 		usz bsize = 0;
 		usz bcount = 0;

-		while (fpos < info.funcs.size())
+		while (fpos < info.get_funcs().size())
 		{
-			auto& func = info.funcs[fpos];
+			auto& func = info.get_funcs()[fpos];

 			if (!func.size)
 			{
@ -4767,9 +4886,9 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
 				{
 					auto far_jump = ensure(g_fxo->get<ppu_far_jumps_t>().gen_jump(source));

-					if (source == func.addr && jit)
+					if (source == func.addr)
 					{
-						jit->update_global_mapping(fmt::format("__0x%x", func.addr - reloc), reinterpret_cast<u64>(far_jump));
+						(*shared_map)[func.addr - reloc] = reinterpret_cast<u64>(far_jump);
 					}

 					ppu_register_function_at(source, 4, far_jump);
@ -4783,22 +4902,14 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
 				}
 			}

-			// Copy block or function entry
-			ppu_function& entry = part.funcs.emplace_back(func);
+			local_jit_bounds->first = std::min<u32>(local_jit_bounds->first, func.addr);
+			local_jit_bounds->second = std::max<u32>(local_jit_bounds->second, func.addr + func.size);
+
+			part.local_bounds.first = std::min<u32>(part.local_bounds.first, func.addr);
+			part.local_bounds.second = std::max<u32>(part.local_bounds.second, func.addr + func.size);

 			// Fixup some information
-			entry.name = fmt::format("__0x%x", entry.addr - reloc);
-
-			if (has_mfvscr && g_cfg.core.ppu_set_sat_bit)
-			{
-				// TODO
-				entry.attr += ppu_attr::has_mfvscr;
-			}
-
-			if (entry.blocks.empty())
-			{
-				entry.blocks.emplace(func.addr, func.size);
-			}
+			func.name = fmt::format("__0x%x", func.addr - reloc);

 			bsize += func.size;

@ -4815,7 +4926,7 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s

 			int has_dcbz = !!g_cfg.core.accurate_cache_line_stores;

-			for (const auto& func : part.funcs)
+			for (const auto& func : part.get_funcs())
 			{
 				if (func.size == 0)
 				{
@ -4827,7 +4938,7 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
 				sha1_update(&ctx, reinterpret_cast<const u8*>(&addr), sizeof(addr));
 				sha1_update(&ctx, reinterpret_cast<const u8*>(&size), sizeof(size));

-				for (const auto& block : func.blocks)
+				for (const auto block : func)
 				{
 					if (block.second == 0 || reloc)
 					{
@ -4898,7 +5009,7 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
 				sha1_update(&ctx, ensure(info.get_ptr<const u8>(func.addr)), func.size);
 			}

-			if (!workload.empty() && fpos >= info.funcs.size())
+			if (fpos >= info.get_funcs().size() || module_counter % c_moudles_per_jit == c_moudles_per_jit - 1)
 			{
 				// Hash the entire function grouped addresses for the integrity of the symbol resolver function
 				// Potentially occuring during patches
@ -4906,7 +5017,13 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s

 				std::vector<be_t<u32>> addrs;

-				for (const ppu_function& func : info.funcs)
+				constexpr auto compare = [](const ppu_function& a, u32 addr) { return a.addr < addr; };
+
+				const auto start = std::lower_bound(info.funcs.begin(), info.funcs.end(), local_jit_bounds->first, compare);
+
+				std::span<const ppu_function> span_range{ start, std::lower_bound(start, info.funcs.end(), local_jit_bounds->second, compare) };
+
+				for (const ppu_function& func : span_range)
 				{
 					if (func.size == 0)
 					{
@ -4919,7 +5036,13 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
 				// Hash its size too
 				addrs.emplace_back(::size32(addrs));

-				sha1_update(&ctx, reinterpret_cast<const u8*>(addrs.data()), addrs.size() * sizeof(be_t<u32>));
+				if (module_counter != 0)
+				{
+					sha1_update(&ctx, reinterpret_cast<const u8*>(addrs.data()), addrs.size() * sizeof(be_t<u32>));
+				}
+
+				part.jit_bounds = std::move(local_jit_bounds); 
+				local_jit_bounds = std::make_shared<std::pair<u32, u32>>(u32{umax}, 0);
 			}

 			if (false)
@ -4974,7 +5097,7 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
 				settings += ppu_settings::accurate_vnan, settings -= ppu_settings::fixup_vnan, fmt::throw_exception("VNAN Not implemented");
 			if (g_cfg.core.ppu_use_nj_bit)
 				settings += ppu_settings::accurate_nj_mode, settings -= ppu_settings::fixup_nj_denormals, fmt::throw_exception("NJ Not implemented");
-			if (fpos >= info.funcs.size())
+			if (fpos >= info.get_funcs().size() || module_counter % c_moudles_per_jit == c_moudles_per_jit - 1)
 				settings += ppu_settings::contains_symbol_resolver; // Avoid invalidating all modules for this purpose

 			// Write version, hash, CPU, settings
@ -4986,6 +5109,8 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
 			break;
 		}

+		module_counter++;
+
 		if (!check_only)
 		{
 			total_compile++;
@ -4996,13 +5121,14 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
 		// Check object file
 		if (jit_compiler::check(cache_path + obj_name))
 		{
-			if (!jit && !check_only)
+			if (!is_being_used_in_emulation && !check_only)
 			{
 				ppu_log.success("LLVM: Module exists: %s", obj_name);

 				// Done already, revert total amount increase
 				// Avoid incrementing "pdone" instead because it creates false appreciation for both the progress dialog and the user
 				total_compile--;
+				link_workload.pop_back();
 			}

 			continue;
@ -5113,11 +5239,26 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
 					// Keep allocating workload
 					const auto& [obj_name, part] = std::as_const(workload)[i];

+					std::shared_lock rlock(g_fxo->get<jit_core_allocator>().shared_mtx, std::defer_lock);
+					std::unique_lock lock(g_fxo->get<jit_core_allocator>().shared_mtx, std::defer_lock);
+
+					if (part.jit_bounds && part.parent->funcs.size() >= 0x8000)
+					{
+						// Make a large symbol-resolving function compile alone because it has massive memory requirements
+						lock.lock();
+					}
+					else
+					{
+						rlock.lock();
+					}
+
 					ppu_log.warning("LLVM: Compiling module %s%s", cache_path, obj_name);

-					// Use another JIT instance
-					jit_compiler jit2({}, g_cfg.core.llvm_cpu, 0x1);
-					ppu_initialize2(jit2, part, cache_path, obj_name, i == workload.size() - 1 ? main_module : part);
+					{
+						// Use another JIT instance
+						jit_compiler jit2({}, g_cfg.core.llvm_cpu, 0x1);
+						ppu_initialize2(jit2, part, cache_path, obj_name);
+					}

 					ppu_log.success("LLVM: Compiled module %s", obj_name);
 				}
@ -5145,6 +5286,17 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
 		g_watchdog_hold_ctr--;
 	}

+	// Initialize compiler instance
+	while (jits.size() < utils::aligned_div<u64>(module_counter, c_moudles_per_jit) && is_being_used_in_emulation)
+	{
+		jits.emplace_back(std::make_shared<jit_compiler>(s_link_table, g_cfg.core.llvm_cpu, 0, symbols_cement));
+	}
+
+	if (jit_mod.symbol_resolvers.empty() && is_being_used_in_emulation)
+	{
+		jit_mod.symbol_resolvers.resize(jits.size());
+	}
+
 	bool failed_to_load = false;
 	{
 		if (!is_being_used_in_emulation || (cpu ? cpu->state.all_of(cpu_flag::exit) : Emu.IsStopped()))
@ -5158,14 +5310,18 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
 			*progress_dialog = get_localized_string(localized_string_id::PROGRESS_DIALOG_LINKING_PPU_MODULES);
 		}

+		usz mod_index = umax;
+
 		for (const auto& [obj_name, is_compiled] : link_workload)
 		{
+			mod_index++;
+
 			if (cpu ? cpu->state.all_of(cpu_flag::exit) : Emu.IsStopped())
 			{
 				break;
 			}

-			if (!failed_to_load && !jit->add(cache_path + obj_name))
+			if (!failed_to_load && !jits[mod_index / c_moudles_per_jit]->add(cache_path + obj_name))
 			{
 				ppu_log.error("LLVM: Failed to load module %s", obj_name);
 				failed_to_load = true;
@ -5205,10 +5361,10 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s

 	progress_dialog = get_localized_string(localized_string_id::PROGRESS_DIALOG_APPLYING_PPU_CODE);

-	if (!jit)
+	if (jits.empty())
 	{
 		// No functions - nothing to do
-		ensure(info.funcs.empty());
+		ensure(info.get_funcs().empty());
 		return compiled_new;
 	}

@ -5216,25 +5372,27 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s

 	if (is_first)
 	{
-		jit->fin();
-	}
-
-	if (is_first)
-	{
-		jit_mod.symbol_resolver = reinterpret_cast<void(*)(u8*, u64)>(jit->get("__resolve_symbols"));
-		ensure(jit_mod.symbol_resolver);
-	}
-	else
-	{
-		ensure(jit_mod.symbol_resolver);
+		for (auto& jit : jits)
+		{
+			jit->fin();
+		}
 	}

 #ifdef __APPLE__
 	// Symbol resolver is in JIT mem, so we must enable execution
 	pthread_jit_write_protect_np(true);
 #endif
+	{
+		usz index = umax;

-	jit_mod.symbol_resolver(vm::g_exec_addr, info.segs[0].addr);
+		for (auto& sim : jit_mod.symbol_resolvers)
+		{
+			index++;
+
+			sim = ensure(!is_first ? sim : reinterpret_cast<void(*)(u8*, u64)>(jits[index]->get("__resolve_symbols")));
+			sim(vm::g_exec_addr, info.segs[0].addr);
+		}
+	}

 #ifdef __APPLE__
 	// Symbol resolver is in JIT mem, so we must enable execution
@ -5242,7 +5400,7 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
 #endif

 	// Find a BLR-only function in order to copy it to all BLRs (some games need it)
-	for (const auto& func : info.funcs)
+	for (const auto& func : info.get_funcs())
 	{
 		if (func.size == 4 && *info.get_ptr<u32>(func.addr) == ppu_instructions::BLR())
 		{
@ -5281,7 +5439,7 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
 #endif
 }

-static void ppu_initialize2(jit_compiler& jit, const ppu_module<lv2_obj>& module_part, const std::string& cache_path, const std::string& obj_name, const ppu_module<lv2_obj>& whole_module)
+static void ppu_initialize2(jit_compiler& jit, const ppu_module<lv2_obj>& module_part, const std::string& cache_path, const std::string& obj_name)
 {
 #ifdef LLVM_AVAILABLE
 	using namespace llvm;
@ -5307,8 +5465,11 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module<lv2_obj>& module
 		translator.get_type<u64>(), // r2
 		}, false);

+	// Difference between function name and current location
+	const u32 reloc = module_part.get_relocs().empty() ? 0 : ::at32(module_part.segs, 0).addr;
+
 	// Initialize function list
-	for (const auto& func : module_part.funcs)
+	for (const auto& func : module_part.get_funcs())
 	{
 		if (func.size)
 		{
@ -5374,8 +5535,14 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module<lv2_obj>& module
 		fpm.addPass(EarlyCSEPass());
 #endif

+		u32 guest_code_size = 0;
+		u32 min_addr = umax;
+		u32 max_addr = 0;
+		u32 num_func = 0;
+
 		// Translate functions
-		for (usz fi = 0, fmax = module_part.funcs.size(); fi < fmax; fi++)
+		// Start with the lowest bound of the module, function list is sorted
+		for (const auto& mod_func : module_part.get_funcs())
 		{
 			if (Emu.IsStopped())
 			{
@ -5383,10 +5550,15 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module<lv2_obj>& module
 				return;
 			}

-			if (module_part.funcs[fi].size)
+			if (mod_func.size)
 			{
+				num_func++;
+				guest_code_size += mod_func.size;
+				max_addr = std::max<u32>(max_addr, mod_func.addr + mod_func.size);
+				min_addr = std::min<u32>(min_addr, mod_func.addr);
+
 				// Translate
-				if (const auto func = translator.Translate(module_part.funcs[fi]))
+				if (const auto func = translator.Translate(mod_func))
 				{
 #ifdef ARCH_X64 // TODO
 					// Run optimization passes
@ -5405,10 +5577,10 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module<lv2_obj>& module
 			}
 		}

-		// Run this only in one module for all functions
-		if (&whole_module != &module_part)
+		// Run this only in one module for all functions compiled
+		if (module_part.jit_bounds)
 		{
-			if (const auto func = translator.GetSymbolResolver(whole_module))
+			if (const auto func = translator.GetSymbolResolver(module_part))
 			{
 #ifdef ARCH_X64 // TODO
 				// Run optimization passes
@ -5452,7 +5624,7 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module<lv2_obj>& module
 			return;
 		}

-		ppu_log.notice("LLVM: %zu functions generated", _module->getFunctionList().size());
+		ppu_log.notice("LLVM: %zu functions generated (code_size=0x%x, num_func=%d, max_addr(-)min_addr=0x%x)", _module->getFunctionList().size(), guest_code_size, num_func, max_addr - min_addr);
 	}

 	// Load or compile module