mirror of
https://github.com/RPCSX/rpcsx.git
synced 2025-12-06 07:12:14 +01:00
llvm: try better protect from oom
This commit is contained in:
parent
745b0c45e0
commit
0e639725c1
|
|
@ -167,9 +167,104 @@ bool serialize<ppu_thread::cr_bits>(utils::serial& ar, typename ppu_thread::cr_b
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class concurent_memory_limit
|
||||||
|
{
|
||||||
|
u32 m_total = 0;
|
||||||
|
atomic_t<u32> m_free = 0;
|
||||||
|
|
||||||
|
static constexpr auto k_block_size = 1024 * 8;
|
||||||
|
|
||||||
|
public:
|
||||||
|
class [[nodiscard]] user
|
||||||
|
{
|
||||||
|
concurent_memory_limit *m_limit = nullptr;
|
||||||
|
u32 m_used = 0;
|
||||||
|
|
||||||
|
public:
|
||||||
|
user(concurent_memory_limit *limit, u32 used) : m_limit(limit), m_used(used) {}
|
||||||
|
user() = default;
|
||||||
|
user(user &&other)
|
||||||
|
{
|
||||||
|
*this = std::move(other);
|
||||||
|
}
|
||||||
|
|
||||||
|
~user()
|
||||||
|
{
|
||||||
|
if (m_used != 0)
|
||||||
|
{
|
||||||
|
m_limit->release(m_used);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
user &operator=(user &&other)
|
||||||
|
{
|
||||||
|
std::swap(other.m_limit, m_limit);
|
||||||
|
std::swap(other.m_used, m_used);
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
explicit operator bool() const { return m_limit != nullptr; }
|
||||||
|
};
|
||||||
|
|
||||||
|
concurent_memory_limit(u64 total)
|
||||||
|
: m_total(u32(std::min<u64>(total / k_block_size, std::numeric_limits<u32>::max()))), m_free(m_total) {}
|
||||||
|
|
||||||
|
|
||||||
|
user acquire(u64 amount)
|
||||||
|
{
|
||||||
|
amount = utils::aligned_div<u64>(amount, k_block_size);
|
||||||
|
|
||||||
|
u32 allocated = 0;
|
||||||
|
while (!m_free.fetch_op([&, this](u32& value)
|
||||||
|
{
|
||||||
|
if (value >= amount || value == m_total)
|
||||||
|
{
|
||||||
|
// Allow at least allocation, make 0 the "memory unavailable" sign value for atomic waiting efficiency
|
||||||
|
const u32 new_val = static_cast<u32>(utils::sub_saturate<u64>(value, amount));
|
||||||
|
allocated = value - new_val;
|
||||||
|
value = new_val;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Resort to waiting
|
||||||
|
allocated = 0;
|
||||||
|
return Emu.IsStopped();
|
||||||
|
}).second)
|
||||||
|
{
|
||||||
|
// Wait until not 0
|
||||||
|
m_free.wait(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Emu.IsStopped())
|
||||||
|
{
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
return user(this, allocated);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::size_t free_memory() const {
|
||||||
|
return m_free.load() * k_block_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::uint64_t total_memory() const {
|
||||||
|
return m_total * k_block_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
void release(u32 amount)
|
||||||
|
{
|
||||||
|
if (!m_free.fetch_add(amount))
|
||||||
|
{
|
||||||
|
m_free.notify_all();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
extern void ppu_initialize();
|
extern void ppu_initialize();
|
||||||
extern void ppu_finalize(const ppu_module<lv2_obj>& info, bool force_mem_release = false);
|
extern void ppu_finalize(const ppu_module<lv2_obj>& info, bool force_mem_release = false);
|
||||||
extern bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only = false, u64 file_size = 0);
|
extern bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only = false, u64 file_size = 0);
|
||||||
|
extern bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_size, concurent_memory_limit &memory_limit);
|
||||||
static void ppu_initialize2(class jit_compiler& jit, const ppu_module<lv2_obj>& module_part, const std::string& cache_path, const std::string& obj_name);
|
static void ppu_initialize2(class jit_compiler& jit, const ppu_module<lv2_obj>& module_part, const std::string& cache_path, const std::string& obj_name);
|
||||||
extern bool ppu_load_exec(const ppu_exec_object&, bool virtual_load, const std::string&, utils::serial* = nullptr);
|
extern bool ppu_load_exec(const ppu_exec_object&, bool virtual_load, const std::string&, utils::serial* = nullptr);
|
||||||
extern std::pair<shared_ptr<lv2_overlay>, CellError> ppu_load_overlay(const ppu_exec_object&, bool virtual_load, const std::string& path, s64 file_offset, utils::serial* = nullptr);
|
extern std::pair<shared_ptr<lv2_overlay>, CellError> ppu_load_overlay(const ppu_exec_object&, bool virtual_load, const std::string& path, s64 file_offset, utils::serial* = nullptr);
|
||||||
|
|
@ -4171,13 +4266,7 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
|
||||||
|
|
||||||
lf_queue<file_info> possible_exec_file_paths;
|
lf_queue<file_info> possible_exec_file_paths;
|
||||||
|
|
||||||
// Allow to allocate 2000 times the size of each file for the use of LLVM
|
concurent_memory_limit memory_limit(utils::get_total_memory() / 3);
|
||||||
// This works very nicely with Metal Gear Solid 4 for example:
|
|
||||||
// 2 7MB overlay files -> 14GB
|
|
||||||
// The growth in memory requirements of LLVM is not linear with file size of course
|
|
||||||
// But these estimates should hopefully protect RPCS3 in the coming years
|
|
||||||
// Especially when thread count is on the rise with each CPU generation
|
|
||||||
atomic_t<u32> file_size_limit = static_cast<u32>(std::clamp<u64>(utils::aligned_div<u64>(utils::get_total_memory(), 2000), 65536, u32{umax}));
|
|
||||||
|
|
||||||
const u32 software_thread_limit = std::min<u32>(g_cfg.core.llvm_threads ? g_cfg.core.llvm_threads : u32{umax}, ::size32(file_queue));
|
const u32 software_thread_limit = std::min<u32>(g_cfg.core.llvm_threads ? g_cfg.core.llvm_threads : u32{umax}, ::size32(file_queue));
|
||||||
const u32 cpu_thread_limit = utils::get_thread_count() > 8u ? std::max<u32>(utils::get_thread_count(), 2) - 1 : utils::get_thread_count(); // One LLVM thread less
|
const u32 cpu_thread_limit = utils::get_thread_count() > 8u ? std::max<u32>(utils::get_thread_count(), 2) - 1 : utils::get_thread_count(); // One LLVM thread less
|
||||||
|
|
@ -4236,7 +4325,6 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
|
||||||
// Set low priority
|
// Set low priority
|
||||||
thread_ctrl::scoped_priority low_prio(-1);
|
thread_ctrl::scoped_priority low_prio(-1);
|
||||||
u32 inc_fdone = 1;
|
u32 inc_fdone = 1;
|
||||||
u32 restore_mem = 0;
|
|
||||||
|
|
||||||
for (usz func_i = fnext++; func_i < file_queue.size(); func_i = fnext++, g_progr_fdone += std::exchange(inc_fdone, 1))
|
for (usz func_i = fnext++; func_i < file_queue.size(); func_i = fnext++, g_progr_fdone += std::exchange(inc_fdone, 1))
|
||||||
{
|
{
|
||||||
|
|
@ -4245,20 +4333,12 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (restore_mem)
|
|
||||||
{
|
|
||||||
if (!file_size_limit.fetch_add(restore_mem))
|
|
||||||
{
|
|
||||||
file_size_limit.notify_all();
|
|
||||||
}
|
|
||||||
|
|
||||||
restore_mem = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto& [path, offset, file_size] = file_queue[func_i];
|
auto& [path, offset, file_size] = file_queue[func_i];
|
||||||
|
|
||||||
ppu_log.notice("Trying to load: %s", path);
|
ppu_log.notice("Trying to load: %s", path);
|
||||||
|
|
||||||
|
auto file_allocation = memory_limit.acquire(file_size * 2);
|
||||||
|
|
||||||
// Load MSELF, SPRX or SELF
|
// Load MSELF, SPRX or SELF
|
||||||
fs::file src{path};
|
fs::file src{path};
|
||||||
|
|
||||||
|
|
@ -4322,52 +4402,15 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto wait_for_memory = [&]() -> bool
|
|
||||||
{
|
|
||||||
// Try not to process too many files at once because it seems to reduce performance and cause RAM shortages
|
|
||||||
// Concurrently compiling more OVL or huge PRX files does not have much theoretical benefit
|
|
||||||
while (!file_size_limit.fetch_op([&](u32& value)
|
|
||||||
{
|
|
||||||
if (value)
|
|
||||||
{
|
|
||||||
// Allow at least one file, make 0 the "memory unavailable" sign value for atomic waiting efficiency
|
|
||||||
const u32 new_val = static_cast<u32>(utils::sub_saturate<u64>(value, file_size));
|
|
||||||
restore_mem = value - new_val;
|
|
||||||
value = new_val;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Resort to waiting
|
|
||||||
restore_mem = 0;
|
|
||||||
return false;
|
|
||||||
}).second)
|
|
||||||
{
|
|
||||||
// Wait until not 0
|
|
||||||
file_size_limit.wait(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (Emu.IsStopped())
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
};
|
|
||||||
|
|
||||||
elf_error prx_err{}, ovl_err{};
|
elf_error prx_err{}, ovl_err{};
|
||||||
|
|
||||||
if (ppu_prx_object obj = src; (prx_err = obj, obj == elf_error::ok))
|
if (ppu_prx_object obj = src; (prx_err = obj, obj == elf_error::ok))
|
||||||
{
|
{
|
||||||
if (!wait_for_memory())
|
|
||||||
{
|
|
||||||
// Emulation stopped
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (auto prx = ppu_load_prx(obj, true, path, offset))
|
if (auto prx = ppu_load_prx(obj, true, path, offset))
|
||||||
{
|
{
|
||||||
obj.clear(), src.close(); // Clear decrypted file and elf object memory
|
obj.clear(), src.close(); // Clear decrypted file and elf object memory
|
||||||
ppu_initialize(*prx, false, file_size);
|
file_allocation = {}; // release used file memory
|
||||||
|
ppu_initialize(*prx, false, file_size, memory_limit);
|
||||||
ppu_finalize(*prx, true);
|
ppu_finalize(*prx, true);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
@ -4400,11 +4443,8 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!wait_for_memory())
|
obj.clear(), src.close(); // Clear decrypted file and elf object memory
|
||||||
{
|
file_allocation = {}; // release used file memory
|
||||||
// Emulation stopped
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Participate in thread execution limitation (takes a long time)
|
// Participate in thread execution limitation (takes a long time)
|
||||||
if (std::lock_guard lock(g_fxo->get<jit_core_allocator>().sem); !ovlm->analyse(0, ovlm->entry, ovlm->seg0_code_end, ovlm->applied_patches, std::vector<u32>{}, []()
|
if (std::lock_guard lock(g_fxo->get<jit_core_allocator>().sem); !ovlm->analyse(0, ovlm->entry, ovlm->seg0_code_end, ovlm->applied_patches, std::vector<u32>{}, []()
|
||||||
|
|
@ -4416,8 +4456,7 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
obj.clear(), src.close(); // Clear decrypted file and elf object memory
|
ppu_initialize(*ovlm, false, file_size, memory_limit);
|
||||||
ppu_initialize(*ovlm, false, file_size);
|
|
||||||
ppu_finalize(*ovlm, true);
|
ppu_finalize(*ovlm, true);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
@ -4432,14 +4471,6 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
|
||||||
possible_exec_file_paths.push(path, offset, file_size);
|
possible_exec_file_paths.push(path, offset, file_size);
|
||||||
inc_fdone = 0;
|
inc_fdone = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (restore_mem)
|
|
||||||
{
|
|
||||||
if (!file_size_limit.fetch_add(restore_mem))
|
|
||||||
{
|
|
||||||
file_size_limit.notify_all();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
});
|
||||||
|
|
||||||
// Join every thread
|
// Join every thread
|
||||||
|
|
@ -4482,6 +4513,8 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto file_allocation = memory_limit.acquire(file_size * 2);
|
||||||
|
|
||||||
for (usz i = 0;; i++)
|
for (usz i = 0;; i++)
|
||||||
{
|
{
|
||||||
if (i > decrypt_klics.size())
|
if (i > decrypt_klics.size())
|
||||||
|
|
@ -4550,10 +4583,11 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
|
||||||
}
|
}
|
||||||
|
|
||||||
obj.clear(), src.close(); // Clear decrypted file and elf object memory
|
obj.clear(), src.close(); // Clear decrypted file and elf object memory
|
||||||
|
file_allocation = {};
|
||||||
|
|
||||||
_main.name = ' '; // Make ppu_finalize work
|
_main.name = ' '; // Make ppu_finalize work
|
||||||
Emu.ConfigurePPUCache();
|
Emu.ConfigurePPUCache();
|
||||||
ppu_initialize(_main, false, file_size);
|
ppu_initialize(_main, false, file_size, memory_limit);
|
||||||
spu_cache::initialize(false);
|
spu_cache::initialize(false);
|
||||||
ppu_finalize(_main, true);
|
ppu_finalize(_main, true);
|
||||||
_main = {};
|
_main = {};
|
||||||
|
|
@ -4719,7 +4753,7 @@ extern void ppu_initialize()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_size)
|
bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_size, concurent_memory_limit &memory_limit)
|
||||||
{
|
{
|
||||||
if (g_cfg.core.ppu_decoder != ppu_decoder_type::llvm)
|
if (g_cfg.core.ppu_decoder != ppu_decoder_type::llvm)
|
||||||
{
|
{
|
||||||
|
|
@ -5466,6 +5500,7 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
||||||
|
|
||||||
struct thread_op
|
struct thread_op
|
||||||
{
|
{
|
||||||
|
concurent_memory_limit &memory_limit;
|
||||||
atomic_t<u32>& work_cv;
|
atomic_t<u32>& work_cv;
|
||||||
std::vector<std::pair<std::string, ppu_module<lv2_obj>>>& workload;
|
std::vector<std::pair<std::string, ppu_module<lv2_obj>>>& workload;
|
||||||
const ppu_module<lv2_obj>& main_module;
|
const ppu_module<lv2_obj>& main_module;
|
||||||
|
|
@ -5474,10 +5509,11 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
||||||
|
|
||||||
std::unique_lock<decltype(jit_core_allocator::sem)> core_lock;
|
std::unique_lock<decltype(jit_core_allocator::sem)> core_lock;
|
||||||
|
|
||||||
thread_op(atomic_t<u32>& work_cv, std::vector<std::pair<std::string, ppu_module<lv2_obj>>>& workload
|
thread_op(concurent_memory_limit &memory_limit, atomic_t<u32>& work_cv, std::vector<std::pair<std::string, ppu_module<lv2_obj>>>& workload
|
||||||
, const cpu_thread* cpu, const ppu_module<lv2_obj>& main_module, const std::string& cache_path, decltype(jit_core_allocator::sem)& sem) noexcept
|
, const cpu_thread* cpu, const ppu_module<lv2_obj>& main_module, const std::string& cache_path, decltype(jit_core_allocator::sem)& sem) noexcept
|
||||||
|
|
||||||
: work_cv(work_cv)
|
: memory_limit(memory_limit)
|
||||||
|
, work_cv(work_cv)
|
||||||
, workload(workload)
|
, workload(workload)
|
||||||
, main_module(main_module)
|
, main_module(main_module)
|
||||||
, cache_path(cache_path)
|
, cache_path(cache_path)
|
||||||
|
|
@ -5488,7 +5524,8 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
||||||
}
|
}
|
||||||
|
|
||||||
thread_op(const thread_op& other) noexcept
|
thread_op(const thread_op& other) noexcept
|
||||||
: work_cv(other.work_cv)
|
: memory_limit(other.memory_limit)
|
||||||
|
, work_cv(other.work_cv)
|
||||||
, workload(other.workload)
|
, workload(other.workload)
|
||||||
, main_module(other.main_module)
|
, main_module(other.main_module)
|
||||||
, cache_path(other.cache_path)
|
, cache_path(other.cache_path)
|
||||||
|
|
@ -5521,6 +5558,16 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
||||||
// Keep allocating workload
|
// Keep allocating workload
|
||||||
const auto& [obj_name, part] = std::as_const(workload)[i];
|
const auto& [obj_name, part] = std::as_const(workload)[i];
|
||||||
|
|
||||||
|
|
||||||
|
std::size_t total_fn_size = 0;
|
||||||
|
for (auto &fn : part.get_funcs())
|
||||||
|
{
|
||||||
|
total_fn_size += fn.size;
|
||||||
|
}
|
||||||
|
|
||||||
|
ppu_log.warning("LLVM: reporting used memory %u (free/total: %u/%u) by %s%s", total_fn_size * 1024 * 16, memory_limit.free_memory(), memory_limit.total_memory(), cache_path, obj_name);
|
||||||
|
auto used_memory = memory_limit.acquire(total_fn_size * 1024 * 16);
|
||||||
|
|
||||||
std::shared_lock rlock(g_fxo->get<jit_core_allocator>().shared_mtx, std::defer_lock);
|
std::shared_lock rlock(g_fxo->get<jit_core_allocator>().shared_mtx, std::defer_lock);
|
||||||
std::unique_lock lock(g_fxo->get<jit_core_allocator>().shared_mtx, std::defer_lock);
|
std::unique_lock lock(g_fxo->get<jit_core_allocator>().shared_mtx, std::defer_lock);
|
||||||
|
|
||||||
|
|
@ -5553,7 +5600,7 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
||||||
g_watchdog_hold_ctr++;
|
g_watchdog_hold_ctr++;
|
||||||
|
|
||||||
named_thread_group threads(fmt::format("PPUW.%u.", ++g_fxo->get<thread_index_allocator>().index), thread_count
|
named_thread_group threads(fmt::format("PPUW.%u.", ++g_fxo->get<thread_index_allocator>().index), thread_count
|
||||||
, thread_op(work_cv, workload, cpu, info, cache_path, g_fxo->get<jit_core_allocator>().sem)
|
, thread_op(memory_limit, work_cv, workload, cpu, info, cache_path, g_fxo->get<jit_core_allocator>().sem)
|
||||||
, [&](u32 /*thread_index*/, thread_op& op)
|
, [&](u32 /*thread_index*/, thread_op& op)
|
||||||
{
|
{
|
||||||
// Allocate "core"
|
// Allocate "core"
|
||||||
|
|
@ -5728,6 +5775,12 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_size)
|
||||||
|
{
|
||||||
|
concurent_memory_limit memory_limit(utils::aligned_div<u64>(utils::get_total_memory(), 2));
|
||||||
|
return ppu_initialize(info, check_only, file_size, memory_limit);
|
||||||
|
}
|
||||||
|
|
||||||
static void ppu_initialize2(jit_compiler& jit, const ppu_module<lv2_obj>& module_part, const std::string& cache_path, const std::string& obj_name)
|
static void ppu_initialize2(jit_compiler& jit, const ppu_module<lv2_obj>& module_part, const std::string& cache_path, const std::string& obj_name)
|
||||||
{
|
{
|
||||||
#ifdef LLVM_AVAILABLE
|
#ifdef LLVM_AVAILABLE
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue