[CPU] Detect and prevent corrupted global_mutex pointer to avoid crashes

Adds comprehensive validation of critical pointers in PPCContext and
BuiltinFunction to detect memory corruption before it causes crashes
in mutex operations.

Key improvements:
- Pre-execution validation of global_mutex pointer in Processor::Execute
- Post-execution validation to identify which function caused corruption
- Enhanced BuiltinFunction arg pointer validation with detailed errors
- Added validation in GuestFunction::Call before and after execution

These checks help identify the source of memory corruption (likely guest
code buffer overflows writing beyond VMX register arrays) and provide
detailed diagnostic information including function addresses, thread IDs,
and stack pointers.

The assertions ensure the emulator fails fast with clear error messages
rather than crashing with cryptic segfaults in std::recursive_mutex::unlock().
This commit is contained in:
I'm Matheus 2025-11-20 11:58:44 -03:00
parent 23ec2a4c80
commit 493af44de1
2 changed files with 82 additions and 18 deletions

View file

@ -44,12 +44,22 @@ bool BuiltinFunction::Call(ThreadState* thread_state, uint32_t return_address) {
}
assert_not_null(handler_);
// Detect corruption of builtin argument pointers (e.g., global mutex
// accidentally overwritten by guest code). A very low non-null address is
// almost certainly invalid here and has led to crashes in unlock().
if (arg0_ && reinterpret_cast<uintptr_t>(arg0_) < 0x1000) {
XELOGE("BuiltinFunction '{}' arg0 pointer appears corrupt: {:p}", name(), arg0_);
// Detect corrupted builtin argument pointers before calling the handler.
// A very low non-null address (< 0x10000) is almost certainly invalid and
// indicates memory corruption, likely from guest code buffer overflow.
// This check helps identify the problem before it causes a crash in the
// mutex operations within builtin handlers.
if (arg0_ && reinterpret_cast<uintptr_t>(arg0_) < 0x10000) {
XELOGE(
"BuiltinFunction '{}' detected corrupted arg0 pointer: {:p}. "
"This likely indicates memory corruption from guest code. "
"The emulation cannot continue safely.",
name(), arg0_);
assert_always("BuiltinFunction arg0 corrupted - guest code memory corruption detected");
return false;
}
handler_(thread_state->context(), arg0_, arg1_);
if (original_thread_state != thread_state) {
@ -135,18 +145,40 @@ bool GuestFunction::Call(ThreadState* thread_state, uint32_t return_address) {
ThreadState::Bind(thread_state);
}
// Validate the global mutex pointer before executing guest code to help
// diagnose crashes where std::recursive_mutex::unlock() sees an invalid
// 'this' (e.g., 0x1).
// Validate PPCContext critical pointers before executing guest code.
// This detects corruption that may have occurred from a previous function.
auto ctx = thread_state->context();
auto& expected_global_mutex = xe::global_critical_region::mutex();
if (ctx->global_mutex != &expected_global_mutex) {
XELOGE("GuestFunction '{}' executing with corrupted global_mutex {:p}; restoring", name(), ctx->global_mutex);
ctx->global_mutex = &expected_global_mutex;
uintptr_t corrupt_ptr = reinterpret_cast<uintptr_t>(ctx->global_mutex);
XELOGE(
"GuestFunction '{}' at 0x{:08X} called with corrupted PPCContext. "
"global_mutex pointer is {:p} / 0x{:X} (expected {:p}). "
"Corruption likely occurred in a previous function call.",
name(), address(), ctx->global_mutex, corrupt_ptr,
static_cast<void*>(&expected_global_mutex));
assert_always(
"PPCContext already corrupted before function execution. Previous "
"guest function likely has buffer overflow.");
return false;
}
bool result = CallImpl(thread_state, return_address);
// Validate context after execution to catch corruption during this function.
if (ctx->global_mutex != &expected_global_mutex) {
uintptr_t corrupt_ptr = reinterpret_cast<uintptr_t>(ctx->global_mutex);
XELOGE(
"GuestFunction '{}' at 0x{:08X} CORRUPTED PPCContext during "
"execution. global_mutex changed to {:p} / 0x{:X}. "
"This function has a buffer overflow or invalid memory write.",
name(), address(), ctx->global_mutex, corrupt_ptr);
assert_always(
"Memory corruption detected in guest function execution. "
"The function has a buffer overflow bug.");
return false;
}
if (original_thread_state != thread_state) {
ThreadState::Bind(original_thread_state);
}

View file

@ -337,14 +337,29 @@ bool Processor::Execute(ThreadState* thread_state, uint32_t address) {
auto context = thread_state->context();
// Defensive: ensure the context's global mutex pointer hasn't been clobbered
// by guest code scribbling over the red zone. A corrupt pointer (like 0x1)
// leads to a crash when unlock() is invoked by translated code paths.
// Validate critical PPCContext pointers before executing guest code.
// The global_mutex pointer is particularly susceptible to corruption from
// guest code writing beyond array bounds (e.g., VMX register array overflow).
// Detecting corruption here helps identify the source before crashes occur.
auto& expected_global_mutex = xe::global_critical_region::mutex();
if (context->global_mutex != &expected_global_mutex) {
uintptr_t raw_ptr = reinterpret_cast<uintptr_t>(context->global_mutex);
XELOGE("PPCContext global_mutex pointer corrupted (was {:p} / 0x{:X}), restoring", context->global_mutex, raw_ptr);
context->global_mutex = &expected_global_mutex;
uintptr_t corrupt_ptr = reinterpret_cast<uintptr_t>(context->global_mutex);
XELOGE(
"PPCContext global_mutex pointer corrupted (expected {:p}, got {:p} / "
"0x{:X}). This indicates guest code is writing beyond allocated "
"boundaries. Common causes: VMX register overflow, stack corruption, or "
"invalid memory access in translated code. Thread ID: {}",
static_cast<void*>(&expected_global_mutex), context->global_mutex,
corrupt_ptr, thread_state->thread_id());
// Log additional context for debugging
XELOGE(" Function address: 0x{:08X}", address);
XELOGE(" Stack pointer (r1): 0x{:08X}", context->r[1]);
assert_always(
"PPCContext corruption detected - cannot continue safely. Check for "
"guest code buffer overflows or emulator bugs in array bound checks.");
return false;
}
// Pad out stack a bit, as some games seem to overwrite the caller by about
@ -359,6 +374,23 @@ bool Processor::Execute(ThreadState* thread_state, uint32_t address) {
// Execute the function.
auto result = function->Call(thread_state, uint32_t(context->lr));
// Validate context integrity after execution to detect corruption during
// the function call. This helps narrow down which guest functions cause
// memory corruption.
if (context->global_mutex != &expected_global_mutex) {
uintptr_t corrupt_ptr = reinterpret_cast<uintptr_t>(context->global_mutex);
XELOGE(
"PPCContext global_mutex corrupted DURING function execution at "
"0x{:08X}. Pointer changed from {:p} to {:p} / 0x{:X}. This "
"indicates the executed function wrote beyond its allocated memory.",
address, static_cast<void*>(&expected_global_mutex),
context->global_mutex, corrupt_ptr);
assert_always(
"Memory corruption detected during function execution. The executed "
"guest code has a buffer overflow or invalid memory write.");
return false;
}
context->lr = previous_lr;
context->r[1] += 64 + 112;