diff --git a/src/xenia/cpu/backend/x64/x64_backend.cc b/src/xenia/cpu/backend/x64/x64_backend.cc index 41c9832ee..708cd884b 100644 --- a/src/xenia/cpu/backend/x64/x64_backend.cc +++ b/src/xenia/cpu/backend/x64/x64_backend.cc @@ -407,13 +407,25 @@ HostToGuestThunk X64ThunkEmitter::EmitHostToGuestThunk() { // rdx = arg0 (context) // r8 = arg1 (guest return address) + struct _code_offsets { + size_t prolog; + size_t body; + size_t epilog; + size_t tail; + } code_offsets = {}; + const size_t stack_size = StackLayout::THUNK_STACK_SIZE; + + code_offsets.prolog = getSize(); + // rsp + 0 = return address mov(qword[rsp + 8 * 3], r8); mov(qword[rsp + 8 * 2], rdx); mov(qword[rsp + 8 * 1], rcx); sub(rsp, stack_size); + code_offsets.body = getSize(); + // Save nonvolatile registers. EmitSaveNonvolatileRegs(); @@ -424,13 +436,26 @@ HostToGuestThunk X64ThunkEmitter::EmitHostToGuestThunk() { EmitLoadNonvolatileRegs(); + code_offsets.epilog = getSize(); + add(rsp, stack_size); mov(rcx, qword[rsp + 8 * 1]); mov(rdx, qword[rsp + 8 * 2]); mov(r8, qword[rsp + 8 * 3]); ret(); - void* fn = Emplace(stack_size); + code_offsets.tail = getSize(); + + assert_zero(code_offsets.prolog); + EmitFunctionInfo func_info = {}; + func_info.code_size.total = getSize(); + func_info.code_size.prolog = code_offsets.body - code_offsets.prolog; + func_info.code_size.body = code_offsets.epilog - code_offsets.body; + func_info.code_size.epilog = code_offsets.tail - code_offsets.epilog; + func_info.code_size.tail = getSize() - code_offsets.tail; + func_info.stack_size = stack_size; + + void* fn = Emplace(func_info); return (HostToGuestThunk)fn; } @@ -440,10 +465,22 @@ GuestToHostThunk X64ThunkEmitter::EmitGuestToHostThunk() { // r8 = arg1 // r9 = arg2 + struct _code_offsets { + size_t prolog; + size_t body; + size_t epilog; + size_t tail; + } code_offsets = {}; + const size_t stack_size = StackLayout::THUNK_STACK_SIZE; + + code_offsets.prolog = getSize(); + // rsp + 0 = return address sub(rsp, stack_size); + code_offsets.body = getSize(); + // Save off volatile registers. EmitSaveVolatileRegs(); @@ -453,10 +490,23 @@ GuestToHostThunk X64ThunkEmitter::EmitGuestToHostThunk() { EmitLoadVolatileRegs(); + code_offsets.epilog = getSize(); + add(rsp, stack_size); ret(); - void* fn = Emplace(stack_size); + code_offsets.tail = getSize(); + + assert_zero(code_offsets.prolog); + EmitFunctionInfo func_info = {}; + func_info.code_size.total = getSize(); + func_info.code_size.prolog = code_offsets.body - code_offsets.prolog; + func_info.code_size.body = code_offsets.epilog - code_offsets.body; + func_info.code_size.epilog = code_offsets.tail - code_offsets.epilog; + func_info.code_size.tail = getSize() - code_offsets.tail; + func_info.stack_size = stack_size; + + void* fn = Emplace(func_info); return (GuestToHostThunk)fn; } @@ -466,11 +516,23 @@ extern "C" uint64_t ResolveFunction(void* raw_context, uint32_t target_address); ResolveFunctionThunk X64ThunkEmitter::EmitResolveFunctionThunk() { // ebx = target PPC address // rcx = context + + struct _code_offsets { + size_t prolog; + size_t body; + size_t epilog; + size_t tail; + } code_offsets = {}; + const size_t stack_size = StackLayout::THUNK_STACK_SIZE; + code_offsets.prolog = getSize(); + // rsp + 0 = return address sub(rsp, stack_size); + code_offsets.body = getSize(); + // Save volatile registers EmitSaveVolatileRegs(); @@ -481,10 +543,23 @@ ResolveFunctionThunk X64ThunkEmitter::EmitResolveFunctionThunk() { EmitLoadVolatileRegs(); + code_offsets.epilog = getSize(); + add(rsp, stack_size); jmp(rax); - void* fn = Emplace(stack_size); + code_offsets.tail = getSize(); + + assert_zero(code_offsets.prolog); + EmitFunctionInfo func_info = {}; + func_info.code_size.total = getSize(); + func_info.code_size.prolog = code_offsets.body - code_offsets.prolog; + func_info.code_size.body = code_offsets.epilog - code_offsets.body; + func_info.code_size.epilog = code_offsets.tail - code_offsets.epilog; + func_info.code_size.tail = getSize() - code_offsets.tail; + func_info.stack_size = stack_size; + + void* fn = Emplace(func_info); return (ResolveFunctionThunk)fn; } diff --git a/src/xenia/cpu/backend/x64/x64_code_cache.cc b/src/xenia/cpu/backend/x64/x64_code_cache.cc index e4a23248e..85adb6579 100644 --- a/src/xenia/cpu/backend/x64/x64_code_cache.cc +++ b/src/xenia/cpu/backend/x64/x64_code_cache.cc @@ -125,15 +125,14 @@ void X64CodeCache::CommitExecutableRange(uint32_t guest_low, } void* X64CodeCache::PlaceHostCode(uint32_t guest_address, void* machine_code, - size_t code_size, size_t stack_size) { + const EmitFunctionInfo& func_info) { // Same for now. We may use different pools or whatnot later on, like when // we only want to place guest code in a serialized cache on disk. - return PlaceGuestCode(guest_address, machine_code, code_size, stack_size, - nullptr); + return PlaceGuestCode(guest_address, machine_code, func_info, nullptr); } void* X64CodeCache::PlaceGuestCode(uint32_t guest_address, void* machine_code, - size_t code_size, size_t stack_size, + const EmitFunctionInfo& func_info, GuestFunction* function_info) { // Hold a lock while we bump the pointers up. This is important as the // unwind table requires entries AND code to be sorted in order. @@ -149,7 +148,7 @@ void* X64CodeCache::PlaceGuestCode(uint32_t guest_address, void* machine_code, // Reserve code. // Always move the code to land on 16b alignment. code_address = generated_code_base_ + generated_code_offset_; - generated_code_offset_ += xe::round_up(code_size, 16); + generated_code_offset_ += xe::round_up(func_info.code_size.total, 16); // Reserve unwind info. // We go on the high size of the unwind info as we don't know how big we @@ -187,15 +186,17 @@ void* X64CodeCache::PlaceGuestCode(uint32_t guest_address, void* machine_code, old_commit_mark, new_commit_mark)); // Copy code. - std::memcpy(code_address, machine_code, code_size); + std::memcpy(code_address, machine_code, func_info.code_size.total); // Fill unused slots with 0xCC std::memset( - code_address + code_size, 0xCC, - xe::round_up(code_size + unwind_reservation.data_size, 16) - code_size); + code_address + func_info.code_size.total, 0xCC, + xe::round_up(func_info.code_size.total + unwind_reservation.data_size, + 16) - + func_info.code_size.total); // Notify subclasses of placed code. - PlaceCode(guest_address, machine_code, code_size, stack_size, code_address, + PlaceCode(guest_address, machine_code, func_info, code_address, unwind_reservation); } diff --git a/src/xenia/cpu/backend/x64/x64_code_cache.h b/src/xenia/cpu/backend/x64/x64_code_cache.h index 8fef0273e..5dc2b580e 100644 --- a/src/xenia/cpu/backend/x64/x64_code_cache.h +++ b/src/xenia/cpu/backend/x64/x64_code_cache.h @@ -25,6 +25,17 @@ namespace cpu { namespace backend { namespace x64 { +struct EmitFunctionInfo { + struct _code_size { + size_t prolog; + size_t body; + size_t epilog; + size_t tail; + size_t total; + } code_size; + size_t stack_size; +}; + class X64CodeCache : public CodeCache { public: ~X64CodeCache() override; @@ -48,9 +59,9 @@ class X64CodeCache : public CodeCache { void CommitExecutableRange(uint32_t guest_low, uint32_t guest_high); void* PlaceHostCode(uint32_t guest_address, void* machine_code, - size_t code_size, size_t stack_size); + const EmitFunctionInfo& func_info); void* PlaceGuestCode(uint32_t guest_address, void* machine_code, - size_t code_size, size_t stack_size, + const EmitFunctionInfo& func_info, GuestFunction* function_info); uint32_t PlaceData(const void* data, size_t length); @@ -84,8 +95,7 @@ class X64CodeCache : public CodeCache { return UnwindReservation(); } virtual void PlaceCode(uint32_t guest_address, void* machine_code, - size_t code_size, size_t stack_size, - void* code_address, + const EmitFunctionInfo& func_info, void* code_address, UnwindReservation unwind_reservation) {} std::wstring file_name_; diff --git a/src/xenia/cpu/backend/x64/x64_code_cache_win.cc b/src/xenia/cpu/backend/x64/x64_code_cache_win.cc index 8e895adb0..9cd910d6a 100644 --- a/src/xenia/cpu/backend/x64/x64_code_cache_win.cc +++ b/src/xenia/cpu/backend/x64/x64_code_cache_win.cc @@ -112,13 +112,13 @@ class Win32X64CodeCache : public X64CodeCache { private: UnwindReservation RequestUnwindReservation(uint8_t* entry_address) override; - void PlaceCode(uint32_t guest_address, void* machine_code, size_t code_size, - size_t stack_size, void* code_address, + void PlaceCode(uint32_t guest_address, void* machine_code, + const EmitFunctionInfo& func_info, void* code_address, UnwindReservation unwind_reservation) override; void InitializeUnwindEntry(uint8_t* unwind_entry_address, size_t unwind_table_slot, void* code_address, - size_t code_size, size_t stack_size); + const EmitFunctionInfo& func_info); // Growable function table system handle. void* unwind_table_handle_ = nullptr; @@ -222,13 +222,12 @@ Win32X64CodeCache::RequestUnwindReservation(uint8_t* entry_address) { } void Win32X64CodeCache::PlaceCode(uint32_t guest_address, void* machine_code, - size_t code_size, size_t stack_size, + const EmitFunctionInfo& func_info, void* code_address, UnwindReservation unwind_reservation) { // Add unwind info. InitializeUnwindEntry(unwind_reservation.entry_address, - unwind_reservation.table_slot, code_address, code_size, - stack_size); + unwind_reservation.table_slot, code_address, func_info); if (supports_growable_table_) { // Notify that the unwind table has grown. @@ -237,29 +236,29 @@ void Win32X64CodeCache::PlaceCode(uint32_t guest_address, void* machine_code, } // This isn't needed on x64 (probably), but is convention. - FlushInstructionCache(GetCurrentProcess(), code_address, code_size); + FlushInstructionCache(GetCurrentProcess(), code_address, + func_info.code_size.total); } -void Win32X64CodeCache::InitializeUnwindEntry(uint8_t* unwind_entry_address, - size_t unwind_table_slot, - void* code_address, - size_t code_size, - size_t stack_size) { +void Win32X64CodeCache::InitializeUnwindEntry( + uint8_t* unwind_entry_address, size_t unwind_table_slot, void* code_address, + const EmitFunctionInfo& func_info) { auto unwind_info = reinterpret_cast(unwind_entry_address); UNWIND_CODE* unwind_code = nullptr; - if (!stack_size) { - // https://msdn.microsoft.com/en-us/library/ddssxxy8.aspx + assert_true(func_info.code_size.prolog < 256); // needs to fit into a uint8_t + auto prolog_size = static_cast(func_info.code_size.prolog); + + if (!func_info.stack_size) { + // https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64#struct-unwind_info unwind_info->Version = 1; unwind_info->Flags = 0; - unwind_info->SizeOfProlog = 0; + unwind_info->SizeOfProlog = prolog_size; unwind_info->CountOfCodes = 0; unwind_info->FrameRegister = 0; unwind_info->FrameOffset = 0; - } else if (stack_size <= 128) { - uint8_t prolog_size = 4; - - // https://msdn.microsoft.com/en-us/library/ddssxxy8.aspx + } else if (func_info.stack_size <= 128) { + // https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64#struct-unwind_info unwind_info->Version = 1; unwind_info->Flags = 0; unwind_info->SizeOfProlog = prolog_size; @@ -267,17 +266,16 @@ void Win32X64CodeCache::InitializeUnwindEntry(uint8_t* unwind_entry_address, unwind_info->FrameRegister = 0; unwind_info->FrameOffset = 0; - // https://msdn.microsoft.com/en-us/library/ck9asaa9.aspx + // https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64#struct-unwind_code unwind_code = &unwind_info->UnwindCode[unwind_info->CountOfCodes++]; unwind_code->CodeOffset = 14; // end of instruction + 1 == offset of next instruction unwind_code->UnwindOp = UWOP_ALLOC_SMALL; - unwind_code->OpInfo = stack_size / 8 - 1; + unwind_code->OpInfo = func_info.stack_size / 8 - 1; } else { // TODO(benvanik): take as parameters? - uint8_t prolog_size = 7; - // https://msdn.microsoft.com/en-us/library/ddssxxy8.aspx + // https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64#struct-unwind_info unwind_info->Version = 1; unwind_info->Flags = 0; unwind_info->SizeOfProlog = prolog_size; @@ -285,16 +283,16 @@ void Win32X64CodeCache::InitializeUnwindEntry(uint8_t* unwind_entry_address, unwind_info->FrameRegister = 0; unwind_info->FrameOffset = 0; - // https://msdn.microsoft.com/en-us/library/ck9asaa9.aspx + // https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64#struct-unwind_code unwind_code = &unwind_info->UnwindCode[unwind_info->CountOfCodes++]; unwind_code->CodeOffset = 7; // end of instruction + 1 == offset of next instruction unwind_code->UnwindOp = UWOP_ALLOC_LARGE; unwind_code->OpInfo = 0; // One slot for size - assert_true((stack_size / 8) < 65536u); + assert_true((func_info.stack_size / 8) < 65536u); unwind_code = &unwind_info->UnwindCode[unwind_info->CountOfCodes++]; - unwind_code->FrameOffset = (USHORT)(stack_size) / 8; + unwind_code->FrameOffset = (USHORT)(func_info.stack_size) / 8; } if (unwind_info->CountOfCodes % 1) { @@ -307,7 +305,8 @@ void Win32X64CodeCache::InitializeUnwindEntry(uint8_t* unwind_entry_address, auto& fn_entry = unwind_table_[unwind_table_slot]; fn_entry.BeginAddress = (DWORD)(reinterpret_cast(code_address) - generated_code_base_); - fn_entry.EndAddress = (DWORD)(fn_entry.BeginAddress + code_size); + fn_entry.EndAddress = + (DWORD)(fn_entry.BeginAddress + func_info.code_size.total); fn_entry.UnwindData = (DWORD)(unwind_entry_address - generated_code_base_); } diff --git a/src/xenia/cpu/backend/x64/x64_emitter.cc b/src/xenia/cpu/backend/x64/x64_emitter.cc index 1d85cbe5f..52d4ff020 100644 --- a/src/xenia/cpu/backend/x64/x64_emitter.cc +++ b/src/xenia/cpu/backend/x64/x64_emitter.cc @@ -102,14 +102,14 @@ bool X64Emitter::Emit(GuestFunction* function, HIRBuilder* builder, source_map_arena_.Reset(); // Fill the generator with code. - size_t stack_size = 0; - if (!Emit(builder, &stack_size)) { + EmitFunctionInfo func_info = {}; + if (!Emit(builder, func_info)) { return false; } // Copy the final code to the cache and relocate it. *out_code_size = getSize(); - *out_code_address = Emplace(stack_size, function); + *out_code_address = Emplace(func_info, function); // Stash source map. source_map_arena_.CloneContents(out_source_map); @@ -117,18 +117,20 @@ bool X64Emitter::Emit(GuestFunction* function, HIRBuilder* builder, return true; } -void* X64Emitter::Emplace(size_t stack_size, GuestFunction* function) { +void* X64Emitter::Emplace(const EmitFunctionInfo& func_info, + GuestFunction* function) { // To avoid changing xbyak, we do a switcharoo here. // top_ points to the Xbyak buffer, and since we are in AutoGrow mode // it has pending relocations. We copy the top_ to our buffer, swap the // pointer, relocate, then return the original scratch pointer for use. uint8_t* old_address = top_; void* new_address; + assert_true(func_info.code_size.total == size_); if (function) { - new_address = code_cache_->PlaceGuestCode(function->address(), top_, size_, - stack_size, function); + new_address = code_cache_->PlaceGuestCode(function->address(), top_, + func_info, function); } else { - new_address = code_cache_->PlaceHostCode(0, top_, size_, stack_size); + new_address = code_cache_->PlaceHostCode(0, top_, func_info); } top_ = reinterpret_cast(new_address); ready(); @@ -137,7 +139,7 @@ void* X64Emitter::Emplace(size_t stack_size, GuestFunction* function) { return new_address; } -bool X64Emitter::Emit(HIRBuilder* builder, size_t* out_stack_size) { +bool X64Emitter::Emit(HIRBuilder* builder, EmitFunctionInfo& func_info) { Xbyak::Label epilog_label; epilog_label_ = &epilog_label; @@ -159,6 +161,15 @@ bool X64Emitter::Emit(HIRBuilder* builder, size_t* out_stack_size) { stack_offset -= StackLayout::GUEST_STACK_SIZE; stack_offset = xe::align(stack_offset, static_cast(16)); + struct _code_offsets { + size_t prolog; + size_t body; + size_t epilog; + size_t tail; + } code_offsets = {}; + + code_offsets.prolog = getSize(); + // Function prolog. // Must be 16b aligned. // Windows is very strict about the form of this and the epilog: @@ -168,7 +179,7 @@ bool X64Emitter::Emit(HIRBuilder* builder, size_t* out_stack_size) { // Adding or changing anything here must be matched! const size_t stack_size = StackLayout::GUEST_STACK_SIZE + stack_offset; assert_true((stack_size + 8) % 16 == 0); - *out_stack_size = stack_size; + func_info.stack_size = stack_size; stack_size_ = stack_size; sub(rsp, (uint32_t)stack_size); @@ -208,6 +219,8 @@ bool X64Emitter::Emit(HIRBuilder* builder, size_t* out_stack_size) { mov(GetMembaseReg(), qword[GetContextReg() + offsetof(ppc::PPCContext, virtual_membase)]); + code_offsets.body = getSize(); + // Body. auto block = builder->first_block(); while (block) { @@ -236,6 +249,8 @@ bool X64Emitter::Emit(HIRBuilder* builder, size_t* out_stack_size) { block = block->next; } + code_offsets.epilog = getSize(); + // Function epilog. L(epilog_label); epilog_label_ = nullptr; @@ -244,6 +259,8 @@ bool X64Emitter::Emit(HIRBuilder* builder, size_t* out_stack_size) { add(rsp, (uint32_t)stack_size); ret(); + code_offsets.tail = getSize(); + if (cvars::emit_source_annotations) { nop(); nop(); @@ -252,6 +269,13 @@ bool X64Emitter::Emit(HIRBuilder* builder, size_t* out_stack_size) { nop(); } + assert_zero(code_offsets.prolog); + func_info.code_size.total = getSize(); + func_info.code_size.prolog = code_offsets.body - code_offsets.prolog; + func_info.code_size.body = code_offsets.epilog - code_offsets.body; + func_info.code_size.epilog = code_offsets.tail - code_offsets.epilog; + func_info.code_size.tail = getSize() - code_offsets.tail; + return true; } diff --git a/src/xenia/cpu/backend/x64/x64_emitter.h b/src/xenia/cpu/backend/x64/x64_emitter.h index fd9ef943f..626894aaa 100644 --- a/src/xenia/cpu/backend/x64/x64_emitter.h +++ b/src/xenia/cpu/backend/x64/x64_emitter.h @@ -39,6 +39,8 @@ namespace x64 { class X64Backend; class X64CodeCache; +struct EmitFunctionInfo; + enum RegisterFlags { REG_DEST = (1 << 0), REG_ABCD = (1 << 1), @@ -222,8 +224,9 @@ class X64Emitter : public Xbyak::CodeGenerator { size_t stack_size() const { return stack_size_; } protected: - void* Emplace(size_t stack_size, GuestFunction* function = nullptr); - bool Emit(hir::HIRBuilder* builder, size_t* out_stack_size); + void* Emplace(const EmitFunctionInfo& func_info, + GuestFunction* function = nullptr); + bool Emit(hir::HIRBuilder* builder, EmitFunctionInfo& func_info); void EmitGetCurrentThreadId(); void EmitTraceUserCallReturn();