From beb5ee40ee58db2555bd70ca776fcc25ba5b6e32 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Mon, 2 Jun 2014 07:49:45 -0700 Subject: [PATCH] Dirty page table. Not yet used. --- src/alloy/backend/ivm/ivm_function.cc | 1 + src/alloy/backend/ivm/ivm_intcode.cc | 11 +++++++++++ src/alloy/backend/ivm/ivm_intcode.h | 1 + src/alloy/backend/x64/x64_emitter.cc | 5 +++++ src/alloy/backend/x64/x64_emitter.h | 2 ++ src/alloy/backend/x64/x64_sequences.cc | 20 ++++++++++++++++++++ src/alloy/memory.h | 2 ++ src/xenia/cpu/xenon_memory.cc | 13 ++++++++++--- src/xenia/cpu/xenon_memory.h | 4 ++++ 9 files changed, 56 insertions(+), 3 deletions(-) diff --git a/src/alloy/backend/ivm/ivm_function.cc b/src/alloy/backend/ivm/ivm_function.cc index 72c564210..88306b228 100644 --- a/src/alloy/backend/ivm/ivm_function.cc +++ b/src/alloy/backend/ivm/ivm_function.cc @@ -118,6 +118,7 @@ int IVMFunction::CallImpl(ThreadState* thread_state, uint64_t return_address) { ics.locals = local_stack; ics.context = (uint8_t*)thread_state->raw_context(); ics.membase = memory->membase(); + ics.page_table = ics.membase + memory->page_table(); ics.did_carry = 0; ics.did_saturate = 0; ics.thread_state = thread_state; diff --git a/src/alloy/backend/ivm/ivm_intcode.cc b/src/alloy/backend/ivm/ivm_intcode.cc index 1f24bc6ea..1badeab7e 100644 --- a/src/alloy/backend/ivm/ivm_intcode.cc +++ b/src/alloy/backend/ivm/ivm_intcode.cc @@ -1445,6 +1445,10 @@ int Translate_LOAD(TranslationContext& ctx, Instr* i) { return DispatchToC(ctx, i, fns[i->dest->type]); } +void MarkPageDirty(IntCodeState& ics, uint32_t address) { + // 16KB pages. + ics.page_table[(address >> 14) & 0x7FFF] = 1; +} uint32_t IntCode_STORE_I8(IntCodeState& ics, const IntCode* i) { uint32_t address = ics.rf[i->src1_reg].u32; if (DYNAMIC_REGISTER_ACCESS_CHECK(address)) { @@ -1455,6 +1459,7 @@ uint32_t IntCode_STORE_I8(IntCodeState& ics, const IntCode* i) { address, ics.rf[i->src2_reg].i8, ics.rf[i->src2_reg].u8); DFLUSH(); *((int8_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i8; + MarkPageDirty(ics, address); return IA_NEXT; } uint32_t IntCode_STORE_I16(IntCodeState& ics, const IntCode* i) { @@ -1468,6 +1473,7 @@ uint32_t IntCode_STORE_I16(IntCodeState& ics, const IntCode* i) { address, ics.rf[i->src2_reg].i16, ics.rf[i->src2_reg].u16); DFLUSH(); *((int16_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i16; + MarkPageDirty(ics, address); return IA_NEXT; } uint32_t IntCode_STORE_I32(IntCodeState& ics, const IntCode* i) { @@ -1481,6 +1487,7 @@ uint32_t IntCode_STORE_I32(IntCodeState& ics, const IntCode* i) { address, ics.rf[i->src2_reg].i32, ics.rf[i->src2_reg].u32); DFLUSH(); *((int32_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i32; + MarkPageDirty(ics, address); return IA_NEXT; } uint32_t IntCode_STORE_I64(IntCodeState& ics, const IntCode* i) { @@ -1494,6 +1501,7 @@ uint32_t IntCode_STORE_I64(IntCodeState& ics, const IntCode* i) { address, ics.rf[i->src2_reg].i64, ics.rf[i->src2_reg].u64); DFLUSH(); *((int64_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i64; + MarkPageDirty(ics, address); return IA_NEXT; } uint32_t IntCode_STORE_F32(IntCodeState& ics, const IntCode* i) { @@ -1502,6 +1510,7 @@ uint32_t IntCode_STORE_F32(IntCodeState& ics, const IntCode* i) { address, ics.rf[i->src2_reg].f32, ics.rf[i->src2_reg].u32); DFLUSH(); *((float*)(ics.membase + address)) = ics.rf[i->src2_reg].f32; + MarkPageDirty(ics, address); return IA_NEXT; } uint32_t IntCode_STORE_F64(IntCodeState& ics, const IntCode* i) { @@ -1510,6 +1519,7 @@ uint32_t IntCode_STORE_F64(IntCodeState& ics, const IntCode* i) { address, ics.rf[i->src2_reg].f64, ics.rf[i->src2_reg].u64); DFLUSH(); *((double*)(ics.membase + address)) = ics.rf[i->src2_reg].f64; + MarkPageDirty(ics, address); return IA_NEXT; } uint32_t IntCode_STORE_V128(IntCodeState& ics, const IntCode* i) { @@ -1520,6 +1530,7 @@ uint32_t IntCode_STORE_V128(IntCodeState& ics, const IntCode* i) { VECI4(ics.rf[i->src2_reg].v128,0), VECI4(ics.rf[i->src2_reg].v128,1), VECI4(ics.rf[i->src2_reg].v128,2), VECI4(ics.rf[i->src2_reg].v128,3)); DFLUSH(); *((vec128_t*)(ics.membase + address)) = ics.rf[i->src2_reg].v128; + MarkPageDirty(ics, address); return IA_NEXT; } int Translate_STORE(TranslationContext& ctx, Instr* i) { diff --git a/src/alloy/backend/ivm/ivm_intcode.h b/src/alloy/backend/ivm/ivm_intcode.h index 340bb4dd3..389ccbef2 100644 --- a/src/alloy/backend/ivm/ivm_intcode.h +++ b/src/alloy/backend/ivm/ivm_intcode.h @@ -43,6 +43,7 @@ typedef struct { uint8_t* locals; uint8_t* context; uint8_t* membase; + uint8_t* page_table; int8_t did_carry; int8_t did_saturate; runtime::ThreadState* thread_state; diff --git a/src/alloy/backend/x64/x64_emitter.cc b/src/alloy/backend/x64/x64_emitter.cc index 090c8fe9a..8674459bf 100644 --- a/src/alloy/backend/x64/x64_emitter.cc +++ b/src/alloy/backend/x64/x64_emitter.cc @@ -446,6 +446,11 @@ void X64Emitter::StoreEflags() { #endif // STORE_EFLAGS } +uint32_t X64Emitter::page_table_address() const { + uint64_t addr = runtime_->memory()->page_table(); + return static_cast(addr); +} + bool X64Emitter::ConstantFitsIn32Reg(uint64_t v) { if ((v & ~0x7FFFFFFF) == 0) { // Fits under 31 bits, so just load using normal mov. diff --git a/src/alloy/backend/x64/x64_emitter.h b/src/alloy/backend/x64/x64_emitter.h index a720e1970..785ff5ac7 100644 --- a/src/alloy/backend/x64/x64_emitter.h +++ b/src/alloy/backend/x64/x64_emitter.h @@ -132,6 +132,8 @@ public: void LoadEflags(); void StoreEflags(); + uint32_t page_table_address() const; + // Moves a 64bit immediate into memory. bool ConstantFitsIn32Reg(uint64_t v); void MovMem64(const Xbyak::RegExp& addr, uint64_t v); diff --git a/src/alloy/backend/x64/x64_sequences.cc b/src/alloy/backend/x64/x64_sequences.cc index e12491f6a..50981f1cb 100644 --- a/src/alloy/backend/x64/x64_sequences.cc +++ b/src/alloy/backend/x64/x64_sequences.cc @@ -1563,6 +1563,12 @@ EMITTER_OPCODE_TABLE( // OPCODE_STORE // ============================================================================ // Note: most *should* be aligned, but needs to be checked! +void EmitMarkPageDirty(X64Emitter& e, RegExp& addr) { + // 16KB pages. + e.shr(e.eax, 14); + e.and(e.eax, 0x7FFF); + e.mov(e.byte[e.rdx + e.rax + e.page_table_address()], 1); +} EMITTER(STORE_I8, MATCH(I, I8<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { auto addr = ComputeMemoryAddress(e, i.src1); @@ -1571,7 +1577,9 @@ EMITTER(STORE_I8, MATCH(I, I8<>>)) { } else { e.mov(e.byte[addr], i.src2); } + EmitMarkPageDirty(e, addr); if (IsTracingData()) { + auto addr = ComputeMemoryAddress(e, i.src1); e.mov(e.r8b, e.byte[addr]); e.lea(e.rdx, e.ptr[addr]); e.CallNative(TraceMemoryStoreI8); @@ -1586,7 +1594,9 @@ EMITTER(STORE_I16, MATCH(I, I16<>>)) { } else { e.mov(e.word[addr], i.src2); } + EmitMarkPageDirty(e, addr); if (IsTracingData()) { + auto addr = ComputeMemoryAddress(e, i.src1); e.mov(e.r8w, e.word[addr]); e.lea(e.rdx, e.ptr[addr]); e.CallNative(TraceMemoryStoreI16); @@ -1601,7 +1611,9 @@ EMITTER(STORE_I32, MATCH(I, I32<>>)) { } else { e.mov(e.dword[addr], i.src2); } + EmitMarkPageDirty(e, addr); if (IsTracingData()) { + auto addr = ComputeMemoryAddress(e, i.src1); e.mov(e.r8d, e.dword[addr]); e.lea(e.rdx, e.ptr[addr]); e.CallNative(TraceMemoryStoreI32); @@ -1616,7 +1628,9 @@ EMITTER(STORE_I64, MATCH(I, I64<>>)) { } else { e.mov(e.qword[addr], i.src2); } + EmitMarkPageDirty(e, addr); if (IsTracingData()) { + auto addr = ComputeMemoryAddress(e, i.src1); e.mov(e.r8, e.qword[addr]); e.lea(e.rdx, e.ptr[addr]); e.CallNative(TraceMemoryStoreI64); @@ -1631,7 +1645,9 @@ EMITTER(STORE_F32, MATCH(I, F32<>>)) { } else { e.vmovss(e.dword[addr], i.src2); } + EmitMarkPageDirty(e, addr); if (IsTracingData()) { + auto addr = ComputeMemoryAddress(e, i.src1); e.lea(e.r8, e.ptr[addr]); e.lea(e.rdx, e.ptr[addr]); e.CallNative(TraceMemoryStoreF32); @@ -1646,7 +1662,9 @@ EMITTER(STORE_F64, MATCH(I, F64<>>)) { } else { e.vmovsd(e.qword[addr], i.src2); } + EmitMarkPageDirty(e, addr); if (IsTracingData()) { + auto addr = ComputeMemoryAddress(e, i.src1); e.lea(e.r8, e.ptr[addr]); e.lea(e.rdx, e.ptr[addr]); e.CallNative(TraceMemoryStoreF64); @@ -1662,7 +1680,9 @@ EMITTER(STORE_V128, MATCH(I, V128<>>)) { } else { e.vmovaps(e.ptr[addr], i.src2); } + EmitMarkPageDirty(e, addr); if (IsTracingData()) { + auto addr = ComputeMemoryAddress(e, i.src1); e.lea(e.r8, e.ptr[addr]); e.lea(e.rdx, e.ptr[addr]); e.CallNative(TraceMemoryStoreV128); diff --git a/src/alloy/memory.h b/src/alloy/memory.h index d51d4dc65..72719cc4a 100644 --- a/src/alloy/memory.h +++ b/src/alloy/memory.h @@ -34,6 +34,8 @@ public: }; inline uint32_t* reserve_address() { return &reserve_address_; } + virtual uint64_t page_table() const = 0; + virtual int Initialize(); void Zero(uint64_t address, size_t size); diff --git a/src/xenia/cpu/xenon_memory.cc b/src/xenia/cpu/xenon_memory.cc index 1e4116bf3..22f928022 100644 --- a/src/xenia/cpu/xenon_memory.cc +++ b/src/xenia/cpu/xenon_memory.cc @@ -225,9 +225,9 @@ LONG CALLBACK CheckMMIOHandler(PEXCEPTION_POINTERS ex_info) { } // namespace -XenonMemory::XenonMemory() : - mapping_(0), mapping_base_(0), - Memory() { +XenonMemory::XenonMemory() + : Memory(), + mapping_(0), mapping_base_(0), page_table_(0) { virtual_heap_ = new XenonMemoryHeap(this, false); physical_heap_ = new XenonMemoryHeap(this, true); } @@ -329,6 +329,13 @@ int XenonMemory::Initialize() { //AddVectoredContinueHandler(1, CheckMMIOHandler); } + // Allocate dirty page table. + // This must live within our low heap. Ideally we'd hardcode the address but + // this is more flexible. + page_table_ = physical_heap_->Alloc( + 0, (512 * 1024 * 1024) / (16 * 1024), + X_MEM_COMMIT, 16 * 1024); + return 0; XECLEANUP: diff --git a/src/xenia/cpu/xenon_memory.h b/src/xenia/cpu/xenon_memory.h index 5c97649a4..05872d12e 100644 --- a/src/xenia/cpu/xenon_memory.h +++ b/src/xenia/cpu/xenon_memory.h @@ -33,6 +33,8 @@ public: int Initialize() override; + uint64_t page_table() const override { return page_table_; } + bool AddMappedRange(uint64_t address, uint64_t mask, uint64_t size, void* context, @@ -83,6 +85,8 @@ private: XenonMemoryHeap* virtual_heap_; XenonMemoryHeap* physical_heap_; + uint64_t page_table_; + friend class XenonMemoryHeap; };