diff --git a/src/alloy/backend/ivm/ivm_function.cc b/src/alloy/backend/ivm/ivm_function.cc index 0782f3a4b..b0aacc1fb 100644 --- a/src/alloy/backend/ivm/ivm_function.cc +++ b/src/alloy/backend/ivm/ivm_function.cc @@ -39,10 +39,13 @@ int IVMFunction::CallImpl(ThreadState* thread_state, uint64_t return_address) { size_t register_file_size = register_count_ * sizeof(Register); Register* register_file = (Register*)alloca(register_file_size); + Memory* memory = thread_state->memory(); + IntCodeState ics; ics.rf = register_file; ics.context = (uint8_t*)thread_state->raw_context(); - ics.membase = thread_state->memory()->membase(); + ics.membase = memory->membase(); + ics.reserve_address = memory->reserve_address(); ics.did_carry = 0; ics.access_callbacks = thread_state->runtime()->access_callbacks(); ics.thread_state = thread_state; diff --git a/src/alloy/backend/ivm/ivm_intcode.cc b/src/alloy/backend/ivm/ivm_intcode.cc index d4f98d19a..0437c1d40 100644 --- a/src/alloy/backend/ivm/ivm_intcode.cc +++ b/src/alloy/backend/ivm/ivm_intcode.cc @@ -1246,6 +1246,61 @@ int Translate_LOAD(TranslationContext& ctx, Instr* i) { return DispatchToC(ctx, i, fns[i->dest->type]); } +uint32_t IntCode_LOAD_ACQUIRE_I8(IntCodeState& ics, const IntCode* i) { + uint32_t address = ics.rf[i->src1_reg].u32; + xe_atomic_exchange_32(address, ics.reserve_address); + ics.rf[i->dest_reg].i8 = *((int8_t*)(ics.membase + address)); + return IA_NEXT; +} +uint32_t IntCode_LOAD_ACQUIRE_I16(IntCodeState& ics, const IntCode* i) { + uint32_t address = ics.rf[i->src1_reg].u32; + xe_atomic_exchange_32(address, ics.reserve_address); + ics.rf[i->dest_reg].i16 = *((int16_t*)(ics.membase + address)); + return IA_NEXT; +} +uint32_t IntCode_LOAD_ACQUIRE_I32(IntCodeState& ics, const IntCode* i) { + uint32_t address = ics.rf[i->src1_reg].u32; + xe_atomic_exchange_32(address, ics.reserve_address); + ics.rf[i->dest_reg].i32 = *((int32_t*)(ics.membase + address)); + return IA_NEXT; +} +uint32_t IntCode_LOAD_ACQUIRE_I64(IntCodeState& ics, const IntCode* i) { + uint32_t address = ics.rf[i->src1_reg].u32; + xe_atomic_exchange_32(address, ics.reserve_address); + ics.rf[i->dest_reg].i64 = *((int64_t*)(ics.membase + address)); + return IA_NEXT; +} +uint32_t IntCode_LOAD_ACQUIRE_F32(IntCodeState& ics, const IntCode* i) { + uint32_t address = ics.rf[i->src1_reg].u32; + xe_atomic_exchange_32(address, ics.reserve_address); + ics.rf[i->dest_reg].f32 = *((float*)(ics.membase + address)); + return IA_NEXT; +} +uint32_t IntCode_LOAD_ACQUIRE_F64(IntCodeState& ics, const IntCode* i) { + uint32_t address = ics.rf[i->src1_reg].u32; + xe_atomic_exchange_32(address, ics.reserve_address); + ics.rf[i->dest_reg].f64 = *((double*)(ics.membase + address)); + return IA_NEXT; +} +uint32_t IntCode_LOAD_ACQUIRE_V128(IntCodeState& ics, const IntCode* i) { + uint32_t address = ics.rf[i->src1_reg].u32; + xe_atomic_exchange_32(address, ics.reserve_address); + ics.rf[i->dest_reg].v128 = *((vec128_t*)(ics.membase + (address & ~0xF))); + return IA_NEXT; +} +int Translate_LOAD_ACQUIRE(TranslationContext& ctx, Instr* i) { + static IntCodeFn fns[] = { + IntCode_LOAD_ACQUIRE_I8, + IntCode_LOAD_ACQUIRE_I16, + IntCode_LOAD_ACQUIRE_I32, + IntCode_LOAD_ACQUIRE_I64, + IntCode_LOAD_ACQUIRE_F32, + IntCode_LOAD_ACQUIRE_F64, + IntCode_LOAD_ACQUIRE_V128, + }; + return DispatchToC(ctx, i, fns[i->dest->type]); +} + uint32_t IntCode_STORE_I8(IntCodeState& ics, const IntCode* i) { uint32_t address = ics.rf[i->src1_reg].u32; if (DYNAMIC_REGISTER_ACCESS_CHECK(address)) { @@ -1331,6 +1386,89 @@ int Translate_STORE(TranslationContext& ctx, Instr* i) { return DispatchToC(ctx, i, fns[i->src2.value->type]); } +uint32_t IntCode_STORE_RELEASE_I8(IntCodeState& ics, const IntCode* i) { + uint32_t address = ics.rf[i->src1_reg].u32; + int8_t stored = 0; + if (xe_atomic_exchange_32(0, ics.reserve_address) == address) { + *((int8_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i8; + stored = 1; + } + ics.rf[i->dest_reg].i8 = stored; + return IA_NEXT; +} +uint32_t IntCode_STORE_RELEASE_I16(IntCodeState& ics, const IntCode* i) { + uint32_t address = ics.rf[i->src1_reg].u32; + int8_t stored = 0; + if (xe_atomic_exchange_32(0, ics.reserve_address) == address) { + *((int16_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i16; + stored = 1; + } + ics.rf[i->dest_reg].i8 = stored; + return IA_NEXT; +} +uint32_t IntCode_STORE_RELEASE_I32(IntCodeState& ics, const IntCode* i) { + uint32_t address = ics.rf[i->src1_reg].u32; + int8_t stored = 0; + if (xe_atomic_exchange_32(0, ics.reserve_address) == address) { + *((int32_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i32; + stored = 1; + } + ics.rf[i->dest_reg].i8 = stored; + return IA_NEXT; +} +uint32_t IntCode_STORE_RELEASE_I64(IntCodeState& ics, const IntCode* i) { + uint32_t address = ics.rf[i->src1_reg].u32; + int8_t stored = 0; + if (xe_atomic_exchange_32(0, ics.reserve_address) == address) { + *((int64_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i64; + stored = 1; + } + ics.rf[i->dest_reg].i8 = stored; + return IA_NEXT; +} +uint32_t IntCode_STORE_RELEASE_F32(IntCodeState& ics, const IntCode* i) { + uint32_t address = ics.rf[i->src1_reg].u32; + int8_t stored = 0; + if (xe_atomic_exchange_32(0, ics.reserve_address) == address) { + *((float*)(ics.membase + address)) = ics.rf[i->src2_reg].f32; + stored = 1; + } + ics.rf[i->dest_reg].i8 = stored; + return IA_NEXT; +} +uint32_t IntCode_STORE_RELEASE_F64(IntCodeState& ics, const IntCode* i) { + uint32_t address = ics.rf[i->src1_reg].u32; + int8_t stored = 0; + if (xe_atomic_exchange_32(0, ics.reserve_address) == address) { + *((double*)(ics.membase + address)) = ics.rf[i->src2_reg].f64; + stored = 1; + } + ics.rf[i->dest_reg].i8 = stored; + return IA_NEXT; +} +uint32_t IntCode_STORE_RELEASE_V128(IntCodeState& ics, const IntCode* i) { + uint32_t address = ics.rf[i->src1_reg].u32; + int8_t stored = 0; + if (xe_atomic_exchange_32(0, ics.reserve_address) == address) { + *((vec128_t*)(ics.membase + (address & ~0xF))) = ics.rf[i->src2_reg].v128; + stored = 1; + } + ics.rf[i->dest_reg].i8 = stored; + return IA_NEXT; +} +int Translate_STORE_RELEASE(TranslationContext& ctx, Instr* i) { + static IntCodeFn fns[] = { + IntCode_STORE_RELEASE_I8, + IntCode_STORE_RELEASE_I16, + IntCode_STORE_RELEASE_I32, + IntCode_STORE_RELEASE_I64, + IntCode_STORE_RELEASE_F32, + IntCode_STORE_RELEASE_F64, + IntCode_STORE_RELEASE_V128, + }; + return DispatchToC(ctx, i, fns[i->src2.value->type]); +} + uint32_t IntCode_PREFETCH(IntCodeState& ics, const IntCode* i) { return IA_NEXT; } @@ -2865,9 +3003,9 @@ static const TranslateFn dispatch_table[] = { Translate_STORE_CONTEXT, Translate_LOAD, - TranslateInvalid, //Translate_LOAD_ACQUIRE, + Translate_LOAD_ACQUIRE, Translate_STORE, - TranslateInvalid, //Translate_STORE_RELEASE, + Translate_STORE_RELEASE, Translate_PREFETCH, TranslateInvalid, //Translate_MAX, diff --git a/src/alloy/backend/ivm/ivm_intcode.h b/src/alloy/backend/ivm/ivm_intcode.h index 68aa69763..9312ff2ce 100644 --- a/src/alloy/backend/ivm/ivm_intcode.h +++ b/src/alloy/backend/ivm/ivm_intcode.h @@ -43,6 +43,7 @@ typedef struct { Register* rf; uint8_t* context; uint8_t* membase; + uint32_t* reserve_address; int8_t did_carry; runtime::RegisterAccessCallbacks* access_callbacks; runtime::ThreadState* thread_state; diff --git a/src/alloy/memory.h b/src/alloy/memory.h index 04e6ad6ff..8ceb1ee63 100644 --- a/src/alloy/memory.h +++ b/src/alloy/memory.h @@ -32,6 +32,7 @@ public: inline uint8_t* Translate(uint64_t guest_address) const { return membase_ + guest_address; }; + inline uint32_t* reserve_address() { return &reserve_address_; } virtual int Initialize(); @@ -53,6 +54,7 @@ public: protected: size_t system_page_size_; uint8_t* membase_; + uint32_t reserve_address_; };