diff --git a/rpcs3/Emu/Cell/SPURecompiler.h b/rpcs3/Emu/Cell/SPURecompiler.h index 390db51594..455e058658 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.h +++ b/rpcs3/Emu/Cell/SPURecompiler.h @@ -78,6 +78,7 @@ public: SPUInterpreter* inter; JitRuntime runtime; bool first; + bool need_check; struct SPURecEntry { @@ -457,7 +458,7 @@ private: c.mov(cpu_dword(PC), CPU.PC); // This instruction must be used following a store instruction that modifies the instruction stream. c.mfence(); - c.mov(*pos_var, (CPU.PC >> 2) + 1); + c.mov(*pos_var, (CPU.PC >> 2) + 1 + 0x2000000); do_finalize = true; LOG_OPCODE(); } @@ -1142,6 +1143,7 @@ private: c.mov(*addr, CPU.PC + 4); c.mov(*pos_var, cpu_dword(GPR[ra]._u32[3])); + if (ra) c.or_(*pos_var, 0x2000000 << 2); // rude (check if not LR) c.cmp(cpu_dword(GPR[rt]._u32[3]), 0); c.cmovne(*pos_var, *addr); c.shr(*pos_var, 2); @@ -1160,6 +1162,7 @@ private: c.mov(*addr, CPU.PC + 4); c.mov(*pos_var, cpu_dword(GPR[ra]._u32[3])); + if (ra) c.or_(*pos_var, 0x2000000 << 2); // rude (check if not LR) c.cmp(cpu_dword(GPR[rt]._u32[3]), 0); c.cmove(*pos_var, *addr); c.shr(*pos_var, 2); @@ -1178,6 +1181,7 @@ private: c.mov(*addr, CPU.PC + 4); c.mov(*pos_var, cpu_dword(GPR[ra]._u32[3])); + if (ra) c.or_(*pos_var, 0x2000000 << 2); // rude (check if not LR) c.cmp(cpu_word(GPR[rt]._u16[6]), 0); c.cmovne(*pos_var, *addr); c.shr(*pos_var, 2); @@ -1196,6 +1200,7 @@ private: c.mov(*addr, CPU.PC + 4); c.mov(*pos_var, cpu_dword(GPR[ra]._u32[3])); + if (ra) c.or_(*pos_var, 0x2000000 << 2); // rude (check if not LR) c.cmp(cpu_word(GPR[rt]._u16[6]), 0); c.cmove(*pos_var, *addr); c.shr(*pos_var, 2); @@ -1244,6 +1249,7 @@ private: do_finalize = true; c.mov(*pos_var, cpu_dword(GPR[ra]._u32[3])); + if (ra) c.or_(*pos_var, 0x2000000 << 2); // rude (check if not LR) c.shr(*pos_var, 2); LOG_OPCODE(); } @@ -1267,6 +1273,7 @@ private: c.mov(*pos_var, cpu_dword(GPR[ra]._u32[3])); c.mov(cpu_dword(GPR[rt]._u32[3]), CPU.PC + 4); c.shr(*pos_var, 2); + c.or_(*pos_var, 0x2000000); LOG_OPCODE(); } void IRET(u32 ra) @@ -1947,11 +1954,10 @@ private: { c.mov(*addr, cpu_dword(GPR[ra]._s32[3])); c.cmp(*addr, cpu_dword(GPR[rb]._s32[3])); - c.mov(*addr, 0); c.setg(addr->r8()); - c.neg(*addr); + c.shl(*addr, 24); c.mov(*pos_var, (CPU.PC >> 2) + 1); - c.xor_(*pos_var, *addr); + c.or_(*pos_var, *addr); do_finalize = true; LOG_OPCODE(); } @@ -2308,11 +2314,10 @@ private: { c.mov(*addr, cpu_dword(GPR[ra]._u32[3])); c.cmp(*addr, cpu_dword(GPR[rb]._u32[3])); - c.mov(*addr, 0); c.seta(addr->r8()); - c.neg(*addr); + c.shl(*addr, 24); c.mov(*pos_var, (CPU.PC >> 2) + 1); - c.xor_(*pos_var, *addr); + c.or_(*pos_var, *addr); do_finalize = true; LOG_OPCODE(); } @@ -2662,11 +2667,10 @@ private: { c.mov(*addr, cpu_dword(GPR[ra]._s32[3])); c.cmp(*addr, cpu_dword(GPR[rb]._s32[3])); - c.mov(*addr, 0); c.sete(addr->r8()); - c.neg(*addr); + c.shl(*addr, 24); c.mov(*pos_var, (CPU.PC >> 2) + 1); - c.xor_(*pos_var, *addr); + c.or_(*pos_var, *addr); do_finalize = true; LOG_OPCODE(); } @@ -3324,11 +3328,10 @@ private: { c.mov(*addr, cpu_dword(GPR[ra]._s32[3])); c.cmp(*addr, i10); - c.mov(*addr, 0); c.setg(addr->r8()); - c.neg(*addr); + c.shl(*addr, 24); c.mov(*pos_var, (CPU.PC >> 2) + 1); - c.xor_(*pos_var, *addr); + c.or_(*pos_var, *addr); do_finalize = true; LOG_OPCODE(); } @@ -3390,11 +3393,10 @@ private: { c.mov(*addr, cpu_dword(GPR[ra]._u32[3])); c.cmp(*addr, i10); - c.mov(*addr, 0); c.seta(addr->r8()); - c.neg(*addr); + c.shl(*addr, 24); c.mov(*pos_var, (CPU.PC >> 2) + 1); - c.xor_(*pos_var, *addr); + c.or_(*pos_var, *addr); do_finalize = true; LOG_OPCODE(); } @@ -3441,11 +3443,10 @@ private: { c.mov(*addr, cpu_dword(GPR[ra]._u32[3])); c.cmp(*addr, i10); - c.mov(*addr, 0); c.sete(addr->r8()); - c.neg(*addr); + c.shl(*addr, 24); c.mov(*pos_var, (CPU.PC >> 2) + 1); - c.xor_(*pos_var, *addr); + c.or_(*pos_var, *addr); do_finalize = true; LOG_OPCODE(); } diff --git a/rpcs3/Emu/Cell/SPURecompilerCore.cpp b/rpcs3/Emu/Cell/SPURecompilerCore.cpp index 0427e8effb..68e929edf3 100644 --- a/rpcs3/Emu/Cell/SPURecompilerCore.cpp +++ b/rpcs3/Emu/Cell/SPURecompilerCore.cpp @@ -20,6 +20,7 @@ SPURecompilerCore::SPURecompilerCore(SPUThread& cpu) , inter(new SPUInterpreter(cpu)) , CPU(cpu) , first(true) + , need_check(false) { memset(entry, 0, sizeof(entry)); X86CpuInfo inf; @@ -192,20 +193,26 @@ u8 SPURecompilerCore::DecodeMemory(const u32 address) { // check data (hard way) bool is_valid = true; - //for (u32 i = pos; i < (u32)(entry[pos].count + pos); i++) - //{ - // if (entry[i].valid != ls[i]) - // { - // is_valid = false; - // break; - // } - //} + if (need_check) + { + for (u32 i = 0; i < 0x10000; i++) + { + if (entry[i].valid && entry[i].valid != ls[i]) + { + is_valid = false; + break; + } + } + need_check = false; + } // invalidate if necessary if (!is_valid) { for (u32 i = 0; i < 0x10000; i++) { - if (entry[i].pointer && + if (!entry[i].pointer) continue; + + if (!entry[i].valid || entry[i].valid != ls[i] || i + (u32)entry[i].count > (u32)pos && i < (u32)pos + (u32)entry[pos].count) { @@ -214,6 +221,11 @@ u8 SPURecompilerCore::DecodeMemory(const u32 address) //RtlDeleteFunctionTable(&entry[i].info); #endif entry[i].pointer = nullptr; + for (u32 j = i; j < i + (u32)entry[i].count; j++) + { + entry[j].valid = 0; + } + //need_check = true; } } //LOG_ERROR(Log::SPU, "SPURecompilerCore::DecodeMemory(ls_addr=0x%x): code has changed", pos * sizeof(u32)); @@ -254,11 +266,17 @@ u8 SPURecompilerCore::DecodeMemory(const u32 address) u32 res = pos; res = func(cpu, vm::get_ptr(m_offset), imm_table.data(), &g_imm_table); - if (res > 0xffff) + if (res & 0x1000000) { CPU.SPU.Status.SetValue(SPU_STATUS_STOPPED_BY_HALT); CPU.Stop(); - res = ~res; + res &= ~0x1000000; + } + + if (res & 0x2000000) + { + need_check = true; + res &= ~0x2000000; } if (did_compile) diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index e33291cf8c..ca17de309d 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -358,7 +358,7 @@ public: } else { - InterlockedOr((volatile u64*)m_indval, ((u64)value << 32) | 1); + InterlockedOr(&m_indval, ((u64)value << 32) | 1); } } diff --git a/rpcs3/Emu/Memory/vm_atomic.h b/rpcs3/Emu/Memory/vm_atomic.h index fc4912a419..867a8f0e9d 100644 --- a/rpcs3/Emu/Memory/vm_atomic.h +++ b/rpcs3/Emu/Memory/vm_atomic.h @@ -25,34 +25,34 @@ namespace vm template class _atomic_base { - volatile T data; typedef typename _to_atomic::type atomic_type; + atomic_type data; public: // atomically compare data with cmp, replace with exch if equal, return previous data value anyway __forceinline const T compare_and_swap(const T& cmp, const T& exch) volatile { - const atomic_type res = InterlockedCompareExchange((volatile atomic_type*)&data, (atomic_type&)exch, (atomic_type&)cmp); + const atomic_type res = InterlockedCompareExchange(&data, (atomic_type&)(exch), (atomic_type&)(cmp)); return (T&)res; } // atomically compare data with cmp, replace with exch if equal, return true if data was replaced __forceinline bool compare_and_swap_test(const T& cmp, const T& exch) volatile { - return InterlockedCompareExchange((volatile atomic_type*)&data, (atomic_type&)exch, (atomic_type&)cmp) == (atomic_type&)cmp; + return InterlockedCompareExchange(&data, (atomic_type&)(exch), (atomic_type&)(cmp)) == (atomic_type&)(cmp); } // read data with memory barrier __forceinline const T read_sync() const volatile { - const atomic_type res = InterlockedCompareExchange((volatile atomic_type*)&data, 0, 0); + const atomic_type res = InterlockedCompareExchange(const_cast(&data), 0, 0); return (T&)res; } // atomically replace data with exch, return previous data value __forceinline const T exchange(const T& exch) volatile { - const atomic_type res = InterlockedExchange((volatile atomic_type*)&data, (atomic_type&)exch); + const atomic_type res = InterlockedExchange(&data, (atomic_type&)(exch)); return (T&)res; } @@ -65,7 +65,7 @@ namespace vm // write data without memory barrier __forceinline void write_relaxed(const T& value) volatile { - (T&)data = value; + data = (atomic_type&)(value); } // perform atomic operation on data @@ -81,7 +81,7 @@ namespace vm } // perform atomic operation on data with special exit condition (if intermediate result != proceed_value) - template __forceinline RT atomic_op(const RT& proceed_value, const FT atomic_proc) volatile + template __forceinline RT atomic_op(const RT proceed_value, const FT atomic_proc) volatile { while (true) { @@ -95,37 +95,37 @@ namespace vm __forceinline const T _or(const T& right) volatile { - const atomic_type res = InterlockedOr((volatile atomic_type*)&data, (atomic_type&)right); + const atomic_type res = InterlockedOr(&data, (atomic_type&)(right)); return (T&)res; } __forceinline const T _and(const T& right) volatile { - const atomic_type res = InterlockedAnd((volatile atomic_type*)&data, (atomic_type&)right); + const atomic_type res = InterlockedAnd(&data, (atomic_type&)(right)); return (T&)res; } __forceinline const T _xor(const T& right) volatile { - const atomic_type res = InterlockedXor((volatile atomic_type*)&data, (atomic_type&)right); + const atomic_type res = InterlockedXor(&data, (atomic_type&)(right)); return (T&)res; } __forceinline const T operator |= (const T& right) volatile { - const atomic_type res = InterlockedOr((volatile atomic_type*)&data, (atomic_type&)right) | (atomic_type&)right; + const atomic_type res = InterlockedOr(&data, (atomic_type&)(right)) | (atomic_type&)(right); return (T&)res; } __forceinline const T operator &= (const T& right) volatile { - const atomic_type res = InterlockedAnd((volatile atomic_type*)&data, (atomic_type&)right) & (atomic_type&)right; + const atomic_type res = InterlockedAnd(&data, (atomic_type&)(right)) & (atomic_type&)(right); return (T&)res; } __forceinline const T operator ^= (const T& right) volatile { - const atomic_type res = InterlockedXor((volatile atomic_type*)&data, (atomic_type&)right) ^ (atomic_type&)right; + const atomic_type res = InterlockedXor(&data, (atomic_type&)(right)) ^ (atomic_type&)(right); return (T&)res; } diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp index 6b7ba92de5..440c2f08f4 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp @@ -20,17 +20,86 @@ extern u32 libsre; extern u32 libsre_rtoc; #endif -s64 spursCreateLv2EventQueue(vm::ptr spurs, u32& queue_id, u32 arg3, u32 arg4, u64 name_u64) +s64 spursAttachLv2EventQueue(vm::ptr spurs, u32 queue, vm::ptr port, s32 isDynamic, bool wasCreated) { +#ifdef PRX_DEBUG_XXX + return cb_call, u32, vm::ptr, s32, bool>(GetCurrentPPUThread(), libsre + 0xAE34, libsre_rtoc, + spurs, queue, port, isDynamic, wasCreated); +#else + if (!spurs || !port) + { + return CELL_SPURS_CORE_ERROR_NULL_POINTER; + } + if (spurs.addr() % 128) + { + return CELL_SPURS_CORE_ERROR_ALIGN; + } + if (spurs->m.unk21.ToBE()) + { + return CELL_SPURS_CORE_ERROR_STAT; + } + + u32 unk1 = 0; #ifdef PRX_DEBUG + unk1 = cb_call(GetCurrentPPUThread(), libsre + 0x10900, libsre_rtoc); +#endif + + u8 _port = 0x3f; + u8 port_start = 0x10; + u64 port_mask = 0; + if (isDynamic == 0) + { + _port = *port; + if (_port > 0x3f) + { + return CELL_SPURS_CORE_ERROR_INVAL; + } + if (unk1 <= 0x17ffff && _port > 0xf) + { + return CELL_SPURS_CORE_ERROR_PERM; + } + port_start = _port; + } + + for (u32 i = port_start + 1; i < _port; i++) + { + port_mask |= 1ull << (i - 1); + } + + if (s32 res = sys_spu_thread_group_connect_event_all_threads(spurs->m.spuTG, queue, port_mask, port)) + { + if (res == CELL_EISCONN) + { + return CELL_SPURS_CORE_ERROR_BUSY; + } + return res; + } + + if (!wasCreated) + { + spurs->m.spups |= be_t::make(1ull << *port); // atomic bitwise or + } + + return CELL_OK; +#endif +} + +s64 spursCreateLv2EventQueue(vm::ptr spurs, u32& queue_id, vm::ptr port, s32 size, u64 name_u64) +{ +#ifdef PRX_DEBUG_XXX vm::var> queue; - s32 res = cb_call, vm::ptr>, u32, u32, u32>(GetCurrentPPUThread(), libsre + 0xB14C, libsre_rtoc, - spurs, queue, arg3, arg4, vm::read32(libsre_rtoc - 0x7E2C)); + s32 res = cb_call, vm::ptr>, vm::ptr, s32, u32>(GetCurrentPPUThread(), libsre + 0xB14C, libsre_rtoc, + spurs, queue, port, size, vm::read32(libsre_rtoc - 0x7E2C)); queue_id = queue->ToLE(); return res; #else - // TODO - queue_id = event_queue_create(SYS_SYNC_PRIORITY, SYS_PPU_QUEUE, *(u64*)"+QUEUE+", 0, 1); + queue_id = event_queue_create(SYS_SYNC_FIFO, SYS_PPU_QUEUE, name_u64, 0, size); + if (!queue_id) + { + return CELL_EAGAIN; // rough + } + + assert(spursAttachLv2EventQueue(spurs, queue_id, port, 1, true) == CELL_OK); return CELL_OK; #endif } @@ -108,7 +177,7 @@ s64 spursInit( for (u32 i = 0; i < 0x10; i++) { sem = semaphore_create(0, 1, SYS_SYNC_PRIORITY, *(u64*)"_spuWkl"); - assert(sem && ~sem); // should rollback if semaphore creating failed and return the error + assert(sem && ~sem); // should rollback if semaphore creation failed and return the error spurs->m.sub1[i].sem = sem; } if (isSecond) @@ -182,7 +251,7 @@ s64 spursInit( spurs->m.ppuPriority = ppuPriority; u32 queue; - assert(spursCreateLv2EventQueue(spurs, queue, spurs.addr() + 0xc9, 0x2a, *(u64*)"_spuPrv") == CELL_OK); + assert(spursCreateLv2EventQueue(spurs, queue, vm::ptr::make(spurs.addr() + 0xc9), 0x2a, *(u64*)"_spuPrv") == CELL_OK); spurs->m.queue = queue; u32 port = event_port_create(0); @@ -618,13 +687,13 @@ s64 cellSpursSetPreemptionVictimHints(vm::ptr spurs, vm::ptr spurs, u32 queue, vm::ptr port, s32 isDynamic) { -#ifdef PRX_DEBUG cellSpurs->Warning("cellSpursAttachLv2EventQueue(spurs_addr=0x%x, queue=%d, port_addr=0x%x, isDynamic=%d)", spurs.addr(), queue, port.addr(), isDynamic); + +#ifdef PRX_DEBUG_XXX return GetCurrentPPUThread().FastCall2(libsre + 0xAFE0, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); - return CELL_OK; + return spursAttachLv2EventQueue(spurs, queue, port, isDynamic, false); #endif } diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h index 717e9afe94..d3ab987c2d 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h @@ -185,7 +185,7 @@ struct CellSpurs u8 unknown0[0xB0 - 0x78]; be_t unk0; // 0x0B0 u8 unknown2[0xC0 - 0xB4]; - u8 unk6[0x10]; // 0x0C0 + u8 unk6[0x10]; // 0x0C0 (SPU port at 0xc9) u8 unknown1[0x100 - 0x0D0]; _sub_str1 sub1[0x10]; // 0x100 be_t unk22; // 0x900 @@ -219,7 +219,7 @@ struct CellSpurs be_t unk5; // 0xD9C be_t revision; // 0xDA0 be_t sdkVersion; // 0xDA4 - u8 unknown8[0xDB0 - 0xDA8]; + vm::atomic spups;// 0xDA8 sys_lwmutex_t mutex; // 0xDB0 sys_lwcond_t cond; // 0xDC8 u8 unknown9[0xF00 - 0xDD0]; diff --git a/rpcs3/Emu/SysCalls/Modules/cellSync.cpp b/rpcs3/Emu/SysCalls/Modules/cellSync.cpp index a09d4c6103..793115d879 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSync.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSync.cpp @@ -923,17 +923,16 @@ s32 syncLFQueueInitialize(vm::ptr queue, vm::ptr buffer, u3 u32 old_value; while (true) { - const u32 old_data = queue->m_data(); - CellSyncLFQueue new_data; - new_data.m_data() = old_data; + const auto old = queue->init.read_relaxed(); + auto init = old; - if (old_data) + if (old.ToBE()) { - if (sdk_ver > 0x17ffff && old_data != se32(2)) + if (sdk_ver > 0x17ffff && old != 2) { return CELL_SYNC_ERROR_STAT; } - old_value = old_data; + old_value = old.ToLE(); } else { @@ -948,14 +947,14 @@ s32 syncLFQueueInitialize(vm::ptr queue, vm::ptr buffer, u3 } } } - new_data.m_data() = se32(1); - old_value = se32(1); + init = 1; + old_value = 1; } - if (InterlockedCompareExchange(&queue->m_data(), new_data.m_data(), old_data) == old_data) break; + if (queue->init.compare_and_swap_test(old, init)) break; } - if (old_value == se32(2)) + if (old_value == 2) { if ((u32)queue->m_size != size || (u32)queue->m_depth != depth || queue->m_buffer.addr() != buffer.addr()) { @@ -975,12 +974,11 @@ s32 syncLFQueueInitialize(vm::ptr queue, vm::ptr buffer, u3 syncLFQueueInit(queue, buffer, size, depth, direction, eaSignal); // prx: sync, zeroize u32 at 0x2c offset - InterlockedCompareExchange(&queue->m_data(), 0, 0); - queue->m_data() = 0; + queue->init.exchange({}); } // prx: sync - InterlockedCompareExchange(&queue->m_data(), 0, 0); + queue->init.read_sync(); return CELL_OK; #endif } @@ -1011,28 +1009,27 @@ s32 syncLFQueueGetPushPointer(vm::ptr queue, s32& pointer, u32 return -1; } - const u64 old_data = InterlockedCompareExchange(&queue->m_push1(), 0, 0); - CellSyncLFQueue new_queue; - new_queue.m_push1() = old_data; + const auto old = queue->push1.read_sync(); + auto push = old; if (var1) { - new_queue.m_h7 = 0; + push.m_h7 = 0; } if (isBlocking && useEventQueue && *(u32*)queue->m_bs == -1) { return CELL_SYNC_ERROR_STAT; } - s32 var2 = (s32)(s16)new_queue.m_h8; + s32 var2 = (s32)(s16)push.m_h8; s32 res; - if (useEventQueue && ((s32)(u16)new_queue.m_h5 != var2 || new_queue.m_h7.ToBE() != 0)) + if (useEventQueue && ((s32)push.m_h5 != var2 || push.m_h7.ToBE() != 0)) { res = CELL_SYNC_ERROR_BUSY; } else { - var2 -= (s32)(u16)queue->m_h1; + var2 -= (s32)(u16)queue->pop1.read_relaxed().m_h1; if (var2 < 0) { var2 += depth * 2; @@ -1040,21 +1037,21 @@ s32 syncLFQueueGetPushPointer(vm::ptr queue, s32& pointer, u32 if (var2 < depth) { - pointer = (s16)new_queue.m_h8; + pointer = (s16)push.m_h8; if (pointer + 1 >= depth * 2) { - new_queue.m_h8 = 0; + push.m_h8 = 0; } else { - new_queue.m_h8++; + push.m_h8++; } res = CELL_OK; } else if (!isBlocking) { res = CELL_SYNC_ERROR_AGAIN; - if (!new_queue.m_h7.ToBE() || res) + if (!push.m_h7.ToBE() || res) { return res; } @@ -1067,7 +1064,7 @@ s32 syncLFQueueGetPushPointer(vm::ptr queue, s32& pointer, u32 else { res = CELL_OK; - new_queue.m_h7 = 3; + push.m_h7 = 3; if (isBlocking != 3) { break; @@ -1075,9 +1072,9 @@ s32 syncLFQueueGetPushPointer(vm::ptr queue, s32& pointer, u32 } } - if (InterlockedCompareExchange(&queue->m_push1(), new_queue.m_push1(), old_data) == old_data) + if (queue->push1.compare_and_swap_test(old, push)) { - if (!new_queue.m_h7.ToBE() || res) + if (!push.m_h7.ToBE() || res) { return res; } @@ -1138,19 +1135,19 @@ s32 syncLFQueueCompletePushPointer(vm::ptr queue, s32 pointer, while (true) { const u32 old_data = InterlockedCompareExchange(&queue->m_push2(), 0, 0); - CellSyncLFQueue new_queue; - new_queue.m_push2() = old_data; + CellSyncLFQueue new_; + new_.m_push2() = old_data; - const u32 old_data2 = queue->m_push3(); - new_queue.m_push3() = old_data2; + const auto old2 = queue->push3.read_relaxed(); + auto push = old2; - s32 var1 = pointer - (u16)new_queue.m_h5; + s32 var1 = pointer - (u16)push.m_h5; if (var1 < 0) { var1 += depth * 2; } - s32 var2 = (s32)(s16)queue->m_h4 - (s32)(u16)queue->m_h1; + s32 var2 = (s32)(s16)queue->pop1.read_relaxed().m_h4 - (s32)(u16)queue->pop1.read_relaxed().m_h1; if (var2 < 0) { var2 += depth * 2; @@ -1166,7 +1163,7 @@ s32 syncLFQueueCompletePushPointer(vm::ptr queue, s32 pointer, { var9_ = 1 << var9_; } - s32 var9 = ~(var9_ | (u16)new_queue.m_h6); + s32 var9 = ~(var9_ | (u16)push.m_h6); // count leading zeros in u16 { u16 v = var9; @@ -1179,7 +1176,7 @@ s32 syncLFQueueCompletePushPointer(vm::ptr queue, s32 pointer, } } - s32 var5 = (s32)(u16)new_queue.m_h6 | var9_; + s32 var5 = (s32)(u16)push.m_h6 | var9_; if (var9 & 0x30) { var5 = 0; @@ -1189,13 +1186,13 @@ s32 syncLFQueueCompletePushPointer(vm::ptr queue, s32 pointer, var5 <<= var9; } - s32 var3 = (u16)new_queue.m_h5 + var9; + s32 var3 = (u16)push.m_h5 + var9; if (var3 >= depth * 2) { var3 -= depth * 2; } - u16 pack = new_queue.m_hs[0]; // three packed 5-bit fields + u16 pack = new_.m_hs[0]; // three packed 5-bit fields s32 var4 = ((pack >> 10) & 0x1f) - ((pack >> 5) & 0x1f); if (var4 < 0) @@ -1233,7 +1230,7 @@ s32 syncLFQueueCompletePushPointer(vm::ptr queue, s32 pointer, var12 = (var12 + 1) << 10; } - new_queue.m_hs[0] = (pack & 0x83ff) | var12; + new_.m_hs[0] = (pack & 0x83ff) | var12; var6 = (u16)queue->m_hs[1 + 2 * var11]; } else @@ -1241,14 +1238,15 @@ s32 syncLFQueueCompletePushPointer(vm::ptr queue, s32 pointer, var6 = -1; } - s32 var7 = (var3 << 16) | (var5 & 0xffff); + push.m_h5 = (u16)var3; + push.m_h6 = (u16)var5; - if (InterlockedCompareExchange(&queue->m_push2(), new_queue.m_push2(), old_data) == old_data) + if (InterlockedCompareExchange(&queue->m_push2(), new_.m_push2(), old_data) == old_data) { assert(var2 + var4 < 16); if (var6 != -1) { - bool exch = InterlockedCompareExchange(&queue->m_push3(), re32(var7), old_data2) == old_data2; + bool exch = queue->push3.compare_and_swap_test(old2, push); assert(exch); if (exch) { @@ -1261,7 +1259,7 @@ s32 syncLFQueueCompletePushPointer(vm::ptr queue, s32 pointer, pack = queue->m_hs[0]; if ((pack & 0x1f) == ((pack >> 10) & 0x1f)) { - if (InterlockedCompareExchange(&queue->m_push3(), re32(var7), old_data2) == old_data2) + if (queue->push3.compare_and_swap_test(old2, push)) { return CELL_OK; } @@ -1409,28 +1407,27 @@ s32 syncLFQueueGetPopPointer(vm::ptr queue, s32& pointer, u32 i return -1; } - const u64 old_data = InterlockedCompareExchange(&queue->m_pop1(), 0, 0); - CellSyncLFQueue new_queue; - new_queue.m_pop1() = old_data; + const auto old = queue->pop1.read_sync(); + auto pop = old; if (var1) { - new_queue.m_h3 = 0; + pop.m_h3 = 0; } if (isBlocking && useEventQueue && *(u32*)queue->m_bs == -1) { return CELL_SYNC_ERROR_STAT; } - s32 var2 = (s32)(s16)new_queue.m_h4; + s32 var2 = (s32)(s16)pop.m_h4; s32 res; - if (useEventQueue && ((s32)(u16)new_queue.m_h1 != var2 || new_queue.m_h3.ToBE() != 0)) + if (useEventQueue && ((s32)(u16)pop.m_h1 != var2 || pop.m_h3.ToBE() != 0)) { res = CELL_SYNC_ERROR_BUSY; } else { - var2 = (s32)(u16)queue->m_h5 - var2; + var2 = (s32)(u16)queue->push1.read_relaxed().m_h5 - var2; if (var2 < 0) { var2 += depth * 2; @@ -1438,21 +1435,21 @@ s32 syncLFQueueGetPopPointer(vm::ptr queue, s32& pointer, u32 i if (var2 > 0) { - pointer = (s16)new_queue.m_h4; + pointer = (s16)pop.m_h4; if (pointer + 1 >= depth * 2) { - new_queue.m_h4 = 0; + pop.m_h4 = 0; } else { - new_queue.m_h4++; + pop.m_h4++; } res = CELL_OK; } else if (!isBlocking) { res = CELL_SYNC_ERROR_AGAIN; - if (!new_queue.m_h3.ToBE() || res) + if (!pop.m_h3.ToBE() || res) { return res; } @@ -1465,7 +1462,7 @@ s32 syncLFQueueGetPopPointer(vm::ptr queue, s32& pointer, u32 i else { res = CELL_OK; - new_queue.m_h3 = 3; + pop.m_h3 = 3; if (isBlocking != 3) { break; @@ -1473,9 +1470,9 @@ s32 syncLFQueueGetPopPointer(vm::ptr queue, s32& pointer, u32 i } } - if (InterlockedCompareExchange(&queue->m_pop1(), new_queue.m_pop1(), old_data) == old_data) + if (queue->pop1.compare_and_swap_test(old, pop)) { - if (!new_queue.m_h3.ToBE() || res) + if (!pop.m_h3.ToBE() || res) { return res; } @@ -1536,19 +1533,19 @@ s32 syncLFQueueCompletePopPointer(vm::ptr queue, s32 pointer, c while (true) { const u32 old_data = InterlockedCompareExchange(&queue->m_pop2(), 0, 0); - CellSyncLFQueue new_queue; - new_queue.m_pop2() = old_data; + CellSyncLFQueue new_; + new_.m_pop2() = old_data; - const u32 old_data2 = queue->m_pop3(); - new_queue.m_pop3() = old_data2; + const auto old2 = queue->pop3.read_relaxed(); + auto pop = old2; - s32 var1 = pointer - (u16)new_queue.m_h1; + s32 var1 = pointer - (u16)pop.m_h1; if (var1 < 0) { var1 += depth * 2; } - s32 var2 = (s32)(s16)queue->m_h8 - (s32)(u16)queue->m_h5; + s32 var2 = (s32)(s16)queue->push1.read_relaxed().m_h8 - (s32)(u16)queue->push1.read_relaxed().m_h5; if (var2 < 0) { var2 += depth * 2; @@ -1564,7 +1561,7 @@ s32 syncLFQueueCompletePopPointer(vm::ptr queue, s32 pointer, c { var9_ = 1 << var9_; } - s32 var9 = ~(var9_ | (u16)new_queue.m_h2); + s32 var9 = ~(var9_ | (u16)pop.m_h2); // count leading zeros in u16 { u16 v = var9; @@ -1577,7 +1574,7 @@ s32 syncLFQueueCompletePopPointer(vm::ptr queue, s32 pointer, c } } - s32 var5 = (s32)(u16)new_queue.m_h2 | var9_; + s32 var5 = (s32)(u16)pop.m_h2 | var9_; if (var9 & 0x30) { var5 = 0; @@ -1587,13 +1584,13 @@ s32 syncLFQueueCompletePopPointer(vm::ptr queue, s32 pointer, c var5 <<= var9; } - s32 var3 = (u16)new_queue.m_h1 + var9; + s32 var3 = (u16)pop.m_h1 + var9; if (var3 >= depth * 2) { var3 -= depth * 2; } - u16 pack = new_queue.m_hs[16]; // three packed 5-bit fields + u16 pack = new_.m_hs[16]; // three packed 5-bit fields s32 var4 = ((pack >> 10) & 0x1f) - ((pack >> 5) & 0x1f); if (var4 < 0) @@ -1635,17 +1632,18 @@ s32 syncLFQueueCompletePopPointer(vm::ptr queue, s32 pointer, c var12 = (var12 + 1) << 10; } - new_queue.m_hs[0] = (pack & 0x83ff) | var12; + new_.m_hs[0] = (pack & 0x83ff) | var12; var6 = (u16)queue->m_hs[17 + 2 * var11]; } - s32 var7 = (var3 << 16) | (var5 & 0xffff); + pop.m_h1 = (u16)var3; + pop.m_h2 = (u16)var5; - if (InterlockedCompareExchange(&queue->m_pop2(), new_queue.m_pop2(), old_data) == old_data) + if (InterlockedCompareExchange(&queue->m_pop2(), new_.m_pop2(), old_data) == old_data) { if (var6 != -1) { - bool exch = InterlockedCompareExchange(&queue->m_pop3(), re32(var7), old_data2) == old_data2; + bool exch = queue->pop3.compare_and_swap_test(old2, pop); assert(exch); if (exch) { @@ -1658,7 +1656,7 @@ s32 syncLFQueueCompletePopPointer(vm::ptr queue, s32 pointer, c pack = queue->m_hs[16]; if ((pack & 0x1f) == ((pack >> 10) & 0x1f)) { - if (InterlockedCompareExchange(&queue->m_pop3(), re32(var7), old_data2) == old_data2) + if (queue->pop3.compare_and_swap_test(old2, pop)) { return CELL_OK; } @@ -1798,12 +1796,10 @@ s32 cellSyncLFQueueClear(vm::ptr queue) // TODO: optimize if possible while (true) { - const u64 old_data = InterlockedCompareExchange(&queue->m_pop1(), 0, 0); - CellSyncLFQueue new_queue; - new_queue.m_pop1() = old_data; + const auto old = queue->pop1.read_sync(); + auto pop = old; - const u64 new_data = queue->m_push1(); - new_queue.m_push1() = new_data; + const auto push = queue->push1.read_relaxed(); s32 var1, var2; if (queue->m_direction.ToBE() != se32(CELL_SYNC_QUEUE_ANY2ANY)) @@ -1812,19 +1808,24 @@ s32 cellSyncLFQueueClear(vm::ptr queue) } else { - var1 = (u16)new_queue.m_h7; - var2 = (u16)new_queue.m_h3; + var1 = (u16)push.m_h7; + var2 = (u16)pop.m_h3; } - if ((s32)(s16)new_queue.m_h4 != (s32)(u16)new_queue.m_h1 || - (s32)(s16)new_queue.m_h8 != (s32)(u16)new_queue.m_h5 || + if ((s32)(s16)pop.m_h4 != (s32)(u16)pop.m_h1 || + (s32)(s16)push.m_h8 != (s32)(u16)push.m_h5 || ((var2 >> 10) & 0x1f) != (var2 & 0x1f) || ((var1 >> 10) & 0x1f) != (var1 & 0x1f)) { return CELL_SYNC_ERROR_BUSY; } - if (InterlockedCompareExchange(&queue->m_pop1(), new_data, old_data) == old_data) break; + pop.m_h1 = push.m_h5; + pop.m_h2 = push.m_h6; + pop.m_h3 = push.m_h7; + pop.m_h4 = push.m_h8; + + if (queue->pop1.compare_and_swap_test(old, pop)) break; } return CELL_OK; @@ -1846,12 +1847,12 @@ s32 cellSyncLFQueueSize(vm::ptr queue, vm::ptr> size) // TODO: optimize if possible while (true) { - const u32 old_data = InterlockedCompareExchange(&queue->m_pop3(), 0, 0); + const auto old = queue->pop3.read_sync(); - u32 var1 = (u16)queue->m_h1; - u32 var2 = (u16)queue->m_h5; + u32 var1 = (u16)queue->pop1.read_relaxed().m_h1; + u32 var2 = (u16)queue->push1.read_relaxed().m_h5; - if (InterlockedCompareExchange(&queue->m_pop3(), old_data, old_data) == old_data) + if (queue->pop3.compare_and_swap_test(old, old)) { if (var1 <= var2) { diff --git a/rpcs3/Emu/SysCalls/Modules/cellSync.h b/rpcs3/Emu/SysCalls/Modules/cellSync.h index a2cb2fcadd..4a8086624b 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSync.h +++ b/rpcs3/Emu/SysCalls/Modules/cellSync.h @@ -97,11 +97,6 @@ enum CellSyncQueueDirection : u32 // CellSyncLFQueueDirection struct CellSyncLFQueue { - struct init_t - { - be_t m_sync; - }; - struct pop1_t { be_t m_h1; @@ -110,6 +105,12 @@ struct CellSyncLFQueue be_t m_h4; }; + struct pop3_t + { + be_t m_h1; + be_t m_h2; + }; + struct push1_t { be_t m_h5; @@ -118,72 +119,44 @@ struct CellSyncLFQueue be_t m_h8; }; - union + struct push3_t { - struct - { - vm::atomic pop1; - vm::atomic push1; - }; - struct - { - be_t m_h1; // 0x0 - be_t m_h2; // 0x2 - be_t m_h3; // 0x4 - be_t m_h4; // 0x6 - be_t m_h5; // 0x8 - be_t m_h6; // 0xA - be_t m_h7; // 0xC - be_t m_h8; // 0xE - }; + be_t m_h5; + be_t m_h6; }; + union + { + vm::atomic pop1; + vm::atomic pop3; + }; + union + { + vm::atomic push1; + vm::atomic push3; + }; be_t m_size; // 0x10 be_t m_depth; // 0x14 vm::bptr m_buffer; // 0x18 u8 m_bs[4]; // 0x20 be_t m_direction; // 0x24 be_t m_v1; // 0x28 - vm::atomic init; // 0x2C + vm::atomic init; // 0x2C be_t m_hs[32]; // 0x30 vm::bptr m_eaSignal; // 0x70 be_t m_v2; // 0x78 be_t m_v3; // 0x7C - volatile u32& m_data() - { - return *reinterpret_cast((u8*)this + 0x2c); - } - - volatile u64& m_push1() - { - return *reinterpret_cast((u8*)this + 0x8); - } - volatile u32& m_push2() { return *reinterpret_cast((u8*)this + 0x30); } - volatile u32& m_push3() - { - return *reinterpret_cast((u8*)this + 0x8); - } - - volatile u64& m_pop1() - { - return *reinterpret_cast((u8*)this + 0x0); - } - volatile u32& m_pop2() { return *reinterpret_cast((u8*)this + 0x50); } - volatile u32& m_pop3() - { - return *reinterpret_cast((u8*)this + 0x0); - } }; static_assert(sizeof(CellSyncLFQueue) == 128, "CellSyncLFQueue: wrong size");