More changes

2026-02-26 17:45:59 +01:00 · 2014-09-23 03:07:40 +04:00 · 2014-09-23 03:07:40 +04:00 · e5a485e50b
parent 449e7c1049
commit e5a485e50b
8 changed files with 250 additions and 188 deletions
--- a/rpcs3/Emu/Cell/SPURecompiler.h
+++ b/rpcs3/Emu/Cell/SPURecompiler.h
@ -78,6 +78,7 @@ public:
 	SPUInterpreter* inter;
 	JitRuntime runtime;
 	bool first;
+	bool need_check;

 	struct SPURecEntry
 	{
@ -457,7 +458,7 @@ private:
 		c.mov(cpu_dword(PC), CPU.PC);
 		// This instruction must be used following a store instruction that modifies the instruction stream.
 		c.mfence();
-		c.mov(*pos_var, (CPU.PC >> 2) + 1);
+		c.mov(*pos_var, (CPU.PC >> 2) + 1 + 0x2000000);
 		do_finalize = true;
 		LOG_OPCODE();
 	}
@ -1142,6 +1143,7 @@ private:

 		c.mov(*addr, CPU.PC + 4);
 		c.mov(*pos_var, cpu_dword(GPR[ra]._u32[3]));
+		if (ra) c.or_(*pos_var, 0x2000000 << 2); // rude (check if not LR)
 		c.cmp(cpu_dword(GPR[rt]._u32[3]), 0);
 		c.cmovne(*pos_var, *addr);
 		c.shr(*pos_var, 2);
@ -1160,6 +1162,7 @@ private:

 		c.mov(*addr, CPU.PC + 4);
 		c.mov(*pos_var, cpu_dword(GPR[ra]._u32[3]));
+		if (ra) c.or_(*pos_var, 0x2000000 << 2); // rude (check if not LR)
 		c.cmp(cpu_dword(GPR[rt]._u32[3]), 0);
 		c.cmove(*pos_var, *addr);
 		c.shr(*pos_var, 2);
@ -1178,6 +1181,7 @@ private:

 		c.mov(*addr, CPU.PC + 4);
 		c.mov(*pos_var, cpu_dword(GPR[ra]._u32[3]));
+		if (ra) c.or_(*pos_var, 0x2000000 << 2); // rude (check if not LR)
 		c.cmp(cpu_word(GPR[rt]._u16[6]), 0);
 		c.cmovne(*pos_var, *addr);
 		c.shr(*pos_var, 2);
@ -1196,6 +1200,7 @@ private:

 		c.mov(*addr, CPU.PC + 4);
 		c.mov(*pos_var, cpu_dword(GPR[ra]._u32[3]));
+		if (ra) c.or_(*pos_var, 0x2000000 << 2); // rude (check if not LR)
 		c.cmp(cpu_word(GPR[rt]._u16[6]), 0);
 		c.cmove(*pos_var, *addr);
 		c.shr(*pos_var, 2);
@ -1244,6 +1249,7 @@ private:
 		do_finalize = true;

 		c.mov(*pos_var, cpu_dword(GPR[ra]._u32[3]));
+		if (ra) c.or_(*pos_var, 0x2000000 << 2); // rude (check if not LR)
 		c.shr(*pos_var, 2);
 		LOG_OPCODE();
 	}
@ -1267,6 +1273,7 @@ private:
 		c.mov(*pos_var, cpu_dword(GPR[ra]._u32[3]));
 		c.mov(cpu_dword(GPR[rt]._u32[3]), CPU.PC + 4);
 		c.shr(*pos_var, 2);
+		c.or_(*pos_var, 0x2000000);
 		LOG_OPCODE();
 	}
 	void IRET(u32 ra)
@ -1947,11 +1954,10 @@ private:
 	{
 		c.mov(*addr, cpu_dword(GPR[ra]._s32[3]));
 		c.cmp(*addr, cpu_dword(GPR[rb]._s32[3]));
-		c.mov(*addr, 0);
 		c.setg(addr->r8());
-		c.neg(*addr);
+		c.shl(*addr, 24);
 		c.mov(*pos_var, (CPU.PC >> 2) + 1);
-		c.xor_(*pos_var, *addr);
+		c.or_(*pos_var, *addr);
 		do_finalize = true;
 		LOG_OPCODE();
 	}
@ -2308,11 +2314,10 @@ private:
 	{
 		c.mov(*addr, cpu_dword(GPR[ra]._u32[3]));
 		c.cmp(*addr, cpu_dword(GPR[rb]._u32[3]));
-		c.mov(*addr, 0);
 		c.seta(addr->r8());
-		c.neg(*addr);
+		c.shl(*addr, 24);
 		c.mov(*pos_var, (CPU.PC >> 2) + 1);
-		c.xor_(*pos_var, *addr);
+		c.or_(*pos_var, *addr);
 		do_finalize = true;
 		LOG_OPCODE();
 	}
@ -2662,11 +2667,10 @@ private:
 	{
 		c.mov(*addr, cpu_dword(GPR[ra]._s32[3]));
 		c.cmp(*addr, cpu_dword(GPR[rb]._s32[3]));
-		c.mov(*addr, 0);
 		c.sete(addr->r8());
-		c.neg(*addr);
+		c.shl(*addr, 24);
 		c.mov(*pos_var, (CPU.PC >> 2) + 1);
-		c.xor_(*pos_var, *addr);
+		c.or_(*pos_var, *addr);
 		do_finalize = true;
 		LOG_OPCODE();
 	}
@ -3324,11 +3328,10 @@ private:
 	{
 		c.mov(*addr, cpu_dword(GPR[ra]._s32[3]));
 		c.cmp(*addr, i10);
-		c.mov(*addr, 0);
 		c.setg(addr->r8());
-		c.neg(*addr);
+		c.shl(*addr, 24);
 		c.mov(*pos_var, (CPU.PC >> 2) + 1);
-		c.xor_(*pos_var, *addr);
+		c.or_(*pos_var, *addr);
 		do_finalize = true;
 		LOG_OPCODE();
 	}
@ -3390,11 +3393,10 @@ private:
 	{
 		c.mov(*addr, cpu_dword(GPR[ra]._u32[3]));
 		c.cmp(*addr, i10);
-		c.mov(*addr, 0);
 		c.seta(addr->r8());
-		c.neg(*addr);
+		c.shl(*addr, 24);
 		c.mov(*pos_var, (CPU.PC >> 2) + 1);
-		c.xor_(*pos_var, *addr);
+		c.or_(*pos_var, *addr);
 		do_finalize = true;
 		LOG_OPCODE();
 	}
@ -3441,11 +3443,10 @@ private:
 	{
 		c.mov(*addr, cpu_dword(GPR[ra]._u32[3]));
 		c.cmp(*addr, i10);
-		c.mov(*addr, 0);
 		c.sete(addr->r8());
-		c.neg(*addr);
+		c.shl(*addr, 24);
 		c.mov(*pos_var, (CPU.PC >> 2) + 1);
-		c.xor_(*pos_var, *addr);
+		c.or_(*pos_var, *addr);
 		do_finalize = true;
 		LOG_OPCODE();
 	}
--- a/rpcs3/Emu/Cell/SPURecompilerCore.cpp
+++ b/rpcs3/Emu/Cell/SPURecompilerCore.cpp
@ -20,6 +20,7 @@ SPURecompilerCore::SPURecompilerCore(SPUThread& cpu)
 	, inter(new SPUInterpreter(cpu))
 	, CPU(cpu)
 	, first(true)
+	, need_check(false)
 {
 	memset(entry, 0, sizeof(entry));
 	X86CpuInfo inf;
@ -192,20 +193,26 @@ u8 SPURecompilerCore::DecodeMemory(const u32 address)
 	{
 		// check data (hard way)
 		bool is_valid = true;
-		//for (u32 i = pos; i < (u32)(entry[pos].count + pos); i++)
-		//{
-		//	if (entry[i].valid != ls[i])
-		//	{
-		//		is_valid = false;
-		//		break;
-		//	}
-		//}
+		if (need_check)
+		{
+			for (u32 i = 0; i < 0x10000; i++)
+			{
+				if (entry[i].valid && entry[i].valid != ls[i])
+				{
+					is_valid = false;
+					break;
+				}
+			}
+			need_check = false;
+		}
 		// invalidate if necessary
 		if (!is_valid)
 		{
 			for (u32 i = 0; i < 0x10000; i++)
 			{
-				if (entry[i].pointer &&
+				if (!entry[i].pointer) continue;
+
+				if (!entry[i].valid || entry[i].valid != ls[i] ||
 					i + (u32)entry[i].count > (u32)pos &&
 					i < (u32)pos + (u32)entry[pos].count)
 				{
@ -214,6 +221,11 @@ u8 SPURecompilerCore::DecodeMemory(const u32 address)
 					//RtlDeleteFunctionTable(&entry[i].info);
 #endif
 					entry[i].pointer = nullptr;
+					for (u32 j = i; j < i + (u32)entry[i].count; j++)
+					{
+						entry[j].valid = 0;
+					}
+					//need_check = true;
 				}
 			}
 			//LOG_ERROR(Log::SPU, "SPURecompilerCore::DecodeMemory(ls_addr=0x%x): code has changed", pos * sizeof(u32));
@ -254,11 +266,17 @@ u8 SPURecompilerCore::DecodeMemory(const u32 address)
 	u32 res = pos;
 	res = func(cpu, vm::get_ptr<void>(m_offset), imm_table.data(), &g_imm_table);

-	if (res > 0xffff)
+	if (res & 0x1000000)
 	{
 		CPU.SPU.Status.SetValue(SPU_STATUS_STOPPED_BY_HALT);
 		CPU.Stop();
-		res = ~res;
+		res &= ~0x1000000;
+	}
+
+	if (res & 0x2000000)
+	{
+		need_check = true;
+		res &= ~0x2000000;
 	}

 	if (did_compile)
--- a/rpcs3/Emu/Cell/SPUThread.h
+++ b/rpcs3/Emu/Cell/SPUThread.h
@ -358,7 +358,7 @@ public:
 			}
 			else
 			{
-				InterlockedOr((volatile u64*)m_indval, ((u64)value << 32) | 1);
+				InterlockedOr(&m_indval, ((u64)value << 32) | 1);
 			}
 		}

--- a/rpcs3/Emu/Memory/vm_atomic.h
+++ b/rpcs3/Emu/Memory/vm_atomic.h
@ -25,34 +25,34 @@ namespace vm
 	template<typename T>
 	class _atomic_base
 	{
-		volatile T data;
 		typedef typename _to_atomic<T, sizeof(T)>::type atomic_type;
+		atomic_type data;

 	public:
 		// atomically compare data with cmp, replace with exch if equal, return previous data value anyway
 		__forceinline const T compare_and_swap(const T& cmp, const T& exch) volatile
 		{
-			const atomic_type res = InterlockedCompareExchange((volatile atomic_type*)&data, (atomic_type&)exch, (atomic_type&)cmp);
+			const atomic_type res = InterlockedCompareExchange(&data, (atomic_type&)(exch), (atomic_type&)(cmp));
 			return (T&)res;
 		}

 		// atomically compare data with cmp, replace with exch if equal, return true if data was replaced
 		__forceinline bool compare_and_swap_test(const T& cmp, const T& exch) volatile
 		{
-			return InterlockedCompareExchange((volatile atomic_type*)&data, (atomic_type&)exch, (atomic_type&)cmp) == (atomic_type&)cmp;
+			return InterlockedCompareExchange(&data, (atomic_type&)(exch), (atomic_type&)(cmp)) == (atomic_type&)(cmp);
 		}

 		// read data with memory barrier
 		__forceinline const T read_sync() const volatile 
 		{
-			const atomic_type res = InterlockedCompareExchange((volatile atomic_type*)&data, 0, 0);
+			const atomic_type res = InterlockedCompareExchange(const_cast<volatile atomic_type*>(&data), 0, 0);
 			return (T&)res;
 		}

 		// atomically replace data with exch, return previous data value
 		__forceinline const T exchange(const T& exch) volatile
 		{
-			const atomic_type res = InterlockedExchange((volatile atomic_type*)&data, (atomic_type&)exch);
+			const atomic_type res = InterlockedExchange(&data, (atomic_type&)(exch));
 			return (T&)res;
 		}

@ -65,7 +65,7 @@ namespace vm
 		// write data without memory barrier
 		__forceinline void write_relaxed(const T& value) volatile
 		{
-			(T&)data = value;
+			data = (atomic_type&)(value);
 		}

 		// perform atomic operation on data
@ -81,7 +81,7 @@ namespace vm
 		}

 		// perform atomic operation on data with special exit condition (if intermediate result != proceed_value)
-		template<typename RT, typename FT> __forceinline RT atomic_op(const RT& proceed_value, const FT atomic_proc) volatile
+		template<typename RT, typename FT> __forceinline RT atomic_op(const RT proceed_value, const FT atomic_proc) volatile
 		{
 			while (true)
 			{
@ -95,37 +95,37 @@ namespace vm

 		__forceinline const T _or(const T& right) volatile
 		{
-			const atomic_type res = InterlockedOr((volatile atomic_type*)&data, (atomic_type&)right);
+			const atomic_type res = InterlockedOr(&data, (atomic_type&)(right));
 			return (T&)res;
 		}

 		__forceinline const T _and(const T& right) volatile
 		{
-			const atomic_type res = InterlockedAnd((volatile atomic_type*)&data, (atomic_type&)right);
+			const atomic_type res = InterlockedAnd(&data, (atomic_type&)(right));
 			return (T&)res;
 		}

 		__forceinline const T _xor(const T& right) volatile
 		{
-			const atomic_type res = InterlockedXor((volatile atomic_type*)&data, (atomic_type&)right);
+			const atomic_type res = InterlockedXor(&data, (atomic_type&)(right));
 			return (T&)res;
 		}

 		__forceinline const T operator |= (const T& right) volatile
 		{
-			const atomic_type res = InterlockedOr((volatile atomic_type*)&data, (atomic_type&)right) | (atomic_type&)right;
+			const atomic_type res = InterlockedOr(&data, (atomic_type&)(right)) | (atomic_type&)(right);
 			return (T&)res;
 		}

 		__forceinline const T operator &= (const T& right) volatile
 		{
-			const atomic_type res = InterlockedAnd((volatile atomic_type*)&data, (atomic_type&)right) & (atomic_type&)right;
+			const atomic_type res = InterlockedAnd(&data, (atomic_type&)(right)) & (atomic_type&)(right);
 			return (T&)res;
 		}

 		__forceinline const T operator ^= (const T& right) volatile
 		{
-			const atomic_type res = InterlockedXor((volatile atomic_type*)&data, (atomic_type&)right) ^ (atomic_type&)right;
+			const atomic_type res = InterlockedXor(&data, (atomic_type&)(right)) ^ (atomic_type&)(right);
 			return (T&)res;
 		}

--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
@ -20,17 +20,86 @@ extern u32 libsre;
 extern u32 libsre_rtoc;
 #endif

-s64 spursCreateLv2EventQueue(vm::ptr<CellSpurs> spurs, u32& queue_id, u32 arg3, u32 arg4, u64 name_u64)
+s64 spursAttachLv2EventQueue(vm::ptr<CellSpurs> spurs, u32 queue, vm::ptr<u8> port, s32 isDynamic, bool wasCreated)
 {
+#ifdef PRX_DEBUG_XXX
+	return cb_call<s32, vm::ptr<CellSpurs>, u32, vm::ptr<u8>, s32, bool>(GetCurrentPPUThread(), libsre + 0xAE34, libsre_rtoc,
+		spurs, queue, port, isDynamic, wasCreated);
+#else
+	if (!spurs || !port)
+	{
+		return CELL_SPURS_CORE_ERROR_NULL_POINTER;
+	}
+	if (spurs.addr() % 128)
+	{
+		return CELL_SPURS_CORE_ERROR_ALIGN;
+	}
+	if (spurs->m.unk21.ToBE())
+	{
+		return CELL_SPURS_CORE_ERROR_STAT;
+	}
+
+	u32 unk1 = 0;
 #ifdef PRX_DEBUG
+	unk1 = cb_call<u32>(GetCurrentPPUThread(), libsre + 0x10900, libsre_rtoc);
+#endif
+
+	u8 _port = 0x3f;
+	u8 port_start = 0x10;
+	u64 port_mask = 0;
+	if (isDynamic == 0)
+	{
+		_port = *port;
+		if (_port > 0x3f)
+		{
+			return CELL_SPURS_CORE_ERROR_INVAL;
+		}
+		if (unk1 <= 0x17ffff && _port > 0xf)
+		{
+			return CELL_SPURS_CORE_ERROR_PERM;
+		}
+		port_start = _port;
+	}
+
+	for (u32 i = port_start + 1; i < _port; i++)
+	{
+		port_mask |= 1ull << (i - 1);
+	}
+
+	if (s32 res = sys_spu_thread_group_connect_event_all_threads(spurs->m.spuTG, queue, port_mask, port))
+	{
+		if (res == CELL_EISCONN)
+		{
+			return CELL_SPURS_CORE_ERROR_BUSY;
+		}
+		return res;
+	}
+
+	if (!wasCreated)
+	{
+		spurs->m.spups |= be_t<u64>::make(1ull << *port); // atomic bitwise or
+	}
+
+	return CELL_OK;
+#endif
+}
+
+s64 spursCreateLv2EventQueue(vm::ptr<CellSpurs> spurs, u32& queue_id, vm::ptr<u8> port, s32 size, u64 name_u64)
+{
+#ifdef PRX_DEBUG_XXX
 	vm::var<be_t<u32>> queue;
-	s32 res = cb_call<s32, vm::ptr<CellSpurs>, vm::ptr<be_t<u32>>, u32, u32, u32>(GetCurrentPPUThread(), libsre + 0xB14C, libsre_rtoc,
-		spurs, queue, arg3, arg4, vm::read32(libsre_rtoc - 0x7E2C));
+	s32 res = cb_call<s32, vm::ptr<CellSpurs>, vm::ptr<be_t<u32>>, vm::ptr<u8>, s32, u32>(GetCurrentPPUThread(), libsre + 0xB14C, libsre_rtoc,
+		spurs, queue, port, size, vm::read32(libsre_rtoc - 0x7E2C));
 	queue_id = queue->ToLE();
 	return res;
 #else
-	// TODO
-	queue_id = event_queue_create(SYS_SYNC_PRIORITY, SYS_PPU_QUEUE, *(u64*)"+QUEUE+", 0, 1);
+	queue_id = event_queue_create(SYS_SYNC_FIFO, SYS_PPU_QUEUE, name_u64, 0, size);
+	if (!queue_id)
+	{
+		return CELL_EAGAIN; // rough
+	}
+
+	assert(spursAttachLv2EventQueue(spurs, queue_id, port, 1, true) == CELL_OK);
 	return CELL_OK;
 #endif
 }
@ -108,7 +177,7 @@ s64 spursInit(
 	for (u32 i = 0; i < 0x10; i++)
 	{
 		sem = semaphore_create(0, 1, SYS_SYNC_PRIORITY, *(u64*)"_spuWkl");
-		assert(sem && ~sem); // should rollback if semaphore creating failed and return the error
+		assert(sem && ~sem); // should rollback if semaphore creation failed and return the error
 		spurs->m.sub1[i].sem = sem;
 	}
 	if (isSecond)
@ -182,7 +251,7 @@ s64 spursInit(
 	spurs->m.ppuPriority = ppuPriority;

 	u32 queue;
-	assert(spursCreateLv2EventQueue(spurs, queue, spurs.addr() + 0xc9, 0x2a, *(u64*)"_spuPrv") == CELL_OK);
+	assert(spursCreateLv2EventQueue(spurs, queue, vm::ptr<u8>::make(spurs.addr() + 0xc9), 0x2a, *(u64*)"_spuPrv") == CELL_OK);
 	spurs->m.queue = queue;

 	u32 port = event_port_create(0);
@ -618,13 +687,13 @@ s64 cellSpursSetPreemptionVictimHints(vm::ptr<CellSpurs> spurs, vm::ptr<const bo

 s64 cellSpursAttachLv2EventQueue(vm::ptr<CellSpurs> spurs, u32 queue, vm::ptr<u8> port, s32 isDynamic)
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("cellSpursAttachLv2EventQueue(spurs_addr=0x%x, queue=%d, port_addr=0x%x, isDynamic=%d)",
 		spurs.addr(), queue, port.addr(), isDynamic);
+
+#ifdef PRX_DEBUG_XXX
 	return GetCurrentPPUThread().FastCall2(libsre + 0xAFE0, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
-	return CELL_OK;
+	return spursAttachLv2EventQueue(spurs, queue, port, isDynamic, false);
 #endif
 }

--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
@ -185,7 +185,7 @@ struct CellSpurs
 			u8 unknown0[0xB0 - 0x78];
 			be_t<u32> unk0;       // 0x0B0
 			u8 unknown2[0xC0 - 0xB4];
-			u8 unk6[0x10];        // 0x0C0
+			u8 unk6[0x10];        // 0x0C0 (SPU port at 0xc9)
 			u8 unknown1[0x100 - 0x0D0];
 			_sub_str1 sub1[0x10]; // 0x100
 			be_t<u64> unk22;      // 0x900
@ -219,7 +219,7 @@ struct CellSpurs
 			be_t<u32> unk5;       // 0xD9C
 			be_t<u32> revision;   // 0xDA0
 			be_t<u32> sdkVersion; // 0xDA4
-			u8 unknown8[0xDB0 - 0xDA8];
+			vm::atomic<u64> spups;// 0xDA8
 			sys_lwmutex_t mutex;  // 0xDB0
 			sys_lwcond_t cond;    // 0xDC8
 			u8 unknown9[0xF00 - 0xDD0];
--- a/rpcs3/Emu/SysCalls/Modules/cellSync.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellSync.cpp
@ -923,17 +923,16 @@ s32 syncLFQueueInitialize(vm::ptr<CellSyncLFQueue> queue, vm::ptr<u8> buffer, u3
 	u32 old_value;
 	while (true)
 	{
-		const u32 old_data = queue->m_data();
-		CellSyncLFQueue new_data;
-		new_data.m_data() = old_data;
+		const auto old = queue->init.read_relaxed();
+		auto init = old;

-		if (old_data)
+		if (old.ToBE())
 		{
-			if (sdk_ver > 0x17ffff && old_data != se32(2))
+			if (sdk_ver > 0x17ffff && old != 2)
 			{
 				return CELL_SYNC_ERROR_STAT;
 			}
-			old_value = old_data;
+			old_value = old.ToLE();
 		}
 		else
 		{
@ -948,14 +947,14 @@ s32 syncLFQueueInitialize(vm::ptr<CellSyncLFQueue> queue, vm::ptr<u8> buffer, u3
 					}
 				}
 			}
-			new_data.m_data() = se32(1);
-			old_value = se32(1);
+			init = 1;
+			old_value = 1;
 		}

-		if (InterlockedCompareExchange(&queue->m_data(), new_data.m_data(), old_data) == old_data) break;
+		if (queue->init.compare_and_swap_test(old, init)) break;
 	}

-	if (old_value == se32(2))
+	if (old_value == 2)
 	{
 		if ((u32)queue->m_size != size || (u32)queue->m_depth != depth || queue->m_buffer.addr() != buffer.addr())
 		{
@ -975,12 +974,11 @@ s32 syncLFQueueInitialize(vm::ptr<CellSyncLFQueue> queue, vm::ptr<u8> buffer, u3
 		syncLFQueueInit(queue, buffer, size, depth, direction, eaSignal);

 		// prx: sync, zeroize u32 at 0x2c offset
-		InterlockedCompareExchange(&queue->m_data(), 0, 0);
-		queue->m_data() = 0;
+		queue->init.exchange({});
 	}

 	// prx: sync
-	InterlockedCompareExchange(&queue->m_data(), 0, 0);
+	queue->init.read_sync();
 	return CELL_OK;
 #endif
 }
@ -1011,28 +1009,27 @@ s32 syncLFQueueGetPushPointer(vm::ptr<CellSyncLFQueue> queue, s32& pointer, u32
 				return -1;
 			}

-			const u64 old_data = InterlockedCompareExchange(&queue->m_push1(), 0, 0);
-			CellSyncLFQueue new_queue;
-			new_queue.m_push1() = old_data;
+			const auto old = queue->push1.read_sync();
+			auto push = old;

 			if (var1)
 			{
-				new_queue.m_h7 = 0;
+				push.m_h7 = 0;
 			}
 			if (isBlocking && useEventQueue && *(u32*)queue->m_bs == -1)
 			{
 				return CELL_SYNC_ERROR_STAT;
 			}

-			s32 var2 = (s32)(s16)new_queue.m_h8;
+			s32 var2 = (s32)(s16)push.m_h8;
 			s32 res;
-			if (useEventQueue && ((s32)(u16)new_queue.m_h5 != var2 || new_queue.m_h7.ToBE() != 0))
+			if (useEventQueue && ((s32)push.m_h5 != var2 || push.m_h7.ToBE() != 0))
 			{
 				res = CELL_SYNC_ERROR_BUSY;
 			}
 			else
 			{
-				var2 -= (s32)(u16)queue->m_h1;
+				var2 -= (s32)(u16)queue->pop1.read_relaxed().m_h1;
 				if (var2 < 0)
 				{
 					var2 += depth * 2;
@ -1040,21 +1037,21 @@ s32 syncLFQueueGetPushPointer(vm::ptr<CellSyncLFQueue> queue, s32& pointer, u32

 				if (var2 < depth)
 				{
-					pointer = (s16)new_queue.m_h8;
+					pointer = (s16)push.m_h8;
 					if (pointer + 1 >= depth * 2)
 					{
-						new_queue.m_h8 = 0;
+						push.m_h8 = 0;
 					}
 					else
 					{
-						new_queue.m_h8++;
+						push.m_h8++;
 					}
 					res = CELL_OK;
 				}
 				else if (!isBlocking)
 				{
 					res = CELL_SYNC_ERROR_AGAIN;
-					if (!new_queue.m_h7.ToBE() || res)
+					if (!push.m_h7.ToBE() || res)
 					{
 						return res;
 					}
@ -1067,7 +1064,7 @@ s32 syncLFQueueGetPushPointer(vm::ptr<CellSyncLFQueue> queue, s32& pointer, u32
 				else
 				{
 					res = CELL_OK;
-					new_queue.m_h7 = 3;
+					push.m_h7 = 3;
 					if (isBlocking != 3)
 					{
 						break;
@ -1075,9 +1072,9 @@ s32 syncLFQueueGetPushPointer(vm::ptr<CellSyncLFQueue> queue, s32& pointer, u32
 				}
 			}

-			if (InterlockedCompareExchange(&queue->m_push1(), new_queue.m_push1(), old_data) == old_data)
+			if (queue->push1.compare_and_swap_test(old, push))
 			{
-				if (!new_queue.m_h7.ToBE() || res)
+				if (!push.m_h7.ToBE() || res)
 				{
 					return res;
 				}
@ -1138,19 +1135,19 @@ s32 syncLFQueueCompletePushPointer(vm::ptr<CellSyncLFQueue> queue, s32 pointer,
 	while (true)
 	{
 		const u32 old_data = InterlockedCompareExchange(&queue->m_push2(), 0, 0);
-		CellSyncLFQueue new_queue;
-		new_queue.m_push2() = old_data;
+		CellSyncLFQueue new_;
+		new_.m_push2() = old_data;

-		const u32 old_data2 = queue->m_push3();
-		new_queue.m_push3() = old_data2;
+		const auto old2 = queue->push3.read_relaxed();
+		auto push = old2;

-		s32 var1 = pointer - (u16)new_queue.m_h5;
+		s32 var1 = pointer - (u16)push.m_h5;
 		if (var1 < 0)
 		{
 			var1 += depth * 2;
 		}

-		s32 var2 = (s32)(s16)queue->m_h4 - (s32)(u16)queue->m_h1;
+		s32 var2 = (s32)(s16)queue->pop1.read_relaxed().m_h4 - (s32)(u16)queue->pop1.read_relaxed().m_h1;
 		if (var2 < 0)
 		{
 			var2 += depth * 2;
@ -1166,7 +1163,7 @@ s32 syncLFQueueCompletePushPointer(vm::ptr<CellSyncLFQueue> queue, s32 pointer,
 		{
 			var9_ = 1 << var9_;
 		}
-		s32 var9 = ~(var9_ | (u16)new_queue.m_h6);
+		s32 var9 = ~(var9_ | (u16)push.m_h6);
 		// count leading zeros in u16
 		{
 			u16 v = var9;
@ -1179,7 +1176,7 @@ s32 syncLFQueueCompletePushPointer(vm::ptr<CellSyncLFQueue> queue, s32 pointer,
 			}
 		}
 		
-		s32 var5 = (s32)(u16)new_queue.m_h6 | var9_;
+		s32 var5 = (s32)(u16)push.m_h6 | var9_;
 		if (var9 & 0x30)
 		{
 			var5 = 0;
@ -1189,13 +1186,13 @@ s32 syncLFQueueCompletePushPointer(vm::ptr<CellSyncLFQueue> queue, s32 pointer,
 			var5 <<= var9;
 		}

-		s32 var3 = (u16)new_queue.m_h5 + var9;
+		s32 var3 = (u16)push.m_h5 + var9;
 		if (var3 >= depth * 2)
 		{
 			var3 -= depth * 2;
 		}

-		u16 pack = new_queue.m_hs[0]; // three packed 5-bit fields
+		u16 pack = new_.m_hs[0]; // three packed 5-bit fields

 		s32 var4 = ((pack >> 10) & 0x1f) - ((pack >> 5) & 0x1f);
 		if (var4 < 0)
@ -1233,7 +1230,7 @@ s32 syncLFQueueCompletePushPointer(vm::ptr<CellSyncLFQueue> queue, s32 pointer,
 				var12 = (var12 + 1) << 10;
 			}

-			new_queue.m_hs[0] = (pack & 0x83ff) | var12;
+			new_.m_hs[0] = (pack & 0x83ff) | var12;
 			var6 = (u16)queue->m_hs[1 + 2 * var11];
 		}
 		else
@ -1241,14 +1238,15 @@ s32 syncLFQueueCompletePushPointer(vm::ptr<CellSyncLFQueue> queue, s32 pointer,
 			var6 = -1;
 		}

-		s32 var7 = (var3 << 16) | (var5 & 0xffff);
+		push.m_h5 = (u16)var3;
+		push.m_h6 = (u16)var5;

-		if (InterlockedCompareExchange(&queue->m_push2(), new_queue.m_push2(), old_data) == old_data)
+		if (InterlockedCompareExchange(&queue->m_push2(), new_.m_push2(), old_data) == old_data)
 		{
 			assert(var2 + var4 < 16);
 			if (var6 != -1)
 			{
-				bool exch = InterlockedCompareExchange(&queue->m_push3(), re32(var7), old_data2) == old_data2;
+				bool exch = queue->push3.compare_and_swap_test(old2, push);
 				assert(exch);
 				if (exch)
 				{
@ -1261,7 +1259,7 @@ s32 syncLFQueueCompletePushPointer(vm::ptr<CellSyncLFQueue> queue, s32 pointer,
 				pack = queue->m_hs[0];
 				if ((pack & 0x1f) == ((pack >> 10) & 0x1f))
 				{
-					if (InterlockedCompareExchange(&queue->m_push3(), re32(var7), old_data2) == old_data2)
+					if (queue->push3.compare_and_swap_test(old2, push))
 					{
 						return CELL_OK;
 					}
@ -1409,28 +1407,27 @@ s32 syncLFQueueGetPopPointer(vm::ptr<CellSyncLFQueue> queue, s32& pointer, u32 i
 				return -1;
 			}

-			const u64 old_data = InterlockedCompareExchange(&queue->m_pop1(), 0, 0);
-			CellSyncLFQueue new_queue;
-			new_queue.m_pop1() = old_data;
+			const auto old = queue->pop1.read_sync();
+			auto pop = old;

 			if (var1)
 			{
-				new_queue.m_h3 = 0;
+				pop.m_h3 = 0;
 			}
 			if (isBlocking && useEventQueue && *(u32*)queue->m_bs == -1)
 			{
 				return CELL_SYNC_ERROR_STAT;
 			}

-			s32 var2 = (s32)(s16)new_queue.m_h4;
+			s32 var2 = (s32)(s16)pop.m_h4;
 			s32 res;
-			if (useEventQueue && ((s32)(u16)new_queue.m_h1 != var2 || new_queue.m_h3.ToBE() != 0))
+			if (useEventQueue && ((s32)(u16)pop.m_h1 != var2 || pop.m_h3.ToBE() != 0))
 			{
 				res = CELL_SYNC_ERROR_BUSY;
 			}
 			else
 			{
-				var2 = (s32)(u16)queue->m_h5 - var2;
+				var2 = (s32)(u16)queue->push1.read_relaxed().m_h5 - var2;
 				if (var2 < 0)
 				{
 					var2 += depth * 2;
@ -1438,21 +1435,21 @@ s32 syncLFQueueGetPopPointer(vm::ptr<CellSyncLFQueue> queue, s32& pointer, u32 i

 				if (var2 > 0)
 				{
-					pointer = (s16)new_queue.m_h4;
+					pointer = (s16)pop.m_h4;
 					if (pointer + 1 >= depth * 2)
 					{
-						new_queue.m_h4 = 0;
+						pop.m_h4 = 0;
 					}
 					else
 					{
-						new_queue.m_h4++;
+						pop.m_h4++;
 					}
 					res = CELL_OK;
 				}
 				else if (!isBlocking)
 				{
 					res = CELL_SYNC_ERROR_AGAIN;
-					if (!new_queue.m_h3.ToBE() || res)
+					if (!pop.m_h3.ToBE() || res)
 					{
 						return res;
 					}
@ -1465,7 +1462,7 @@ s32 syncLFQueueGetPopPointer(vm::ptr<CellSyncLFQueue> queue, s32& pointer, u32 i
 				else
 				{
 					res = CELL_OK;
-					new_queue.m_h3 = 3;
+					pop.m_h3 = 3;
 					if (isBlocking != 3)
 					{
 						break;
@ -1473,9 +1470,9 @@ s32 syncLFQueueGetPopPointer(vm::ptr<CellSyncLFQueue> queue, s32& pointer, u32 i
 				}
 			}

-			if (InterlockedCompareExchange(&queue->m_pop1(), new_queue.m_pop1(), old_data) == old_data)
+			if (queue->pop1.compare_and_swap_test(old, pop))
 			{
-				if (!new_queue.m_h3.ToBE() || res)
+				if (!pop.m_h3.ToBE() || res)
 				{
 					return res;
 				}
@ -1536,19 +1533,19 @@ s32 syncLFQueueCompletePopPointer(vm::ptr<CellSyncLFQueue> queue, s32 pointer, c
 	while (true)
 	{
 		const u32 old_data = InterlockedCompareExchange(&queue->m_pop2(), 0, 0);
-		CellSyncLFQueue new_queue;
-		new_queue.m_pop2() = old_data;
+		CellSyncLFQueue new_;
+		new_.m_pop2() = old_data;

-		const u32 old_data2 = queue->m_pop3();
-		new_queue.m_pop3() = old_data2;
+		const auto old2 = queue->pop3.read_relaxed();
+		auto pop = old2;

-		s32 var1 = pointer - (u16)new_queue.m_h1;
+		s32 var1 = pointer - (u16)pop.m_h1;
 		if (var1 < 0)
 		{
 			var1 += depth * 2;
 		}

-		s32 var2 = (s32)(s16)queue->m_h8 - (s32)(u16)queue->m_h5;
+		s32 var2 = (s32)(s16)queue->push1.read_relaxed().m_h8 - (s32)(u16)queue->push1.read_relaxed().m_h5;
 		if (var2 < 0)
 		{
 			var2 += depth * 2;
@ -1564,7 +1561,7 @@ s32 syncLFQueueCompletePopPointer(vm::ptr<CellSyncLFQueue> queue, s32 pointer, c
 		{
 			var9_ = 1 << var9_;
 		}
-		s32 var9 = ~(var9_ | (u16)new_queue.m_h2);
+		s32 var9 = ~(var9_ | (u16)pop.m_h2);
 		// count leading zeros in u16
 		{
 			u16 v = var9;
@ -1577,7 +1574,7 @@ s32 syncLFQueueCompletePopPointer(vm::ptr<CellSyncLFQueue> queue, s32 pointer, c
 			}
 		}

-		s32 var5 = (s32)(u16)new_queue.m_h2 | var9_;
+		s32 var5 = (s32)(u16)pop.m_h2 | var9_;
 		if (var9 & 0x30)
 		{
 			var5 = 0;
@ -1587,13 +1584,13 @@ s32 syncLFQueueCompletePopPointer(vm::ptr<CellSyncLFQueue> queue, s32 pointer, c
 			var5 <<= var9;
 		}

-		s32 var3 = (u16)new_queue.m_h1 + var9;
+		s32 var3 = (u16)pop.m_h1 + var9;
 		if (var3 >= depth * 2)
 		{
 			var3 -= depth * 2;
 		}

-		u16 pack = new_queue.m_hs[16]; // three packed 5-bit fields
+		u16 pack = new_.m_hs[16]; // three packed 5-bit fields

 		s32 var4 = ((pack >> 10) & 0x1f) - ((pack >> 5) & 0x1f);
 		if (var4 < 0)
@ -1635,17 +1632,18 @@ s32 syncLFQueueCompletePopPointer(vm::ptr<CellSyncLFQueue> queue, s32 pointer, c
 				var12 = (var12 + 1) << 10;
 			}

-			new_queue.m_hs[0] = (pack & 0x83ff) | var12;
+			new_.m_hs[0] = (pack & 0x83ff) | var12;
 			var6 = (u16)queue->m_hs[17 + 2 * var11];
 		}

-		s32 var7 = (var3 << 16) | (var5 & 0xffff);
+		pop.m_h1 = (u16)var3;
+		pop.m_h2 = (u16)var5;

-		if (InterlockedCompareExchange(&queue->m_pop2(), new_queue.m_pop2(), old_data) == old_data)
+		if (InterlockedCompareExchange(&queue->m_pop2(), new_.m_pop2(), old_data) == old_data)
 		{
 			if (var6 != -1)
 			{
-				bool exch = InterlockedCompareExchange(&queue->m_pop3(), re32(var7), old_data2) == old_data2;
+				bool exch = queue->pop3.compare_and_swap_test(old2, pop);
 				assert(exch);
 				if (exch)
 				{
@ -1658,7 +1656,7 @@ s32 syncLFQueueCompletePopPointer(vm::ptr<CellSyncLFQueue> queue, s32 pointer, c
 				pack = queue->m_hs[16];
 				if ((pack & 0x1f) == ((pack >> 10) & 0x1f))
 				{
-					if (InterlockedCompareExchange(&queue->m_pop3(), re32(var7), old_data2) == old_data2)
+					if (queue->pop3.compare_and_swap_test(old2, pop))
 					{
 						return CELL_OK;
 					}
@ -1798,12 +1796,10 @@ s32 cellSyncLFQueueClear(vm::ptr<CellSyncLFQueue> queue)
 	// TODO: optimize if possible
 	while (true)
 	{
-		const u64 old_data = InterlockedCompareExchange(&queue->m_pop1(), 0, 0);
-		CellSyncLFQueue new_queue;
-		new_queue.m_pop1() = old_data;
+		const auto old = queue->pop1.read_sync();
+		auto pop = old;

-		const u64 new_data = queue->m_push1();
-		new_queue.m_push1() = new_data;
+		const auto push = queue->push1.read_relaxed();

 		s32 var1, var2;
 		if (queue->m_direction.ToBE() != se32(CELL_SYNC_QUEUE_ANY2ANY))
@ -1812,19 +1808,24 @@ s32 cellSyncLFQueueClear(vm::ptr<CellSyncLFQueue> queue)
 		}
 		else
 		{
-			var1 = (u16)new_queue.m_h7;
-			var2 = (u16)new_queue.m_h3;
+			var1 = (u16)push.m_h7;
+			var2 = (u16)pop.m_h3;
 		}

-		if ((s32)(s16)new_queue.m_h4 != (s32)(u16)new_queue.m_h1 ||
-			(s32)(s16)new_queue.m_h8 != (s32)(u16)new_queue.m_h5 ||
+		if ((s32)(s16)pop.m_h4 != (s32)(u16)pop.m_h1 ||
+			(s32)(s16)push.m_h8 != (s32)(u16)push.m_h5 ||
 			((var2 >> 10) & 0x1f) != (var2 & 0x1f) ||
 			((var1 >> 10) & 0x1f) != (var1 & 0x1f))
 		{
 			return CELL_SYNC_ERROR_BUSY;
 		}

-		if (InterlockedCompareExchange(&queue->m_pop1(), new_data, old_data) == old_data) break;
+		pop.m_h1 = push.m_h5;
+		pop.m_h2 = push.m_h6;
+		pop.m_h3 = push.m_h7;
+		pop.m_h4 = push.m_h8;
+
+		if (queue->pop1.compare_and_swap_test(old, pop)) break;
 	}

 	return CELL_OK;
@ -1846,12 +1847,12 @@ s32 cellSyncLFQueueSize(vm::ptr<CellSyncLFQueue> queue, vm::ptr<be_t<u32>> size)
 	// TODO: optimize if possible
 	while (true)
 	{
-		const u32 old_data = InterlockedCompareExchange(&queue->m_pop3(), 0, 0);
+		const auto old = queue->pop3.read_sync();

-		u32 var1 = (u16)queue->m_h1;
-		u32 var2 = (u16)queue->m_h5;
+		u32 var1 = (u16)queue->pop1.read_relaxed().m_h1;
+		u32 var2 = (u16)queue->push1.read_relaxed().m_h5;

-		if (InterlockedCompareExchange(&queue->m_pop3(), old_data, old_data) == old_data)
+		if (queue->pop3.compare_and_swap_test(old, old))
 		{
 			if (var1 <= var2)
 			{
--- a/rpcs3/Emu/SysCalls/Modules/cellSync.h
+++ b/rpcs3/Emu/SysCalls/Modules/cellSync.h
@ -97,11 +97,6 @@ enum CellSyncQueueDirection : u32 // CellSyncLFQueueDirection

 struct CellSyncLFQueue
 {
-	struct init_t
-	{
-		be_t<u32> m_sync;
-	};
-
 	struct pop1_t
 	{
 		be_t<u16> m_h1;
@ -110,6 +105,12 @@ struct CellSyncLFQueue
 		be_t<u16> m_h4;
 	};

+	struct pop3_t
+	{
+		be_t<u16> m_h1;
+		be_t<u16> m_h2;
+	};
+
 	struct push1_t
 	{
 		be_t<u16> m_h5;
@ -118,72 +119,44 @@ struct CellSyncLFQueue
 		be_t<u16> m_h8;
 	};

-	union
+	struct push3_t
 	{
-		struct
-		{
-			vm::atomic<pop1_t> pop1;
-			vm::atomic<push1_t> push1;
-		};
-		struct
-		{
-			be_t<u16> m_h1; // 0x0
-			be_t<u16> m_h2; // 0x2
-			be_t<u16> m_h3; // 0x4
-			be_t<u16> m_h4; // 0x6
-			be_t<u16> m_h5; // 0x8
-			be_t<u16> m_h6; // 0xA
-			be_t<u16> m_h7; // 0xC
-			be_t<u16> m_h8; // 0xE
-		};
+		be_t<u16> m_h5;
+		be_t<u16> m_h6;
 	};

+	union
+	{
+		vm::atomic<pop1_t> pop1;
+		vm::atomic<pop3_t> pop3;
+	};
+	union
+	{
+		vm::atomic<push1_t> push1;
+		vm::atomic<push3_t> push3;
+	};
 	be_t<u32> m_size;    // 0x10
 	be_t<u32> m_depth;   // 0x14
 	vm::bptr<u8, 1, u64> m_buffer; // 0x18
 	u8        m_bs[4];   // 0x20
 	be_t<CellSyncQueueDirection> m_direction; // 0x24
 	be_t<u32> m_v1;      // 0x28
-	vm::atomic<init_t> init; // 0x2C
+	vm::atomic<u32> init; // 0x2C
 	be_t<u16> m_hs[32];  // 0x30
 	vm::bptr<void, 1, u64> m_eaSignal; // 0x70
 	be_t<u32> m_v2;      // 0x78
 	be_t<u32> m_v3;      // 0x7C

-	volatile u32& m_data()
-	{
-		return *reinterpret_cast<u32*>((u8*)this + 0x2c);
-	}
-
-	volatile u64& m_push1()
-	{
-		return *reinterpret_cast<u64*>((u8*)this + 0x8);
-	}
-
 	volatile u32& m_push2()
 	{
 		return *reinterpret_cast<u32*>((u8*)this + 0x30);
 	}

-	volatile u32& m_push3()
-	{
-		return *reinterpret_cast<u32*>((u8*)this + 0x8);
-	}
-
-	volatile u64& m_pop1()
-	{
-		return *reinterpret_cast<u64*>((u8*)this + 0x0);
-	}
-
 	volatile u32& m_pop2()
 	{
 		return *reinterpret_cast<u32*>((u8*)this + 0x50);
 	}

-	volatile u32& m_pop3()
-	{
-		return *reinterpret_cast<u32*>((u8*)this + 0x0);
-	}
 };

 static_assert(sizeof(CellSyncLFQueue) == 128, "CellSyncLFQueue: wrong size");