More changes

This commit is contained in:
Nekotekina 2014-09-23 03:07:40 +04:00
parent 449e7c1049
commit e5a485e50b
8 changed files with 250 additions and 188 deletions

View file

@ -78,6 +78,7 @@ public:
SPUInterpreter* inter;
JitRuntime runtime;
bool first;
bool need_check;
struct SPURecEntry
{
@ -457,7 +458,7 @@ private:
c.mov(cpu_dword(PC), CPU.PC);
// This instruction must be used following a store instruction that modifies the instruction stream.
c.mfence();
c.mov(*pos_var, (CPU.PC >> 2) + 1);
c.mov(*pos_var, (CPU.PC >> 2) + 1 + 0x2000000);
do_finalize = true;
LOG_OPCODE();
}
@ -1142,6 +1143,7 @@ private:
c.mov(*addr, CPU.PC + 4);
c.mov(*pos_var, cpu_dword(GPR[ra]._u32[3]));
if (ra) c.or_(*pos_var, 0x2000000 << 2); // rude (check if not LR)
c.cmp(cpu_dword(GPR[rt]._u32[3]), 0);
c.cmovne(*pos_var, *addr);
c.shr(*pos_var, 2);
@ -1160,6 +1162,7 @@ private:
c.mov(*addr, CPU.PC + 4);
c.mov(*pos_var, cpu_dword(GPR[ra]._u32[3]));
if (ra) c.or_(*pos_var, 0x2000000 << 2); // rude (check if not LR)
c.cmp(cpu_dword(GPR[rt]._u32[3]), 0);
c.cmove(*pos_var, *addr);
c.shr(*pos_var, 2);
@ -1178,6 +1181,7 @@ private:
c.mov(*addr, CPU.PC + 4);
c.mov(*pos_var, cpu_dword(GPR[ra]._u32[3]));
if (ra) c.or_(*pos_var, 0x2000000 << 2); // rude (check if not LR)
c.cmp(cpu_word(GPR[rt]._u16[6]), 0);
c.cmovne(*pos_var, *addr);
c.shr(*pos_var, 2);
@ -1196,6 +1200,7 @@ private:
c.mov(*addr, CPU.PC + 4);
c.mov(*pos_var, cpu_dword(GPR[ra]._u32[3]));
if (ra) c.or_(*pos_var, 0x2000000 << 2); // rude (check if not LR)
c.cmp(cpu_word(GPR[rt]._u16[6]), 0);
c.cmove(*pos_var, *addr);
c.shr(*pos_var, 2);
@ -1244,6 +1249,7 @@ private:
do_finalize = true;
c.mov(*pos_var, cpu_dword(GPR[ra]._u32[3]));
if (ra) c.or_(*pos_var, 0x2000000 << 2); // rude (check if not LR)
c.shr(*pos_var, 2);
LOG_OPCODE();
}
@ -1267,6 +1273,7 @@ private:
c.mov(*pos_var, cpu_dword(GPR[ra]._u32[3]));
c.mov(cpu_dword(GPR[rt]._u32[3]), CPU.PC + 4);
c.shr(*pos_var, 2);
c.or_(*pos_var, 0x2000000);
LOG_OPCODE();
}
void IRET(u32 ra)
@ -1947,11 +1954,10 @@ private:
{
c.mov(*addr, cpu_dword(GPR[ra]._s32[3]));
c.cmp(*addr, cpu_dword(GPR[rb]._s32[3]));
c.mov(*addr, 0);
c.setg(addr->r8());
c.neg(*addr);
c.shl(*addr, 24);
c.mov(*pos_var, (CPU.PC >> 2) + 1);
c.xor_(*pos_var, *addr);
c.or_(*pos_var, *addr);
do_finalize = true;
LOG_OPCODE();
}
@ -2308,11 +2314,10 @@ private:
{
c.mov(*addr, cpu_dword(GPR[ra]._u32[3]));
c.cmp(*addr, cpu_dword(GPR[rb]._u32[3]));
c.mov(*addr, 0);
c.seta(addr->r8());
c.neg(*addr);
c.shl(*addr, 24);
c.mov(*pos_var, (CPU.PC >> 2) + 1);
c.xor_(*pos_var, *addr);
c.or_(*pos_var, *addr);
do_finalize = true;
LOG_OPCODE();
}
@ -2662,11 +2667,10 @@ private:
{
c.mov(*addr, cpu_dword(GPR[ra]._s32[3]));
c.cmp(*addr, cpu_dword(GPR[rb]._s32[3]));
c.mov(*addr, 0);
c.sete(addr->r8());
c.neg(*addr);
c.shl(*addr, 24);
c.mov(*pos_var, (CPU.PC >> 2) + 1);
c.xor_(*pos_var, *addr);
c.or_(*pos_var, *addr);
do_finalize = true;
LOG_OPCODE();
}
@ -3324,11 +3328,10 @@ private:
{
c.mov(*addr, cpu_dword(GPR[ra]._s32[3]));
c.cmp(*addr, i10);
c.mov(*addr, 0);
c.setg(addr->r8());
c.neg(*addr);
c.shl(*addr, 24);
c.mov(*pos_var, (CPU.PC >> 2) + 1);
c.xor_(*pos_var, *addr);
c.or_(*pos_var, *addr);
do_finalize = true;
LOG_OPCODE();
}
@ -3390,11 +3393,10 @@ private:
{
c.mov(*addr, cpu_dword(GPR[ra]._u32[3]));
c.cmp(*addr, i10);
c.mov(*addr, 0);
c.seta(addr->r8());
c.neg(*addr);
c.shl(*addr, 24);
c.mov(*pos_var, (CPU.PC >> 2) + 1);
c.xor_(*pos_var, *addr);
c.or_(*pos_var, *addr);
do_finalize = true;
LOG_OPCODE();
}
@ -3441,11 +3443,10 @@ private:
{
c.mov(*addr, cpu_dword(GPR[ra]._u32[3]));
c.cmp(*addr, i10);
c.mov(*addr, 0);
c.sete(addr->r8());
c.neg(*addr);
c.shl(*addr, 24);
c.mov(*pos_var, (CPU.PC >> 2) + 1);
c.xor_(*pos_var, *addr);
c.or_(*pos_var, *addr);
do_finalize = true;
LOG_OPCODE();
}

View file

@ -20,6 +20,7 @@ SPURecompilerCore::SPURecompilerCore(SPUThread& cpu)
, inter(new SPUInterpreter(cpu))
, CPU(cpu)
, first(true)
, need_check(false)
{
memset(entry, 0, sizeof(entry));
X86CpuInfo inf;
@ -192,20 +193,26 @@ u8 SPURecompilerCore::DecodeMemory(const u32 address)
{
// check data (hard way)
bool is_valid = true;
//for (u32 i = pos; i < (u32)(entry[pos].count + pos); i++)
//{
// if (entry[i].valid != ls[i])
// {
// is_valid = false;
// break;
// }
//}
if (need_check)
{
for (u32 i = 0; i < 0x10000; i++)
{
if (entry[i].valid && entry[i].valid != ls[i])
{
is_valid = false;
break;
}
}
need_check = false;
}
// invalidate if necessary
if (!is_valid)
{
for (u32 i = 0; i < 0x10000; i++)
{
if (entry[i].pointer &&
if (!entry[i].pointer) continue;
if (!entry[i].valid || entry[i].valid != ls[i] ||
i + (u32)entry[i].count > (u32)pos &&
i < (u32)pos + (u32)entry[pos].count)
{
@ -214,6 +221,11 @@ u8 SPURecompilerCore::DecodeMemory(const u32 address)
//RtlDeleteFunctionTable(&entry[i].info);
#endif
entry[i].pointer = nullptr;
for (u32 j = i; j < i + (u32)entry[i].count; j++)
{
entry[j].valid = 0;
}
//need_check = true;
}
}
//LOG_ERROR(Log::SPU, "SPURecompilerCore::DecodeMemory(ls_addr=0x%x): code has changed", pos * sizeof(u32));
@ -254,11 +266,17 @@ u8 SPURecompilerCore::DecodeMemory(const u32 address)
u32 res = pos;
res = func(cpu, vm::get_ptr<void>(m_offset), imm_table.data(), &g_imm_table);
if (res > 0xffff)
if (res & 0x1000000)
{
CPU.SPU.Status.SetValue(SPU_STATUS_STOPPED_BY_HALT);
CPU.Stop();
res = ~res;
res &= ~0x1000000;
}
if (res & 0x2000000)
{
need_check = true;
res &= ~0x2000000;
}
if (did_compile)

View file

@ -358,7 +358,7 @@ public:
}
else
{
InterlockedOr((volatile u64*)m_indval, ((u64)value << 32) | 1);
InterlockedOr(&m_indval, ((u64)value << 32) | 1);
}
}

View file

@ -25,34 +25,34 @@ namespace vm
template<typename T>
class _atomic_base
{
volatile T data;
typedef typename _to_atomic<T, sizeof(T)>::type atomic_type;
atomic_type data;
public:
// atomically compare data with cmp, replace with exch if equal, return previous data value anyway
__forceinline const T compare_and_swap(const T& cmp, const T& exch) volatile
{
const atomic_type res = InterlockedCompareExchange((volatile atomic_type*)&data, (atomic_type&)exch, (atomic_type&)cmp);
const atomic_type res = InterlockedCompareExchange(&data, (atomic_type&)(exch), (atomic_type&)(cmp));
return (T&)res;
}
// atomically compare data with cmp, replace with exch if equal, return true if data was replaced
__forceinline bool compare_and_swap_test(const T& cmp, const T& exch) volatile
{
return InterlockedCompareExchange((volatile atomic_type*)&data, (atomic_type&)exch, (atomic_type&)cmp) == (atomic_type&)cmp;
return InterlockedCompareExchange(&data, (atomic_type&)(exch), (atomic_type&)(cmp)) == (atomic_type&)(cmp);
}
// read data with memory barrier
__forceinline const T read_sync() const volatile
{
const atomic_type res = InterlockedCompareExchange((volatile atomic_type*)&data, 0, 0);
const atomic_type res = InterlockedCompareExchange(const_cast<volatile atomic_type*>(&data), 0, 0);
return (T&)res;
}
// atomically replace data with exch, return previous data value
__forceinline const T exchange(const T& exch) volatile
{
const atomic_type res = InterlockedExchange((volatile atomic_type*)&data, (atomic_type&)exch);
const atomic_type res = InterlockedExchange(&data, (atomic_type&)(exch));
return (T&)res;
}
@ -65,7 +65,7 @@ namespace vm
// write data without memory barrier
__forceinline void write_relaxed(const T& value) volatile
{
(T&)data = value;
data = (atomic_type&)(value);
}
// perform atomic operation on data
@ -81,7 +81,7 @@ namespace vm
}
// perform atomic operation on data with special exit condition (if intermediate result != proceed_value)
template<typename RT, typename FT> __forceinline RT atomic_op(const RT& proceed_value, const FT atomic_proc) volatile
template<typename RT, typename FT> __forceinline RT atomic_op(const RT proceed_value, const FT atomic_proc) volatile
{
while (true)
{
@ -95,37 +95,37 @@ namespace vm
__forceinline const T _or(const T& right) volatile
{
const atomic_type res = InterlockedOr((volatile atomic_type*)&data, (atomic_type&)right);
const atomic_type res = InterlockedOr(&data, (atomic_type&)(right));
return (T&)res;
}
__forceinline const T _and(const T& right) volatile
{
const atomic_type res = InterlockedAnd((volatile atomic_type*)&data, (atomic_type&)right);
const atomic_type res = InterlockedAnd(&data, (atomic_type&)(right));
return (T&)res;
}
__forceinline const T _xor(const T& right) volatile
{
const atomic_type res = InterlockedXor((volatile atomic_type*)&data, (atomic_type&)right);
const atomic_type res = InterlockedXor(&data, (atomic_type&)(right));
return (T&)res;
}
__forceinline const T operator |= (const T& right) volatile
{
const atomic_type res = InterlockedOr((volatile atomic_type*)&data, (atomic_type&)right) | (atomic_type&)right;
const atomic_type res = InterlockedOr(&data, (atomic_type&)(right)) | (atomic_type&)(right);
return (T&)res;
}
__forceinline const T operator &= (const T& right) volatile
{
const atomic_type res = InterlockedAnd((volatile atomic_type*)&data, (atomic_type&)right) & (atomic_type&)right;
const atomic_type res = InterlockedAnd(&data, (atomic_type&)(right)) & (atomic_type&)(right);
return (T&)res;
}
__forceinline const T operator ^= (const T& right) volatile
{
const atomic_type res = InterlockedXor((volatile atomic_type*)&data, (atomic_type&)right) ^ (atomic_type&)right;
const atomic_type res = InterlockedXor(&data, (atomic_type&)(right)) ^ (atomic_type&)(right);
return (T&)res;
}

View file

@ -20,17 +20,86 @@ extern u32 libsre;
extern u32 libsre_rtoc;
#endif
s64 spursCreateLv2EventQueue(vm::ptr<CellSpurs> spurs, u32& queue_id, u32 arg3, u32 arg4, u64 name_u64)
s64 spursAttachLv2EventQueue(vm::ptr<CellSpurs> spurs, u32 queue, vm::ptr<u8> port, s32 isDynamic, bool wasCreated)
{
#ifdef PRX_DEBUG_XXX
return cb_call<s32, vm::ptr<CellSpurs>, u32, vm::ptr<u8>, s32, bool>(GetCurrentPPUThread(), libsre + 0xAE34, libsre_rtoc,
spurs, queue, port, isDynamic, wasCreated);
#else
if (!spurs || !port)
{
return CELL_SPURS_CORE_ERROR_NULL_POINTER;
}
if (spurs.addr() % 128)
{
return CELL_SPURS_CORE_ERROR_ALIGN;
}
if (spurs->m.unk21.ToBE())
{
return CELL_SPURS_CORE_ERROR_STAT;
}
u32 unk1 = 0;
#ifdef PRX_DEBUG
unk1 = cb_call<u32>(GetCurrentPPUThread(), libsre + 0x10900, libsre_rtoc);
#endif
u8 _port = 0x3f;
u8 port_start = 0x10;
u64 port_mask = 0;
if (isDynamic == 0)
{
_port = *port;
if (_port > 0x3f)
{
return CELL_SPURS_CORE_ERROR_INVAL;
}
if (unk1 <= 0x17ffff && _port > 0xf)
{
return CELL_SPURS_CORE_ERROR_PERM;
}
port_start = _port;
}
for (u32 i = port_start + 1; i < _port; i++)
{
port_mask |= 1ull << (i - 1);
}
if (s32 res = sys_spu_thread_group_connect_event_all_threads(spurs->m.spuTG, queue, port_mask, port))
{
if (res == CELL_EISCONN)
{
return CELL_SPURS_CORE_ERROR_BUSY;
}
return res;
}
if (!wasCreated)
{
spurs->m.spups |= be_t<u64>::make(1ull << *port); // atomic bitwise or
}
return CELL_OK;
#endif
}
s64 spursCreateLv2EventQueue(vm::ptr<CellSpurs> spurs, u32& queue_id, vm::ptr<u8> port, s32 size, u64 name_u64)
{
#ifdef PRX_DEBUG_XXX
vm::var<be_t<u32>> queue;
s32 res = cb_call<s32, vm::ptr<CellSpurs>, vm::ptr<be_t<u32>>, u32, u32, u32>(GetCurrentPPUThread(), libsre + 0xB14C, libsre_rtoc,
spurs, queue, arg3, arg4, vm::read32(libsre_rtoc - 0x7E2C));
s32 res = cb_call<s32, vm::ptr<CellSpurs>, vm::ptr<be_t<u32>>, vm::ptr<u8>, s32, u32>(GetCurrentPPUThread(), libsre + 0xB14C, libsre_rtoc,
spurs, queue, port, size, vm::read32(libsre_rtoc - 0x7E2C));
queue_id = queue->ToLE();
return res;
#else
// TODO
queue_id = event_queue_create(SYS_SYNC_PRIORITY, SYS_PPU_QUEUE, *(u64*)"+QUEUE+", 0, 1);
queue_id = event_queue_create(SYS_SYNC_FIFO, SYS_PPU_QUEUE, name_u64, 0, size);
if (!queue_id)
{
return CELL_EAGAIN; // rough
}
assert(spursAttachLv2EventQueue(spurs, queue_id, port, 1, true) == CELL_OK);
return CELL_OK;
#endif
}
@ -108,7 +177,7 @@ s64 spursInit(
for (u32 i = 0; i < 0x10; i++)
{
sem = semaphore_create(0, 1, SYS_SYNC_PRIORITY, *(u64*)"_spuWkl");
assert(sem && ~sem); // should rollback if semaphore creating failed and return the error
assert(sem && ~sem); // should rollback if semaphore creation failed and return the error
spurs->m.sub1[i].sem = sem;
}
if (isSecond)
@ -182,7 +251,7 @@ s64 spursInit(
spurs->m.ppuPriority = ppuPriority;
u32 queue;
assert(spursCreateLv2EventQueue(spurs, queue, spurs.addr() + 0xc9, 0x2a, *(u64*)"_spuPrv") == CELL_OK);
assert(spursCreateLv2EventQueue(spurs, queue, vm::ptr<u8>::make(spurs.addr() + 0xc9), 0x2a, *(u64*)"_spuPrv") == CELL_OK);
spurs->m.queue = queue;
u32 port = event_port_create(0);
@ -618,13 +687,13 @@ s64 cellSpursSetPreemptionVictimHints(vm::ptr<CellSpurs> spurs, vm::ptr<const bo
s64 cellSpursAttachLv2EventQueue(vm::ptr<CellSpurs> spurs, u32 queue, vm::ptr<u8> port, s32 isDynamic)
{
#ifdef PRX_DEBUG
cellSpurs->Warning("cellSpursAttachLv2EventQueue(spurs_addr=0x%x, queue=%d, port_addr=0x%x, isDynamic=%d)",
spurs.addr(), queue, port.addr(), isDynamic);
#ifdef PRX_DEBUG_XXX
return GetCurrentPPUThread().FastCall2(libsre + 0xAFE0, libsre_rtoc);
#else
UNIMPLEMENTED_FUNC(cellSpurs);
return CELL_OK;
return spursAttachLv2EventQueue(spurs, queue, port, isDynamic, false);
#endif
}

View file

@ -185,7 +185,7 @@ struct CellSpurs
u8 unknown0[0xB0 - 0x78];
be_t<u32> unk0; // 0x0B0
u8 unknown2[0xC0 - 0xB4];
u8 unk6[0x10]; // 0x0C0
u8 unk6[0x10]; // 0x0C0 (SPU port at 0xc9)
u8 unknown1[0x100 - 0x0D0];
_sub_str1 sub1[0x10]; // 0x100
be_t<u64> unk22; // 0x900
@ -219,7 +219,7 @@ struct CellSpurs
be_t<u32> unk5; // 0xD9C
be_t<u32> revision; // 0xDA0
be_t<u32> sdkVersion; // 0xDA4
u8 unknown8[0xDB0 - 0xDA8];
vm::atomic<u64> spups;// 0xDA8
sys_lwmutex_t mutex; // 0xDB0
sys_lwcond_t cond; // 0xDC8
u8 unknown9[0xF00 - 0xDD0];

View file

@ -923,17 +923,16 @@ s32 syncLFQueueInitialize(vm::ptr<CellSyncLFQueue> queue, vm::ptr<u8> buffer, u3
u32 old_value;
while (true)
{
const u32 old_data = queue->m_data();
CellSyncLFQueue new_data;
new_data.m_data() = old_data;
const auto old = queue->init.read_relaxed();
auto init = old;
if (old_data)
if (old.ToBE())
{
if (sdk_ver > 0x17ffff && old_data != se32(2))
if (sdk_ver > 0x17ffff && old != 2)
{
return CELL_SYNC_ERROR_STAT;
}
old_value = old_data;
old_value = old.ToLE();
}
else
{
@ -948,14 +947,14 @@ s32 syncLFQueueInitialize(vm::ptr<CellSyncLFQueue> queue, vm::ptr<u8> buffer, u3
}
}
}
new_data.m_data() = se32(1);
old_value = se32(1);
init = 1;
old_value = 1;
}
if (InterlockedCompareExchange(&queue->m_data(), new_data.m_data(), old_data) == old_data) break;
if (queue->init.compare_and_swap_test(old, init)) break;
}
if (old_value == se32(2))
if (old_value == 2)
{
if ((u32)queue->m_size != size || (u32)queue->m_depth != depth || queue->m_buffer.addr() != buffer.addr())
{
@ -975,12 +974,11 @@ s32 syncLFQueueInitialize(vm::ptr<CellSyncLFQueue> queue, vm::ptr<u8> buffer, u3
syncLFQueueInit(queue, buffer, size, depth, direction, eaSignal);
// prx: sync, zeroize u32 at 0x2c offset
InterlockedCompareExchange(&queue->m_data(), 0, 0);
queue->m_data() = 0;
queue->init.exchange({});
}
// prx: sync
InterlockedCompareExchange(&queue->m_data(), 0, 0);
queue->init.read_sync();
return CELL_OK;
#endif
}
@ -1011,28 +1009,27 @@ s32 syncLFQueueGetPushPointer(vm::ptr<CellSyncLFQueue> queue, s32& pointer, u32
return -1;
}
const u64 old_data = InterlockedCompareExchange(&queue->m_push1(), 0, 0);
CellSyncLFQueue new_queue;
new_queue.m_push1() = old_data;
const auto old = queue->push1.read_sync();
auto push = old;
if (var1)
{
new_queue.m_h7 = 0;
push.m_h7 = 0;
}
if (isBlocking && useEventQueue && *(u32*)queue->m_bs == -1)
{
return CELL_SYNC_ERROR_STAT;
}
s32 var2 = (s32)(s16)new_queue.m_h8;
s32 var2 = (s32)(s16)push.m_h8;
s32 res;
if (useEventQueue && ((s32)(u16)new_queue.m_h5 != var2 || new_queue.m_h7.ToBE() != 0))
if (useEventQueue && ((s32)push.m_h5 != var2 || push.m_h7.ToBE() != 0))
{
res = CELL_SYNC_ERROR_BUSY;
}
else
{
var2 -= (s32)(u16)queue->m_h1;
var2 -= (s32)(u16)queue->pop1.read_relaxed().m_h1;
if (var2 < 0)
{
var2 += depth * 2;
@ -1040,21 +1037,21 @@ s32 syncLFQueueGetPushPointer(vm::ptr<CellSyncLFQueue> queue, s32& pointer, u32
if (var2 < depth)
{
pointer = (s16)new_queue.m_h8;
pointer = (s16)push.m_h8;
if (pointer + 1 >= depth * 2)
{
new_queue.m_h8 = 0;
push.m_h8 = 0;
}
else
{
new_queue.m_h8++;
push.m_h8++;
}
res = CELL_OK;
}
else if (!isBlocking)
{
res = CELL_SYNC_ERROR_AGAIN;
if (!new_queue.m_h7.ToBE() || res)
if (!push.m_h7.ToBE() || res)
{
return res;
}
@ -1067,7 +1064,7 @@ s32 syncLFQueueGetPushPointer(vm::ptr<CellSyncLFQueue> queue, s32& pointer, u32
else
{
res = CELL_OK;
new_queue.m_h7 = 3;
push.m_h7 = 3;
if (isBlocking != 3)
{
break;
@ -1075,9 +1072,9 @@ s32 syncLFQueueGetPushPointer(vm::ptr<CellSyncLFQueue> queue, s32& pointer, u32
}
}
if (InterlockedCompareExchange(&queue->m_push1(), new_queue.m_push1(), old_data) == old_data)
if (queue->push1.compare_and_swap_test(old, push))
{
if (!new_queue.m_h7.ToBE() || res)
if (!push.m_h7.ToBE() || res)
{
return res;
}
@ -1138,19 +1135,19 @@ s32 syncLFQueueCompletePushPointer(vm::ptr<CellSyncLFQueue> queue, s32 pointer,
while (true)
{
const u32 old_data = InterlockedCompareExchange(&queue->m_push2(), 0, 0);
CellSyncLFQueue new_queue;
new_queue.m_push2() = old_data;
CellSyncLFQueue new_;
new_.m_push2() = old_data;
const u32 old_data2 = queue->m_push3();
new_queue.m_push3() = old_data2;
const auto old2 = queue->push3.read_relaxed();
auto push = old2;
s32 var1 = pointer - (u16)new_queue.m_h5;
s32 var1 = pointer - (u16)push.m_h5;
if (var1 < 0)
{
var1 += depth * 2;
}
s32 var2 = (s32)(s16)queue->m_h4 - (s32)(u16)queue->m_h1;
s32 var2 = (s32)(s16)queue->pop1.read_relaxed().m_h4 - (s32)(u16)queue->pop1.read_relaxed().m_h1;
if (var2 < 0)
{
var2 += depth * 2;
@ -1166,7 +1163,7 @@ s32 syncLFQueueCompletePushPointer(vm::ptr<CellSyncLFQueue> queue, s32 pointer,
{
var9_ = 1 << var9_;
}
s32 var9 = ~(var9_ | (u16)new_queue.m_h6);
s32 var9 = ~(var9_ | (u16)push.m_h6);
// count leading zeros in u16
{
u16 v = var9;
@ -1179,7 +1176,7 @@ s32 syncLFQueueCompletePushPointer(vm::ptr<CellSyncLFQueue> queue, s32 pointer,
}
}
s32 var5 = (s32)(u16)new_queue.m_h6 | var9_;
s32 var5 = (s32)(u16)push.m_h6 | var9_;
if (var9 & 0x30)
{
var5 = 0;
@ -1189,13 +1186,13 @@ s32 syncLFQueueCompletePushPointer(vm::ptr<CellSyncLFQueue> queue, s32 pointer,
var5 <<= var9;
}
s32 var3 = (u16)new_queue.m_h5 + var9;
s32 var3 = (u16)push.m_h5 + var9;
if (var3 >= depth * 2)
{
var3 -= depth * 2;
}
u16 pack = new_queue.m_hs[0]; // three packed 5-bit fields
u16 pack = new_.m_hs[0]; // three packed 5-bit fields
s32 var4 = ((pack >> 10) & 0x1f) - ((pack >> 5) & 0x1f);
if (var4 < 0)
@ -1233,7 +1230,7 @@ s32 syncLFQueueCompletePushPointer(vm::ptr<CellSyncLFQueue> queue, s32 pointer,
var12 = (var12 + 1) << 10;
}
new_queue.m_hs[0] = (pack & 0x83ff) | var12;
new_.m_hs[0] = (pack & 0x83ff) | var12;
var6 = (u16)queue->m_hs[1 + 2 * var11];
}
else
@ -1241,14 +1238,15 @@ s32 syncLFQueueCompletePushPointer(vm::ptr<CellSyncLFQueue> queue, s32 pointer,
var6 = -1;
}
s32 var7 = (var3 << 16) | (var5 & 0xffff);
push.m_h5 = (u16)var3;
push.m_h6 = (u16)var5;
if (InterlockedCompareExchange(&queue->m_push2(), new_queue.m_push2(), old_data) == old_data)
if (InterlockedCompareExchange(&queue->m_push2(), new_.m_push2(), old_data) == old_data)
{
assert(var2 + var4 < 16);
if (var6 != -1)
{
bool exch = InterlockedCompareExchange(&queue->m_push3(), re32(var7), old_data2) == old_data2;
bool exch = queue->push3.compare_and_swap_test(old2, push);
assert(exch);
if (exch)
{
@ -1261,7 +1259,7 @@ s32 syncLFQueueCompletePushPointer(vm::ptr<CellSyncLFQueue> queue, s32 pointer,
pack = queue->m_hs[0];
if ((pack & 0x1f) == ((pack >> 10) & 0x1f))
{
if (InterlockedCompareExchange(&queue->m_push3(), re32(var7), old_data2) == old_data2)
if (queue->push3.compare_and_swap_test(old2, push))
{
return CELL_OK;
}
@ -1409,28 +1407,27 @@ s32 syncLFQueueGetPopPointer(vm::ptr<CellSyncLFQueue> queue, s32& pointer, u32 i
return -1;
}
const u64 old_data = InterlockedCompareExchange(&queue->m_pop1(), 0, 0);
CellSyncLFQueue new_queue;
new_queue.m_pop1() = old_data;
const auto old = queue->pop1.read_sync();
auto pop = old;
if (var1)
{
new_queue.m_h3 = 0;
pop.m_h3 = 0;
}
if (isBlocking && useEventQueue && *(u32*)queue->m_bs == -1)
{
return CELL_SYNC_ERROR_STAT;
}
s32 var2 = (s32)(s16)new_queue.m_h4;
s32 var2 = (s32)(s16)pop.m_h4;
s32 res;
if (useEventQueue && ((s32)(u16)new_queue.m_h1 != var2 || new_queue.m_h3.ToBE() != 0))
if (useEventQueue && ((s32)(u16)pop.m_h1 != var2 || pop.m_h3.ToBE() != 0))
{
res = CELL_SYNC_ERROR_BUSY;
}
else
{
var2 = (s32)(u16)queue->m_h5 - var2;
var2 = (s32)(u16)queue->push1.read_relaxed().m_h5 - var2;
if (var2 < 0)
{
var2 += depth * 2;
@ -1438,21 +1435,21 @@ s32 syncLFQueueGetPopPointer(vm::ptr<CellSyncLFQueue> queue, s32& pointer, u32 i
if (var2 > 0)
{
pointer = (s16)new_queue.m_h4;
pointer = (s16)pop.m_h4;
if (pointer + 1 >= depth * 2)
{
new_queue.m_h4 = 0;
pop.m_h4 = 0;
}
else
{
new_queue.m_h4++;
pop.m_h4++;
}
res = CELL_OK;
}
else if (!isBlocking)
{
res = CELL_SYNC_ERROR_AGAIN;
if (!new_queue.m_h3.ToBE() || res)
if (!pop.m_h3.ToBE() || res)
{
return res;
}
@ -1465,7 +1462,7 @@ s32 syncLFQueueGetPopPointer(vm::ptr<CellSyncLFQueue> queue, s32& pointer, u32 i
else
{
res = CELL_OK;
new_queue.m_h3 = 3;
pop.m_h3 = 3;
if (isBlocking != 3)
{
break;
@ -1473,9 +1470,9 @@ s32 syncLFQueueGetPopPointer(vm::ptr<CellSyncLFQueue> queue, s32& pointer, u32 i
}
}
if (InterlockedCompareExchange(&queue->m_pop1(), new_queue.m_pop1(), old_data) == old_data)
if (queue->pop1.compare_and_swap_test(old, pop))
{
if (!new_queue.m_h3.ToBE() || res)
if (!pop.m_h3.ToBE() || res)
{
return res;
}
@ -1536,19 +1533,19 @@ s32 syncLFQueueCompletePopPointer(vm::ptr<CellSyncLFQueue> queue, s32 pointer, c
while (true)
{
const u32 old_data = InterlockedCompareExchange(&queue->m_pop2(), 0, 0);
CellSyncLFQueue new_queue;
new_queue.m_pop2() = old_data;
CellSyncLFQueue new_;
new_.m_pop2() = old_data;
const u32 old_data2 = queue->m_pop3();
new_queue.m_pop3() = old_data2;
const auto old2 = queue->pop3.read_relaxed();
auto pop = old2;
s32 var1 = pointer - (u16)new_queue.m_h1;
s32 var1 = pointer - (u16)pop.m_h1;
if (var1 < 0)
{
var1 += depth * 2;
}
s32 var2 = (s32)(s16)queue->m_h8 - (s32)(u16)queue->m_h5;
s32 var2 = (s32)(s16)queue->push1.read_relaxed().m_h8 - (s32)(u16)queue->push1.read_relaxed().m_h5;
if (var2 < 0)
{
var2 += depth * 2;
@ -1564,7 +1561,7 @@ s32 syncLFQueueCompletePopPointer(vm::ptr<CellSyncLFQueue> queue, s32 pointer, c
{
var9_ = 1 << var9_;
}
s32 var9 = ~(var9_ | (u16)new_queue.m_h2);
s32 var9 = ~(var9_ | (u16)pop.m_h2);
// count leading zeros in u16
{
u16 v = var9;
@ -1577,7 +1574,7 @@ s32 syncLFQueueCompletePopPointer(vm::ptr<CellSyncLFQueue> queue, s32 pointer, c
}
}
s32 var5 = (s32)(u16)new_queue.m_h2 | var9_;
s32 var5 = (s32)(u16)pop.m_h2 | var9_;
if (var9 & 0x30)
{
var5 = 0;
@ -1587,13 +1584,13 @@ s32 syncLFQueueCompletePopPointer(vm::ptr<CellSyncLFQueue> queue, s32 pointer, c
var5 <<= var9;
}
s32 var3 = (u16)new_queue.m_h1 + var9;
s32 var3 = (u16)pop.m_h1 + var9;
if (var3 >= depth * 2)
{
var3 -= depth * 2;
}
u16 pack = new_queue.m_hs[16]; // three packed 5-bit fields
u16 pack = new_.m_hs[16]; // three packed 5-bit fields
s32 var4 = ((pack >> 10) & 0x1f) - ((pack >> 5) & 0x1f);
if (var4 < 0)
@ -1635,17 +1632,18 @@ s32 syncLFQueueCompletePopPointer(vm::ptr<CellSyncLFQueue> queue, s32 pointer, c
var12 = (var12 + 1) << 10;
}
new_queue.m_hs[0] = (pack & 0x83ff) | var12;
new_.m_hs[0] = (pack & 0x83ff) | var12;
var6 = (u16)queue->m_hs[17 + 2 * var11];
}
s32 var7 = (var3 << 16) | (var5 & 0xffff);
pop.m_h1 = (u16)var3;
pop.m_h2 = (u16)var5;
if (InterlockedCompareExchange(&queue->m_pop2(), new_queue.m_pop2(), old_data) == old_data)
if (InterlockedCompareExchange(&queue->m_pop2(), new_.m_pop2(), old_data) == old_data)
{
if (var6 != -1)
{
bool exch = InterlockedCompareExchange(&queue->m_pop3(), re32(var7), old_data2) == old_data2;
bool exch = queue->pop3.compare_and_swap_test(old2, pop);
assert(exch);
if (exch)
{
@ -1658,7 +1656,7 @@ s32 syncLFQueueCompletePopPointer(vm::ptr<CellSyncLFQueue> queue, s32 pointer, c
pack = queue->m_hs[16];
if ((pack & 0x1f) == ((pack >> 10) & 0x1f))
{
if (InterlockedCompareExchange(&queue->m_pop3(), re32(var7), old_data2) == old_data2)
if (queue->pop3.compare_and_swap_test(old2, pop))
{
return CELL_OK;
}
@ -1798,12 +1796,10 @@ s32 cellSyncLFQueueClear(vm::ptr<CellSyncLFQueue> queue)
// TODO: optimize if possible
while (true)
{
const u64 old_data = InterlockedCompareExchange(&queue->m_pop1(), 0, 0);
CellSyncLFQueue new_queue;
new_queue.m_pop1() = old_data;
const auto old = queue->pop1.read_sync();
auto pop = old;
const u64 new_data = queue->m_push1();
new_queue.m_push1() = new_data;
const auto push = queue->push1.read_relaxed();
s32 var1, var2;
if (queue->m_direction.ToBE() != se32(CELL_SYNC_QUEUE_ANY2ANY))
@ -1812,19 +1808,24 @@ s32 cellSyncLFQueueClear(vm::ptr<CellSyncLFQueue> queue)
}
else
{
var1 = (u16)new_queue.m_h7;
var2 = (u16)new_queue.m_h3;
var1 = (u16)push.m_h7;
var2 = (u16)pop.m_h3;
}
if ((s32)(s16)new_queue.m_h4 != (s32)(u16)new_queue.m_h1 ||
(s32)(s16)new_queue.m_h8 != (s32)(u16)new_queue.m_h5 ||
if ((s32)(s16)pop.m_h4 != (s32)(u16)pop.m_h1 ||
(s32)(s16)push.m_h8 != (s32)(u16)push.m_h5 ||
((var2 >> 10) & 0x1f) != (var2 & 0x1f) ||
((var1 >> 10) & 0x1f) != (var1 & 0x1f))
{
return CELL_SYNC_ERROR_BUSY;
}
if (InterlockedCompareExchange(&queue->m_pop1(), new_data, old_data) == old_data) break;
pop.m_h1 = push.m_h5;
pop.m_h2 = push.m_h6;
pop.m_h3 = push.m_h7;
pop.m_h4 = push.m_h8;
if (queue->pop1.compare_and_swap_test(old, pop)) break;
}
return CELL_OK;
@ -1846,12 +1847,12 @@ s32 cellSyncLFQueueSize(vm::ptr<CellSyncLFQueue> queue, vm::ptr<be_t<u32>> size)
// TODO: optimize if possible
while (true)
{
const u32 old_data = InterlockedCompareExchange(&queue->m_pop3(), 0, 0);
const auto old = queue->pop3.read_sync();
u32 var1 = (u16)queue->m_h1;
u32 var2 = (u16)queue->m_h5;
u32 var1 = (u16)queue->pop1.read_relaxed().m_h1;
u32 var2 = (u16)queue->push1.read_relaxed().m_h5;
if (InterlockedCompareExchange(&queue->m_pop3(), old_data, old_data) == old_data)
if (queue->pop3.compare_and_swap_test(old, old))
{
if (var1 <= var2)
{

View file

@ -97,11 +97,6 @@ enum CellSyncQueueDirection : u32 // CellSyncLFQueueDirection
struct CellSyncLFQueue
{
struct init_t
{
be_t<u32> m_sync;
};
struct pop1_t
{
be_t<u16> m_h1;
@ -110,6 +105,12 @@ struct CellSyncLFQueue
be_t<u16> m_h4;
};
struct pop3_t
{
be_t<u16> m_h1;
be_t<u16> m_h2;
};
struct push1_t
{
be_t<u16> m_h5;
@ -118,72 +119,44 @@ struct CellSyncLFQueue
be_t<u16> m_h8;
};
union
struct push3_t
{
struct
{
vm::atomic<pop1_t> pop1;
vm::atomic<push1_t> push1;
};
struct
{
be_t<u16> m_h1; // 0x0
be_t<u16> m_h2; // 0x2
be_t<u16> m_h3; // 0x4
be_t<u16> m_h4; // 0x6
be_t<u16> m_h5; // 0x8
be_t<u16> m_h6; // 0xA
be_t<u16> m_h7; // 0xC
be_t<u16> m_h8; // 0xE
};
be_t<u16> m_h5;
be_t<u16> m_h6;
};
union
{
vm::atomic<pop1_t> pop1;
vm::atomic<pop3_t> pop3;
};
union
{
vm::atomic<push1_t> push1;
vm::atomic<push3_t> push3;
};
be_t<u32> m_size; // 0x10
be_t<u32> m_depth; // 0x14
vm::bptr<u8, 1, u64> m_buffer; // 0x18
u8 m_bs[4]; // 0x20
be_t<CellSyncQueueDirection> m_direction; // 0x24
be_t<u32> m_v1; // 0x28
vm::atomic<init_t> init; // 0x2C
vm::atomic<u32> init; // 0x2C
be_t<u16> m_hs[32]; // 0x30
vm::bptr<void, 1, u64> m_eaSignal; // 0x70
be_t<u32> m_v2; // 0x78
be_t<u32> m_v3; // 0x7C
volatile u32& m_data()
{
return *reinterpret_cast<u32*>((u8*)this + 0x2c);
}
volatile u64& m_push1()
{
return *reinterpret_cast<u64*>((u8*)this + 0x8);
}
volatile u32& m_push2()
{
return *reinterpret_cast<u32*>((u8*)this + 0x30);
}
volatile u32& m_push3()
{
return *reinterpret_cast<u32*>((u8*)this + 0x8);
}
volatile u64& m_pop1()
{
return *reinterpret_cast<u64*>((u8*)this + 0x0);
}
volatile u32& m_pop2()
{
return *reinterpret_cast<u32*>((u8*)this + 0x50);
}
volatile u32& m_pop3()
{
return *reinterpret_cast<u32*>((u8*)this + 0x0);
}
};
static_assert(sizeof(CellSyncLFQueue) == 128, "CellSyncLFQueue: wrong size");