#include "stdafx.h" #include "Emu/Memory/Memory.h" #include "Emu/System.h" #include "Emu/Cell/SPUThread.h" #include "Emu/SysCalls/Modules.h" #include "Emu/SysCalls/lv2/sys_lwmutex.h" #include "Emu/SysCalls/lv2/sys_lwcond.h" #include "Emu/SysCalls/lv2/sys_spu.h" #include "Emu/SysCalls/Modules/cellSpurs.h" // // SPURS utility functions // void cellSpursModulePutTrace(CellSpursTracePacket * packet, unsigned tag); u32 cellSpursModulePollStatus(SPUThread & spu, u32 * status); // // SPURS Kernel functions // void spursKernelSelectWorkload(SPUThread & spu); void spursKernelSelectWorkload2(SPUThread & spu); // // SPURS system service workload functions // void spursSysServiceCleanupAfterPreemption(SPUThread & spu, SpursKernelMgmtData * mgmt); void spursSysServiceUpdateTraceCount(SPUThread & spu, SpursKernelMgmtData * mgmt); void spursSysServiceUpdateTrace(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 arg2, u32 arg3, u32 arg4); void spursSysServiceUpdateEvent(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 wklShutdownBitSet); void spursSysServiceUpdateWorkload(SPUThread & spu, SpursKernelMgmtData * mgmt); void spursSysServiceProcessMessages(SPUThread & spu, SpursKernelMgmtData * mgmt); void spursSysServiceWaitOrExit(SPUThread & spu, SpursKernelMgmtData * mgmt); void spursSysServiceWorkloadMain(SPUThread & spu, u32 pollStatus); void spursSysServiceWorkloadEntry(SPUThread & spu); // // SPURS taskset polict module functions // extern Module *cellSpurs; ////////////////////////////////////////////////////////////////////////////// // SPURS utility functions ////////////////////////////////////////////////////////////////////////////// /// Output trace information void cellSpursModulePutTrace(CellSpursTracePacket * packet, unsigned tag) { // TODO: Implement this } /// Check for execution right requests u32 cellSpursModulePollStatus(SPUThread & spu, u32 * status) { auto mgmt = vm::get_ptr(spu.ls_offset + 0x100); spu.GPR[3]._u32[3] = 1; if (mgmt->spurs->m.flags1 & SF1_32_WORKLOADS) { spursKernelSelectWorkload2(spu); } else { spursKernelSelectWorkload(spu); } auto result = spu.GPR[3]._u64[1]; if (status) { *status = (u32)result; } u32 wklId = result >> 32; return wklId == mgmt->wklCurrentId ? 0 : 1; } ////////////////////////////////////////////////////////////////////////////// // SPURS kernel functions ////////////////////////////////////////////////////////////////////////////// /// Select a workload to run void spursKernelSelectWorkload(SPUThread & spu) { LV2_LOCK(0); // TODO: lock-free implementation if possible auto mgmt = vm::get_ptr(spu.ls_offset + 0x100); // The first and only argument to this function is a boolean that is set to false if the function // is called by the SPURS kernel and set to true if called by cellSpursModulePollStatus. // If the first argument is true then the shared data is not updated with the result. const auto isPoll = spu.GPR[3]._u32[3]; // Calculate the contention (number of SPUs used) for each workload u8 contention[CELL_SPURS_MAX_WORKLOAD]; u8 pendingContention[CELL_SPURS_MAX_WORKLOAD]; for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { contention[i] = mgmt->spurs->m.wklCurrentContention[i] - mgmt->wklLocContention[i]; // If this is a poll request then the number of SPUs pending to context switch is also added to the contention presumably // to prevent unnecessary jumps to the kernel if (isPoll) { pendingContention[i] = mgmt->spurs->m.wklPendingContention[i] - mgmt->wklLocPendingContention[i]; if (i != mgmt->wklCurrentId) { contention[i] += pendingContention[i]; } } } u32 wklSelectedId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID; u32 pollStatus = 0; // The system service workload has the highest priority. Select the system service workload if // the system service message bit for this SPU is set. if (mgmt->spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) { mgmt->spuIdling = 0; if (!isPoll || mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { // Clear the message bit mgmt->spurs->m.sysSrvMessage.write_relaxed(mgmt->spurs->m.sysSrvMessage.read_relaxed() & ~(1 << mgmt->spuNum)); } } else { // Caclulate the scheduling weight for each workload u16 maxWeight = 0; for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { u16 runnable = mgmt->wklRunnable1 & (0x8000 >> i); u16 wklSignal = mgmt->spurs->m.wklSignal1.read_relaxed() & (0x8000 >> i); u8 wklFlag = mgmt->spurs->m.wklFlag.flag.read_relaxed() == 0 ? mgmt->spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0; u8 readyCount = mgmt->spurs->m.wklReadyCount1[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : mgmt->spurs->m.wklReadyCount1[i].read_relaxed(); u8 idleSpuCount = mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed(); u8 requestCount = readyCount + idleSpuCount; // For a workload to be considered for scheduling: // 1. Its priority must not be 0 // 2. The number of SPUs used by it must be less than the max contention for that workload // 3. The workload should be in runnable state // 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount) // OR the workload must be signalled // OR the workload flag is 0 and the workload is configured as the wokload flag receiver if (runnable && mgmt->priority[i] != 0 && mgmt->spurs->m.wklMaxContention[i].read_relaxed() > contention[i]) { if (wklFlag || wklSignal || (readyCount != 0 && requestCount > contention[i])) { // The scheduling weight of the workload is formed from the following parameters in decreasing order of priority: // 1. Wokload signal set or workload flag or ready count > contention // 2. Priority of the workload on the SPU // 3. Is the workload the last selected workload // 4. Minimum contention of the workload // 5. Number of SPUs that are being used by the workload (lesser the number, more the weight) // 6. Is the workload executable same as the currently loaded executable // 7. The workload id (lesser the number, more the weight) u16 weight = (wklFlag || wklSignal || (readyCount > contention[i])) ? 0x8000 : 0; weight |= (u16)(mgmt->priority[i] & 0x7F) << 16; weight |= i == mgmt->wklCurrentId ? 0x80 : 0x00; weight |= (contention[i] > 0 && mgmt->spurs->m.wklMinContention[i] > contention[i]) ? 0x40 : 0x00; weight |= ((CELL_SPURS_MAX_SPU - contention[i]) & 0x0F) << 2; weight |= mgmt->wklUniqueId[i] == mgmt->wklCurrentId ? 0x02 : 0x00; weight |= 0x01; // In case of a tie the lower numbered workload is chosen if (weight > maxWeight) { wklSelectedId = i; maxWeight = weight; pollStatus = readyCount > contention[i] ? CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT : 0; pollStatus |= wklSignal ? CELL_SPURS_MODULE_POLL_STATUS_SIGNAL : 0; pollStatus |= wklFlag ? CELL_SPURS_MODULE_POLL_STATUS_FLAG : 0; } } } } // Not sure what this does. Possibly mark the SPU as idle/in use. mgmt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0; if (!isPoll || wklSelectedId == mgmt->wklCurrentId) { // Clear workload signal for the selected workload mgmt->spurs->m.wklSignal1.write_relaxed(be_t::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x8000 >> wklSelectedId))); mgmt->spurs->m.wklSignal2.write_relaxed(be_t::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x80000000u >> wklSelectedId))); // If the selected workload is the wklFlag workload then pull the wklFlag to all 1s if (wklSelectedId == mgmt->spurs->m.wklFlagReceiver.read_relaxed()) { mgmt->spurs->m.wklFlag.flag.write_relaxed(be_t::make(0xFFFFFFFF)); } } } if (!isPoll) { // Called by kernel // Increment the contention for the selected workload if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { contention[wklSelectedId]++; } for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { mgmt->spurs->m.wklCurrentContention[i] = contention[i]; mgmt->wklLocContention[i] = 0; mgmt->wklLocPendingContention[i] = 0; } if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { mgmt->wklLocContention[wklSelectedId] = 1; } mgmt->wklCurrentId = wklSelectedId; } else if (wklSelectedId != mgmt->wklCurrentId) { // Not called by kernel but a context switch is required // Increment the pending contention for the selected workload if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { pendingContention[wklSelectedId]++; } for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { mgmt->spurs->m.wklPendingContention[i] = pendingContention[i]; mgmt->wklLocPendingContention[i] = 0; } if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { mgmt->wklLocPendingContention[wklSelectedId] = 1; } } u64 result = (u64)wklSelectedId << 32; result |= pollStatus; spu.GPR[3]._u64[1] = result; } /// Select a workload to run void spursKernelSelectWorkload2(SPUThread & spu) { LV2_LOCK(0); // TODO: lock-free implementation if possible auto mgmt = vm::get_ptr(spu.ls_offset + 0x100); // The first and only argument to this function is a boolean that is set to false if the function // is called by the SPURS kernel and set to true if called by cellSpursModulePollStatus. // If the first argument is true then the shared data is not updated with the result. const auto isPoll = spu.GPR[3]._u32[3]; // Calculate the contention (number of SPUs used) for each workload u8 contention[CELL_SPURS_MAX_WORKLOAD2]; u8 pendingContention[CELL_SPURS_MAX_WORKLOAD2]; for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) { contention[i] = mgmt->spurs->m.wklCurrentContention[i & 0x0F] - mgmt->wklLocContention[i & 0x0F]; contention[i] = i < CELL_SPURS_MAX_WORKLOAD ? contention[i] & 0x0F : contention[i] >> 4; // If this is a poll request then the number of SPUs pending to context switch is also added to the contention presumably // to prevent unnecessary jumps to the kernel if (isPoll) { pendingContention[i] = mgmt->spurs->m.wklPendingContention[i & 0x0F] - mgmt->wklLocPendingContention[i & 0x0F]; pendingContention[i] = i < CELL_SPURS_MAX_WORKLOAD ? pendingContention[i] & 0x0F : pendingContention[i] >> 4; if (i != mgmt->wklCurrentId) { contention[i] += pendingContention[i]; } } } u32 wklSelectedId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID; u32 pollStatus = 0; // The system service workload has the highest priority. Select the system service workload if // the system service message bit for this SPU is set. if (mgmt->spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) { // Not sure what this does. Possibly Mark the SPU as in use. mgmt->spuIdling = 0; if (!isPoll || mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { // Clear the message bit mgmt->spurs->m.sysSrvMessage.write_relaxed(mgmt->spurs->m.sysSrvMessage.read_relaxed() & ~(1 << mgmt->spuNum)); } } else { // Caclulate the scheduling weight for each workload u8 maxWeight = 0; for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) { auto j = i & 0x0F; u16 runnable = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->wklRunnable1 & (0x8000 >> j) : mgmt->wklRunnable2 & (0x8000 >> j); u8 priority = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->priority[j] & 0x0F : mgmt->priority[j] >> 4; u8 maxContention = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklMaxContention[j].read_relaxed() & 0x0F : mgmt->spurs->m.wklMaxContention[j].read_relaxed() >> 4; u16 wklSignal = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklSignal1.read_relaxed() & (0x8000 >> j) : mgmt->spurs->m.wklSignal2.read_relaxed() & (0x8000 >> j); u8 wklFlag = mgmt->spurs->m.wklFlag.flag.read_relaxed() == 0 ? mgmt->spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0; u8 readyCount = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklReadyCount1[j].read_relaxed() : mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[j].read_relaxed(); // For a workload to be considered for scheduling: // 1. Its priority must be greater than 0 // 2. The number of SPUs used by it must be less than the max contention for that workload // 3. The workload should be in runnable state // 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount) // OR the workload must be signalled // OR the workload flag is 0 and the workload is configured as the wokload receiver if (runnable && priority > 0 && maxContention > contention[i]) { if (wklFlag || wklSignal || readyCount > contention[i]) { // The scheduling weight of the workload is equal to the priority of the workload for the SPU. // The current workload is given a sligtly higher weight presumably to reduce the number of context switches. // In case of a tie the lower numbered workload is chosen. u8 weight = priority << 4; if (mgmt->wklCurrentId == i) { weight |= 0x04; } if (weight > maxWeight) { wklSelectedId = i; maxWeight = weight; pollStatus = readyCount > contention[i] ? CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT : 0; pollStatus |= wklSignal ? CELL_SPURS_MODULE_POLL_STATUS_SIGNAL : 0; pollStatus |= wklFlag ? CELL_SPURS_MODULE_POLL_STATUS_FLAG : 0; } } } } // Not sure what this does. Possibly mark the SPU as idle/in use. mgmt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0; if (!isPoll || wklSelectedId == mgmt->wklCurrentId) { // Clear workload signal for the selected workload mgmt->spurs->m.wklSignal1.write_relaxed(be_t::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x8000 >> wklSelectedId))); mgmt->spurs->m.wklSignal2.write_relaxed(be_t::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x80000000u >> wklSelectedId))); // If the selected workload is the wklFlag workload then pull the wklFlag to all 1s if (wklSelectedId == mgmt->spurs->m.wklFlagReceiver.read_relaxed()) { mgmt->spurs->m.wklFlag.flag.write_relaxed(be_t::make(0xFFFFFFFF)); } } } if (!isPoll) { // Called by kernel // Increment the contention for the selected workload if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { contention[wklSelectedId]++; } for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++) { mgmt->spurs->m.wklCurrentContention[i] = contention[i] | (contention[i + 0x10] << 4); mgmt->wklLocContention[i] = 0; mgmt->wklLocPendingContention[i] = 0; } mgmt->wklLocContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0; mgmt->wklCurrentId = wklSelectedId; } else if (wklSelectedId != mgmt->wklCurrentId) { // Not called by kernel but a context switch is required // Increment the pending contention for the selected workload if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { pendingContention[wklSelectedId]++; } for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++) { mgmt->spurs->m.wklPendingContention[i] = pendingContention[i] | (pendingContention[i + 0x10] << 4); mgmt->wklLocPendingContention[i] = 0; } mgmt->wklLocPendingContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0; } u64 result = (u64)wklSelectedId << 32; result |= pollStatus; spu.GPR[3]._u64[1] = result; } /// Entry point of the SPURS kernel void spursKernelMain(SPUThread & spu) { SpursKernelMgmtData * mgmt = vm::get_ptr(spu.ls_offset + 0x100); mgmt->spuNum = spu.GPR[3]._u32[3]; mgmt->dmaTagId = 0x1F; mgmt->spurs.set(spu.GPR[4]._u64[1]); mgmt->wklCurrentId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID; mgmt->wklCurrentUniqueId = 0x20; bool isSecond = mgmt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false; mgmt->yieldToKernelAddr = isSecond ? 0x838 : 0x808; mgmt->selectWorkloadAddr = 0x290; spu.WriteLS32(mgmt->yieldToKernelAddr, 2); // hack for cellSpursModuleExit spu.WriteLS32(mgmt->selectWorkloadAddr, 3); // hack for cellSpursModulePollStatus spu.WriteLS32(mgmt->selectWorkloadAddr + 4, 0x35000000); // bi $0 spu.m_code3_func = isSecond ? spursKernelSelectWorkload2 : spursKernelSelectWorkload; u32 wid = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID; u32 pollStatus = 0; while (true) { if (Emu.IsStopped()) { cellSpurs->Warning("Spurs Kernel aborted"); return; } // Get current workload info auto & wkl = wid < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklInfo1[wid] : (wid < CELL_SPURS_MAX_WORKLOAD2 && isSecond ? mgmt->spurs->m.wklInfo2[wid & 0xf] : mgmt->spurs->m.wklInfoSysSrv); if (mgmt->wklCurrentAddr != wkl.addr) { if (wkl.addr.addr() != SPURS_IMG_ADDR_SYS_SRV_WORKLOAD) { // Load executable code memcpy(vm::get_ptr(spu.ls_offset + 0xA00), wkl.addr.get_ptr(), wkl.size); } mgmt->wklCurrentAddr = wkl.addr; mgmt->wklCurrentUniqueId = wkl.uniqueId.read_relaxed(); } if (!isSecond) { mgmt->moduleId[0] = 0; mgmt->moduleId[1] = 0; } // Run workload spu.GPR[1]._u32[3] = 0x3FFB0; spu.GPR[3]._u32[3] = 0x100; spu.GPR[4]._u64[1] = wkl.arg; spu.GPR[5]._u32[3] = pollStatus; spu.SetPc(0xA00); switch (mgmt->wklCurrentAddr.addr()) { case SPURS_IMG_ADDR_SYS_SRV_WORKLOAD: spursSysServiceWorkloadEntry(spu); break; default: spu.FastCall(0xA00); break; } // Check status auto status = spu.SPU.Status.GetValue(); if (status == SPU_STATUS_STOPPED_BY_STOP) { return; } else { assert(status == SPU_STATUS_RUNNING); } // Select next workload to run spu.GPR[3].clear(); if (isSecond) { spursKernelSelectWorkload2(spu); } else { spursKernelSelectWorkload(spu); } u64 res = spu.GPR[3]._u64[1]; pollStatus = (u32)(res); wid = (u32)(res >> 32); } } ////////////////////////////////////////////////////////////////////////////// // SPURS system workload functions ////////////////////////////////////////////////////////////////////////////// /// Restore scheduling parameters after a workload has been preempted by the system service workload void spursSysServiceCleanupAfterPreemption(SPUThread & spu, SpursKernelMgmtData * mgmt) { if (mgmt->spurs->m.sysSrvWorkload[mgmt->spuNum] != 0xFF) { auto wklId = mgmt->spurs->m.sysSrvWorkload[mgmt->spuNum]; mgmt->spurs->m.sysSrvWorkload[mgmt->spuNum] = 0xFF; spursSysServiceUpdateWorkload(spu, mgmt); if (wklId >= CELL_SPURS_MAX_WORKLOAD) { mgmt->spurs->m.wklCurrentContention[wklId & 0x0F] -= 0x10; mgmt->spurs->m.wklReadyCount1[wklId & 0x0F].write_relaxed(mgmt->spurs->m.wklReadyCount1[wklId & 0x0F].read_relaxed() - 1); } else { mgmt->spurs->m.wklCurrentContention[wklId & 0x0F] -= 0x01; mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[wklId & 0x0F].write_relaxed(mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[wklId & 0x0F].read_relaxed() - 1); } // Set the current workload id to the id of the pre-empted workload since cellSpursModulePutTrace // uses the current worload id to determine the workload to which the trace belongs auto wklIdSaved = mgmt->wklCurrentId; mgmt->wklCurrentId = wklId; // Trace - STOP: GUID CellSpursTracePacket pkt; memset(&pkt, 0, sizeof(pkt)); pkt.header.tag = CELL_SPURS_TRACE_TAG_STOP; pkt.data.stop = SPURS_GUID_SYS_WKL; cellSpursModulePutTrace(&pkt, mgmt->dmaTagId); mgmt->wklCurrentId = wklIdSaved; } } /// Update the trace count for this SPU in CellSpurs void spursSysServiceUpdateTraceCount(SPUThread & spu, SpursKernelMgmtData * mgmt) { if (mgmt->traceBuffer) { auto traceInfo = vm::ptr::make((u32)(mgmt->traceBuffer - (mgmt->spurs->m.traceStartIndex[mgmt->spuNum] << 4))); traceInfo->count[mgmt->spuNum] = mgmt->traceMsgCount; } } /// Update trace control in SPU from CellSpurs void spursSysServiceUpdateTrace(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 arg2, u32 arg3, u32 arg4) { auto sysSrvMsgUpdateTrace = mgmt->spurs->m.sysSrvMsgUpdateTrace; mgmt->spurs->m.sysSrvMsgUpdateTrace &= ~(1 << mgmt->spuNum); mgmt->spurs->m.xCC &= ~(1 << mgmt->spuNum); mgmt->spurs->m.xCC |= arg2 << mgmt->spuNum; bool notify = false; if (((sysSrvMsgUpdateTrace & (1 << mgmt->spuNum)) != 0) && (mgmt->spurs->m.sysSrvMsgUpdateTrace == 0) && (mgmt->spurs->m.xCD != 0)) { mgmt->spurs->m.xCD = 0; notify = true; } if (arg4 && mgmt->spurs->m.xCD != 0) { mgmt->spurs->m.xCD = 0; notify = true; } // Get trace parameters from CellSpurs and store them in the LS if (((sysSrvMsgUpdateTrace & (1 << mgmt->spuNum)) != 0) || (arg3 != 0)) { if (mgmt->traceMsgCount != 0xFF || mgmt->spurs->m.traceBuffer.addr() == 0) { spursSysServiceUpdateTraceCount(spu, mgmt); } else { mgmt->traceMsgCount = mgmt->spurs->m.traceBuffer->count[mgmt->spuNum]; } mgmt->traceBuffer = mgmt->spurs->m.traceBuffer.addr() + (mgmt->spurs->m.traceStartIndex[mgmt->spuNum] << 4); mgmt->traceMaxCount = mgmt->spurs->m.traceStartIndex[1] - mgmt->spurs->m.traceStartIndex[0]; if (mgmt->traceBuffer == 0) { mgmt->traceMsgCount = 0; } } if (notify) { // TODO: sys_spu_thread_send_event(mgmt->spurs->m.spuPort, 2, 0); } } /// Update events in CellSpurs void spursSysServiceUpdateEvent(SPUThread & spu, SpursKernelMgmtData * mgmt, u32 wklShutdownBitSet) { // Mark the workloads in wklShutdownBitSet as completed and also generate a bit set of the completed // workloads that have a shutdown completion hook registered u32 wklNotifyBitSet = 0; for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { if (wklShutdownBitSet & (0x80000000u >> i)) { mgmt->spurs->m.wklEvent1[i] |= 0x01; if (mgmt->spurs->m.wklEvent1[i] & 0x02 || mgmt->spurs->m.wklEvent1[i] & 0x10) { wklNotifyBitSet |= 0x80000000u >> i; } } if (wklShutdownBitSet & (0x8000 >> i)) { mgmt->spurs->m.wklEvent2[i] |= 0x01; if (mgmt->spurs->m.wklEvent2[i] & 0x02 || mgmt->spurs->m.wklEvent2[i] & 0x10) { wklNotifyBitSet |= 0x8000 >> i; } } } if (wklNotifyBitSet) { // TODO: sys_spu_thread_send_event(mgmt->spurs->m.spuPort, 0, wklNotifyMask); } } /// Update workload information in the SPU from CellSpurs void spursSysServiceUpdateWorkload(SPUThread & spu, SpursKernelMgmtData * mgmt) { u32 wklShutdownBitSet = 0; mgmt->wklRunnable1 = 0; mgmt->wklRunnable2 = 0; for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { // Copy the priority of the workload for this SPU and its unique id to the LS mgmt->priority[i] = mgmt->spurs->m.wklInfo1[i].priority[mgmt->spuNum] == 0 ? 0 : 0x10 - mgmt->spurs->m.wklInfo1[i].priority[mgmt->spuNum]; mgmt->wklUniqueId[i] = mgmt->spurs->m.wklInfo1[i].uniqueId.read_relaxed(); // Update workload status and runnable flag based on the workload state auto wklStatus = mgmt->spurs->m.wklStatus1[i]; if (mgmt->spurs->m.wklState1[i].read_relaxed() == SPURS_WKL_STATE_RUNNABLE) { mgmt->spurs->m.wklStatus1[i] |= 1 << mgmt->spuNum; mgmt->wklRunnable1 |= 0x8000 >> i; } else { mgmt->spurs->m.wklStatus1[i] &= ~(1 << mgmt->spuNum); } // If the workload is shutting down and if this is the last SPU from which it is being removed then // add it to the shutdown bit set if (mgmt->spurs->m.wklState1[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) { if (((wklStatus & (1 << mgmt->spuNum)) != 0) && (mgmt->spurs->m.wklStatus1[i] == 0)) { mgmt->spurs->m.wklState1[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE); wklShutdownBitSet |= 0x80000000u >> i; } } if (mgmt->spurs->m.flags1 & SF1_32_WORKLOADS) { // Copy the priority of the workload for this SPU to the LS if (mgmt->spurs->m.wklInfo2[i].priority[mgmt->spuNum]) { mgmt->priority[i] |= (0x10 - mgmt->spurs->m.wklInfo2[i].priority[mgmt->spuNum]) << 4; } // Update workload status and runnable flag based on the workload state wklStatus = mgmt->spurs->m.wklStatus2[i]; if (mgmt->spurs->m.wklState2[i].read_relaxed() == SPURS_WKL_STATE_RUNNABLE) { mgmt->spurs->m.wklStatus2[i] |= 1 << mgmt->spuNum; mgmt->wklRunnable2 |= 0x8000 >> i; } else { mgmt->spurs->m.wklStatus2[i] &= ~(1 << mgmt->spuNum); } // If the workload is shutting down and if this is the last SPU from which it is being removed then // add it to the shutdown bit set if (mgmt->spurs->m.wklState2[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) { if (((wklStatus & (1 << mgmt->spuNum)) != 0) && (mgmt->spurs->m.wklStatus2[i] == 0)) { mgmt->spurs->m.wklState2[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE); wklShutdownBitSet |= 0x8000 >> i; } } } } if (wklShutdownBitSet) { spursSysServiceUpdateEvent(spu, mgmt, wklShutdownBitSet); } } /// Process any messages void spursSysServiceProcessMessages(SPUThread & spu, SpursKernelMgmtData * mgmt) { LV2_LOCK(0); // Process update workload message if (mgmt->spurs->m.sysSrvMsgUpdateWorkload.read_relaxed() & (1 << mgmt->spuNum)) { mgmt->spurs->m.sysSrvMsgUpdateWorkload &= ~(1 << mgmt->spuNum); spursSysServiceUpdateWorkload(spu, mgmt); } // Process update trace message if (mgmt->spurs->m.sysSrvMsgUpdateTrace & (1 << mgmt->spuNum)) { spursSysServiceUpdateTrace(spu, mgmt, 1, 0, 0); } // Process terminate request if (mgmt->spurs->m.sysSrvMsgTerminate & (1 << mgmt->spuNum)) { mgmt->spurs->m.sysSrvOnSpu &= ~(1 << mgmt->spuNum); // TODO: Rest of the terminate processing } } /// Wait for an external event or exit the SPURS thread group if no workloads can be scheduled void spursSysServiceWaitOrExit(SPUThread & spu, SpursKernelMgmtData * mgmt) { while (true) { Emu.GetCoreMutex().lock(); // Find the number of SPUs that are idling in this SPURS instance u32 nIdlingSpus = 0; for (u32 i = 0; i < 8; i++) { if (mgmt->spurs->m.spuIdling & (1 << i)) { nIdlingSpus++; } } bool allSpusIdle = nIdlingSpus == mgmt->spurs->m.nSpus ? true: false; bool exitIfNoWork = mgmt->spurs->m.flags1 & SF1_EXIT_IF_NO_WORK ? true : false; // Check if any workloads can be scheduled bool foundReadyWorkload = false; if (mgmt->spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) { foundReadyWorkload = true; } else { if (mgmt->spurs->m.flags1 & SF1_32_WORKLOADS) { for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) { u32 j = i & 0x0F; u8 runnable = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->wklRunnable1 & (0x8000 >> j) : mgmt->wklRunnable2 & (0x8000 >> j); u8 priority = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->priority[j] & 0x0F : mgmt->priority[j] >> 4; u8 maxContention = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklMaxContention[j].read_relaxed() & 0x0F : mgmt->spurs->m.wklMaxContention[j].read_relaxed() >> 4; u8 contention = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklCurrentContention[j] & 0x0F : mgmt->spurs->m.wklCurrentContention[j] >> 4; u8 wklSignal = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklSignal1.read_relaxed() & (0x8000 >> j) : mgmt->spurs->m.wklSignal2.read_relaxed() & (0x8000 >> j); u8 wklFlag = mgmt->spurs->m.wklFlag.flag.read_relaxed() == 0 ? mgmt->spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0; u8 readyCount = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklReadyCount1[j].read_relaxed() : mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[j].read_relaxed(); if (runnable && priority > 0 && maxContention > contention) { if (wklFlag || wklSignal || readyCount > contention) { foundReadyWorkload = true; break; } } } } else { for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { u8 runnable = mgmt->wklRunnable1 & (0x8000 >> i); u8 wklSignal = mgmt->spurs->m.wklSignal1.read_relaxed() & (0x8000 >> i); u8 wklFlag = mgmt->spurs->m.wklFlag.flag.read_relaxed() == 0 ? mgmt->spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0; u8 readyCount = mgmt->spurs->m.wklReadyCount1[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : mgmt->spurs->m.wklReadyCount1[i].read_relaxed(); u8 idleSpuCount = mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed(); u8 requestCount = readyCount + idleSpuCount; if (runnable && mgmt->priority[i] != 0 && mgmt->spurs->m.wklMaxContention[i].read_relaxed() > mgmt->spurs->m.wklCurrentContention[i]) { if (wklFlag || wklSignal || (readyCount != 0 && requestCount > mgmt->spurs->m.wklCurrentContention[i])) { foundReadyWorkload = true; break; } } } } } // If all SPUs are idling and the exit_if_no_work flag is set then the SPU thread group must exit. Otherwise wait for external events. if ((mgmt->spurs->m.spuIdling & (1 << mgmt->spuNum)) && (allSpusIdle == false || exitIfNoWork == false) && foundReadyWorkload == false) { // The system service blocks by making a reservation and waiting on the reservation lost event. This is unfortunately // not yet completely implemented in rpcs3. So we busy wait here. //u128 r; //spu.ReadChannel(r, 0); Emu.GetCoreMutex().unlock(); std::this_thread::sleep_for(std::chrono::milliseconds(1)); Emu.GetCoreMutex().lock(); } if ((allSpusIdle == true && exitIfNoWork == true) || foundReadyWorkload == false) { mgmt->spurs->m.spuIdling |= 1 << mgmt->spuNum; } else { mgmt->spurs->m.spuIdling &= ~(1 << mgmt->spuNum); } Emu.GetCoreMutex().unlock(); if (allSpusIdle == false || exitIfNoWork == false) { if (foundReadyWorkload == true) { return; } } else { // TODO: exit spu thread group } } } /// Main function for the system service workload void spursSysServiceWorkloadMain(SPUThread & spu, u32 pollStatus) { auto mgmt = vm::get_ptr(spu.ls_offset + 0x100); if (mgmt->spurs.addr() % CellSpurs::align) { assert(0); } // Initialise the system service if this is the first time its being started on this SPU if (mgmt->sysSrvInitialised == 0) { mgmt->sysSrvInitialised = 1; LV2_LOCK(0); if (mgmt->spurs->m.sysSrvOnSpu & (1 << mgmt->spuNum)) { assert(0); } mgmt->spurs->m.sysSrvOnSpu |= 1 << mgmt->spuNum; mgmt->traceBuffer = 0; mgmt->traceMsgCount = -1; spursSysServiceUpdateTrace(spu, mgmt, 1, 1, 0); spursSysServiceCleanupAfterPreemption(spu, mgmt); // Trace - SERVICE: INIT CellSpursTracePacket pkt; memset(&pkt, 0, sizeof(pkt)); pkt.header.tag = CELL_SPURS_TRACE_TAG_SERVICE; pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_INIT; cellSpursModulePutTrace(&pkt, mgmt->dmaTagId); } // Trace - START: Module='SYS ' CellSpursTracePacket pkt; memset(&pkt, 0, sizeof(pkt)); pkt.header.tag = CELL_SPURS_TRACE_TAG_START; memcpy(pkt.data.start.module, "SYS ", 4); pkt.data.start.level = 1; // Policy module pkt.data.start.ls = 0xA00 >> 2; cellSpursModulePutTrace(&pkt, mgmt->dmaTagId); while (true) { // Process messages for the system service workload spursSysServiceProcessMessages(spu, mgmt); poll: if (cellSpursModulePollStatus(spu, nullptr)) { // Trace - SERVICE: EXIT CellSpursTracePacket pkt; memset(&pkt, 0, sizeof(pkt)); pkt.header.tag = CELL_SPURS_TRACE_TAG_SERVICE; pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_EXIT; cellSpursModulePutTrace(&pkt, mgmt->dmaTagId); // Trace - STOP: GUID memset(&pkt, 0, sizeof(pkt)); pkt.header.tag = CELL_SPURS_TRACE_TAG_STOP; pkt.data.stop = SPURS_GUID_SYS_WKL; cellSpursModulePutTrace(&pkt, mgmt->dmaTagId); break; } // If we reach here it means that either there are more system service messages to be processed // or there are no workloads that can be scheduled. // If the SPU is not idling then process the remaining system service messages if (mgmt->spuIdling == 0) { continue; } // If we reach here it means that the SPU is idling // Trace - SERVICE: WAIT CellSpursTracePacket pkt; memset(&pkt, 0, sizeof(pkt)); pkt.header.tag = CELL_SPURS_TRACE_TAG_SERVICE; pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_WAIT; cellSpursModulePutTrace(&pkt, mgmt->dmaTagId); spursSysServiceWaitOrExit(spu, mgmt); goto poll; } } /// Entry point of the system service workload void spursSysServiceWorkloadEntry(SPUThread & spu) { auto mgmt = vm::get_ptr(spu.ls_offset + spu.GPR[3]._u32[3]); auto arg = spu.GPR[4]._u64[1]; auto pollStatus = spu.GPR[5]._u32[3]; spu.GPR[1]._u32[3] = 0x3FFD0; *(vm::ptr::make(spu.GPR[1]._u32[3])) = 0x3FFF0; memset(vm::get_ptr(spu.ls_offset + 0x3FFE0), 0, 32); if (mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { spursSysServiceWorkloadMain(spu, pollStatus); } else { // TODO: If we reach here it means the current workload was preempted to start the // system service workload. Need to implement this. } // TODO: Ensure that this function always returns to the SPURS kernel return; } ////////////////////////////////////////////////////////////////////////////// // SPURS taskset policy module functions ////////////////////////////////////////////////////////////////////////////// bool spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 * isWaiting) { auto kernelMgmt = vm::get_ptr(spu.ls_offset + 0x100); auto mgmt = vm::get_ptr(spu.ls_offset + 0x2700); // Verify taskset state is valid for (auto i = 0; i < 4; i ++) { if ((mgmt->taskset->m.waiting_set[i] & mgmt->taskset->m.running_set[i]) || (mgmt->taskset->m.ready_set[i] & mgmt->taskset->m.ready2_set[i]) || ((mgmt->taskset->m.running_set[i] | mgmt->taskset->m.ready_set[i] | mgmt->taskset->m.ready2_set[i] | mgmt->taskset->m.signal_received_set[i] | mgmt->taskset->m.waiting_set[i]) & ~mgmt->taskset->m.enabled_set[i])) { assert(0); } } // TODO: Implement cases s32 delta = 0; switch (request + 1) { case 0: break; case 1: break; case 2: break; case 3: break; case 4: break; case 5: break; case 6: break; default: assert(0); break; } // Set the ready count of the workload to the number of ready tasks do { s32 readyCount = kernelMgmt->wklCurrentId >= CELL_SPURS_MAX_WORKLOAD ? kernelMgmt->spurs->m.wklIdleSpuCountOrReadyCount2[kernelMgmt->wklCurrentId & 0x0F].read_relaxed() : kernelMgmt->spurs->m.wklReadyCount1[kernelMgmt->wklCurrentId].read_relaxed(); auto newReadyCount = readyCount + delta > 0xFF ? 0xFF : readyCount + delta < 0 ? 0 : readyCount + delta; if (kernelMgmt->wklCurrentId >= CELL_SPURS_MAX_WORKLOAD) { kernelMgmt->spurs->m.wklIdleSpuCountOrReadyCount2[kernelMgmt->wklCurrentId & 0x0F].write_relaxed(newReadyCount); } else { kernelMgmt->spurs->m.wklReadyCount1[kernelMgmt->wklCurrentId].write_relaxed(newReadyCount); } delta += readyCount; } while (delta > 0); // TODO: Implement return return false; } void spursTasksetDispatch() { } void spursTasksetProcessPollStatus(SPUThread & spu, u32 pollStatus) { if (pollStatus & CELL_SPURS_MODULE_POLL_STATUS_FLAG) { spursTasksetProcessRequest(spu, 6, nullptr, nullptr); } } bool spursTasksetShouldYield(SPUThread & spu) { u32 pollStatus; if (cellSpursModulePollStatus(spu, &pollStatus)) { return true; } spursTasksetProcessPollStatus(spu, pollStatus); return false; } void spursTasksetInit(SPUThread & spu, u32 pollStatus) { auto mgmt = vm::get_ptr(spu.ls_offset + 0x2700); auto kernelMgmt = vm::get_ptr(spu.ls_offset + 0x100); kernelMgmt->moduleId[0] = 'T'; kernelMgmt->moduleId[1] = 'K'; // Trace - START: Module='TKST' CellSpursTracePacket pkt; memset(&pkt, 0, sizeof(pkt)); pkt.header.tag = 0x52; // Its not clear what this tag means exactly but it seems similar to CELL_SPURS_TRACE_TAG_START memcpy(pkt.data.start.module, "TKST", 4); pkt.data.start.level = 2; pkt.data.start.ls = 0xA00 >> 2; cellSpursModulePutTrace(&pkt, mgmt->dmaTagId); spursTasksetProcessPollStatus(spu, pollStatus); } void spursTasksetEntry(SPUThread & spu) { auto mgmt = vm::get_ptr(spu.ls_offset + 0x2700); // Check if the function was invoked by the SPURS kernel or because of a syscall if (spu.PC != 0xA70) { // Called from kernel auto kernelMgmt = vm::get_ptr(spu.ls_offset + spu.GPR[3]._u32[3]); auto arg = spu.GPR[4]._u64[1]; auto pollStatus = spu.GPR[5]._u32[3]; memset(mgmt, 0, sizeof(*mgmt)); mgmt->taskset.set(arg); memcpy(mgmt->moduleId, "SPURSTASK MODULE", 16); mgmt->kernelMgmt = spu.GPR[3]._u32[3]; mgmt->yieldAddr = 0xA70; mgmt->spuNum = kernelMgmt->spuNum; mgmt->dmaTagId = kernelMgmt->dmaTagId; mgmt->taskId = 0xFFFFFFFF; spursTasksetInit(spu, pollStatus); // TODO: Dispatch } mgmt->contextSaveArea[0] = spu.GPR[0]; mgmt->contextSaveArea[1] = spu.GPR[1]; for (auto i = 0; i < 48; i++) { mgmt->contextSaveArea[i + 2] = spu.GPR[80 + i]; } // TODO: Process syscall }