gpu: implement compute queue

This commit is contained in:
DH 2024-10-15 18:35:17 +03:00
parent 1f28918fc9
commit 4fe857485c
12 changed files with 836 additions and 483 deletions

View file

@ -181,6 +181,7 @@ public:
Ref<RcBase> blockpoolDevice;
shared_mutex gpuDeviceMtx;
Ref<RcBase> gpuDevice;
Ref<RcBase> dceDevice;
uint sdkVersion{};
uint fwSdkVersion{};
uint safeMode{};

View file

@ -236,7 +236,7 @@ Device::Device() : vkContext(createVkContext(this)) {
for (int i = 0; i < kGfxPipeCount; ++i) {
graphicsPipes[i].setDeQueue(
Queue{
Ring{
.base = mainGfxRings[i],
.size = sizeof(mainGfxRings[i]) / sizeof(mainGfxRings[i][0]),
.rptr = mainGfxRings[i],
@ -474,7 +474,7 @@ void Device::start() {
}
}
void Device::submitCommand(Queue &ring,
void Device::submitCommand(Ring &ring,
std::span<const std::uint32_t> command) {
std::scoped_lock lock(writeCommandMtx);
if (ring.wptr + command.size() > ring.base + ring.size) {
@ -599,12 +599,12 @@ void Device::onCommandBuffer(std::uint32_t pid, int cmdHeader,
auto op = rx::getBits(cmdHeader, 15, 8);
if (op == gnm::IT_INDIRECT_BUFFER_CNST) {
graphicsPipes[0].setCeQueue(Queue::createFromRange(
graphicsPipes[0].setCeQueue(Ring::createFromRange(
process.vmId, memory.getPointer<std::uint32_t>(address),
size / sizeof(std::uint32_t)));
} else if (op == gnm::IT_INDIRECT_BUFFER) {
graphicsPipes[0].setDeQueue(
Queue::createFromRange(process.vmId,
Ring::createFromRange(process.vmId,
memory.getPointer<std::uint32_t>(address),
size / sizeof(std::uint32_t)),
1);

View file

@ -112,7 +112,7 @@ struct Device : orbis::RcBase, DeviceContext {
return caches[vmId].createComputeTag(scheduler);
}
void submitCommand(Queue &ring, std::span<const std::uint32_t> command);
void submitCommand(Ring &ring, std::span<const std::uint32_t> command);
void submitGfxCommand(int gfxPipe, std::span<const std::uint32_t> command);
void mapProcess(std::uint32_t pid, int vmId);

View file

@ -117,5 +117,49 @@ void DeviceCtl::registerBufferAttribute(std::uint32_t pid,
process.bufferAttributes[attr.attrId] = attr;
}
void DeviceCtl::mapComputeQueue(int vmId, std::uint32_t meId,
std::uint32_t pipeId, std::uint32_t queueId,
std::uint32_t vqueueId,
orbis::uint64_t ringBaseAddress,
orbis::uint64_t readPtrAddress,
orbis::uint64_t doorbell,
orbis::uint64_t ringSize) {
if (meId != 1) {
rx::die("unexpected ME %d", meId);
}
auto &pipe = mDevice->computePipes[pipeId];
auto lock = pipe.lockQueue(queueId);
auto memory = RemoteMemory{vmId};
auto base = memory.getPointer<std::uint32_t>(ringBaseAddress);
pipe.mapQueue(queueId,
Ring{
.vmId = vmId,
.indirectLevel = 0,
.doorbell = memory.getPointer<std::uint32_t>(doorbell),
.base = base,
.size = ringSize,
.rptr = base,
.wptr = base,
.rptrReportLocation =
memory.getPointer<std::uint32_t>(readPtrAddress),
},
lock);
auto config = std::bit_cast<amdgpu::Registers::ComputeConfig *>(doorbell);
config->state = 1;
}
void DeviceCtl::submitComputeQueue(std::uint32_t meId, std::uint32_t pipeId,
std::uint32_t queueId,
std::uint64_t offset) {
if (meId != 1) {
rx::die("unexpected ME %d", meId);
}
auto &pipe = mDevice->computePipes[pipeId];
pipe.submit(queueId, offset);
}
void DeviceCtl::start() { mDevice->start(); }
void DeviceCtl::waitForIdle() { mDevice->waitForIdle(); }

View file

@ -1,6 +1,7 @@
#pragma once
#include "DeviceContext.hpp"
#include "orbis-config.hpp"
#include "orbis/utils/Rc.hpp"
#include <cstdint>
#include <span>
@ -40,6 +41,14 @@ public:
std::uint64_t address, std::uint64_t size, int prot);
void registerBuffer(std::uint32_t pid, Buffer buffer);
void registerBufferAttribute(std::uint32_t pid, BufferAttribute attr);
void mapComputeQueue(int vmId, std::uint32_t meId, std::uint32_t pipeId,
std::uint32_t queueId, std::uint32_t vqueueId,
orbis::uint64_t ringBaseAddress,
orbis::uint64_t readPtrAddress, orbis::uint64_t doorbell,
orbis::uint64_t ringSize);
void submitComputeQueue(std::uint32_t meId, std::uint32_t pipeId,
std::uint32_t queueId, std::uint64_t offset);
void start();
void waitForIdle();

File diff suppressed because it is too large Load diff

View file

@ -1,6 +1,7 @@
#pragma once
#include "Registers.hpp"
#include "Scheduler.hpp"
#include "orbis/utils/SharedMutex.hpp"
#include <cstdint>
#include <vulkan/vulkan_core.h>
@ -8,7 +9,7 @@
namespace amdgpu {
struct Device;
struct Queue {
struct Ring {
int vmId = -1;
int indirectLevel = -1;
std::uint32_t *doorbell{};
@ -16,11 +17,12 @@ struct Queue {
std::uint64_t size{};
std::uint32_t *rptr{};
std::uint32_t *wptr{};
std::uint32_t *rptrReportLocation{};
static Queue createFromRange(int vmId, std::uint32_t *base,
std::uint64_t size, int indirectLevel = 0,
std::uint32_t *doorbell = nullptr) {
Queue result;
static Ring createFromRange(int vmId, std::uint32_t *base, std::uint64_t size,
int indirectLevel = 0,
std::uint32_t *doorbell = nullptr) {
Ring result;
result.vmId = vmId;
result.indirectLevel = indirectLevel;
result.doorbell = doorbell;
@ -36,20 +38,35 @@ struct ComputePipe {
Device *device;
Scheduler scheduler;
using CommandHandler = bool (ComputePipe::*)(Queue &);
using CommandHandler = bool (ComputePipe::*)(Ring &);
CommandHandler commandHandlers[255];
Queue queues[8];
Registers::ComputeConfig computeConfig;
orbis::shared_mutex queueMtx[8];
int index;
Ring queues[2][8];
std::uint64_t drawIndexIndirPatchBase = 0;
ComputePipe(int index);
bool processAllRings();
void processRing(Queue &queue);
void mapQueue(int queueId, Queue queue);
bool processRing(Ring &ring);
void mapQueue(int queueId, Ring ring, std::unique_lock<orbis::shared_mutex> &lock);
void waitForIdle(int queueId, std::unique_lock<orbis::shared_mutex> &lock);
void submit(int queueId, std::uint32_t offset);
bool setShReg(Queue &queue);
bool unknownPacket(Queue &queue);
bool handleNop(Queue &queue);
std::unique_lock<orbis::shared_mutex> lockQueue(int queueId) {
return std::unique_lock<orbis::shared_mutex>(queueMtx[queueId]);
}
bool setShReg(Ring &ring);
bool dispatchDirect(Ring &ring);
bool dispatchIndirect(Ring &ring);
bool releaseMem(Ring &ring);
bool waitRegMem(Ring &ring);
bool writeData(Ring &ring);
bool unknownPacket(Ring &ring);
bool handleNop(Ring &ring);
std::uint32_t *getMmRegister(Ring &ring, std::uint32_t dwAddress);
};
struct GraphicsPipe {
@ -71,75 +88,75 @@ struct GraphicsPipe {
Registers::Context context;
Registers::UConfig uConfig;
Queue deQueues[3];
Queue ceQueue;
Ring deQueues[3];
Ring ceQueue;
using CommandHandler = bool (GraphicsPipe::*)(Queue &);
using CommandHandler = bool (GraphicsPipe::*)(Ring &);
CommandHandler commandHandlers[4][255];
GraphicsPipe(int index);
void setCeQueue(Queue queue);
void setDeQueue(Queue queue, int ring);
void setCeQueue(Ring ring);
void setDeQueue(Ring ring, int indirectLevel);
bool processAllRings();
void processRing(Queue &queue);
void processRing(Ring &ring);
bool drawPreamble(Queue &queue);
bool indexBufferSize(Queue &queue);
bool handleNop(Queue &queue);
bool contextControl(Queue &queue);
bool acquireMem(Queue &queue);
bool releaseMem(Queue &queue);
bool dispatchDirect(Queue &queue);
bool dispatchIndirect(Queue &queue);
bool writeData(Queue &queue);
bool memSemaphore(Queue &queue);
bool waitRegMem(Queue &queue);
bool indirectBufferConst(Queue &queue);
bool indirectBuffer(Queue &queue);
bool condWrite(Queue &queue);
bool eventWrite(Queue &queue);
bool eventWriteEop(Queue &queue);
bool eventWriteEos(Queue &queue);
bool dmaData(Queue &queue);
bool setBase(Queue &queue);
bool clearState(Queue &queue);
bool setPredication(Queue &queue);
bool drawIndirect(Queue &queue);
bool drawIndexIndirect(Queue &queue);
bool indexBase(Queue &queue);
bool drawIndex2(Queue &queue);
bool indexType(Queue &queue);
bool drawIndexAuto(Queue &queue);
bool numInstances(Queue &queue);
bool drawIndexMultiAuto(Queue &queue);
bool drawIndexOffset2(Queue &queue);
bool pfpSyncMe(Queue &queue);
bool setCeDeCounters(Queue &queue);
bool waitOnCeCounter(Queue &queue);
bool waitOnDeCounterDiff(Queue &queue);
bool incrementCeCounter(Queue &queue);
bool incrementDeCounter(Queue &queue);
bool loadConstRam(Queue &queue);
bool writeConstRam(Queue &queue);
bool dumpConstRam(Queue &queue);
bool setConfigReg(Queue &queue);
bool setShReg(Queue &queue);
bool setUConfigReg(Queue &queue);
bool setContextReg(Queue &queue);
bool drawPreamble(Ring &ring);
bool indexBufferSize(Ring &ring);
bool handleNop(Ring &ring);
bool contextControl(Ring &ring);
bool acquireMem(Ring &ring);
bool releaseMem(Ring &ring);
bool dispatchDirect(Ring &ring);
bool dispatchIndirect(Ring &ring);
bool writeData(Ring &ring);
bool memSemaphore(Ring &ring);
bool waitRegMem(Ring &ring);
bool indirectBufferConst(Ring &ring);
bool indirectBuffer(Ring &ring);
bool condWrite(Ring &ring);
bool eventWrite(Ring &ring);
bool eventWriteEop(Ring &ring);
bool eventWriteEos(Ring &ring);
bool dmaData(Ring &ring);
bool setBase(Ring &ring);
bool clearState(Ring &ring);
bool setPredication(Ring &ring);
bool drawIndirect(Ring &ring);
bool drawIndexIndirect(Ring &ring);
bool indexBase(Ring &ring);
bool drawIndex2(Ring &ring);
bool indexType(Ring &ring);
bool drawIndexAuto(Ring &ring);
bool numInstances(Ring &ring);
bool drawIndexMultiAuto(Ring &ring);
bool drawIndexOffset2(Ring &ring);
bool pfpSyncMe(Ring &ring);
bool setCeDeCounters(Ring &ring);
bool waitOnCeCounter(Ring &ring);
bool waitOnDeCounterDiff(Ring &ring);
bool incrementCeCounter(Ring &ring);
bool incrementDeCounter(Ring &ring);
bool loadConstRam(Ring &ring);
bool writeConstRam(Ring &ring);
bool dumpConstRam(Ring &ring);
bool setConfigReg(Ring &ring);
bool setShReg(Ring &ring);
bool setUConfigReg(Ring &ring);
bool setContextReg(Ring &ring);
bool unknownPacket(Queue &queue);
bool unknownPacket(Ring &ring);
bool switchBuffer(Queue &queue);
bool mapProcess(Queue &queue);
bool mapQueues(Queue &queue);
bool unmapQueues(Queue &queue);
bool mapMemory(Queue &queue);
bool unmapMemory(Queue &queue);
bool protectMemory(Queue &queue);
bool unmapProcess(Queue &queue);
bool flip(Queue &queue);
bool switchBuffer(Ring &ring);
bool mapProcess(Ring &ring);
bool mapQueues(Ring &ring);
bool unmapQueues(Ring &ring);
bool mapMemory(Ring &ring);
bool unmapMemory(Ring &ring);
bool protectMemory(Ring &ring);
bool unmapProcess(Ring &ring);
bool flip(Ring &ring);
std::uint32_t *getMmRegister(std::uint32_t dwAddress);
};

View file

@ -399,10 +399,10 @@ struct DbDepthSize {
std::uint32_t raw;
};
std::uint32_t getPitch() const {
[[nodiscard]] std::uint32_t getPitch() const {
return (pitchTileMax + 1) * 8;
}
std::uint32_t getHeight() const {
[[nodiscard]] std::uint32_t getHeight() const {
return (heightTileMax + 1) * 8;
}
};
@ -591,8 +591,12 @@ struct Registers {
};
};
std::uint8_t getVGprCount() const { return (vgprs + 1) * 4; }
std::uint8_t getSGprCount() const { return (sgprs + 1) * 8; }
[[nodiscard]] std::uint8_t getVGprCount() const {
return (vgprs + 1) * 4;
}
[[nodiscard]] std::uint8_t getSGprCount() const {
return (sgprs + 1) * 8;
}
} rsrc1;
struct {
union {
@ -613,7 +617,9 @@ struct Registers {
};
};
std::uint32_t getLdsDwordsCount() const { return ldsSize * 64; }
[[nodiscard]] std::uint32_t getLdsDwordsCount() const {
return ldsSize * 64;
}
} rsrc2;
std::uint32_t _pad3[1];
@ -624,20 +630,25 @@ struct Registers {
std::uint32_t wavesPerSh : 6;
std::uint32_t : 6;
std::uint32_t tgPerCu : 4;
std::uint32_t lockThreshold: 6;
std::uint32_t lockThreshold : 6;
std::uint32_t simdDestCntl : 1;
};
};
std::uint32_t getWavesPerSh() const { return wavesPerSh << 4; }
[[nodiscard]] std::uint32_t getWavesPerSh() const {
return wavesPerSh << 4;
}
} resourceLimits;
std::uint32_t staticThreadMgmtSe0;
std::uint32_t staticThreadMgmtSe1;
std::uint32_t tmpRingSize;
std::uint32_t _pad4[39];
std::uint32_t _unk0[5];
std::uint32_t state;
std::uint32_t _unk1[33];
std::array<std::uint32_t, 16> userData;
};
static_assert(sizeof(ComputeConfig) == 320);
struct ShaderConfig {
static constexpr auto kMmioOffset = 0x2c00;

View file

@ -1,3 +1,4 @@
#include "dce.hpp"
#include "gpu/DeviceCtl.hpp"
#include "io-device.hpp"
#include "iodev/dmem.hpp"
@ -8,7 +9,6 @@
#include "orbis/thread/Process.hpp"
#include "orbis/thread/Thread.hpp"
#include "orbis/utils/Logs.hpp"
#include "orbis/utils/SharedMutex.hpp"
#include "rx/mem.hpp"
#include "rx/watchdog.hpp"
#include "vm.hpp"
@ -192,32 +192,21 @@ static void runBridge(int vmId) {
}}.detach();
}
static constexpr auto kVmIdCount = 6;
struct DceFile : public orbis::File {};
struct DceDevice : IoDevice {
orbis::shared_mutex mtx;
std::uint32_t freeVmIds = (1 << (kVmIdCount + 1)) - 1;
orbis::uint64_t dmemOffset = ~static_cast<std::uint64_t>(0);
int DceDevice::allocateVmId() {
int id = std::countr_zero(freeVmIds);
orbis::ErrorCode open(orbis::Ref<orbis::File> *file, const char *path,
std::uint32_t flags, std::uint32_t mode,
orbis::Thread *thread) override;
if (id >= kVmIdCount) {
std::println(stderr, "out of vm slots");
std::abort();
}
int allocateVmId() {
int id = std::countr_zero(freeVmIds);
freeVmIds &= ~(1 << id);
return id;
}
if (id >= kVmIdCount) {
std::println(stderr, "out of vm slots");
std::abort();
}
freeVmIds &= ~(1 << id);
return id;
};
void deallocateVmId(int vmId) { freeVmIds |= (1 << vmId); };
};
void DceDevice::deallocateVmId(int vmId) { freeVmIds |= (1 << vmId); }
static void initDceMemory(DceDevice *device) {
if (device->dmemOffset + 1) {
@ -466,21 +455,24 @@ orbis::ErrorCode DceDevice::open(orbis::Ref<orbis::File> *file,
newFile->device = this;
newFile->ops = &ops;
*file = newFile;
initializeProcess(thread->tproc);
return {};
}
if (thread->tproc->vmId == -1) {
void DceDevice::initializeProcess(orbis::Process *process) {
if (process->vmId == -1) {
createGpu();
auto vmId = allocateVmId();
std::lock_guard lock(orbis::g_context.gpuDeviceMtx);
{
auto gpu = amdgpu::DeviceCtl{orbis::g_context.gpuDevice};
gpu.submitMapProcess(thread->tproc->gfxRing, thread->tproc->pid, vmId);
thread->tproc->vmId = vmId;
gpu.submitMapProcess(process->gfxRing, process->pid, vmId);
process->vmId = vmId;
}
runBridge(vmId);
}
return {};
}
IoDevice *createDceCharacterDevice() { return orbis::knew<DceDevice>(); }

25
rpcsx/iodev/dce.hpp Normal file
View file

@ -0,0 +1,25 @@
#pragma once
#include "io-device.hpp"
#include "orbis-config.hpp"
#include "orbis/error/ErrorCode.hpp"
#include "orbis/file.hpp"
#include "orbis/thread/Process.hpp"
#include "orbis/utils/Rc.hpp"
#include "orbis/utils/SharedMutex.hpp"
static constexpr auto kVmIdCount = 6;
struct DceDevice : IoDevice {
orbis::shared_mutex mtx;
std::uint32_t freeVmIds = (1 << (kVmIdCount + 1)) - 1;
orbis::uint64_t dmemOffset = ~static_cast<std::uint64_t>(0);
orbis::ErrorCode open(orbis::Ref<orbis::File> *file, const char *path,
std::uint32_t flags, std::uint32_t mode,
orbis::Thread *thread) override;
int allocateVmId();
void deallocateVmId(int vmId);
void initializeProcess(orbis::Process *process);
};

View file

@ -1,5 +1,6 @@
#include "gpu/DeviceCtl.hpp"
#include "io-device.hpp"
#include "iodev/dce.hpp"
#include "iodev/dmem.hpp"
#include "orbis/KernelAllocator.hpp"
#include "orbis/KernelContext.hpp"
@ -87,11 +88,11 @@ static orbis::ErrorCode gc_ioctl(orbis::File *file, std::uint64_t request,
if (auto gpu = amdgpu::DeviceCtl{orbis::g_context.gpuDevice}) {
for (unsigned i = 0; i < args->count; ++i) {
gpu.submitGfxCommand(gcFile->gfxPipe,
orbis::g_currentThread->tproc->vmId,
{args->cmds + i * 4, 4});
orbis::g_currentThread->tproc->vmId,
{args->cmds + i * 4, 4});
}
} else {
return orbis::ErrorCode::INVAL;
return orbis::ErrorCode::BUSY;
}
break;
}
@ -106,7 +107,7 @@ static orbis::ErrorCode gc_ioctl(orbis::File *file, std::uint64_t request,
if (auto gpu = amdgpu::DeviceCtl{orbis::g_context.gpuDevice}) {
gpu.submitSwitchBuffer(orbis::g_currentThread->tproc->vmId);
} else {
return orbis::ErrorCode::INVAL;
return orbis::ErrorCode::BUSY;
}
// ORBIS_LOG_ERROR("gc ioctl 0xc0088101", args->arg0, args->arg1);
@ -127,11 +128,11 @@ static orbis::ErrorCode gc_ioctl(orbis::File *file, std::uint64_t request,
if (auto gpu = amdgpu::DeviceCtl{orbis::g_context.gpuDevice}) {
for (unsigned i = 0; i < args->count; ++i) {
gpu.submitGfxCommand(gcFile->gfxPipe,
orbis::g_currentThread->tproc->vmId,
{args->cmds + i * 4, 4});
orbis::g_currentThread->tproc->vmId,
{args->cmds + i * 4, 4});
}
} else {
return orbis::ErrorCode::INVAL;
return orbis::ErrorCode::BUSY;
}
// orbis::bridge.sendDoFlip();
@ -142,7 +143,7 @@ static orbis::ErrorCode gc_ioctl(orbis::File *file, std::uint64_t request,
if (auto gpu = amdgpu::DeviceCtl{orbis::g_context.gpuDevice}) {
gpu.waitForIdle();
} else {
return orbis::ErrorCode::INVAL;
return orbis::ErrorCode::BUSY;
}
break;
}
@ -193,64 +194,53 @@ static orbis::ErrorCode gc_ioctl(orbis::File *file, std::uint64_t request,
case 0xc030810d: { // map compute queue
struct Args {
std::uint32_t pipeHi;
std::uint32_t pipeLo;
std::uint32_t queueId;
std::uint32_t offset;
std::uint64_t ringBaseAddress;
std::uint64_t readPtrAddress;
std::uint64_t dingDongPtr;
std::uint32_t lenLog2;
orbis::uint32_t meId;
orbis::uint32_t pipeId;
orbis::uint32_t queueId;
orbis::uint32_t vqueueId;
orbis::uintptr_t ringBaseAddress;
orbis::uintptr_t readPtrAddress;
orbis::uintptr_t doorbell;
orbis::uint32_t ringSize;
};
auto args = reinterpret_cast<Args *>(argp);
ORBIS_LOG_ERROR("gc ioctl map compute queue", args->pipeHi, args->pipeLo,
args->queueId, args->offset, args->ringBaseAddress,
args->readPtrAddress, args->dingDongPtr, args->lenLog2);
ORBIS_LOG_ERROR("gc ioctl map compute queue", args->meId, args->pipeId,
args->queueId, args->vqueueId, args->ringBaseAddress,
args->readPtrAddress, args->doorbell, args->ringSize);
rx::die("gc ioctl map compute queue");
if (auto gpu = amdgpu::DeviceCtl{orbis::g_context.gpuDevice}) {
gpu.mapComputeQueue(thread->tproc->vmId, args->meId, args->pipeId,
args->queueId, args->vqueueId, args->ringBaseAddress,
args->readPtrAddress, args->doorbell,
static_cast<std::uint64_t>(1) << args->ringSize);
// auto id = ((args->pipeHi * 4) + args->pipeLo) * 8 + args->queueId;
// device->computeQueues[id] = {
// .ringBaseAddress = args->ringBaseAddress,
// .readPtrAddress = args->readPtrAddress,
// .dingDongPtr = args->dingDongPtr,
// .len = static_cast<std::uint64_t>(1) << args->lenLog2,
// };
// args->pipeHi = 0x769c766;
// args->pipeLo = 0x72e8e3c1;
// args->queueId = -0x248d50d8;
// args->offset = 0xd245ed58;
// ((std::uint64_t *)args->dingDongPtr)[0xf0 / sizeof(std::uint64_t)] = 1;
} else {
return orbis::ErrorCode::BUSY;
}
break;
}
case 0xc010811c: {
// ding dong for workload
struct Args {
std::uint32_t pipeHi;
std::uint32_t pipeLo;
std::uint32_t meId;
std::uint32_t pipeId;
std::uint32_t queueId;
std::uint32_t nextStartOffsetInDw;
};
auto args = reinterpret_cast<Args *>(argp);
ORBIS_LOG_ERROR("gc ioctl ding dong for workload", args->pipeHi,
args->pipeLo, args->queueId, args->nextStartOffsetInDw);
rx::die("gc ioctl ding dong for workload");
ORBIS_LOG_ERROR("gc ioctl ding dong for workload", args->meId, args->pipeId,
args->queueId, args->nextStartOffsetInDw);
// auto id = ((args->pipeHi * 4) + args->pipeLo) * 8 + args->queueId;
// auto queue = device->computeQueues.at(id);
// auto address = (queue.ringBaseAddress + queue.offset);
// auto endOffset = static_cast<std::uint64_t>(args->nextStartOffsetInDw) <<
// 2; auto size = endOffset - queue.offset;
// rx::bridge.sendCommandBuffer(thread->tproc->pid, id, address, size);
// queue.offset = endOffset;
if (auto gpu = amdgpu::DeviceCtl{orbis::g_context.gpuDevice}) {
gpu.submitComputeQueue(args->meId, args->pipeId, args->queueId,
args->nextStartOffsetInDw);
} else {
return orbis::ErrorCode::BUSY;
}
break;
}
@ -336,6 +326,9 @@ orbis::ErrorCode GcDevice::open(orbis::Ref<orbis::File> *file, const char *path,
}
void GcDevice::addClient(orbis::Process *process) {
auto dce = orbis::g_context.dceDevice.rawStaticCast<DceDevice>();
dce->initializeProcess(process);
std::lock_guard lock(mtx);
auto &client = clients[process->pid];
++client;

View file

@ -321,6 +321,9 @@ static void ps4InitDev() {
auto dmem1 = createDmemCharacterDevice(1);
orbis::g_context.dmemDevice = dmem1;
auto dce = createDceCharacterDevice();
orbis::g_context.dceDevice = dce;
auto ttyFd = ::open("tty.txt", O_CREAT | O_TRUNC | O_WRONLY, 0666);
auto consoleDev = createConsoleCharacterDevice(STDIN_FILENO, ttyFd);
auto mbus = static_cast<MBusDevice *>(createMBusCharacterDevice());
@ -357,7 +360,7 @@ static void ps4InitDev() {
vfs::addDevice("zero", createZeroCharacterDevice());
vfs::addDevice("null", createNullCharacterDevice());
vfs::addDevice("dipsw", createDipswCharacterDevice());
vfs::addDevice("dce", createDceCharacterDevice());
vfs::addDevice("dce", dce);
vfs::addDevice("hmd_cmd", createHmdCmdCharacterDevice());
vfs::addDevice("hmd_snsr", createHmdSnsrCharacterDevice());
vfs::addDevice("hmd_3da", createHmd3daCharacterDevice());