mirror of
https://github.com/RPCSX/rpcsx.git
synced 2026-01-07 17:20:14 +01:00
gpu: implement compute queue
This commit is contained in:
parent
1f28918fc9
commit
4fe857485c
|
|
@ -181,6 +181,7 @@ public:
|
|||
Ref<RcBase> blockpoolDevice;
|
||||
shared_mutex gpuDeviceMtx;
|
||||
Ref<RcBase> gpuDevice;
|
||||
Ref<RcBase> dceDevice;
|
||||
uint sdkVersion{};
|
||||
uint fwSdkVersion{};
|
||||
uint safeMode{};
|
||||
|
|
|
|||
|
|
@ -236,7 +236,7 @@ Device::Device() : vkContext(createVkContext(this)) {
|
|||
|
||||
for (int i = 0; i < kGfxPipeCount; ++i) {
|
||||
graphicsPipes[i].setDeQueue(
|
||||
Queue{
|
||||
Ring{
|
||||
.base = mainGfxRings[i],
|
||||
.size = sizeof(mainGfxRings[i]) / sizeof(mainGfxRings[i][0]),
|
||||
.rptr = mainGfxRings[i],
|
||||
|
|
@ -474,7 +474,7 @@ void Device::start() {
|
|||
}
|
||||
}
|
||||
|
||||
void Device::submitCommand(Queue &ring,
|
||||
void Device::submitCommand(Ring &ring,
|
||||
std::span<const std::uint32_t> command) {
|
||||
std::scoped_lock lock(writeCommandMtx);
|
||||
if (ring.wptr + command.size() > ring.base + ring.size) {
|
||||
|
|
@ -599,12 +599,12 @@ void Device::onCommandBuffer(std::uint32_t pid, int cmdHeader,
|
|||
auto op = rx::getBits(cmdHeader, 15, 8);
|
||||
|
||||
if (op == gnm::IT_INDIRECT_BUFFER_CNST) {
|
||||
graphicsPipes[0].setCeQueue(Queue::createFromRange(
|
||||
graphicsPipes[0].setCeQueue(Ring::createFromRange(
|
||||
process.vmId, memory.getPointer<std::uint32_t>(address),
|
||||
size / sizeof(std::uint32_t)));
|
||||
} else if (op == gnm::IT_INDIRECT_BUFFER) {
|
||||
graphicsPipes[0].setDeQueue(
|
||||
Queue::createFromRange(process.vmId,
|
||||
Ring::createFromRange(process.vmId,
|
||||
memory.getPointer<std::uint32_t>(address),
|
||||
size / sizeof(std::uint32_t)),
|
||||
1);
|
||||
|
|
|
|||
|
|
@ -112,7 +112,7 @@ struct Device : orbis::RcBase, DeviceContext {
|
|||
return caches[vmId].createComputeTag(scheduler);
|
||||
}
|
||||
|
||||
void submitCommand(Queue &ring, std::span<const std::uint32_t> command);
|
||||
void submitCommand(Ring &ring, std::span<const std::uint32_t> command);
|
||||
void submitGfxCommand(int gfxPipe, std::span<const std::uint32_t> command);
|
||||
|
||||
void mapProcess(std::uint32_t pid, int vmId);
|
||||
|
|
|
|||
|
|
@ -117,5 +117,49 @@ void DeviceCtl::registerBufferAttribute(std::uint32_t pid,
|
|||
process.bufferAttributes[attr.attrId] = attr;
|
||||
}
|
||||
|
||||
void DeviceCtl::mapComputeQueue(int vmId, std::uint32_t meId,
|
||||
std::uint32_t pipeId, std::uint32_t queueId,
|
||||
std::uint32_t vqueueId,
|
||||
orbis::uint64_t ringBaseAddress,
|
||||
orbis::uint64_t readPtrAddress,
|
||||
orbis::uint64_t doorbell,
|
||||
orbis::uint64_t ringSize) {
|
||||
if (meId != 1) {
|
||||
rx::die("unexpected ME %d", meId);
|
||||
}
|
||||
|
||||
auto &pipe = mDevice->computePipes[pipeId];
|
||||
auto lock = pipe.lockQueue(queueId);
|
||||
auto memory = RemoteMemory{vmId};
|
||||
auto base = memory.getPointer<std::uint32_t>(ringBaseAddress);
|
||||
pipe.mapQueue(queueId,
|
||||
Ring{
|
||||
.vmId = vmId,
|
||||
.indirectLevel = 0,
|
||||
.doorbell = memory.getPointer<std::uint32_t>(doorbell),
|
||||
.base = base,
|
||||
.size = ringSize,
|
||||
.rptr = base,
|
||||
.wptr = base,
|
||||
.rptrReportLocation =
|
||||
memory.getPointer<std::uint32_t>(readPtrAddress),
|
||||
},
|
||||
lock);
|
||||
|
||||
auto config = std::bit_cast<amdgpu::Registers::ComputeConfig *>(doorbell);
|
||||
config->state = 1;
|
||||
}
|
||||
|
||||
void DeviceCtl::submitComputeQueue(std::uint32_t meId, std::uint32_t pipeId,
|
||||
std::uint32_t queueId,
|
||||
std::uint64_t offset) {
|
||||
if (meId != 1) {
|
||||
rx::die("unexpected ME %d", meId);
|
||||
}
|
||||
|
||||
auto &pipe = mDevice->computePipes[pipeId];
|
||||
pipe.submit(queueId, offset);
|
||||
}
|
||||
|
||||
void DeviceCtl::start() { mDevice->start(); }
|
||||
void DeviceCtl::waitForIdle() { mDevice->waitForIdle(); }
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "DeviceContext.hpp"
|
||||
#include "orbis-config.hpp"
|
||||
#include "orbis/utils/Rc.hpp"
|
||||
#include <cstdint>
|
||||
#include <span>
|
||||
|
|
@ -40,6 +41,14 @@ public:
|
|||
std::uint64_t address, std::uint64_t size, int prot);
|
||||
void registerBuffer(std::uint32_t pid, Buffer buffer);
|
||||
void registerBufferAttribute(std::uint32_t pid, BufferAttribute attr);
|
||||
|
||||
void mapComputeQueue(int vmId, std::uint32_t meId, std::uint32_t pipeId,
|
||||
std::uint32_t queueId, std::uint32_t vqueueId,
|
||||
orbis::uint64_t ringBaseAddress,
|
||||
orbis::uint64_t readPtrAddress, orbis::uint64_t doorbell,
|
||||
orbis::uint64_t ringSize);
|
||||
void submitComputeQueue(std::uint32_t meId, std::uint32_t pipeId,
|
||||
std::uint32_t queueId, std::uint64_t offset);
|
||||
void start();
|
||||
void waitForIdle();
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -1,6 +1,7 @@
|
|||
#pragma once
|
||||
#include "Registers.hpp"
|
||||
#include "Scheduler.hpp"
|
||||
#include "orbis/utils/SharedMutex.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
|
@ -8,7 +9,7 @@
|
|||
namespace amdgpu {
|
||||
struct Device;
|
||||
|
||||
struct Queue {
|
||||
struct Ring {
|
||||
int vmId = -1;
|
||||
int indirectLevel = -1;
|
||||
std::uint32_t *doorbell{};
|
||||
|
|
@ -16,11 +17,12 @@ struct Queue {
|
|||
std::uint64_t size{};
|
||||
std::uint32_t *rptr{};
|
||||
std::uint32_t *wptr{};
|
||||
std::uint32_t *rptrReportLocation{};
|
||||
|
||||
static Queue createFromRange(int vmId, std::uint32_t *base,
|
||||
std::uint64_t size, int indirectLevel = 0,
|
||||
std::uint32_t *doorbell = nullptr) {
|
||||
Queue result;
|
||||
static Ring createFromRange(int vmId, std::uint32_t *base, std::uint64_t size,
|
||||
int indirectLevel = 0,
|
||||
std::uint32_t *doorbell = nullptr) {
|
||||
Ring result;
|
||||
result.vmId = vmId;
|
||||
result.indirectLevel = indirectLevel;
|
||||
result.doorbell = doorbell;
|
||||
|
|
@ -36,20 +38,35 @@ struct ComputePipe {
|
|||
Device *device;
|
||||
Scheduler scheduler;
|
||||
|
||||
using CommandHandler = bool (ComputePipe::*)(Queue &);
|
||||
using CommandHandler = bool (ComputePipe::*)(Ring &);
|
||||
CommandHandler commandHandlers[255];
|
||||
Queue queues[8];
|
||||
Registers::ComputeConfig computeConfig;
|
||||
orbis::shared_mutex queueMtx[8];
|
||||
int index;
|
||||
Ring queues[2][8];
|
||||
std::uint64_t drawIndexIndirPatchBase = 0;
|
||||
|
||||
ComputePipe(int index);
|
||||
|
||||
bool processAllRings();
|
||||
void processRing(Queue &queue);
|
||||
void mapQueue(int queueId, Queue queue);
|
||||
bool processRing(Ring &ring);
|
||||
void mapQueue(int queueId, Ring ring, std::unique_lock<orbis::shared_mutex> &lock);
|
||||
void waitForIdle(int queueId, std::unique_lock<orbis::shared_mutex> &lock);
|
||||
void submit(int queueId, std::uint32_t offset);
|
||||
|
||||
bool setShReg(Queue &queue);
|
||||
bool unknownPacket(Queue &queue);
|
||||
bool handleNop(Queue &queue);
|
||||
std::unique_lock<orbis::shared_mutex> lockQueue(int queueId) {
|
||||
return std::unique_lock<orbis::shared_mutex>(queueMtx[queueId]);
|
||||
}
|
||||
|
||||
bool setShReg(Ring &ring);
|
||||
bool dispatchDirect(Ring &ring);
|
||||
bool dispatchIndirect(Ring &ring);
|
||||
bool releaseMem(Ring &ring);
|
||||
bool waitRegMem(Ring &ring);
|
||||
bool writeData(Ring &ring);
|
||||
bool unknownPacket(Ring &ring);
|
||||
bool handleNop(Ring &ring);
|
||||
|
||||
std::uint32_t *getMmRegister(Ring &ring, std::uint32_t dwAddress);
|
||||
};
|
||||
|
||||
struct GraphicsPipe {
|
||||
|
|
@ -71,75 +88,75 @@ struct GraphicsPipe {
|
|||
Registers::Context context;
|
||||
Registers::UConfig uConfig;
|
||||
|
||||
Queue deQueues[3];
|
||||
Queue ceQueue;
|
||||
Ring deQueues[3];
|
||||
Ring ceQueue;
|
||||
|
||||
using CommandHandler = bool (GraphicsPipe::*)(Queue &);
|
||||
using CommandHandler = bool (GraphicsPipe::*)(Ring &);
|
||||
CommandHandler commandHandlers[4][255];
|
||||
|
||||
GraphicsPipe(int index);
|
||||
|
||||
void setCeQueue(Queue queue);
|
||||
void setDeQueue(Queue queue, int ring);
|
||||
void setCeQueue(Ring ring);
|
||||
void setDeQueue(Ring ring, int indirectLevel);
|
||||
|
||||
bool processAllRings();
|
||||
void processRing(Queue &queue);
|
||||
void processRing(Ring &ring);
|
||||
|
||||
bool drawPreamble(Queue &queue);
|
||||
bool indexBufferSize(Queue &queue);
|
||||
bool handleNop(Queue &queue);
|
||||
bool contextControl(Queue &queue);
|
||||
bool acquireMem(Queue &queue);
|
||||
bool releaseMem(Queue &queue);
|
||||
bool dispatchDirect(Queue &queue);
|
||||
bool dispatchIndirect(Queue &queue);
|
||||
bool writeData(Queue &queue);
|
||||
bool memSemaphore(Queue &queue);
|
||||
bool waitRegMem(Queue &queue);
|
||||
bool indirectBufferConst(Queue &queue);
|
||||
bool indirectBuffer(Queue &queue);
|
||||
bool condWrite(Queue &queue);
|
||||
bool eventWrite(Queue &queue);
|
||||
bool eventWriteEop(Queue &queue);
|
||||
bool eventWriteEos(Queue &queue);
|
||||
bool dmaData(Queue &queue);
|
||||
bool setBase(Queue &queue);
|
||||
bool clearState(Queue &queue);
|
||||
bool setPredication(Queue &queue);
|
||||
bool drawIndirect(Queue &queue);
|
||||
bool drawIndexIndirect(Queue &queue);
|
||||
bool indexBase(Queue &queue);
|
||||
bool drawIndex2(Queue &queue);
|
||||
bool indexType(Queue &queue);
|
||||
bool drawIndexAuto(Queue &queue);
|
||||
bool numInstances(Queue &queue);
|
||||
bool drawIndexMultiAuto(Queue &queue);
|
||||
bool drawIndexOffset2(Queue &queue);
|
||||
bool pfpSyncMe(Queue &queue);
|
||||
bool setCeDeCounters(Queue &queue);
|
||||
bool waitOnCeCounter(Queue &queue);
|
||||
bool waitOnDeCounterDiff(Queue &queue);
|
||||
bool incrementCeCounter(Queue &queue);
|
||||
bool incrementDeCounter(Queue &queue);
|
||||
bool loadConstRam(Queue &queue);
|
||||
bool writeConstRam(Queue &queue);
|
||||
bool dumpConstRam(Queue &queue);
|
||||
bool setConfigReg(Queue &queue);
|
||||
bool setShReg(Queue &queue);
|
||||
bool setUConfigReg(Queue &queue);
|
||||
bool setContextReg(Queue &queue);
|
||||
bool drawPreamble(Ring &ring);
|
||||
bool indexBufferSize(Ring &ring);
|
||||
bool handleNop(Ring &ring);
|
||||
bool contextControl(Ring &ring);
|
||||
bool acquireMem(Ring &ring);
|
||||
bool releaseMem(Ring &ring);
|
||||
bool dispatchDirect(Ring &ring);
|
||||
bool dispatchIndirect(Ring &ring);
|
||||
bool writeData(Ring &ring);
|
||||
bool memSemaphore(Ring &ring);
|
||||
bool waitRegMem(Ring &ring);
|
||||
bool indirectBufferConst(Ring &ring);
|
||||
bool indirectBuffer(Ring &ring);
|
||||
bool condWrite(Ring &ring);
|
||||
bool eventWrite(Ring &ring);
|
||||
bool eventWriteEop(Ring &ring);
|
||||
bool eventWriteEos(Ring &ring);
|
||||
bool dmaData(Ring &ring);
|
||||
bool setBase(Ring &ring);
|
||||
bool clearState(Ring &ring);
|
||||
bool setPredication(Ring &ring);
|
||||
bool drawIndirect(Ring &ring);
|
||||
bool drawIndexIndirect(Ring &ring);
|
||||
bool indexBase(Ring &ring);
|
||||
bool drawIndex2(Ring &ring);
|
||||
bool indexType(Ring &ring);
|
||||
bool drawIndexAuto(Ring &ring);
|
||||
bool numInstances(Ring &ring);
|
||||
bool drawIndexMultiAuto(Ring &ring);
|
||||
bool drawIndexOffset2(Ring &ring);
|
||||
bool pfpSyncMe(Ring &ring);
|
||||
bool setCeDeCounters(Ring &ring);
|
||||
bool waitOnCeCounter(Ring &ring);
|
||||
bool waitOnDeCounterDiff(Ring &ring);
|
||||
bool incrementCeCounter(Ring &ring);
|
||||
bool incrementDeCounter(Ring &ring);
|
||||
bool loadConstRam(Ring &ring);
|
||||
bool writeConstRam(Ring &ring);
|
||||
bool dumpConstRam(Ring &ring);
|
||||
bool setConfigReg(Ring &ring);
|
||||
bool setShReg(Ring &ring);
|
||||
bool setUConfigReg(Ring &ring);
|
||||
bool setContextReg(Ring &ring);
|
||||
|
||||
bool unknownPacket(Queue &queue);
|
||||
bool unknownPacket(Ring &ring);
|
||||
|
||||
bool switchBuffer(Queue &queue);
|
||||
bool mapProcess(Queue &queue);
|
||||
bool mapQueues(Queue &queue);
|
||||
bool unmapQueues(Queue &queue);
|
||||
bool mapMemory(Queue &queue);
|
||||
bool unmapMemory(Queue &queue);
|
||||
bool protectMemory(Queue &queue);
|
||||
bool unmapProcess(Queue &queue);
|
||||
bool flip(Queue &queue);
|
||||
bool switchBuffer(Ring &ring);
|
||||
bool mapProcess(Ring &ring);
|
||||
bool mapQueues(Ring &ring);
|
||||
bool unmapQueues(Ring &ring);
|
||||
bool mapMemory(Ring &ring);
|
||||
bool unmapMemory(Ring &ring);
|
||||
bool protectMemory(Ring &ring);
|
||||
bool unmapProcess(Ring &ring);
|
||||
bool flip(Ring &ring);
|
||||
|
||||
std::uint32_t *getMmRegister(std::uint32_t dwAddress);
|
||||
};
|
||||
|
|
|
|||
|
|
@ -399,10 +399,10 @@ struct DbDepthSize {
|
|||
std::uint32_t raw;
|
||||
};
|
||||
|
||||
std::uint32_t getPitch() const {
|
||||
[[nodiscard]] std::uint32_t getPitch() const {
|
||||
return (pitchTileMax + 1) * 8;
|
||||
}
|
||||
std::uint32_t getHeight() const {
|
||||
[[nodiscard]] std::uint32_t getHeight() const {
|
||||
return (heightTileMax + 1) * 8;
|
||||
}
|
||||
};
|
||||
|
|
@ -591,8 +591,12 @@ struct Registers {
|
|||
};
|
||||
};
|
||||
|
||||
std::uint8_t getVGprCount() const { return (vgprs + 1) * 4; }
|
||||
std::uint8_t getSGprCount() const { return (sgprs + 1) * 8; }
|
||||
[[nodiscard]] std::uint8_t getVGprCount() const {
|
||||
return (vgprs + 1) * 4;
|
||||
}
|
||||
[[nodiscard]] std::uint8_t getSGprCount() const {
|
||||
return (sgprs + 1) * 8;
|
||||
}
|
||||
} rsrc1;
|
||||
struct {
|
||||
union {
|
||||
|
|
@ -613,7 +617,9 @@ struct Registers {
|
|||
};
|
||||
};
|
||||
|
||||
std::uint32_t getLdsDwordsCount() const { return ldsSize * 64; }
|
||||
[[nodiscard]] std::uint32_t getLdsDwordsCount() const {
|
||||
return ldsSize * 64;
|
||||
}
|
||||
} rsrc2;
|
||||
std::uint32_t _pad3[1];
|
||||
|
||||
|
|
@ -624,20 +630,25 @@ struct Registers {
|
|||
std::uint32_t wavesPerSh : 6;
|
||||
std::uint32_t : 6;
|
||||
std::uint32_t tgPerCu : 4;
|
||||
std::uint32_t lockThreshold: 6;
|
||||
std::uint32_t lockThreshold : 6;
|
||||
std::uint32_t simdDestCntl : 1;
|
||||
};
|
||||
|
||||
};
|
||||
std::uint32_t getWavesPerSh() const { return wavesPerSh << 4; }
|
||||
[[nodiscard]] std::uint32_t getWavesPerSh() const {
|
||||
return wavesPerSh << 4;
|
||||
}
|
||||
} resourceLimits;
|
||||
std::uint32_t staticThreadMgmtSe0;
|
||||
std::uint32_t staticThreadMgmtSe1;
|
||||
std::uint32_t tmpRingSize;
|
||||
std::uint32_t _pad4[39];
|
||||
std::uint32_t _unk0[5];
|
||||
std::uint32_t state;
|
||||
std::uint32_t _unk1[33];
|
||||
std::array<std::uint32_t, 16> userData;
|
||||
};
|
||||
|
||||
static_assert(sizeof(ComputeConfig) == 320);
|
||||
|
||||
struct ShaderConfig {
|
||||
static constexpr auto kMmioOffset = 0x2c00;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
#include "dce.hpp"
|
||||
#include "gpu/DeviceCtl.hpp"
|
||||
#include "io-device.hpp"
|
||||
#include "iodev/dmem.hpp"
|
||||
|
|
@ -8,7 +9,6 @@
|
|||
#include "orbis/thread/Process.hpp"
|
||||
#include "orbis/thread/Thread.hpp"
|
||||
#include "orbis/utils/Logs.hpp"
|
||||
#include "orbis/utils/SharedMutex.hpp"
|
||||
#include "rx/mem.hpp"
|
||||
#include "rx/watchdog.hpp"
|
||||
#include "vm.hpp"
|
||||
|
|
@ -192,32 +192,21 @@ static void runBridge(int vmId) {
|
|||
}}.detach();
|
||||
}
|
||||
|
||||
static constexpr auto kVmIdCount = 6;
|
||||
struct DceFile : public orbis::File {};
|
||||
|
||||
struct DceDevice : IoDevice {
|
||||
orbis::shared_mutex mtx;
|
||||
std::uint32_t freeVmIds = (1 << (kVmIdCount + 1)) - 1;
|
||||
orbis::uint64_t dmemOffset = ~static_cast<std::uint64_t>(0);
|
||||
int DceDevice::allocateVmId() {
|
||||
int id = std::countr_zero(freeVmIds);
|
||||
|
||||
orbis::ErrorCode open(orbis::Ref<orbis::File> *file, const char *path,
|
||||
std::uint32_t flags, std::uint32_t mode,
|
||||
orbis::Thread *thread) override;
|
||||
if (id >= kVmIdCount) {
|
||||
std::println(stderr, "out of vm slots");
|
||||
std::abort();
|
||||
}
|
||||
|
||||
int allocateVmId() {
|
||||
int id = std::countr_zero(freeVmIds);
|
||||
freeVmIds &= ~(1 << id);
|
||||
return id;
|
||||
}
|
||||
|
||||
if (id >= kVmIdCount) {
|
||||
std::println(stderr, "out of vm slots");
|
||||
std::abort();
|
||||
}
|
||||
|
||||
freeVmIds &= ~(1 << id);
|
||||
return id;
|
||||
};
|
||||
|
||||
void deallocateVmId(int vmId) { freeVmIds |= (1 << vmId); };
|
||||
};
|
||||
void DceDevice::deallocateVmId(int vmId) { freeVmIds |= (1 << vmId); }
|
||||
|
||||
static void initDceMemory(DceDevice *device) {
|
||||
if (device->dmemOffset + 1) {
|
||||
|
|
@ -466,21 +455,24 @@ orbis::ErrorCode DceDevice::open(orbis::Ref<orbis::File> *file,
|
|||
newFile->device = this;
|
||||
newFile->ops = &ops;
|
||||
*file = newFile;
|
||||
initializeProcess(thread->tproc);
|
||||
return {};
|
||||
}
|
||||
|
||||
if (thread->tproc->vmId == -1) {
|
||||
void DceDevice::initializeProcess(orbis::Process *process) {
|
||||
if (process->vmId == -1) {
|
||||
createGpu();
|
||||
auto vmId = allocateVmId();
|
||||
|
||||
std::lock_guard lock(orbis::g_context.gpuDeviceMtx);
|
||||
{
|
||||
auto gpu = amdgpu::DeviceCtl{orbis::g_context.gpuDevice};
|
||||
gpu.submitMapProcess(thread->tproc->gfxRing, thread->tproc->pid, vmId);
|
||||
thread->tproc->vmId = vmId;
|
||||
gpu.submitMapProcess(process->gfxRing, process->pid, vmId);
|
||||
process->vmId = vmId;
|
||||
}
|
||||
|
||||
runBridge(vmId);
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
IoDevice *createDceCharacterDevice() { return orbis::knew<DceDevice>(); }
|
||||
|
|
|
|||
25
rpcsx/iodev/dce.hpp
Normal file
25
rpcsx/iodev/dce.hpp
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
#pragma once
|
||||
|
||||
#include "io-device.hpp"
|
||||
#include "orbis-config.hpp"
|
||||
#include "orbis/error/ErrorCode.hpp"
|
||||
#include "orbis/file.hpp"
|
||||
#include "orbis/thread/Process.hpp"
|
||||
#include "orbis/utils/Rc.hpp"
|
||||
#include "orbis/utils/SharedMutex.hpp"
|
||||
|
||||
static constexpr auto kVmIdCount = 6;
|
||||
|
||||
struct DceDevice : IoDevice {
|
||||
orbis::shared_mutex mtx;
|
||||
std::uint32_t freeVmIds = (1 << (kVmIdCount + 1)) - 1;
|
||||
orbis::uint64_t dmemOffset = ~static_cast<std::uint64_t>(0);
|
||||
|
||||
orbis::ErrorCode open(orbis::Ref<orbis::File> *file, const char *path,
|
||||
std::uint32_t flags, std::uint32_t mode,
|
||||
orbis::Thread *thread) override;
|
||||
|
||||
int allocateVmId();
|
||||
void deallocateVmId(int vmId);
|
||||
void initializeProcess(orbis::Process *process);
|
||||
};
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
#include "gpu/DeviceCtl.hpp"
|
||||
#include "io-device.hpp"
|
||||
#include "iodev/dce.hpp"
|
||||
#include "iodev/dmem.hpp"
|
||||
#include "orbis/KernelAllocator.hpp"
|
||||
#include "orbis/KernelContext.hpp"
|
||||
|
|
@ -87,11 +88,11 @@ static orbis::ErrorCode gc_ioctl(orbis::File *file, std::uint64_t request,
|
|||
if (auto gpu = amdgpu::DeviceCtl{orbis::g_context.gpuDevice}) {
|
||||
for (unsigned i = 0; i < args->count; ++i) {
|
||||
gpu.submitGfxCommand(gcFile->gfxPipe,
|
||||
orbis::g_currentThread->tproc->vmId,
|
||||
{args->cmds + i * 4, 4});
|
||||
orbis::g_currentThread->tproc->vmId,
|
||||
{args->cmds + i * 4, 4});
|
||||
}
|
||||
} else {
|
||||
return orbis::ErrorCode::INVAL;
|
||||
return orbis::ErrorCode::BUSY;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
@ -106,7 +107,7 @@ static orbis::ErrorCode gc_ioctl(orbis::File *file, std::uint64_t request,
|
|||
if (auto gpu = amdgpu::DeviceCtl{orbis::g_context.gpuDevice}) {
|
||||
gpu.submitSwitchBuffer(orbis::g_currentThread->tproc->vmId);
|
||||
} else {
|
||||
return orbis::ErrorCode::INVAL;
|
||||
return orbis::ErrorCode::BUSY;
|
||||
}
|
||||
|
||||
// ORBIS_LOG_ERROR("gc ioctl 0xc0088101", args->arg0, args->arg1);
|
||||
|
|
@ -127,11 +128,11 @@ static orbis::ErrorCode gc_ioctl(orbis::File *file, std::uint64_t request,
|
|||
if (auto gpu = amdgpu::DeviceCtl{orbis::g_context.gpuDevice}) {
|
||||
for (unsigned i = 0; i < args->count; ++i) {
|
||||
gpu.submitGfxCommand(gcFile->gfxPipe,
|
||||
orbis::g_currentThread->tproc->vmId,
|
||||
{args->cmds + i * 4, 4});
|
||||
orbis::g_currentThread->tproc->vmId,
|
||||
{args->cmds + i * 4, 4});
|
||||
}
|
||||
} else {
|
||||
return orbis::ErrorCode::INVAL;
|
||||
return orbis::ErrorCode::BUSY;
|
||||
}
|
||||
|
||||
// orbis::bridge.sendDoFlip();
|
||||
|
|
@ -142,7 +143,7 @@ static orbis::ErrorCode gc_ioctl(orbis::File *file, std::uint64_t request,
|
|||
if (auto gpu = amdgpu::DeviceCtl{orbis::g_context.gpuDevice}) {
|
||||
gpu.waitForIdle();
|
||||
} else {
|
||||
return orbis::ErrorCode::INVAL;
|
||||
return orbis::ErrorCode::BUSY;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
@ -193,64 +194,53 @@ static orbis::ErrorCode gc_ioctl(orbis::File *file, std::uint64_t request,
|
|||
|
||||
case 0xc030810d: { // map compute queue
|
||||
struct Args {
|
||||
std::uint32_t pipeHi;
|
||||
std::uint32_t pipeLo;
|
||||
std::uint32_t queueId;
|
||||
std::uint32_t offset;
|
||||
std::uint64_t ringBaseAddress;
|
||||
std::uint64_t readPtrAddress;
|
||||
std::uint64_t dingDongPtr;
|
||||
std::uint32_t lenLog2;
|
||||
orbis::uint32_t meId;
|
||||
orbis::uint32_t pipeId;
|
||||
orbis::uint32_t queueId;
|
||||
orbis::uint32_t vqueueId;
|
||||
orbis::uintptr_t ringBaseAddress;
|
||||
orbis::uintptr_t readPtrAddress;
|
||||
orbis::uintptr_t doorbell;
|
||||
orbis::uint32_t ringSize;
|
||||
};
|
||||
|
||||
auto args = reinterpret_cast<Args *>(argp);
|
||||
|
||||
ORBIS_LOG_ERROR("gc ioctl map compute queue", args->pipeHi, args->pipeLo,
|
||||
args->queueId, args->offset, args->ringBaseAddress,
|
||||
args->readPtrAddress, args->dingDongPtr, args->lenLog2);
|
||||
ORBIS_LOG_ERROR("gc ioctl map compute queue", args->meId, args->pipeId,
|
||||
args->queueId, args->vqueueId, args->ringBaseAddress,
|
||||
args->readPtrAddress, args->doorbell, args->ringSize);
|
||||
|
||||
rx::die("gc ioctl map compute queue");
|
||||
if (auto gpu = amdgpu::DeviceCtl{orbis::g_context.gpuDevice}) {
|
||||
gpu.mapComputeQueue(thread->tproc->vmId, args->meId, args->pipeId,
|
||||
args->queueId, args->vqueueId, args->ringBaseAddress,
|
||||
args->readPtrAddress, args->doorbell,
|
||||
static_cast<std::uint64_t>(1) << args->ringSize);
|
||||
|
||||
// auto id = ((args->pipeHi * 4) + args->pipeLo) * 8 + args->queueId;
|
||||
// device->computeQueues[id] = {
|
||||
// .ringBaseAddress = args->ringBaseAddress,
|
||||
// .readPtrAddress = args->readPtrAddress,
|
||||
// .dingDongPtr = args->dingDongPtr,
|
||||
// .len = static_cast<std::uint64_t>(1) << args->lenLog2,
|
||||
// };
|
||||
// args->pipeHi = 0x769c766;
|
||||
// args->pipeLo = 0x72e8e3c1;
|
||||
// args->queueId = -0x248d50d8;
|
||||
// args->offset = 0xd245ed58;
|
||||
|
||||
// ((std::uint64_t *)args->dingDongPtr)[0xf0 / sizeof(std::uint64_t)] = 1;
|
||||
} else {
|
||||
return orbis::ErrorCode::BUSY;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case 0xc010811c: {
|
||||
// ding dong for workload
|
||||
struct Args {
|
||||
std::uint32_t pipeHi;
|
||||
std::uint32_t pipeLo;
|
||||
std::uint32_t meId;
|
||||
std::uint32_t pipeId;
|
||||
std::uint32_t queueId;
|
||||
std::uint32_t nextStartOffsetInDw;
|
||||
};
|
||||
|
||||
auto args = reinterpret_cast<Args *>(argp);
|
||||
ORBIS_LOG_ERROR("gc ioctl ding dong for workload", args->pipeHi,
|
||||
args->pipeLo, args->queueId, args->nextStartOffsetInDw);
|
||||
rx::die("gc ioctl ding dong for workload");
|
||||
ORBIS_LOG_ERROR("gc ioctl ding dong for workload", args->meId, args->pipeId,
|
||||
args->queueId, args->nextStartOffsetInDw);
|
||||
|
||||
// auto id = ((args->pipeHi * 4) + args->pipeLo) * 8 + args->queueId;
|
||||
|
||||
// auto queue = device->computeQueues.at(id);
|
||||
// auto address = (queue.ringBaseAddress + queue.offset);
|
||||
// auto endOffset = static_cast<std::uint64_t>(args->nextStartOffsetInDw) <<
|
||||
// 2; auto size = endOffset - queue.offset;
|
||||
|
||||
// rx::bridge.sendCommandBuffer(thread->tproc->pid, id, address, size);
|
||||
|
||||
// queue.offset = endOffset;
|
||||
if (auto gpu = amdgpu::DeviceCtl{orbis::g_context.gpuDevice}) {
|
||||
gpu.submitComputeQueue(args->meId, args->pipeId, args->queueId,
|
||||
args->nextStartOffsetInDw);
|
||||
} else {
|
||||
return orbis::ErrorCode::BUSY;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -336,6 +326,9 @@ orbis::ErrorCode GcDevice::open(orbis::Ref<orbis::File> *file, const char *path,
|
|||
}
|
||||
|
||||
void GcDevice::addClient(orbis::Process *process) {
|
||||
auto dce = orbis::g_context.dceDevice.rawStaticCast<DceDevice>();
|
||||
dce->initializeProcess(process);
|
||||
|
||||
std::lock_guard lock(mtx);
|
||||
auto &client = clients[process->pid];
|
||||
++client;
|
||||
|
|
|
|||
|
|
@ -321,6 +321,9 @@ static void ps4InitDev() {
|
|||
auto dmem1 = createDmemCharacterDevice(1);
|
||||
orbis::g_context.dmemDevice = dmem1;
|
||||
|
||||
auto dce = createDceCharacterDevice();
|
||||
orbis::g_context.dceDevice = dce;
|
||||
|
||||
auto ttyFd = ::open("tty.txt", O_CREAT | O_TRUNC | O_WRONLY, 0666);
|
||||
auto consoleDev = createConsoleCharacterDevice(STDIN_FILENO, ttyFd);
|
||||
auto mbus = static_cast<MBusDevice *>(createMBusCharacterDevice());
|
||||
|
|
@ -357,7 +360,7 @@ static void ps4InitDev() {
|
|||
vfs::addDevice("zero", createZeroCharacterDevice());
|
||||
vfs::addDevice("null", createNullCharacterDevice());
|
||||
vfs::addDevice("dipsw", createDipswCharacterDevice());
|
||||
vfs::addDevice("dce", createDceCharacterDevice());
|
||||
vfs::addDevice("dce", dce);
|
||||
vfs::addDevice("hmd_cmd", createHmdCmdCharacterDevice());
|
||||
vfs::addDevice("hmd_snsr", createHmdSnsrCharacterDevice());
|
||||
vfs::addDevice("hmd_3da", createHmd3daCharacterDevice());
|
||||
|
|
|
|||
Loading…
Reference in a new issue