From b85c6e6acc23b72b80eb0d87f86e5fdf8acbd04d Mon Sep 17 00:00:00 2001 From: DH Date: Wed, 13 Nov 2024 23:53:08 +0300 Subject: [PATCH] gpu: fixed command ring corruption in release build add IT_DMA_DATA for compute pipe implement 3 indirection gfx ring commands --- rpcsx/gpu/Device.cpp | 13 ++- rpcsx/gpu/Pipe.cpp | 192 +++++++++++++++++++++++++++++++++++++------ rpcsx/gpu/Pipe.hpp | 5 +- 3 files changed, 179 insertions(+), 31 deletions(-) diff --git a/rpcsx/gpu/Device.cpp b/rpcsx/gpu/Device.cpp index 69b1ea30c..2832bea82 100644 --- a/rpcsx/gpu/Device.cpp +++ b/rpcsx/gpu/Device.cpp @@ -532,7 +532,8 @@ void Device::submitCommand(Ring &ring, std::span command) { ring.wptr = ring.base; } - std::memcpy(ring.wptr, command.data(), command.size_bytes()); + std::memcpy(const_cast(ring.wptr), command.data(), + command.size_bytes()); ring.wptr += command.size(); } @@ -594,6 +595,10 @@ void Device::unmapProcess(std::uint32_t pid) { auto &process = processInfo[pid]; auto startAddress = static_cast(process.vmId) << 40; auto size = static_cast(1) << 40; + + startAddress += orbis::kMinAddress; + size -= orbis::kMinAddress; + rx::mem::reserve(reinterpret_cast(startAddress), size); ::close(process.vmFd); @@ -607,7 +612,7 @@ void Device::protectMemory(std::uint32_t pid, std::uint64_t address, auto vmSlotIt = process.vmTable.queryArea(address); if (vmSlotIt == process.vmTable.end()) { - std::abort(); + return; } auto vmSlot = (*vmSlotIt).payload; @@ -1027,7 +1032,9 @@ static void notifyPageChanges(Device *device, int vmId, std::uint32_t firstPage, 1, std::memory_order::release); device->cpuCacheCommandsIdle[vmId].notify_one(); - while (device->cpuCacheCommands[vmId][i].load(std::memory_order::acquire) != 0) {} + while (device->cpuCacheCommands[vmId][i].load( + std::memory_order::acquire) != 0) { + } return; } } diff --git a/rpcsx/gpu/Pipe.cpp b/rpcsx/gpu/Pipe.cpp index 1e08cb278..57036b532 100644 --- a/rpcsx/gpu/Pipe.cpp +++ b/rpcsx/gpu/Pipe.cpp @@ -97,6 +97,7 @@ ComputePipe::ComputePipe(int index) commandHandlers[gnm::IT_WRITE_DATA] = &ComputePipe::writeData; commandHandlers[gnm::IT_INDIRECT_BUFFER] = &ComputePipe::indirectBuffer; commandHandlers[gnm::IT_ACQUIRE_MEM] = &ComputePipe::acquireMem; + commandHandlers[gnm::IT_DMA_DATA] = &ComputePipe::dmaData; } bool ComputePipe::processAllRings() { @@ -186,8 +187,8 @@ void ComputePipe::setIndirectRing(int queueId, int indirectLevel, Ring ring) { } ring.indirectLevel = indirectLevel; - std::println("mapQueue: {}, {}, {}", (void *)ring.base, (void *)ring.wptr, - ring.size); + std::println(stderr, "mapQueue: {}, {}, {}", (void *)ring.base, + (void *)ring.wptr, ring.size); queues[1 - ring.indirectLevel][queueId] = ring; } @@ -202,8 +203,8 @@ void ComputePipe::mapQueue(int queueId, Ring ring, waitForIdle(queueId, lock); } - std::println("mapQueue: {}, {}, {}, {}", (void *)ring.base, (void *)ring.wptr, - ring.size, (void *)ring.doorbell); + std::println(stderr, "mapQueue: {}, {}, {}, {}", (void *)ring.base, + (void *)ring.wptr, ring.size, (void *)ring.doorbell); queues[1 - ring.indirectLevel][queueId] = ring; } @@ -260,7 +261,7 @@ bool ComputePipe::setShReg(Ring &ring) { data[i]); } - std::memcpy(ring.doorbell + offset, data, sizeof(std::uint32_t) * len); + std::memcpy(ring.doorbell + offset, const_cast(data), sizeof(std::uint32_t) * len); return true; } @@ -404,7 +405,7 @@ bool ComputePipe::writeData(Ring &ring) { *dstPointer = data[i]; } } else { - std::memcpy(dstPointer, data, len * sizeof(std::uint32_t)); + std::memcpy(dstPointer, const_cast(data), len * sizeof(std::uint32_t)); } return true; @@ -430,6 +431,138 @@ bool ComputePipe::indirectBuffer(Ring &ring) { bool ComputePipe::acquireMem(Ring &ring) { return true; } +bool ComputePipe::dmaData(Ring &ring) { + auto control = ring.rptr[1]; + auto srcAddressLo = ring.rptr[2]; + auto data = srcAddressLo; + auto srcAddressHi = ring.rptr[3]; + auto dstAddressLo = ring.rptr[4]; + auto dstAddressHi = ring.rptr[5]; + auto cmdSize = ring.rptr[6]; + auto size = rx::getBits(cmdSize, 20, 0); + + auto engine = rx::getBit(control, 0); + auto srcVolatile = rx::getBit(control, 15); + + // 0 - dstAddress using das + // 1 - gds + // 3 - dstAddress using L2 + auto dstSel = rx::getBits(control, 21, 20); + + // 0 - LRU + // 1 - Stream + // 2 - Bypass + auto dstCachePolicy = rx::getBits(control, 26, 25); + + auto dstVolatile = rx::getBit(control, 27); + + // 0 - srcAddress using sas + // 1 - gds + // 2 - data + // 3 - srcAddress using L2 + auto srcSel = rx::getBits(control, 30, 29); + + auto cpSync = rx::getBit(control, 31); + + auto dataDisWc = rx::getBit(cmdSize, 21); + + // 0 - none + // 1 - 8 in 16 + // 2 - 8 in 32 + // 3 - 8 in 64 + auto dstSwap = rx::getBits(cmdSize, 25, 24); + + // 0 - memory + // 1 - register + auto sas = rx::getBit(cmdSize, 26); + + // 0 - memory + // 1 - register + auto das = rx::getBit(cmdSize, 27); + + auto saic = rx::getBit(cmdSize, 28); + auto daic = rx::getBit(cmdSize, 29); + auto rawWait = rx::getBit(cmdSize, 30); + + void *dst = nullptr; + switch (dstSel) { + case 3: + case 0: + if (dstSel == 3 || das == 0) { + auto dstAddress = + dstAddressLo | (static_cast(dstAddressHi) << 32); + dst = amdgpu::RemoteMemory{ring.vmId}.getPointer(dstAddress); + device->caches[ring.vmId].invalidate( + scheduler, rx::AddressRange::fromBeginSize(dstAddress, size)); + } else { + dst = getMmRegister(ring, dstAddressLo / sizeof(std::uint32_t)); + } + break; + + case 1: + dst = device->caches[ring.vmId].getGdsBuffer().getData() + dstAddressLo; + break; + + default: + rx::die("IT_DMA_DATA: unexpected dstSel %u", dstSel); + } + + void *src = nullptr; + std::uint32_t srcSize = 0; + switch (srcSel) { + case 3: + case 0: + if (srcSel == 3 || sas == 0) { + auto srcAddress = + srcAddressLo | (static_cast(srcAddressHi) << 32); + src = amdgpu::RemoteMemory{ring.vmId}.getPointer(srcAddress); + device->caches[ring.vmId].flush( + scheduler, rx::AddressRange::fromBeginSize(srcAddress, size)); + } else { + src = getMmRegister(ring, srcAddressLo / sizeof(std::uint32_t)); + } + + srcSize = ~0; + break; + case 1: + src = device->caches[ring.vmId].getGdsBuffer().getData() + srcAddressLo; + srcSize = ~0; + break; + + case 2: + src = &data; + srcSize = sizeof(data); + saic = 1; + break; + + default: + rx::die("IT_DMA_DATA: unexpected srcSel %u", srcSel); + } + + rx::dieIf(size > srcSize && saic == 0, + "IT_DMA_DATA: out of source size srcSel %u, dstSel %u, size %u", + srcSel, dstSel, size); + + if (saic != 0) { + if (daic != 0 && dstSel == 0 && das == 1) { + std::memcpy(dst, src, sizeof(std::uint32_t)); + } else { + for (std::uint32_t i = 0; i < size / sizeof(std::uint32_t); ++i) { + std::memcpy(std::bit_cast(dst) + i, src, + sizeof(std::uint32_t)); + } + } + } else if (daic != 0 && dstSel == 0 && das == 1) { + for (std::uint32_t i = 0; i < size / sizeof(std::uint32_t); ++i) { + std::memcpy(dst, std::bit_cast(src) + i, + sizeof(std::uint32_t)); + } + } else { + std::memcpy(dst, src, size); + } + return true; +} + bool ComputePipe::unknownPacket(Ring &ring) { auto op = rx::getBits(ring.rptr[0], 15, 8); @@ -650,11 +783,15 @@ bool GraphicsPipe::processAllRings() { } void GraphicsPipe::processRing(Ring &ring) { - int cp; + int cp = 1; if (ring.indirectLevel < 0) { cp = 0; } else { cp = ring.indirectLevel + 1; + + if (ring.indirectLevel == 2) { + cp = 2; + } } while (ring.rptr != ring.wptr) { @@ -685,7 +822,8 @@ void GraphicsPipe::processRing(Ring &ring) { return; } - ring.rptr += len; + ring.rptr += + std::min(ring.size - (ring.rptr - ring.base), len); if (op == gnm::IT_INDIRECT_BUFFER || op == gnm::IT_INDIRECT_BUFFER_CNST) { break; @@ -956,7 +1094,7 @@ bool GraphicsPipe::drawIndexAuto(Ring &ring) { return true; } bool GraphicsPipe::numInstances(Ring &ring) { - uConfig.vgtNumInstances = std::max(ring.rptr[1], 1u); + uConfig.vgtNumInstances = std::max(std::uint32_t(ring.rptr[1]), 1u); return true; } bool GraphicsPipe::drawIndexMultiAuto(Ring &ring) { @@ -1024,7 +1162,7 @@ bool GraphicsPipe::writeData(Ring &ring) { *dstPointer = data[i]; } } else { - std::memcpy(dstPointer, data, len * sizeof(std::uint32_t)); + std::memcpy(dstPointer, const_cast(data), len * sizeof(std::uint32_t)); } return true; @@ -1264,7 +1402,7 @@ bool GraphicsPipe::eventWriteEos(Ring &ring) { case 1: { // store GDS data to memory auto sizeDw = rx::getBits(dataInfo, 31, 16); auto gdsIndexDw = rx::getBits(dataInfo, 15, 0); - std::println("event write eos: gds data {:x}-{:x}", gdsIndexDw, + std::println(stderr, "event write eos: gds data {:x}-{:x}", gdsIndexDw, gdsIndexDw + sizeDw); auto size = sizeof(std::uint32_t) * sizeDw; @@ -1431,7 +1569,8 @@ bool GraphicsPipe::setConfigReg(Ring &ring) { if (contextOffset + len <= sizeof(context)) { std::memcpy(reinterpret_cast(&context) + contextOffset, - data, sizeof(std::uint32_t) * len); + const_cast(data), + sizeof(std::uint32_t) * len); return true; } } @@ -1441,8 +1580,8 @@ bool GraphicsPipe::setConfigReg(Ring &ring) { "out of Config regs, offset: %x, count %u, %s\n", offset, len, gnm::mmio::registerName(decltype(device->config)::kMmioOffset + offset)); - std::memcpy(reinterpret_cast(&device->config) + offset, data, - sizeof(std::uint32_t) * len); + std::memcpy(reinterpret_cast(&device->config) + offset, + const_cast(data), sizeof(std::uint32_t) * len); return true; } @@ -1457,8 +1596,8 @@ bool GraphicsPipe::setShReg(Ring &ring) { "out of SH regs, offset: %x, count %u, %s\n", offset, len, gnm::mmio::registerName(decltype(sh)::kMmioOffset + offset)); - std::memcpy(reinterpret_cast(&sh) + offset, data, - sizeof(std::uint32_t) * len); + std::memcpy(reinterpret_cast(&sh) + offset, + const_cast(data), sizeof(std::uint32_t) * len); // for (std::size_t i = 0; i < len; ++i) { // std::fprintf( // stderr, "writing to %s value %x\n", @@ -1487,9 +1626,9 @@ bool GraphicsPipe::setUConfigReg(Ring &ring) { for (std::size_t i = 0; i < len; ++i) { auto id = decltype(uConfig)::kMmioOffset + offset + i; if (auto regName = gnm::mmio::registerName(id)) { - std::println(stderr, "writing to {} value {:x}", regName, data[i]); + std::println(stderr, "writing to {} value {:x}", regName, uint32_t(data[i])); } else { - std::println(stderr, "writing to {:x} value {:x}", id, data[i]); + std::println(stderr, "writing to {:x} value {:x}", id, uint32_t(data[i])); } } } @@ -1498,8 +1637,8 @@ bool GraphicsPipe::setUConfigReg(Ring &ring) { "out of UConfig regs, offset: %u, count %u, %s\n", offset, len, gnm::mmio::registerName(decltype(uConfig)::kMmioOffset + offset)); - std::memcpy(reinterpret_cast(&uConfig) + offset, data, - sizeof(std::uint32_t) * len); + std::memcpy(reinterpret_cast(&uConfig) + offset, + const_cast(data), sizeof(std::uint32_t) * len); // for (std::size_t i = 0; i < len; ++i) { // std::fprintf( // stderr, "writing to %s value %x\n", @@ -1528,9 +1667,9 @@ bool GraphicsPipe::setContextReg(Ring &ring) { for (std::size_t i = 0; i < len; ++i) { auto id = decltype(context)::kMmioOffset + offset + i; if (auto regName = gnm::mmio::registerName(id)) { - std::println(stderr, "writing to {} value {:x}", regName, data[i]); + std::println(stderr, "writing to {} value {:x}", regName, uint32_t(data[i])); } else { - std::println(stderr, "writing to {:x} value {:x}", id, data[i]); + std::println(stderr, "writing to {:x} value {:x}", id, uint32_t(data[i])); } } } @@ -1539,8 +1678,8 @@ bool GraphicsPipe::setContextReg(Ring &ring) { "out of Context regs, offset: %u, count %u, %s\n", offset, len, gnm::mmio::registerName(decltype(context)::kMmioOffset + offset)); - std::memcpy(reinterpret_cast(&context) + offset, data, - sizeof(std::uint32_t) * len); + std::memcpy(reinterpret_cast(&context) + offset, + const_cast(data), sizeof(std::uint32_t) * len); // for (std::size_t i = 0; i < len; ++i) { // std::fprintf( @@ -1687,8 +1826,9 @@ void CommandPipe::processRing(Ring &ring) { rx::die("cmd pipe: unexpected pm4 packet type %u, ring %u, header %u, rptr " "%p, wptr " - "%p, base %p", - type, ring.indirectLevel, header, ring.rptr, ring.wptr, ring.base); + "%p, base %p, end %p", + type, ring.indirectLevel, header, ring.rptr, ring.wptr, ring.base, + ring.base + ring.size); } } diff --git a/rpcsx/gpu/Pipe.hpp b/rpcsx/gpu/Pipe.hpp index bc3238dcc..e512b2ffa 100644 --- a/rpcsx/gpu/Pipe.hpp +++ b/rpcsx/gpu/Pipe.hpp @@ -15,8 +15,8 @@ struct Ring { std::uint32_t *doorbell{}; std::uint32_t *base{}; std::uint64_t size{}; - std::uint32_t *rptr{}; - std::uint32_t *wptr{}; + volatile std::uint32_t *volatile rptr{}; + volatile std::uint32_t *volatile wptr{}; std::uint32_t *rptrReportLocation{}; static Ring createFromRange(int vmId, std::uint32_t *base, std::uint64_t size, @@ -70,6 +70,7 @@ struct ComputePipe { bool writeData(Ring &ring); bool indirectBuffer(Ring &ring); bool acquireMem(Ring &ring); + bool dmaData(Ring &ring); bool unknownPacket(Ring &ring); bool handleNop(Ring &ring);