From 9bf53364780bd0101409e5d595c3d649ddfc631f Mon Sep 17 00:00:00 2001 From: DH Date: Sun, 12 Nov 2023 01:29:14 +0300 Subject: [PATCH] [rpcsx-gpu] hacks for gnm compositor --- .../bridge/include/amdgpu/bridge/bridge.hpp | 36 ++++--- hw/amdgpu/device/src/device.cpp | 10 +- hw/amdgpu/shader/src/Fragment.cpp | 97 ++++++++++++++++--- rpcsx-gpu/main.cpp | 12 ++- rpcsx-os/iodev/dce.cpp | 4 +- rpcsx-os/iodev/gc.cpp | 7 +- rpcsx-os/ops.cpp | 11 --- rpcsx-os/vm.cpp | 23 ++++- 8 files changed, 150 insertions(+), 50 deletions(-) diff --git a/hw/amdgpu/bridge/include/amdgpu/bridge/bridge.hpp b/hw/amdgpu/bridge/include/amdgpu/bridge/bridge.hpp index 270bc4187..0c8de8ccf 100644 --- a/hw/amdgpu/bridge/include/amdgpu/bridge/bridge.hpp +++ b/hw/amdgpu/bridge/include/amdgpu/bridge/bridge.hpp @@ -51,12 +51,14 @@ struct CmdMemoryProt { std::uint64_t address; std::uint64_t size; std::uint32_t prot; + std::uint32_t pid; }; struct CmdCommandBuffer { std::uint64_t queue; std::uint64_t address; - std::uint64_t size; + std::uint32_t size; + std::uint32_t pid; }; struct CmdBuffer { @@ -69,6 +71,7 @@ struct CmdBuffer { }; struct CmdFlip { + std::uint32_t pid; std::uint32_t bufferIndex; std::uint64_t arg; }; @@ -144,18 +147,18 @@ struct BridgePusher { header->flags | static_cast(BridgeFlags::VmConfigured); } - void sendMemoryProtect(std::uint64_t address, std::uint64_t size, + void sendMemoryProtect(std::uint32_t pid, std::uint64_t address, std::uint64_t size, std::uint32_t prot) { - sendCommand(CommandId::ProtectMemory, {address, size, prot}); + sendCommand(CommandId::ProtectMemory, {pid, address, size, prot}); } - void sendCommandBuffer(std::uint64_t queue, std::uint64_t address, + void sendCommandBuffer(std::uint32_t pid, std::uint64_t queue, std::uint64_t address, std::uint64_t size) { - sendCommand(CommandId::CommandBuffer, {queue, address, size}); + sendCommand(CommandId::CommandBuffer, {pid, queue, address, size}); } - void sendFlip(std::uint32_t bufferIndex, std::uint64_t arg) { - sendCommand(CommandId::Flip, {bufferIndex, arg}); + void sendFlip(std::uint32_t pid, std::uint32_t bufferIndex, std::uint64_t arg) { + sendCommand(CommandId::Flip, {pid, bufferIndex, arg}); } void wait() { @@ -250,20 +253,23 @@ private: return result; case CommandId::ProtectMemory: - result.memoryProt.address = args[0]; - result.memoryProt.size = args[1]; - result.memoryProt.prot = args[2]; + result.memoryProt.pid = args[0]; + result.memoryProt.address = args[1]; + result.memoryProt.size = args[2]; + result.memoryProt.prot = args[3]; return result; case CommandId::CommandBuffer: - result.commandBuffer.queue = args[0]; - result.commandBuffer.address = args[1]; - result.commandBuffer.size = args[2]; + result.commandBuffer.pid = args[0]; + result.commandBuffer.queue = args[1]; + result.commandBuffer.address = args[2]; + result.commandBuffer.size = args[3]; return result; case CommandId::Flip: - result.flip.bufferIndex = args[0]; - result.flip.arg = args[1]; + result.flip.pid = args[0]; + result.flip.bufferIndex = args[1]; + result.flip.arg = args[2]; return result; } diff --git a/hw/amdgpu/device/src/device.cpp b/hw/amdgpu/device/src/device.cpp index 3bb16416f..d48555ee7 100644 --- a/hw/amdgpu/device/src/device.cpp +++ b/hw/amdgpu/device/src/device.cpp @@ -1697,7 +1697,7 @@ static bool isPrimRequiresConversion(PrimitiveType primType) { return true; default: - util::unreachable(); + util::unreachable("prim type: %u\n", (unsigned)primType); } } @@ -3731,6 +3731,12 @@ static void draw(TaskChain &taskSet, QueueRegisters ®s, std::uint32_t count, return; } + auto primType = static_cast(regs.vgtPrimitiveType); + + if (primType == PrimitiveType::kPrimitiveTypeNone) { + return; + } + regs.depthClearEnable = true; auto resources = Ref(new GpuActionResources()); @@ -3750,8 +3756,6 @@ static void draw(TaskChain &taskSet, QueueRegisters ®s, std::uint32_t count, shaderLoadTaskSet.schedule(); shaderLoadTaskSet.wait(); - auto primType = static_cast(regs.vgtPrimitiveType); - std::vector colorAttachments; std::vector colorBlendEnable; diff --git a/hw/amdgpu/shader/src/Fragment.cpp b/hw/amdgpu/shader/src/Fragment.cpp index 696593c87..3b5ce6bf4 100644 --- a/hw/amdgpu/shader/src/Fragment.cpp +++ b/hw/amdgpu/shader/src/Fragment.cpp @@ -1243,20 +1243,77 @@ void convertSop2(Fragment &fragment, Sop2 inst) { }; switch (inst.op) { + case Sop2::Op::S_ADDC_U32: { + auto src0 = fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32).value; + auto src1 = fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value; + auto uintT = fragment.context->getType(TypeId::UInt32); + auto scc = fragment.getScc(); + + auto src0Value = fragment.context->findUint32Value(src0); + auto src1Value = fragment.context->findUint32Value(src1); + + if (src0Value && src1Value && + (scc == context->getTrue() || scc == context->getFalse())) { + std::uint64_t result = *src0Value; + result += *src1Value; + result += (scc == context->getTrue() ? 1 : 0); + + std::fprintf(stderr, "saddc result: %lx\n", result); + + fragment.setScalarOperand(inst.sdst, + {uintT, fragment.context->getUInt32(result)}); + fragment.setScc( + {uintT, fragment.context->getUInt32(result > 0xffff'ffff ? 1 : 0)}); + } else { + auto resultStruct = + fragment.context->getStructType(std::array{uintT, uintT}); + auto tmpResult = + fragment.builder.createIAddCarry(resultStruct, src0, src1); + auto tmpVal = + fragment.builder.createCompositeExtract(uintT, tmpResult, {{0u}}); + auto tmpCarry = + fragment.builder.createCompositeExtract(uintT, tmpResult, {{1u}}); + auto result = fragment.builder.createIAddCarry(resultStruct, tmpVal, scc); + + auto value = + fragment.builder.createCompositeExtract(uintT, result, {{0u}}); + auto carry = + fragment.builder.createCompositeExtract(uintT, result, {{1u}}); + + fragment.setScalarOperand(inst.sdst, {uintT, value}); + fragment.setScc({uintT, builder.createBitwiseOr(uintT, tmpCarry, carry)}); + } + break; + } case Sop2::Op::S_ADD_U32: { auto src0 = fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32).value; auto src1 = fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value; auto uintT = fragment.context->getType(TypeId::UInt32); - auto resultStruct = - fragment.context->getStructType(std::array{uintT, uintT}); - auto result = fragment.builder.createIAddCarry(resultStruct, src0, src1); - fragment.setScalarOperand( - inst.sdst, - {uintT, fragment.builder.createCompositeExtract( - uintT, result, {{static_cast(0)}})}); - fragment.setScc( - {uintT, fragment.builder.createCompositeExtract( - uintT, result, {{static_cast(1)}})}); + + auto src0Value = fragment.context->findUint32Value(src0); + auto src1Value = fragment.context->findUint32Value(src1); + if (src0Value && src1Value) { + std::uint64_t result = *src0Value; + result += *src1Value; + + std::fprintf(stderr, "sadd result: %lx\n", result); + + fragment.setScalarOperand(inst.sdst, + {uintT, fragment.context->getUInt32(result)}); + fragment.setScc( + {uintT, fragment.context->getUInt32(result > 0xffff'ffff ? 1 : 0)}); + } else { + auto resultStruct = + fragment.context->getStructType(std::array{uintT, uintT}); + auto result = fragment.builder.createIAddCarry(resultStruct, src0, src1); + fragment.setScalarOperand( + inst.sdst, + {uintT, fragment.builder.createCompositeExtract( + uintT, result, {{static_cast(0)}})}); + fragment.setScc( + {uintT, fragment.builder.createCompositeExtract( + uintT, result, {{static_cast(1)}})}); + } break; } case Sop2::Op::S_ADD_I32: { @@ -3407,6 +3464,7 @@ void convertMtbuf(Fragment &fragment, Mtbuf inst) { *optVBuffer2Value, *optVBuffer3Value}; auto vbuffer = reinterpret_cast(vBufferData); + std::fprintf(stderr, "address0: %lx\n", vbuffer->getAddress()); auto base = spirv::cast( fragment.getScalarOperand(inst.soffset, TypeId::UInt32).value); @@ -5052,6 +5110,17 @@ void convertSop1(Fragment &fragment, Sop1 inst) { } return; + case Sop1::Op::S_GETPC_B64: { + auto pc = fragment.registers->pc; + std::fprintf(stderr, "getpc result: %lx\n", pc); + fragment.setScalarOperand(inst.sdst, {fragment.context->getUInt32Type(), + fragment.context->getUInt32(pc)}); + fragment.setScalarOperand(inst.sdst + 1, + {fragment.context->getUInt32Type(), + fragment.context->getUInt32(pc >> 32)}); + return; + } + case Sop1::Op::S_SWAPPC_B64: { if (auto ssrc0 = fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32), ssrc1 = fragment.getScalarOperand(inst.ssrc0 + 1, TypeId::UInt32); @@ -5769,8 +5838,12 @@ void Fragment::setOperand(RegisterId id, Value value) { auto boolT = context->getBoolType(); if (value.type != boolT) { if (value.type == context->getUInt32Type()) { - value.value = - builder.createINotEqual(boolT, value.value, context->getUInt32(0)); + if (auto imm = context->findUint32Value(value.value)) { + value.value = *imm ? context->getTrue() : context->getFalse(); + } else { + value.value = builder.createINotEqual(boolT, value.value, + context->getUInt32(0)); + } } else if (value.type == context->getSint32Type()) { value.value = builder.createINotEqual(boolT, value.value, context->getSInt32(0)); diff --git a/rpcsx-gpu/main.cpp b/rpcsx-gpu/main.cpp index 7f5cfcf52..e3a70b80d 100644 --- a/rpcsx-gpu/main.cpp +++ b/rpcsx-gpu/main.cpp @@ -92,7 +92,7 @@ int main(int argc, const char *argv[]) { } const char *cmdBridgeName = "/rpcsx-gpu-cmds"; - const char *shmName = "/rpcsx-os-memory"; + const char *shmName = "/rpcsx-os-memory-50001"; unsigned long gpuIndex = 0; auto presenter = PresenterMode::Window; bool enableValidation = false; @@ -945,15 +945,25 @@ int main(int argc, const char *argv[]) { for (auto cmd : std::span(commandsBuffer, pulledCount)) { switch (cmd.id) { case amdgpu::bridge::CommandId::ProtectMemory: + if (cmd.memoryProt.pid != 50001) { + continue; + } device.handleProtectMemory(cmd.memoryProt.address, cmd.memoryProt.size, cmd.memoryProt.prot); break; case amdgpu::bridge::CommandId::CommandBuffer: + if (cmd.memoryProt.pid != 50001) { + continue; + } device.handleCommandBuffer(cmd.commandBuffer.queue, cmd.commandBuffer.address, cmd.commandBuffer.size); break; case amdgpu::bridge::CommandId::Flip: { + if (cmd.memoryProt.pid != 50001) { + continue; + } + if (!isImageAcquired) { Verify() << vkAcquireNextImageKHR(vkDevice, swapchain, UINT64_MAX, presentCompleteSemaphore, nullptr, diff --git a/rpcsx-os/iodev/dce.cpp b/rpcsx-os/iodev/dce.cpp index 834251f34..273bb139d 100644 --- a/rpcsx-os/iodev/dce.cpp +++ b/rpcsx-os/iodev/dce.cpp @@ -5,6 +5,8 @@ #include "orbis/file.hpp" #include "orbis/utils/Logs.hpp" #include "orbis/utils/SharedMutex.hpp" +#include "orbis/thread/Thread.hpp" +#include "orbis/thread/Process.hpp" #include "vm.hpp" #include #include @@ -258,7 +260,7 @@ static orbis::ErrorCode dce_ioctl(orbis::File *file, std::uint64_t request, // args->displayBufferIndex, args->flipMode, args->flipArg, // args->arg5, args->arg6, args->arg7, args->arg8); - rx::bridge.sendFlip(args->displayBufferIndex, + rx::bridge.sendFlip(thread->tproc->pid, args->displayBufferIndex, /*args->flipMode,*/ args->flipArg); return {}; } diff --git a/rpcsx-os/iodev/gc.cpp b/rpcsx-os/iodev/gc.cpp index f958bf8ec..54d8f0fc9 100644 --- a/rpcsx-os/iodev/gc.cpp +++ b/rpcsx-os/iodev/gc.cpp @@ -2,6 +2,7 @@ #include "io-device.hpp" #include "orbis/KernelAllocator.hpp" #include "orbis/file.hpp" +#include "orbis/thread/Process.hpp" #include "orbis/thread/Thread.hpp" #include "orbis/utils/Logs.hpp" #include "orbis/utils/SharedMutex.hpp" @@ -72,7 +73,7 @@ static orbis::ErrorCode gc_ioctl(orbis::File *file, std::uint64_t request, // std::fprintf(stderr, " unkPreservedVal = %lx\n", unkPreservedVal); // std::fprintf(stderr, " size = %lu\n", size); - rx::bridge.sendCommandBuffer(cmdId, address, size); + rx::bridge.sendCommandBuffer(thread->tproc->pid, cmdId, address, size); } funlockfile(stderr); @@ -123,7 +124,7 @@ static orbis::ErrorCode gc_ioctl(orbis::File *file, std::uint64_t request, // std::fprintf(stderr, " unkPreservedVal = %lx\n", unkPreservedVal); // std::fprintf(stderr, " size = %lu\n", size); - rx::bridge.sendCommandBuffer(cmdId, address, size); + rx::bridge.sendCommandBuffer(thread->tproc->pid, cmdId, address, size); } funlockfile(stderr); @@ -229,7 +230,7 @@ static orbis::ErrorCode gc_ioctl(orbis::File *file, std::uint64_t request, auto endOffset = static_cast(args->nextStartOffsetInDw) << 2; auto size = endOffset - queue.offset; - rx::bridge.sendCommandBuffer(id, address, size); + rx::bridge.sendCommandBuffer(thread->tproc->pid, id, address, size); queue.offset = endOffset; break; diff --git a/rpcsx-os/ops.cpp b/rpcsx-os/ops.cpp index 1abdb02ba..e71abcf00 100644 --- a/rpcsx-os/ops.cpp +++ b/rpcsx-os/ops.cpp @@ -799,27 +799,18 @@ SysResult execve(Thread *thread, ptr fname, ptr> argv, } } - ORBIS_LOG_ERROR(__FUNCTION__, __LINE__); - rx::vm::reset(); - ORBIS_LOG_ERROR(__FUNCTION__, __LINE__); - thread->tproc->nextTlsSlot = 1; for (auto [id, mod] : thread->tproc->modulesMap) { thread->tproc->modulesMap.close(id); } - ORBIS_LOG_ERROR(__FUNCTION__, __LINE__); - auto executableModule = rx::linker::loadModuleFile(path, thread); - ORBIS_LOG_ERROR(__FUNCTION__, __LINE__); - executableModule->id = thread->tproc->modulesMap.insert(executableModule); thread->tproc->processParam = executableModule->processParam; thread->tproc->processParamSize = executableModule->processParamSize; - ORBIS_LOG_ERROR(__FUNCTION__, __LINE__); auto name = path; if (auto slashP = name.rfind('/'); slashP != std::string::npos) { @@ -830,8 +821,6 @@ SysResult execve(Thread *thread, ptr fname, ptr> argv, name.resize(15); } - ORBIS_LOG_ERROR(__FUNCTION__, __LINE__); - pthread_setname_np(pthread_self(), name.c_str()); ORBIS_LOG_ERROR(__FUNCTION__, "done"); diff --git a/rpcsx-os/vm.cpp b/rpcsx-os/vm.cpp index 6aad562dd..6547aec16 100644 --- a/rpcsx-os/vm.cpp +++ b/rpcsx-os/vm.cpp @@ -3,6 +3,8 @@ #include "bridge.hpp" #include "io-device.hpp" #include "iodev/dmem.hpp" +#include "orbis/thread/Thread.hpp" +#include "orbis/thread/Process.hpp" #include "orbis/utils/Logs.hpp" #include "orbis/utils/Rc.hpp" #include @@ -908,7 +910,12 @@ void *rx::vm::map(void *addr, std::uint64_t len, std::int32_t prot, } } - rx::bridge.sendMemoryProtect(address, len, prot); + if (auto thr = orbis::g_currentThread) { + std::fprintf(stderr, "sending mapping %lx-%lx, pid %lx\n", address, address + len, thr->tproc->pid); + rx::bridge.sendMemoryProtect(thr->tproc->pid, address, len, prot); + } else { + std::fprintf(stderr, "ignoring mapping %lx-%lx\n", address, address + len); + } return result; } @@ -938,7 +945,11 @@ bool rx::vm::unmap(void *addr, std::uint64_t size) { std::lock_guard lock(g_mtx); gBlocks[(address >> kBlockShift) - kFirstBlock].removeFlags( (address & kBlockMask) >> kPageShift, pages, ~0); - rx::bridge.sendMemoryProtect(reinterpret_cast(addr), size, 0); + if (auto thr = orbis::g_currentThread) { + rx::bridge.sendMemoryProtect(thr->tproc->pid, reinterpret_cast(addr), size, 0); + } else { + std::fprintf(stderr, "ignoring mapping %lx-%lx\n", address, address + size); + } return utils::unmap(addr, size); } @@ -970,8 +981,12 @@ bool rx::vm::protect(void *addr, std::uint64_t size, std::int32_t prot) { (address & kBlockMask) >> kPageShift, pages, kAllocated | (prot & (kMapProtCpuAll | kMapProtGpuAll))); - rx::bridge.sendMemoryProtect(reinterpret_cast(addr), size, - prot); + if (auto thr = orbis::g_currentThread) { + rx::bridge.sendMemoryProtect(thr->tproc->pid, reinterpret_cast(addr), size, + prot); + } else { + std::fprintf(stderr, "ignoring mapping %lx-%lx\n", address, address + size); + } return ::mprotect(addr, size, prot & kMapProtCpuAll) == 0; }