From 62465b900114b3a451266dcccf7bb9d024966c62 Mon Sep 17 00:00:00 2001 From: DH Date: Mon, 5 Jan 2026 06:18:04 +0300 Subject: [PATCH] gpu & pmem: add direct guest memory resource --- kernel/orbis/include/orbis/pmem.hpp | 2 +- kernel/orbis/src/pmem.cpp | 34 ++++-- rpcsx/gpu/Device.cpp | 142 ++++++++++++----------- rpcsx/gpu/Device.hpp | 3 +- rpcsx/gpu/DeviceCtl.cpp | 9 +- rpcsx/gpu/DeviceCtl.hpp | 2 +- rpcsx/gpu/lib/vk/include/vk.hpp | 174 ++++++++++++++++++++-------- rpcsx/gpu/lib/vk/src/vk.cpp | 2 + rpcsx/main.cpp | 87 ++++++++++++-- rx/include/rx/Mappable.hpp | 3 + rx/src/Mappable.cpp | 49 ++++++++ 11 files changed, 364 insertions(+), 143 deletions(-) diff --git a/kernel/orbis/include/orbis/pmem.hpp b/kernel/orbis/include/orbis/pmem.hpp index f0994007b..bc641d966 100644 --- a/kernel/orbis/include/orbis/pmem.hpp +++ b/kernel/orbis/include/orbis/pmem.hpp @@ -13,7 +13,7 @@ struct File; } // namespace orbis namespace orbis::pmem { -ErrorCode initialize(std::uint64_t size); +ErrorCode initialize(rx::Mappable mappable, std::uint64_t size); void destroy(); std::pair allocate(std::uint64_t addressHint, std::uint64_t size, diff --git a/kernel/orbis/src/pmem.cpp b/kernel/orbis/src/pmem.cpp index d7abc9bc1..256b6889e 100644 --- a/kernel/orbis/src/pmem.cpp +++ b/kernel/orbis/src/pmem.cpp @@ -33,14 +33,28 @@ struct PhysicalMemoryAllocation { bool operator==(const PhysicalMemoryAllocation &) const = default; }; -using MappableMemoryResource = - kernel::MappableResource; +struct PhysicalMemoryResource + : kernel::AllocableResource { + std::size_t size; + rx::Mappable mappable; -using PhysicalMemoryResource = - kernel::AllocableResource; + std::errc create(rx::Mappable mappable, std::size_t size) { + if (size == 0 || !mappable) { + return std::errc::invalid_argument; + } + + if (auto errc = + BaseResource::create(rx::AddressRange::fromBeginSize(0, size)); + errc != std::errc{}) { + return errc; + } + + this->size = size; + this->mappable = std::move(mappable); + return {}; + } +}; static auto g_pmemInstance = orbis::createGlobalObject< kernel::LockableKernelObject>(); @@ -76,12 +90,12 @@ struct PhysicalMemory : orbis::IoDevice { static auto g_phyMemory = orbis::createGlobalObject(); -orbis::ErrorCode orbis::pmem::initialize(std::uint64_t size) { +orbis::ErrorCode orbis::pmem::initialize(rx::Mappable mappable, + std::uint64_t size) { std::lock_guard lock(*g_pmemInstance); rx::println("pmem: {:x}", size); - return toErrorCode( - g_pmemInstance->create(rx::AddressRange::fromBeginSize(0, size))); + return toErrorCode(g_pmemInstance->create(std::move(mappable), size)); } void orbis::pmem::destroy() { diff --git a/rpcsx/gpu/Device.cpp b/rpcsx/gpu/Device.cpp index 0236e68fd..35dfae9e6 100644 --- a/rpcsx/gpu/Device.cpp +++ b/rpcsx/gpu/Device.cpp @@ -63,19 +63,10 @@ makeDisplayEvent(DisplayEvent id, std::uint16_t unk0 = 0, return result; } -static vk::Context createVkContext(Device *device) { +static vk::Context createVkContext(Device *device, std::size_t dmemSize) { std::vector optionalLayers; bool enableValidation = rx::g_config.validateGpu; - for (std::size_t process = 0; process < 6; ++process) { - auto range = rx::AddressRange::fromBeginSize( - 0x40'0000 + 0x100'0000'0000 * process, 0x100'0000'0000 - 0x40'0000); - if (auto errc = rx::mem::reserve(range); errc != std::errc{}) { - rx::die("failed to reserve userspace memory: {} {:x}-{:x}", (int)errc, - range.beginAddress(), range.endAddress()); - } - } - auto createWindow = [=] { glfwWindowHint(GLFW_CLIENT_API, GLFW_NO_API); device->window = glfwCreateWindow(1920, 1080, "RPCSX", nullptr, nullptr); @@ -142,17 +133,22 @@ static vk::Context createVkContext(Device *device) { result.createDevice(device->surface, rx::g_config.gpuIndex, { - // VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, - // VK_EXT_DEPTH_CLIP_ENABLE_EXTENSION_NAME, - // VK_EXT_INLINE_UNIFORM_BLOCK_EXTENSION_NAME, - // VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME, - // VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME, - // VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, + +#ifdef _WIN32 + VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, +#else + VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, +#endif + VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME, VK_EXT_SEPARATE_STENCIL_USAGE_EXTENSION_NAME, VK_KHR_SWAPCHAIN_EXTENSION_NAME, VK_EXT_SHADER_OBJECT_EXTENSION_NAME, VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME, VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME, + // VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, + // VK_EXT_DEPTH_CLIP_ENABLE_EXTENSION_NAME, + // VK_EXT_INLINE_UNIFORM_BLOCK_EXTENSION_NAME, + // VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME, }, { VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME, @@ -180,18 +176,70 @@ static vk::Context createVkContext(Device *device) { getTotalMemorySize(VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); - vk::getHostVisibleMemory().initHostVisible( - std::min(hostVisibleMemoryTotalSize / 2, 1ul * 1024 * 1024 * 1024)); + vk::getDirectMemory().initHostDirect(dmemSize); + + vk::getHostVisibleMemory().initHostVisible(512 * 1024 * + 1024); // FIXME: reduce RAM usage vk::getDeviceLocalMemory().initDeviceLocal( std::min(localMemoryTotalSize / 4, 4ul * 1024 * 1024 * 1024)); - vk::context = &device->vkContext; return result; } const auto kCachePageSize = 0x100'0000'0000 / rx::mem::pageSize; -Device::Device() : vkContext(createVkContext(this)) { +Device::Device(std::size_t dmemSize) + : vkContext(createVkContext(this, dmemSize)) { + for (auto &pipe : graphicsPipes) { + pipe.device = this; + } + + for (auto &cachePage : cachePages) { + cachePage = static_cast *>( + orbis::kalloc(kCachePageSize, 1)); + std::memset(cachePage, 0, kCachePageSize); + } + + commandPipe.device = this; + commandPipe.ring = { + .base = std::data(cmdRing), + .size = std::size(cmdRing), + .rptr = std::data(cmdRing), + .wptr = std::data(cmdRing), + }; + + for (auto &pipe : computePipes) { + pipe.device = this; + } + + for (int i = 0; i < kGfxPipeCount; ++i) { + graphicsPipes[i].setDeQueue( + Ring{ + .base = mainGfxRings[i], + .size = std::size(mainGfxRings[i]), + .rptr = mainGfxRings[i], + .wptr = mainGfxRings[i], + }, + 0); + } +} + +Device::~Device() { + vkDeviceWaitIdle(vk::context->device); + + if (debugMessenger != VK_NULL_HANDLE) { + vk::DestroyDebugUtilsMessengerEXT(vk::context->instance, debugMessenger, + vk::context->allocator); + } + + for (auto &cachePage : cachePages) { + orbis::kfree(cachePage, kCachePageSize); + } +} + +void Device::initialize() { + vk::context = &vkContext; + if (!shader::spv::validate(g_rdna_semantic_spirv)) { shader::spv::dump(g_rdna_semantic_spirv, true); rx::die("builtin semantic validation failed"); @@ -209,16 +257,6 @@ Device::Device() : vkContext(createVkContext(this)) { rx::die("failed to deserialize builtin semantics\n"); } - for (auto &pipe : graphicsPipes) { - pipe.device = this; - } - - for (auto &cachePage : cachePages) { - cachePage = static_cast *>( - orbis::kalloc(kCachePageSize, 1)); - std::memset(cachePage, 0, kCachePageSize); - } - cacheUpdateThread = std::jthread([this](const std::stop_token &stopToken) { auto &sched = graphicsPipes[0].scheduler; std::uint32_t prevIdleValue = 0; @@ -262,42 +300,6 @@ Device::Device() : vkContext(createVkContext(this)) { } } }); - - commandPipe.device = this; - commandPipe.ring = { - .base = std::data(cmdRing), - .size = std::size(cmdRing), - .rptr = std::data(cmdRing), - .wptr = std::data(cmdRing), - }; - - for (auto &pipe : computePipes) { - pipe.device = this; - } - - for (int i = 0; i < kGfxPipeCount; ++i) { - graphicsPipes[i].setDeQueue( - Ring{ - .base = mainGfxRings[i], - .size = std::size(mainGfxRings[i]), - .rptr = mainGfxRings[i], - .wptr = mainGfxRings[i], - }, - 0); - } -} - -Device::~Device() { - vkDeviceWaitIdle(vk::context->device); - - if (debugMessenger != VK_NULL_HANDLE) { - vk::DestroyDebugUtilsMessengerEXT(vk::context->instance, debugMessenger, - vk::context->allocator); - } - - for (auto &cachePage : cachePages) { - orbis::kfree(cachePage, kCachePageSize); - } } void Device::start() { @@ -984,10 +986,10 @@ void Device::mapMemory(std::uint32_t pid, rx::AddressRange virtualRange, auto memory = amdgpu::RemoteMemory{process.vmId}; auto vmemAddress = memory.getVirtualAddress(virtualRange.beginAddress()); - auto errc = orbis::pmem::map(vmemAddress, - rx::AddressRange::fromBeginSize( - physicalOffset, virtualRange.size()), - orbis::vmem::toGpuProtection(prot)); + auto errc = orbis::pmem::map( + vmemAddress, + rx::AddressRange::fromBeginSize(physicalOffset, virtualRange.size()), + orbis::vmem::toGpuProtection(prot)); if (errc != orbis::ErrorCode{}) { rx::die("failed to map process {} memory, address {:x}-{:x}, type {}, " "offset {:x}, prot {}, error {}", diff --git a/rpcsx/gpu/Device.hpp b/rpcsx/gpu/Device.hpp index 200e5d990..d3eece143 100644 --- a/rpcsx/gpu/Device.hpp +++ b/rpcsx/gpu/Device.hpp @@ -103,9 +103,10 @@ struct Device : rx::RcBase, DeviceContext { std::uint32_t mainGfxRings[kGfxPipeCount][0x4000 / sizeof(std::uint32_t)]; std::uint32_t cmdRing[0x4000 / sizeof(std::uint32_t)]; - Device(); + Device(std::size_t dmemSize); ~Device(); + void initialize(); void start(); Cache::Tag getCacheTag(int vmId, Scheduler &scheduler) { diff --git a/rpcsx/gpu/DeviceCtl.cpp b/rpcsx/gpu/DeviceCtl.cpp index 4750fc600..04a8d28a7 100644 --- a/rpcsx/gpu/DeviceCtl.cpp +++ b/rpcsx/gpu/DeviceCtl.cpp @@ -21,9 +21,9 @@ DeviceCtl &DeviceCtl::operator=(const DeviceCtl &) = default; DeviceCtl::~DeviceCtl() = default; -DeviceCtl DeviceCtl::createDevice() { +DeviceCtl DeviceCtl::createDevice(std::uint64_t dmemSize) { DeviceCtl result; - result.mDevice = orbis::knew(); + result.mDevice = orbis::knew(dmemSize); return result; } @@ -229,7 +229,10 @@ void DeviceCtl::submitComputeQueue(std::uint32_t meId, std::uint32_t pipeId, pipe.submit(queueId, offset); } -void DeviceCtl::start() { mDevice->start(); } +void DeviceCtl::start() { + mDevice->initialize(); + mDevice->start(); +} void DeviceCtl::waitForIdle() { mDevice->waitForIdle(); } void amdgpu::mapMemory(std::uint32_t pid, rx::AddressRange virtualRange, diff --git a/rpcsx/gpu/DeviceCtl.hpp b/rpcsx/gpu/DeviceCtl.hpp index e3b41ea88..751577e42 100644 --- a/rpcsx/gpu/DeviceCtl.hpp +++ b/rpcsx/gpu/DeviceCtl.hpp @@ -25,7 +25,7 @@ public: DeviceCtl &operator=(const DeviceCtl &); ~DeviceCtl(); - static DeviceCtl createDevice(); + static DeviceCtl createDevice(std::uint64_t dmemSize); DeviceContext &getContext(); rx::Ref getOpaque(); diff --git a/rpcsx/gpu/lib/vk/include/vk.hpp b/rpcsx/gpu/lib/vk/include/vk.hpp index 7a6c9530f..b75f9db01 100644 --- a/rpcsx/gpu/lib/vk/include/vk.hpp +++ b/rpcsx/gpu/lib/vk/include/vk.hpp @@ -1,7 +1,10 @@ #pragma once -#include "rx/MemoryTable.hpp" -#include "rx/die.hpp" +#include +#include +#include +#include +#include #include #include @@ -157,6 +160,12 @@ public: DeviceMemory(DeviceMemory &&other) noexcept { *this = std::move(other); } DeviceMemory() = default; +#ifdef _WIN32 + using NativeHandle = void *; +#else + using NativeHandle = int; +#endif + ~DeviceMemory() { if (mDeviceMemory != nullptr) { vkFreeMemory(context->device, mDeviceMemory, context->allocator); @@ -176,9 +185,20 @@ public: [[nodiscard]] unsigned getMemoryTypeIndex() const { return mMemoryTypeIndex; } static DeviceMemory AllocateFromType(std::size_t size, - unsigned memoryTypeIndex) { + unsigned memoryTypeIndex, + bool withExportSupport = false) { + VkExportMemoryAllocateInfo exportInfo = { + .sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO, +#ifdef _WIN32 + .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT, +#else + .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT, +#endif + }; + VkMemoryAllocateFlagsInfo flags{ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO, + .pNext = withExportSupport ? &exportInfo : nullptr, .flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT, }; @@ -198,16 +218,20 @@ public: } static DeviceMemory Allocate(std::size_t size, unsigned memoryTypeBits, - VkMemoryPropertyFlags properties) { + VkMemoryPropertyFlags properties, + bool withExportSupport = false) { return AllocateFromType( - size, context->findPhysicalMemoryTypeIndex(memoryTypeBits, properties)); + size, context->findPhysicalMemoryTypeIndex(memoryTypeBits, properties), + withExportSupport); } static DeviceMemory Allocate(VkMemoryRequirements requirements, - VkMemoryPropertyFlags properties) { + VkMemoryPropertyFlags properties, + bool withExportSupport = false) { return AllocateFromType(requirements.size, context->findPhysicalMemoryTypeIndex( - requirements.memoryTypeBits, properties)); + requirements.memoryTypeBits, properties), + withExportSupport); } static DeviceMemory CreateExternalFd(int fd, std::size_t size, @@ -315,6 +339,33 @@ public: } void unmap() { vkUnmapMemory(context->device, mDeviceMemory); } + + VkResult getNativeHandle(NativeHandle &handle) const { +#ifdef _WIN32 + VkMemoryGetWin32HandleInfoKHR info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR, + .memory = mDeviceMemory, + .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR, + }; + + auto vkGetMemoryWin32HandleKHR = + (PFN_vkGetMemoryWin32HandleKHR)vkGetDeviceProcAddr( + context->device, "vkGetMemoryWin32HandleKHR"); + + return vkGetMemoryWin32HandleKHR(context->device, &info, &handle); +#else + VkMemoryGetFdInfoKHR info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR, + .memory = mDeviceMemory, + .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR, + }; + + auto vkGetMemoryFdKHR = (PFN_vkGetMemoryFdKHR)vkGetDeviceProcAddr( + context->device, "vkGetMemoryFdKHR"); + + return vkGetMemoryFdKHR(context->device, &info, &handle); +#endif + } }; struct DeviceMemoryRef { @@ -331,7 +382,7 @@ class MemoryResource { DeviceMemory mMemory; char *mData = nullptr; rx::MemoryAreaTable<> table; - // const char *debugName = ""; + const char *debugName = ""; std::mutex mMtx; @@ -339,6 +390,8 @@ public: MemoryResource() = default; ~MemoryResource() { clear(); } + using NativeHandle = DeviceMemory::NativeHandle; + void clear() { if (mMemory.getHandle() != nullptr && mData != nullptr) { vkUnmapMemory(context->device, mMemory.getHandle()); @@ -356,7 +409,7 @@ public: VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; mMemory = DeviceMemory::CreateExternalFd(fd, size, properties); table.map(rx::AddressRange::fromBeginSize(0, size)); - // debugName = "fd-direct"; + debugName = "fd-direct"; } void initFromHost(void *data, std::size_t size) { @@ -365,7 +418,16 @@ public: VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; mMemory = DeviceMemory::CreateExternalHostMemory(data, size, properties); table.map(rx::AddressRange::fromBeginSize(0, size)); - // debugName = "direct"; + debugName = "imported-direct"; + } + + void initHostDirect(std::size_t size) { + assert(mMemory.getHandle() == nullptr); + auto properties = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + + mMemory = DeviceMemory::Allocate(size, ~0, properties, true); + debugName = "direct"; } void initHostVisible(std::size_t size) { @@ -381,7 +443,7 @@ public: mMemory = std::move(memory); table.map(rx::AddressRange::fromBeginSize(0, size)); mData = reinterpret_cast(data); - // debugName = "host"; + debugName = "host"; } void initDeviceLocal(std::size_t size) { @@ -390,13 +452,14 @@ public: mMemory = DeviceMemory::Allocate(size, ~0, properties); table.map(rx::AddressRange::fromBeginSize(0, size)); - // debugName = "local"; + debugName = "local"; } DeviceMemoryRef allocate(VkMemoryRequirements requirements) { if ((requirements.memoryTypeBits & (1 << mMemory.getMemoryTypeIndex())) == 0) { - std::abort(); + rx::die("unexpected requirements for {} memory, {}", debugName, + requirements); } std::lock_guard lock(mMtx); @@ -415,26 +478,23 @@ public: continue; } - // if (debugName == std::string_view{"local"}) { - // std::printf("memory: allocation %s memory %lx-%lx\n", debugName, - // offset, - // offset + requirements.size); - // } - table.unmap(offset, offset + requirements.size); - return {.deviceMemory = mMemory.getHandle(), - .offset = offset, - .size = requirements.size, - .data = mData, - .allocator = this, - .release = [](DeviceMemoryRef &memoryRef) { + return { + .deviceMemory = mMemory.getHandle(), + .offset = offset, + .size = requirements.size, + .data = mData, + .allocator = this, + .release = + [](DeviceMemoryRef &memoryRef) { auto self = reinterpret_cast(memoryRef.allocator); self->deallocate(memoryRef); - }}; + }, + }; } - std::abort(); + return {}; } void deallocate(DeviceMemoryRef memory) { @@ -445,13 +505,16 @@ public: void dump() { std::lock_guard lock(mMtx); + rx::ScopedFileLock errLock(stderr); + rx::println(stderr, "{} resource\n", debugName); for (auto elem : table) { - std::fprintf(stderr, "%zu - %zu\n", elem.beginAddress, elem.endAddress); + rx::println(stderr, " {:#x} - {:#x}\n", elem.beginAddress, + elem.endAddress); } } - DeviceMemoryRef getFromOffset(std::uint64_t offset, std::size_t size) { - return {mMemory.getHandle(), offset, size, nullptr, nullptr, nullptr}; + VkResult getNativeHandle(NativeHandle &handle) const { + return mMemory.getNativeHandle(handle); } explicit operator bool() const { return mMemory.getHandle() != nullptr; } @@ -479,8 +542,11 @@ public: static Semaphore Create(std::uint64_t initialValue = 0) { VkSemaphoreTypeCreateInfo typeCreateInfo = { - VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, nullptr, - VK_SEMAPHORE_TYPE_TIMELINE, initialValue}; + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, + .pNext = nullptr, + .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE, + .initialValue = initialValue, + }; VkSemaphoreCreateInfo createInfo = {VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, &typeCreateInfo, 0}; @@ -492,19 +558,25 @@ public: } VkResult wait(std::uint64_t value, uint64_t timeout) const { - VkSemaphoreWaitInfo waitInfo = {VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, - nullptr, - VK_SEMAPHORE_WAIT_ANY_BIT, - 1, - &mSemaphore, - &value}; + VkSemaphoreWaitInfo waitInfo = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, + .pNext = nullptr, + .flags = VK_SEMAPHORE_WAIT_ANY_BIT, + .semaphoreCount = 1, + .pSemaphores = &mSemaphore, + .pValues = &value, + }; return vkWaitSemaphores(context->device, &waitInfo, timeout); } void signal(std::uint64_t value) { - VkSemaphoreSignalInfo signalInfo = {VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO, - nullptr, mSemaphore, value}; + VkSemaphoreSignalInfo signalInfo = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO, + .pNext = nullptr, + .semaphore = mSemaphore, + .value = value, + }; VK_VERIFY(vkSignalSemaphore(context->device, &signalInfo)); } @@ -543,11 +615,17 @@ public: static BinSemaphore Create() { VkSemaphoreTypeCreateInfo typeCreateInfo = { - VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, nullptr, - VK_SEMAPHORE_TYPE_BINARY, 0}; + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, + .pNext = nullptr, + .semaphoreType = VK_SEMAPHORE_TYPE_BINARY, + .initialValue = 0, + }; - VkSemaphoreCreateInfo createInfo = {VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, - &typeCreateInfo, 0}; + VkSemaphoreCreateInfo createInfo = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, + .pNext = &typeCreateInfo, + .flags = 0, + }; BinSemaphore result; VK_VERIFY(vkCreateSemaphore(context->device, &createInfo, nullptr, @@ -581,8 +659,11 @@ public: } static Fence Create() { - VkFenceCreateInfo fenceCreateInfo = {VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, - nullptr, 0}; + VkFenceCreateInfo fenceCreateInfo = { + .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + }; Fence result; VK_VERIFY(vkCreateFence(context->device, &fenceCreateInfo, nullptr, &result.mFence)); @@ -1027,6 +1108,7 @@ public: vk::MemoryResource &getHostVisibleMemory(); vk::MemoryResource &getDeviceLocalMemory(); +vk::MemoryResource &getDirectMemory(); VkResult CreateShadersEXT(VkDevice device, uint32_t createInfoCount, const VkShaderCreateInfoEXT *pCreateInfos, diff --git a/rpcsx/gpu/lib/vk/src/vk.cpp b/rpcsx/gpu/lib/vk/src/vk.cpp index 70e088752..86d5babef 100644 --- a/rpcsx/gpu/lib/vk/src/vk.cpp +++ b/rpcsx/gpu/lib/vk/src/vk.cpp @@ -9,6 +9,7 @@ vk::Context *vk::context; static vk::MemoryResource g_hostVisibleMemory; static vk::MemoryResource g_deviceLocalMemory; +static vk::MemoryResource g_directMemory; void vk::verifyFailed(VkResult result, const char *message) { std::fprintf(stderr, "vk verification failed: %s\n", message); @@ -782,6 +783,7 @@ vk::Context::findPhysicalMemoryTypeIndex(std::uint32_t typeBits, vk::MemoryResource &vk::getHostVisibleMemory() { return g_hostVisibleMemory; } vk::MemoryResource &vk::getDeviceLocalMemory() { return g_deviceLocalMemory; } +vk::MemoryResource &vk::getDirectMemory() { return g_directMemory; } static auto importDeviceVkProc(VkDevice device, const char *name) { auto result = vkGetDeviceProcAddr(device, name); diff --git a/rpcsx/main.cpp b/rpcsx/main.cpp index 34dfbc705..dd44f590b 100644 --- a/rpcsx/main.cpp +++ b/rpcsx/main.cpp @@ -16,6 +16,7 @@ #include "orbis/vmem.hpp" #include "rx/Config.hpp" #include "rx/FileLock.hpp" +#include "rx/Mappable.hpp" #include "rx/die.hpp" #include "rx/format.hpp" #include "rx/mem.hpp" @@ -23,6 +24,7 @@ #include "rx/watchdog.hpp" #include "thread.hpp" #include "vfs.hpp" +#include "vk.hpp" #include "xbyak/xbyak.h" #include #include @@ -1034,16 +1036,84 @@ int main(int argc, const char *argv[]) { rx::println(stderr, "RPCSX v{}", rx::getVersion().toString()); - setupSigHandlers(); + // FIXME: determine mode by reading elf file orbis::constructAllGlobals(); + + setupSigHandlers(); + rx::startWatchdog(); + + orbis::allocatePid(); + auto initProcess = orbis::createProcess(nullptr, asRoot ? 1 : 10); + orbis::vmem::initialize(initProcess); + + auto pmemSize = 9ull * 1024 * 1024 * 1024; + orbis::g_context->gpuDevice = + amdgpu::DeviceCtl::createDevice(pmemSize).getOpaque(); + orbis::g_context->deviceEventEmitter = orbis::knew(); - // FIXME: determine mode by reading elf file - orbis::pmem::initialize(10ull * 1024 * 1024 * 1024); - orbis::dmem::initialize(); - orbis::fmem::initialize(2ull * 1024 * 1024 * 1024); + vk::DeviceMemory::NativeHandle handle; + VK_VERIFY(vk::getDirectMemory().getNativeHandle(handle)); + auto mappable = rx::Mappable::CreateFromNativeHandle(handle); + rx::AddressRange importedVkMemory; + if (mappable.map(rx::AddressRange::fromBeginSize(orbis::kMinAddress, + orbis::vmem::kPageSize), + 0, rx::mem::Protection::R, + orbis::vmem::kPageSize) != std::errc{}) { + rx::println(stderr, "warning: failed to use Vulkan exported memory, " + "switching to imported memory"); + + vk::getDirectMemory().free(); + auto [cpuMappable, errc] = rx::Mappable::CreateMemory(pmemSize); + + rx::dieIf(errc != std::errc{}, + "failed to allocate physical memory, errc {}", errc); + mappable = std::move(cpuMappable); + auto [addr, mapErrc] = mappable.map( + pmemSize, 0, rx::mem::Protection::R | rx::mem::Protection::W); + rx::dieIf(mapErrc != std::errc{}, "failed to map physical memory, errc {}", + mapErrc); + vk::getDirectMemory().initFromHost(addr, pmemSize); + importedVkMemory = rx::AddressRange::fromBeginSize( + std::bit_cast(addr), pmemSize); + } else { + rx::mem::release(rx::AddressRange::fromBeginSize(orbis::kMinAddress, + orbis::vmem::kPageSize), + orbis::vmem::kPageSize); + } + + if (auto errc = orbis::pmem::initialize(std::move(mappable), pmemSize); + errc != orbis::ErrorCode{}) { + rx::die("pmem initialization failed, {}", errc); + } + if (auto errc = orbis::dmem::initialize(); errc != orbis::ErrorCode{}) { + rx::die("dmem initialization failed, {}", errc); + } + if (auto errc = orbis::fmem::initialize(2ull * 1024 * 1024 * 1024); + errc != orbis::ErrorCode{}) { + rx::die("fmem initialization failed, {}", errc); + } + + if (::fork() != 0) { + rx::attachGpuProcess(::getpid()); + pthread_setname_np(pthread_self(), "rpcsx-gpu"); + + int logFd = + ::open("log-gpu.txt", O_CREAT | O_RDWR | O_TRUNC, S_IRUSR | S_IWUSR); + dup2(logFd, 1); + dup2(logFd, 2); + ::close(logFd); + + amdgpu::DeviceCtl{orbis::g_context->gpuDevice}.start(); + return 0; + } + + rx::attachProcess(::getpid()); + + if (importedVkMemory.isValid()) { + rx::mem::release(importedVkMemory, 0); + } - rx::startWatchdog(); vfs::initialize(); std::vector guestArgv(argv + argIndex, argv + argc); @@ -1055,11 +1125,6 @@ int main(int argc, const char *argv[]) { rx::thread::initialize(); - // vm::printHostStats(); - orbis::allocatePid(); - auto initProcess = orbis::createProcess(nullptr, asRoot ? 1 : 10); - orbis::vmem::initialize(initProcess); - // pthread_setname_np(pthread_self(), "10.MAINTHREAD"); int status = 0; diff --git a/rx/include/rx/Mappable.hpp b/rx/include/rx/Mappable.hpp index 61761a723..236a38e0e 100644 --- a/rx/include/rx/Mappable.hpp +++ b/rx/include/rx/Mappable.hpp @@ -43,6 +43,9 @@ public: rx::EnumBitSet protection, std::size_t alignment); + std::pair map(std::size_t size, std::size_t offset, + rx::EnumBitSet protection); + [[nodiscard]] NativeHandle release() { return std::exchange(m_handle, kInvalidHandle); } diff --git a/rx/src/Mappable.cpp b/rx/src/Mappable.cpp index 8da960d3d..972449b04 100644 --- a/rx/src/Mappable.cpp +++ b/rx/src/Mappable.cpp @@ -169,6 +169,55 @@ std::errc rx::Mappable::map(rx::AddressRange virtualRange, std::size_t offset, return {}; } +std::pair +rx::Mappable::map(std::size_t size, std::size_t offset, + rx::EnumBitSet protection) { +#ifdef _WIN32 + static const DWORD protTable[] = { + PAGE_NOACCESS, // 0 + PAGE_READONLY, // R + PAGE_EXECUTE_READWRITE, // W + PAGE_EXECUTE_READWRITE, // RW + PAGE_EXECUTE, // X + PAGE_EXECUTE_READWRITE, // XR + PAGE_EXECUTE_READWRITE, // XW + PAGE_EXECUTE_READWRITE, // XRW + }; + + auto prot = protTable[(protection & (mem::Protection::R | mem::Protection::W | + mem::Protection::X)) + .toUnderlying()]; + + auto result = MapViewOfFile3((HANDLE)m_handle, nullptr, nullptr, offset, size, + 0, prot, nullptr, 0); + if (!result) { + return {{}, std::errc::invalid_argument}; + } + + return {}; +#else + int prot = 0; + + if (protection & mem::Protection::R) { + prot |= PROT_READ; + } + if (protection & mem::Protection::W) { + prot |= PROT_READ | PROT_WRITE; + } + if (protection & mem::Protection::X) { + prot |= PROT_EXEC; + } + + auto result = ::mmap(nullptr, size, prot, MAP_SHARED, m_handle, offset); + + if (result == MAP_FAILED) { + return {{}, std::errc{errno}}; + } +#endif + + return {result, {}}; +} + void rx::Mappable::destroy() { #ifdef _WIN32 CloseHandle((HANDLE)m_handle);