mirror of
https://github.com/RPCSX/rpcsx.git
synced 2026-04-08 07:55:35 +00:00
rpcsx-gpu: add multiprocess support
This commit is contained in:
parent
f77376c1e3
commit
2c781626d3
14 changed files with 746 additions and 450 deletions
|
|
@ -1,7 +1,9 @@
|
|||
#include "amdgpu/RemoteMemory.hpp"
|
||||
#include "amdgpu/device/gpu-scheduler.hpp"
|
||||
#include "amdgpu/device/vk.hpp"
|
||||
#include "rx/MemoryTable.hpp"
|
||||
#include "rx/Version.hpp"
|
||||
#include "rx/mem.hpp"
|
||||
#include "util/unreachable.hpp"
|
||||
#include <algorithm>
|
||||
#include <amdgpu/bridge/bridge.hpp>
|
||||
|
|
@ -16,18 +18,14 @@
|
|||
#include <sys/stat.h>
|
||||
#include <thread>
|
||||
#include <unistd.h>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <util/VerifyVulkan.hpp>
|
||||
#include <vulkan/vulkan.h>
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
#include <GLFW/glfw3.h> // TODO: make in optional
|
||||
|
||||
// TODO
|
||||
// extern void *g_rwMemory;
|
||||
extern std::size_t g_memorySize;
|
||||
extern std::uint64_t g_memoryBase;
|
||||
extern amdgpu::RemoteMemory g_hostMemory;
|
||||
|
||||
static void usage(std::FILE *out, const char *argv0) {
|
||||
std::fprintf(out, "usage: %s [options...]\n", argv0);
|
||||
std::fprintf(out, " options:\n");
|
||||
|
|
@ -159,6 +157,11 @@ int main(int argc, const char *argv[]) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
if (!rx::mem::reserve((void *)0x40000, 0x60000000000 - 0x40000)) {
|
||||
std::fprintf(stderr, "failed to reserve virtual memory\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
glfwInit();
|
||||
glfwWindowHint(GLFW_CLIENT_API, GLFW_NO_API);
|
||||
auto window = glfwCreateWindow(1280, 720, "RPCSX", nullptr, nullptr);
|
||||
|
|
@ -725,20 +728,6 @@ int main(int argc, const char *argv[]) {
|
|||
amdgpu::bridge::BridgePuller bridgePuller{bridge};
|
||||
amdgpu::bridge::Command commandsBuffer[1];
|
||||
|
||||
if (!std::filesystem::exists(std::string("/dev/shm") + shmName)) {
|
||||
std::printf("Waiting for OS\n");
|
||||
while (!std::filesystem::exists(std::string("/dev/shm") + shmName)) {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(300));
|
||||
}
|
||||
}
|
||||
|
||||
int memoryFd = ::shm_open(shmName, O_RDWR, S_IRUSR | S_IWUSR);
|
||||
|
||||
if (memoryFd < 0) {
|
||||
std::printf("failed to open shared memory\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
int dmemFd[3];
|
||||
|
||||
for (std::size_t i = 0; i < std::size(dmemFd); ++i) {
|
||||
|
|
@ -759,26 +748,80 @@ int main(int argc, const char *argv[]) {
|
|||
}
|
||||
}
|
||||
|
||||
struct stat memoryStat;
|
||||
::fstat(memoryFd, &memoryStat);
|
||||
amdgpu::RemoteMemory memory{(char *)::mmap(
|
||||
nullptr, memoryStat.st_size, PROT_NONE, MAP_SHARED, memoryFd, 0)};
|
||||
|
||||
// extern void *g_rwMemory;
|
||||
g_memorySize = memoryStat.st_size;
|
||||
g_memoryBase = 0x40000;
|
||||
// g_rwMemory = ::mmap(nullptr, g_memorySize, PROT_READ | PROT_WRITE, MAP_SHARED,
|
||||
// memoryFd, 0);
|
||||
|
||||
g_hostMemory = memory;
|
||||
|
||||
{
|
||||
amdgpu::device::AmdgpuDevice device(bridgePuller.header);
|
||||
|
||||
for (std::uint32_t end = bridge->memoryAreaCount, i = 0; i < end; ++i) {
|
||||
auto area = bridge->memoryAreas[i];
|
||||
device.handleProtectMemory(area.address, area.size, area.prot);
|
||||
}
|
||||
struct VmMapSlot {
|
||||
int memoryType;
|
||||
int prot;
|
||||
std::int64_t offset;
|
||||
std::uint64_t baseAddress;
|
||||
|
||||
auto operator<=>(const VmMapSlot &) const = default;
|
||||
};
|
||||
|
||||
struct ProcessInfo {
|
||||
int vmId = -1;
|
||||
int vmFd = -1;
|
||||
rx::MemoryTableWithPayload<VmMapSlot> vmTable;
|
||||
};
|
||||
|
||||
auto mapProcess = [&](std::int64_t pid, int vmId, ProcessInfo &process) {
|
||||
process.vmId = vmId;
|
||||
|
||||
auto memory = amdgpu::RemoteMemory{vmId};
|
||||
|
||||
std::string pidVmName = shmName;
|
||||
pidVmName += '-';
|
||||
pidVmName += std::to_string(pid);
|
||||
int memoryFd = ::shm_open(pidVmName.c_str(), O_RDWR, S_IRUSR | S_IWUSR);
|
||||
process.vmFd = memoryFd;
|
||||
|
||||
if (memoryFd < 0) {
|
||||
std::printf("failed to process %x shared memory\n", (int)pid);
|
||||
std::abort();
|
||||
}
|
||||
|
||||
for (auto [startAddress, endAddress, slot] : process.vmTable) {
|
||||
auto gpuProt = slot.prot >> 4;
|
||||
if (gpuProt == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto devOffset = slot.offset + startAddress - slot.baseAddress;
|
||||
int mapFd = memoryFd;
|
||||
|
||||
if (slot.memoryType >= 0) {
|
||||
mapFd = dmemFd[slot.memoryType];
|
||||
}
|
||||
|
||||
auto mmapResult =
|
||||
::mmap(memory.getPointer(startAddress), endAddress - startAddress,
|
||||
gpuProt, MAP_FIXED | MAP_SHARED, mapFd, devOffset);
|
||||
|
||||
if (mmapResult == MAP_FAILED) {
|
||||
std::printf(
|
||||
"failed to map process %x memory, address %lx-%lx, type %x\n",
|
||||
(int)pid, startAddress, endAddress, slot.memoryType);
|
||||
std::abort();
|
||||
}
|
||||
|
||||
device.handleProtectMemory(memory, startAddress,
|
||||
endAddress - startAddress, slot.prot);
|
||||
}
|
||||
};
|
||||
|
||||
auto unmapProcess = [&](ProcessInfo &process) {
|
||||
auto startAddress = static_cast<std::uint64_t>(process.vmId) << 40;
|
||||
auto size = static_cast<std::uint64_t>(1) << 40;
|
||||
rx::mem::reserve(reinterpret_cast<void *>(startAddress), size);
|
||||
|
||||
::close(process.vmFd);
|
||||
process.vmFd = -1;
|
||||
process.vmId = -1;
|
||||
};
|
||||
|
||||
std::unordered_map<std::int64_t, ProcessInfo> processInfo;
|
||||
|
||||
std::vector<VkCommandBuffer> presentCmdBuffers(swapchainImages.size());
|
||||
|
||||
|
|
@ -966,66 +1009,141 @@ int main(int argc, const char *argv[]) {
|
|||
|
||||
for (auto cmd : std::span(commandsBuffer, pulledCount)) {
|
||||
switch (cmd.id) {
|
||||
case amdgpu::bridge::CommandId::ProtectMemory:
|
||||
device.handleProtectMemory(cmd.memoryProt.address,
|
||||
cmd.memoryProt.size, cmd.memoryProt.prot);
|
||||
break;
|
||||
case amdgpu::bridge::CommandId::CommandBuffer:
|
||||
device.handleCommandBuffer(cmd.commandBuffer.queue,
|
||||
cmd.commandBuffer.address,
|
||||
cmd.commandBuffer.size);
|
||||
break;
|
||||
case amdgpu::bridge::CommandId::Flip: {
|
||||
if (!isImageAcquired) {
|
||||
Verify() << vkAcquireNextImageKHR(vkDevice, swapchain, UINT64_MAX,
|
||||
presentCompleteSemaphore, nullptr,
|
||||
&imageIndex);
|
||||
case amdgpu::bridge::CommandId::ProtectMemory: {
|
||||
auto &process = processInfo[cmd.memoryProt.pid];
|
||||
|
||||
vkWaitForFences(vkDevice, 1, &inFlightFences[imageIndex], VK_TRUE,
|
||||
UINT64_MAX);
|
||||
vkResetFences(vkDevice, 1, &inFlightFences[imageIndex]);
|
||||
auto vmSlotIt = process.vmTable.queryArea(cmd.memoryProt.address);
|
||||
if (vmSlotIt == process.vmTable.end()) {
|
||||
std::abort();
|
||||
}
|
||||
|
||||
isImageAcquired = false;
|
||||
auto vmSlot = (*vmSlotIt).payload;
|
||||
|
||||
vkResetCommandBuffer(presentCmdBuffers[imageIndex], 0);
|
||||
VkCommandBufferBeginInfo beginInfo{};
|
||||
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
|
||||
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
|
||||
process.vmTable.map(cmd.memoryProt.address,
|
||||
cmd.memoryProt.address + cmd.memoryProt.size,
|
||||
VmMapSlot{
|
||||
.memoryType = vmSlot.memoryType,
|
||||
.prot = static_cast<int>(cmd.memoryProt.prot),
|
||||
.offset = vmSlot.offset,
|
||||
.baseAddress = vmSlot.baseAddress,
|
||||
});
|
||||
|
||||
vkBeginCommandBuffer(presentCmdBuffers[imageIndex], &beginInfo);
|
||||
if (process.vmId >= 0) {
|
||||
auto memory = amdgpu::RemoteMemory{process.vmId};
|
||||
rx::mem::protect(memory.getPointer(cmd.memoryProt.address),
|
||||
cmd.memoryProt.size, cmd.memoryProt.prot >> 4);
|
||||
device.handleProtectMemory(memory, cmd.mapMemory.address,
|
||||
cmd.mapMemory.size, cmd.mapMemory.prot);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case amdgpu::bridge::CommandId::CommandBuffer: {
|
||||
auto &process = processInfo[cmd.commandBuffer.pid];
|
||||
if (process.vmId >= 0) {
|
||||
device.handleCommandBuffer(
|
||||
amdgpu::RemoteMemory{process.vmId}, cmd.commandBuffer.queue,
|
||||
cmd.commandBuffer.address, cmd.commandBuffer.size);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case amdgpu::bridge::CommandId::Flip: {
|
||||
auto &process = processInfo[cmd.flip.pid];
|
||||
|
||||
if (device.handleFlip(
|
||||
presentQueue, presentCmdBuffers[imageIndex],
|
||||
*flipTaskChain[imageIndex].get(), cmd.flip.bufferIndex,
|
||||
cmd.flip.arg, swapchainImages[imageIndex], swapchainExtent,
|
||||
presentCompleteSemaphore, renderCompleteSemaphore,
|
||||
inFlightFences[imageIndex])) {
|
||||
VkPresentInfoKHR presentInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
|
||||
.waitSemaphoreCount = 1,
|
||||
.pWaitSemaphores = &renderCompleteSemaphore,
|
||||
.swapchainCount = 1,
|
||||
.pSwapchains = &swapchain,
|
||||
.pImageIndices = &imageIndex,
|
||||
};
|
||||
if (vkQueuePresentKHR(presentQueue, &presentInfo) != VK_SUCCESS) {
|
||||
std::printf("swapchain was invalidated\n");
|
||||
createSwapchain();
|
||||
if (process.vmId >= 0) {
|
||||
if (!isImageAcquired) {
|
||||
Verify() << vkAcquireNextImageKHR(vkDevice, swapchain, UINT64_MAX,
|
||||
presentCompleteSemaphore,
|
||||
nullptr, &imageIndex);
|
||||
|
||||
vkWaitForFences(vkDevice, 1, &inFlightFences[imageIndex], VK_TRUE,
|
||||
UINT64_MAX);
|
||||
vkResetFences(vkDevice, 1, &inFlightFences[imageIndex]);
|
||||
}
|
||||
|
||||
isImageAcquired = false;
|
||||
|
||||
vkResetCommandBuffer(presentCmdBuffers[imageIndex], 0);
|
||||
VkCommandBufferBeginInfo beginInfo{};
|
||||
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
|
||||
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
|
||||
|
||||
vkBeginCommandBuffer(presentCmdBuffers[imageIndex], &beginInfo);
|
||||
|
||||
if (device.handleFlip(
|
||||
amdgpu::RemoteMemory{process.vmId}, presentQueue,
|
||||
presentCmdBuffers[imageIndex],
|
||||
*flipTaskChain[imageIndex].get(), cmd.flip.bufferIndex,
|
||||
cmd.flip.arg, swapchainImages[imageIndex], swapchainExtent,
|
||||
presentCompleteSemaphore, renderCompleteSemaphore,
|
||||
inFlightFences[imageIndex])) {
|
||||
VkPresentInfoKHR presentInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
|
||||
.waitSemaphoreCount = 1,
|
||||
.pWaitSemaphores = &renderCompleteSemaphore,
|
||||
.swapchainCount = 1,
|
||||
.pSwapchains = &swapchain,
|
||||
.pImageIndices = &imageIndex,
|
||||
};
|
||||
if (vkQueuePresentKHR(presentQueue, &presentInfo) != VK_SUCCESS) {
|
||||
std::printf("swapchain was invalidated\n");
|
||||
createSwapchain();
|
||||
}
|
||||
} else {
|
||||
isImageAcquired = true;
|
||||
}
|
||||
} else {
|
||||
isImageAcquired = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case amdgpu::bridge::CommandId::MapDmem: {
|
||||
auto addr = g_hostMemory.getPointer(cmd.mapDmem.address);
|
||||
auto mapping = ::mmap(addr, cmd.mapDmem.size,
|
||||
PROT_READ | PROT_WRITE /*TODO: cmd.mapDmem.prot >> 4*/,
|
||||
MAP_FIXED | MAP_SHARED, dmemFd[cmd.mapDmem.dmemIndex],
|
||||
cmd.mapDmem.offset);
|
||||
device.handleProtectMemory(cmd.mapDmem.address, cmd.mapDmem.size, 0x33 /*TODO: cmd.mapDmem.prot*/);
|
||||
case amdgpu::bridge::CommandId::MapProcess: {
|
||||
mapProcess(cmd.mapProcess.pid, cmd.mapProcess.vmId, processInfo[cmd.mapProcess.pid]);
|
||||
break;
|
||||
}
|
||||
case amdgpu::bridge::CommandId::UnmapProcess: {
|
||||
unmapProcess(processInfo[cmd.mapProcess.pid]);
|
||||
break;
|
||||
}
|
||||
|
||||
case amdgpu::bridge::CommandId::MapMemory: {
|
||||
auto &process = processInfo[cmd.mapMemory.pid];
|
||||
|
||||
process.vmTable.map(
|
||||
cmd.mapMemory.address, cmd.mapMemory.address + cmd.mapMemory.size,
|
||||
VmMapSlot{
|
||||
.memoryType = static_cast<int>(cmd.mapMemory.memoryType >= 0
|
||||
? cmd.mapMemory.dmemIndex
|
||||
: -1),
|
||||
.prot = static_cast<int>(cmd.mapMemory.prot),
|
||||
.offset = cmd.mapMemory.offset,
|
||||
.baseAddress = cmd.mapMemory.address,
|
||||
});
|
||||
|
||||
if (process.vmId >= 0) {
|
||||
auto memory = amdgpu::RemoteMemory{process.vmId};
|
||||
|
||||
int mapFd = process.vmFd;
|
||||
|
||||
if (cmd.mapMemory.memoryType >= 0) {
|
||||
mapFd = dmemFd[cmd.mapMemory.dmemIndex];
|
||||
}
|
||||
|
||||
auto mmapResult =
|
||||
::mmap(memory.getPointer(cmd.mapMemory.address),
|
||||
cmd.mapMemory.size, cmd.mapMemory.prot >> 4,
|
||||
MAP_FIXED | MAP_SHARED, mapFd, cmd.mapMemory.offset);
|
||||
|
||||
if (mmapResult == MAP_FAILED) {
|
||||
std::printf(
|
||||
"failed to map process %x memory, address %lx-%lx, type %x\n",
|
||||
(int)cmd.mapMemory.pid, cmd.mapMemory.address,
|
||||
cmd.mapMemory.address + cmd.mapMemory.size,
|
||||
cmd.mapMemory.memoryType);
|
||||
std::abort();
|
||||
}
|
||||
|
||||
device.handleProtectMemory(memory, cmd.mapMemory.address,
|
||||
cmd.mapMemory.size, cmd.mapMemory.prot);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue