rpcsx-gpu: add multiprocess support

This commit is contained in:
DH 2024-09-01 17:43:45 +03:00
parent f77376c1e3
commit 2c781626d3
14 changed files with 746 additions and 450 deletions

View file

@ -1,14 +1,12 @@
#pragma once
#include <orbis/utils/SharedMutex.hpp>
#include <atomic>
#include <cstdint>
#include <cstring>
#include <initializer_list>
#include <orbis/utils/SharedMutex.hpp>
namespace amdgpu::bridge {
extern std::uint32_t expGpuPid;
struct PadState {
std::uint64_t timestamp;
std::uint32_t unk;
@ -47,7 +45,9 @@ enum class CommandId : std::uint32_t {
ProtectMemory,
CommandBuffer,
Flip,
MapDmem,
MapMemory,
MapProcess,
UnmapProcess,
};
struct CmdMemoryProt {
@ -79,15 +79,25 @@ struct CmdFlip {
std::uint64_t arg;
};
struct CmdMapDmem {
std::uint64_t offset;
struct CmdMapMemory {
std::int64_t offset;
std::uint64_t address;
std::uint64_t size;
std::uint32_t prot;
std::uint32_t pid;
std::int32_t memoryType;
std::uint32_t dmemIndex;
};
struct CmdMapProcess {
std::uint64_t pid;
int vmId;
};
struct CmdUnmapProcess {
std::uint64_t pid;
};
enum {
kPageWriteWatch = 1 << 0,
kPageReadWriteLock = 1 << 1,
@ -112,17 +122,15 @@ struct BridgeHeader {
volatile std::uint64_t flipArg;
volatile std::uint64_t flipCount;
volatile std::uint64_t bufferInUseAddress;
std::uint32_t memoryAreaCount;
std::uint32_t commandBufferCount;
std::uint32_t bufferCount;
CmdMemoryProt memoryAreas[512];
CmdCommandBuffer commandBuffers[32];
CmdBuffer buffers[10];
// orbis::shared_mutex cacheCommandMtx;
// orbis::shared_cv cacheCommandCv;
std::atomic<std::uint64_t> cacheCommands[4];
std::atomic<std::uint32_t> gpuCacheCommand;
std::atomic<std::uint8_t> cachePages[0x100'0000'0000 / kHostPageSize];
std::atomic<std::uint64_t> cacheCommands[6][4];
std::atomic<std::uint32_t> gpuCacheCommand[6];
std::atomic<std::uint8_t> cachePages[6][0x100'0000'0000 / kHostPageSize];
volatile std::uint64_t pull;
volatile std::uint64_t push;
@ -137,7 +145,9 @@ struct Command {
CmdCommandBuffer commandBuffer;
CmdBuffer buffer;
CmdFlip flip;
CmdMapDmem mapDmem;
CmdMapMemory mapMemory;
CmdMapProcess mapProcess;
CmdUnmapProcess unmapProcess;
};
};
@ -160,29 +170,32 @@ struct BridgePusher {
void sendMemoryProtect(std::uint32_t pid, std::uint64_t address,
std::uint64_t size, std::uint32_t prot) {
if (pid == expGpuPid) {
sendCommand(CommandId::ProtectMemory, {pid, address, size, prot});
}
sendCommand(CommandId::ProtectMemory, {pid, address, size, prot});
}
void sendMapDmem(std::uint32_t pid, std::uint32_t dmemIndex, std::uint64_t address, std::uint64_t size, std::uint32_t prot, std::uint64_t offset) {
// if (pid == expGpuPid) {
sendCommand(CommandId::MapDmem, {pid, dmemIndex, address, size, prot, offset});
// }
void sendMapMemory(std::uint32_t pid, std::uint32_t memoryType,
std::uint32_t dmemIndex, std::uint64_t address,
std::uint64_t size, std::uint32_t prot,
std::uint64_t offset) {
sendCommand(CommandId::MapMemory,
{pid, memoryType, dmemIndex, address, size, prot, offset});
}
void sendCommandBuffer(std::uint32_t pid, std::uint64_t queue,
std::uint64_t address, std::uint64_t size) {
// if (pid == expGpuPid) {
sendCommand(CommandId::CommandBuffer, {pid, queue, address, size});
// }
sendCommand(CommandId::CommandBuffer, {pid, queue, address, size});
}
void sendFlip(std::uint32_t pid, std::uint32_t bufferIndex,
std::uint64_t arg) {
// if (pid == expGpuPid) {
sendCommand(CommandId::Flip, {pid, bufferIndex, arg});
// }
sendCommand(CommandId::Flip, {pid, bufferIndex, arg});
}
void sendMapProcess(std::uint32_t pid, unsigned vmId) {
sendCommand(CommandId::MapProcess, {pid, vmId});
}
void sendUnmapProcess(std::uint32_t pid) {
sendCommand(CommandId::UnmapProcess, {pid});
}
void wait() {
@ -198,7 +211,8 @@ private:
void sendCommand(CommandId id, std::initializer_list<std::uint64_t> args) {
std::uint64_t exp = 0;
while (!header->lock.compare_exchange_weak(exp, 1, std::memory_order::acquire, std::memory_order::relaxed)) {
while (!header->lock.compare_exchange_weak(
exp, 1, std::memory_order::acquire, std::memory_order::relaxed)) {
exp = 0;
}
@ -303,13 +317,23 @@ private:
result.flip.arg = args[2];
return result;
case CommandId::MapDmem:
result.mapDmem.pid = args[0];
result.mapDmem.dmemIndex = args[1];
result.mapDmem.address = args[2];
result.mapDmem.size = args[3];
result.mapDmem.prot = args[4];
result.mapDmem.offset = args[5];
case CommandId::MapMemory:
result.mapMemory.pid = args[0];
result.mapMemory.memoryType = args[1];
result.mapMemory.dmemIndex = args[2];
result.mapMemory.address = args[3];
result.mapMemory.size = args[4];
result.mapMemory.prot = args[5];
result.mapMemory.offset = args[6];
return result;
case CommandId::MapProcess:
result.mapProcess.pid = args[0];
result.mapProcess.vmId = args[1];
return result;
case CommandId::UnmapProcess:
result.unmapProcess.pid = args[0];
return result;
}

View file

@ -8,8 +8,6 @@
static int gShmFd = -1;
static constexpr std::size_t kShmSize = sizeof(amdgpu::bridge::BridgeHeader) +
(sizeof(std::uint64_t) * 256);
std::uint32_t amdgpu::bridge::expGpuPid = 0;
amdgpu::bridge::BridgeHeader *
amdgpu::bridge::createShmCommandBuffer(const char *name) {
if (gShmFd != -1) {

View file

@ -1,5 +1,6 @@
#pragma once
#include "amdgpu/RemoteMemory.hpp"
#include "amdgpu/bridge/bridge.hpp"
#include "amdgpu/shader/Instruction.hpp"
#include "gpu-scheduler.hpp"
@ -1259,6 +1260,42 @@ struct GnmTBuffer {
static_assert(sizeof(GnmTBuffer) == sizeof(std::uint64_t) * 4);
struct GnmSSampler {
int32_t clamp_x : 3;
int32_t clamp_y : 3;
int32_t clamp_z : 3;
int32_t max_aniso_ratio : 3;
int32_t depth_compare_func : 3;
int32_t force_unorm_coords : 1;
int32_t aniso_threshold : 3;
int32_t mc_coord_trunc : 1;
int32_t force_degamma : 1;
int32_t aniso_bias : 6;
int32_t trunc_coord : 1;
int32_t disable_cube_wrap : 1;
int32_t filter_mode : 2;
int32_t : 1;
int32_t min_lod : 12;
int32_t max_lod : 12;
int32_t perf_mip : 4;
int32_t perf_z : 4;
int32_t lod_bias : 14;
int32_t lod_bias_sec : 6;
int32_t xy_mag_filter : 2;
int32_t xy_min_filter : 2;
int32_t z_filter : 2;
int32_t mip_filter : 2;
int32_t : 4;
int32_t border_color_ptr : 12;
int32_t : 18;
int32_t border_color_type : 2;
auto operator<=>(const GnmSSampler &) const = default;
bool operator==(const GnmSSampler &) const = default;
};
static_assert(sizeof(GnmSSampler) == sizeof(std::uint32_t) * 4);
constexpr auto kPageSize = 0x4000;
void setVkDevice(VkDevice device,
@ -1266,11 +1303,11 @@ void setVkDevice(VkDevice device,
VkPhysicalDeviceProperties devProperties);
struct AmdgpuDevice {
void handleProtectMemory(std::uint64_t address, std::uint64_t size,
std::uint32_t prot);
void handleCommandBuffer(std::uint64_t queueId, std::uint64_t address,
std::uint64_t size);
bool handleFlip(VkQueue queue, VkCommandBuffer cmdBuffer,
void handleProtectMemory(RemoteMemory memory, std::uint64_t address,
std::uint64_t size, std::uint32_t prot);
void handleCommandBuffer(RemoteMemory memory, std::uint64_t queueId,
std::uint64_t address, std::uint64_t size);
bool handleFlip(RemoteMemory memory, VkQueue queue, VkCommandBuffer cmdBuffer,
TaskChain &initTaskChain, std::uint32_t bufferIndex,
std::uint64_t arg, VkImage targetImage,
VkExtent2D targetExtent, VkSemaphore waitSemaphore,

File diff suppressed because it is too large Load diff

View file

@ -3,10 +3,11 @@
namespace amdgpu {
struct RemoteMemory {
char *shmPointer;
int vmId;
template <typename T = void> T *getPointer(std::uint64_t address) const {
return address ? reinterpret_cast<T *>(shmPointer + address - 0x40000)
return address ? reinterpret_cast<T *>(
static_cast<std::uint64_t>(vmId) << 40 | address)
: nullptr;
}
};