rpcsx-gpu: add multiprocess support

This commit is contained in:
DH 2024-09-01 17:43:45 +03:00
parent f77376c1e3
commit 2c781626d3
14 changed files with 746 additions and 450 deletions

View file

@ -1,14 +1,12 @@
#pragma once
#include <orbis/utils/SharedMutex.hpp>
#include <atomic>
#include <cstdint>
#include <cstring>
#include <initializer_list>
#include <orbis/utils/SharedMutex.hpp>
namespace amdgpu::bridge {
extern std::uint32_t expGpuPid;
struct PadState {
std::uint64_t timestamp;
std::uint32_t unk;
@ -47,7 +45,9 @@ enum class CommandId : std::uint32_t {
ProtectMemory,
CommandBuffer,
Flip,
MapDmem,
MapMemory,
MapProcess,
UnmapProcess,
};
struct CmdMemoryProt {
@ -79,15 +79,25 @@ struct CmdFlip {
std::uint64_t arg;
};
struct CmdMapDmem {
std::uint64_t offset;
struct CmdMapMemory {
std::int64_t offset;
std::uint64_t address;
std::uint64_t size;
std::uint32_t prot;
std::uint32_t pid;
std::int32_t memoryType;
std::uint32_t dmemIndex;
};
struct CmdMapProcess {
std::uint64_t pid;
int vmId;
};
struct CmdUnmapProcess {
std::uint64_t pid;
};
enum {
kPageWriteWatch = 1 << 0,
kPageReadWriteLock = 1 << 1,
@ -112,17 +122,15 @@ struct BridgeHeader {
volatile std::uint64_t flipArg;
volatile std::uint64_t flipCount;
volatile std::uint64_t bufferInUseAddress;
std::uint32_t memoryAreaCount;
std::uint32_t commandBufferCount;
std::uint32_t bufferCount;
CmdMemoryProt memoryAreas[512];
CmdCommandBuffer commandBuffers[32];
CmdBuffer buffers[10];
// orbis::shared_mutex cacheCommandMtx;
// orbis::shared_cv cacheCommandCv;
std::atomic<std::uint64_t> cacheCommands[4];
std::atomic<std::uint32_t> gpuCacheCommand;
std::atomic<std::uint8_t> cachePages[0x100'0000'0000 / kHostPageSize];
std::atomic<std::uint64_t> cacheCommands[6][4];
std::atomic<std::uint32_t> gpuCacheCommand[6];
std::atomic<std::uint8_t> cachePages[6][0x100'0000'0000 / kHostPageSize];
volatile std::uint64_t pull;
volatile std::uint64_t push;
@ -137,7 +145,9 @@ struct Command {
CmdCommandBuffer commandBuffer;
CmdBuffer buffer;
CmdFlip flip;
CmdMapDmem mapDmem;
CmdMapMemory mapMemory;
CmdMapProcess mapProcess;
CmdUnmapProcess unmapProcess;
};
};
@ -160,29 +170,32 @@ struct BridgePusher {
void sendMemoryProtect(std::uint32_t pid, std::uint64_t address,
std::uint64_t size, std::uint32_t prot) {
if (pid == expGpuPid) {
sendCommand(CommandId::ProtectMemory, {pid, address, size, prot});
}
sendCommand(CommandId::ProtectMemory, {pid, address, size, prot});
}
void sendMapDmem(std::uint32_t pid, std::uint32_t dmemIndex, std::uint64_t address, std::uint64_t size, std::uint32_t prot, std::uint64_t offset) {
// if (pid == expGpuPid) {
sendCommand(CommandId::MapDmem, {pid, dmemIndex, address, size, prot, offset});
// }
void sendMapMemory(std::uint32_t pid, std::uint32_t memoryType,
std::uint32_t dmemIndex, std::uint64_t address,
std::uint64_t size, std::uint32_t prot,
std::uint64_t offset) {
sendCommand(CommandId::MapMemory,
{pid, memoryType, dmemIndex, address, size, prot, offset});
}
void sendCommandBuffer(std::uint32_t pid, std::uint64_t queue,
std::uint64_t address, std::uint64_t size) {
// if (pid == expGpuPid) {
sendCommand(CommandId::CommandBuffer, {pid, queue, address, size});
// }
sendCommand(CommandId::CommandBuffer, {pid, queue, address, size});
}
void sendFlip(std::uint32_t pid, std::uint32_t bufferIndex,
std::uint64_t arg) {
// if (pid == expGpuPid) {
sendCommand(CommandId::Flip, {pid, bufferIndex, arg});
// }
sendCommand(CommandId::Flip, {pid, bufferIndex, arg});
}
void sendMapProcess(std::uint32_t pid, unsigned vmId) {
sendCommand(CommandId::MapProcess, {pid, vmId});
}
void sendUnmapProcess(std::uint32_t pid) {
sendCommand(CommandId::UnmapProcess, {pid});
}
void wait() {
@ -198,7 +211,8 @@ private:
void sendCommand(CommandId id, std::initializer_list<std::uint64_t> args) {
std::uint64_t exp = 0;
while (!header->lock.compare_exchange_weak(exp, 1, std::memory_order::acquire, std::memory_order::relaxed)) {
while (!header->lock.compare_exchange_weak(
exp, 1, std::memory_order::acquire, std::memory_order::relaxed)) {
exp = 0;
}
@ -303,13 +317,23 @@ private:
result.flip.arg = args[2];
return result;
case CommandId::MapDmem:
result.mapDmem.pid = args[0];
result.mapDmem.dmemIndex = args[1];
result.mapDmem.address = args[2];
result.mapDmem.size = args[3];
result.mapDmem.prot = args[4];
result.mapDmem.offset = args[5];
case CommandId::MapMemory:
result.mapMemory.pid = args[0];
result.mapMemory.memoryType = args[1];
result.mapMemory.dmemIndex = args[2];
result.mapMemory.address = args[3];
result.mapMemory.size = args[4];
result.mapMemory.prot = args[5];
result.mapMemory.offset = args[6];
return result;
case CommandId::MapProcess:
result.mapProcess.pid = args[0];
result.mapProcess.vmId = args[1];
return result;
case CommandId::UnmapProcess:
result.unmapProcess.pid = args[0];
return result;
}

View file

@ -8,8 +8,6 @@
static int gShmFd = -1;
static constexpr std::size_t kShmSize = sizeof(amdgpu::bridge::BridgeHeader) +
(sizeof(std::uint64_t) * 256);
std::uint32_t amdgpu::bridge::expGpuPid = 0;
amdgpu::bridge::BridgeHeader *
amdgpu::bridge::createShmCommandBuffer(const char *name) {
if (gShmFd != -1) {

View file

@ -1,5 +1,6 @@
#pragma once
#include "amdgpu/RemoteMemory.hpp"
#include "amdgpu/bridge/bridge.hpp"
#include "amdgpu/shader/Instruction.hpp"
#include "gpu-scheduler.hpp"
@ -1259,6 +1260,42 @@ struct GnmTBuffer {
static_assert(sizeof(GnmTBuffer) == sizeof(std::uint64_t) * 4);
struct GnmSSampler {
int32_t clamp_x : 3;
int32_t clamp_y : 3;
int32_t clamp_z : 3;
int32_t max_aniso_ratio : 3;
int32_t depth_compare_func : 3;
int32_t force_unorm_coords : 1;
int32_t aniso_threshold : 3;
int32_t mc_coord_trunc : 1;
int32_t force_degamma : 1;
int32_t aniso_bias : 6;
int32_t trunc_coord : 1;
int32_t disable_cube_wrap : 1;
int32_t filter_mode : 2;
int32_t : 1;
int32_t min_lod : 12;
int32_t max_lod : 12;
int32_t perf_mip : 4;
int32_t perf_z : 4;
int32_t lod_bias : 14;
int32_t lod_bias_sec : 6;
int32_t xy_mag_filter : 2;
int32_t xy_min_filter : 2;
int32_t z_filter : 2;
int32_t mip_filter : 2;
int32_t : 4;
int32_t border_color_ptr : 12;
int32_t : 18;
int32_t border_color_type : 2;
auto operator<=>(const GnmSSampler &) const = default;
bool operator==(const GnmSSampler &) const = default;
};
static_assert(sizeof(GnmSSampler) == sizeof(std::uint32_t) * 4);
constexpr auto kPageSize = 0x4000;
void setVkDevice(VkDevice device,
@ -1266,11 +1303,11 @@ void setVkDevice(VkDevice device,
VkPhysicalDeviceProperties devProperties);
struct AmdgpuDevice {
void handleProtectMemory(std::uint64_t address, std::uint64_t size,
std::uint32_t prot);
void handleCommandBuffer(std::uint64_t queueId, std::uint64_t address,
std::uint64_t size);
bool handleFlip(VkQueue queue, VkCommandBuffer cmdBuffer,
void handleProtectMemory(RemoteMemory memory, std::uint64_t address,
std::uint64_t size, std::uint32_t prot);
void handleCommandBuffer(RemoteMemory memory, std::uint64_t queueId,
std::uint64_t address, std::uint64_t size);
bool handleFlip(RemoteMemory memory, VkQueue queue, VkCommandBuffer cmdBuffer,
TaskChain &initTaskChain, std::uint32_t bufferIndex,
std::uint64_t arg, VkImage targetImage,
VkExtent2D targetExtent, VkSemaphore waitSemaphore,

File diff suppressed because it is too large Load diff

View file

@ -3,10 +3,11 @@
namespace amdgpu {
struct RemoteMemory {
char *shmPointer;
int vmId;
template <typename T = void> T *getPointer(std::uint64_t address) const {
return address ? reinterpret_cast<T *>(shmPointer + address - 0x40000)
return address ? reinterpret_cast<T *>(
static_cast<std::uint64_t>(vmId) << 40 | address)
: nullptr;
}
};

View file

@ -7,7 +7,14 @@ struct AuthInfo {
uint64_t unk0;
uint64_t caps[4];
uint64_t attrs[4];
uint64_t unk[8];
uint64_t ucred[8];
bool hasUseHp3dPipeCapability() const {
return ucred[2] == 0x3800000000000009;
}
bool hasMmapSelfCapability() const { return ((ucred[4] >> 0x3a) & 1) != 1; }
bool hasSystemCapability() const { return ((ucred[3] >> 0x3e) & 1) != 0; }
bool hasSceProgramAttribute() const { return ((ucred[3] >> 0x1f) & 1) != 0; }
};
static_assert(sizeof(AuthInfo) == 136);

View file

@ -51,6 +51,7 @@ struct Process final {
ProcessState state = ProcessState::NEW;
Process *parentProcess = nullptr;
shared_mutex mtx;
int vmId = -1;
void (*onSysEnter)(Thread *thread, int id, uint64_t *args,
int argsCount) = nullptr;
void (*onSysExit)(Thread *thread, int id, uint64_t *args, int argsCount,

View file

@ -8,4 +8,5 @@ add_executable(rpcsx-gpu
target_include_directories(rpcsx-gpu PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(rpcsx-gpu PUBLIC amdgpu::bridge amdgpu::device glfw Vulkan::Vulkan rx)
set_target_properties(rpcsx-gpu PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
target_link_options(rpcsx-os PUBLIC "LINKER:-Ttext-segment,0x0000060000000000")
install(TARGETS rpcsx-gpu RUNTIME DESTINATION bin)

View file

@ -1,7 +1,9 @@
#include "amdgpu/RemoteMemory.hpp"
#include "amdgpu/device/gpu-scheduler.hpp"
#include "amdgpu/device/vk.hpp"
#include "rx/MemoryTable.hpp"
#include "rx/Version.hpp"
#include "rx/mem.hpp"
#include "util/unreachable.hpp"
#include <algorithm>
#include <amdgpu/bridge/bridge.hpp>
@ -16,18 +18,14 @@
#include <sys/stat.h>
#include <thread>
#include <unistd.h>
#include <unordered_map>
#include <unordered_set>
#include <util/VerifyVulkan.hpp>
#include <vulkan/vulkan.h>
#include <vulkan/vulkan_core.h>
#include <GLFW/glfw3.h> // TODO: make in optional
// TODO
// extern void *g_rwMemory;
extern std::size_t g_memorySize;
extern std::uint64_t g_memoryBase;
extern amdgpu::RemoteMemory g_hostMemory;
static void usage(std::FILE *out, const char *argv0) {
std::fprintf(out, "usage: %s [options...]\n", argv0);
std::fprintf(out, " options:\n");
@ -159,6 +157,11 @@ int main(int argc, const char *argv[]) {
return 1;
}
if (!rx::mem::reserve((void *)0x40000, 0x60000000000 - 0x40000)) {
std::fprintf(stderr, "failed to reserve virtual memory\n");
return 1;
}
glfwInit();
glfwWindowHint(GLFW_CLIENT_API, GLFW_NO_API);
auto window = glfwCreateWindow(1280, 720, "RPCSX", nullptr, nullptr);
@ -725,20 +728,6 @@ int main(int argc, const char *argv[]) {
amdgpu::bridge::BridgePuller bridgePuller{bridge};
amdgpu::bridge::Command commandsBuffer[1];
if (!std::filesystem::exists(std::string("/dev/shm") + shmName)) {
std::printf("Waiting for OS\n");
while (!std::filesystem::exists(std::string("/dev/shm") + shmName)) {
std::this_thread::sleep_for(std::chrono::milliseconds(300));
}
}
int memoryFd = ::shm_open(shmName, O_RDWR, S_IRUSR | S_IWUSR);
if (memoryFd < 0) {
std::printf("failed to open shared memory\n");
return 1;
}
int dmemFd[3];
for (std::size_t i = 0; i < std::size(dmemFd); ++i) {
@ -759,26 +748,80 @@ int main(int argc, const char *argv[]) {
}
}
struct stat memoryStat;
::fstat(memoryFd, &memoryStat);
amdgpu::RemoteMemory memory{(char *)::mmap(
nullptr, memoryStat.st_size, PROT_NONE, MAP_SHARED, memoryFd, 0)};
// extern void *g_rwMemory;
g_memorySize = memoryStat.st_size;
g_memoryBase = 0x40000;
// g_rwMemory = ::mmap(nullptr, g_memorySize, PROT_READ | PROT_WRITE, MAP_SHARED,
// memoryFd, 0);
g_hostMemory = memory;
{
amdgpu::device::AmdgpuDevice device(bridgePuller.header);
for (std::uint32_t end = bridge->memoryAreaCount, i = 0; i < end; ++i) {
auto area = bridge->memoryAreas[i];
device.handleProtectMemory(area.address, area.size, area.prot);
}
struct VmMapSlot {
int memoryType;
int prot;
std::int64_t offset;
std::uint64_t baseAddress;
auto operator<=>(const VmMapSlot &) const = default;
};
struct ProcessInfo {
int vmId = -1;
int vmFd = -1;
rx::MemoryTableWithPayload<VmMapSlot> vmTable;
};
auto mapProcess = [&](std::int64_t pid, int vmId, ProcessInfo &process) {
process.vmId = vmId;
auto memory = amdgpu::RemoteMemory{vmId};
std::string pidVmName = shmName;
pidVmName += '-';
pidVmName += std::to_string(pid);
int memoryFd = ::shm_open(pidVmName.c_str(), O_RDWR, S_IRUSR | S_IWUSR);
process.vmFd = memoryFd;
if (memoryFd < 0) {
std::printf("failed to process %x shared memory\n", (int)pid);
std::abort();
}
for (auto [startAddress, endAddress, slot] : process.vmTable) {
auto gpuProt = slot.prot >> 4;
if (gpuProt == 0) {
continue;
}
auto devOffset = slot.offset + startAddress - slot.baseAddress;
int mapFd = memoryFd;
if (slot.memoryType >= 0) {
mapFd = dmemFd[slot.memoryType];
}
auto mmapResult =
::mmap(memory.getPointer(startAddress), endAddress - startAddress,
gpuProt, MAP_FIXED | MAP_SHARED, mapFd, devOffset);
if (mmapResult == MAP_FAILED) {
std::printf(
"failed to map process %x memory, address %lx-%lx, type %x\n",
(int)pid, startAddress, endAddress, slot.memoryType);
std::abort();
}
device.handleProtectMemory(memory, startAddress,
endAddress - startAddress, slot.prot);
}
};
auto unmapProcess = [&](ProcessInfo &process) {
auto startAddress = static_cast<std::uint64_t>(process.vmId) << 40;
auto size = static_cast<std::uint64_t>(1) << 40;
rx::mem::reserve(reinterpret_cast<void *>(startAddress), size);
::close(process.vmFd);
process.vmFd = -1;
process.vmId = -1;
};
std::unordered_map<std::int64_t, ProcessInfo> processInfo;
std::vector<VkCommandBuffer> presentCmdBuffers(swapchainImages.size());
@ -966,66 +1009,141 @@ int main(int argc, const char *argv[]) {
for (auto cmd : std::span(commandsBuffer, pulledCount)) {
switch (cmd.id) {
case amdgpu::bridge::CommandId::ProtectMemory:
device.handleProtectMemory(cmd.memoryProt.address,
cmd.memoryProt.size, cmd.memoryProt.prot);
break;
case amdgpu::bridge::CommandId::CommandBuffer:
device.handleCommandBuffer(cmd.commandBuffer.queue,
cmd.commandBuffer.address,
cmd.commandBuffer.size);
break;
case amdgpu::bridge::CommandId::Flip: {
if (!isImageAcquired) {
Verify() << vkAcquireNextImageKHR(vkDevice, swapchain, UINT64_MAX,
presentCompleteSemaphore, nullptr,
&imageIndex);
case amdgpu::bridge::CommandId::ProtectMemory: {
auto &process = processInfo[cmd.memoryProt.pid];
vkWaitForFences(vkDevice, 1, &inFlightFences[imageIndex], VK_TRUE,
UINT64_MAX);
vkResetFences(vkDevice, 1, &inFlightFences[imageIndex]);
auto vmSlotIt = process.vmTable.queryArea(cmd.memoryProt.address);
if (vmSlotIt == process.vmTable.end()) {
std::abort();
}
isImageAcquired = false;
auto vmSlot = (*vmSlotIt).payload;
vkResetCommandBuffer(presentCmdBuffers[imageIndex], 0);
VkCommandBufferBeginInfo beginInfo{};
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
process.vmTable.map(cmd.memoryProt.address,
cmd.memoryProt.address + cmd.memoryProt.size,
VmMapSlot{
.memoryType = vmSlot.memoryType,
.prot = static_cast<int>(cmd.memoryProt.prot),
.offset = vmSlot.offset,
.baseAddress = vmSlot.baseAddress,
});
vkBeginCommandBuffer(presentCmdBuffers[imageIndex], &beginInfo);
if (process.vmId >= 0) {
auto memory = amdgpu::RemoteMemory{process.vmId};
rx::mem::protect(memory.getPointer(cmd.memoryProt.address),
cmd.memoryProt.size, cmd.memoryProt.prot >> 4);
device.handleProtectMemory(memory, cmd.mapMemory.address,
cmd.mapMemory.size, cmd.mapMemory.prot);
}
break;
}
case amdgpu::bridge::CommandId::CommandBuffer: {
auto &process = processInfo[cmd.commandBuffer.pid];
if (process.vmId >= 0) {
device.handleCommandBuffer(
amdgpu::RemoteMemory{process.vmId}, cmd.commandBuffer.queue,
cmd.commandBuffer.address, cmd.commandBuffer.size);
}
break;
}
case amdgpu::bridge::CommandId::Flip: {
auto &process = processInfo[cmd.flip.pid];
if (device.handleFlip(
presentQueue, presentCmdBuffers[imageIndex],
*flipTaskChain[imageIndex].get(), cmd.flip.bufferIndex,
cmd.flip.arg, swapchainImages[imageIndex], swapchainExtent,
presentCompleteSemaphore, renderCompleteSemaphore,
inFlightFences[imageIndex])) {
VkPresentInfoKHR presentInfo{
.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
.waitSemaphoreCount = 1,
.pWaitSemaphores = &renderCompleteSemaphore,
.swapchainCount = 1,
.pSwapchains = &swapchain,
.pImageIndices = &imageIndex,
};
if (vkQueuePresentKHR(presentQueue, &presentInfo) != VK_SUCCESS) {
std::printf("swapchain was invalidated\n");
createSwapchain();
if (process.vmId >= 0) {
if (!isImageAcquired) {
Verify() << vkAcquireNextImageKHR(vkDevice, swapchain, UINT64_MAX,
presentCompleteSemaphore,
nullptr, &imageIndex);
vkWaitForFences(vkDevice, 1, &inFlightFences[imageIndex], VK_TRUE,
UINT64_MAX);
vkResetFences(vkDevice, 1, &inFlightFences[imageIndex]);
}
isImageAcquired = false;
vkResetCommandBuffer(presentCmdBuffers[imageIndex], 0);
VkCommandBufferBeginInfo beginInfo{};
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
vkBeginCommandBuffer(presentCmdBuffers[imageIndex], &beginInfo);
if (device.handleFlip(
amdgpu::RemoteMemory{process.vmId}, presentQueue,
presentCmdBuffers[imageIndex],
*flipTaskChain[imageIndex].get(), cmd.flip.bufferIndex,
cmd.flip.arg, swapchainImages[imageIndex], swapchainExtent,
presentCompleteSemaphore, renderCompleteSemaphore,
inFlightFences[imageIndex])) {
VkPresentInfoKHR presentInfo{
.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
.waitSemaphoreCount = 1,
.pWaitSemaphores = &renderCompleteSemaphore,
.swapchainCount = 1,
.pSwapchains = &swapchain,
.pImageIndices = &imageIndex,
};
if (vkQueuePresentKHR(presentQueue, &presentInfo) != VK_SUCCESS) {
std::printf("swapchain was invalidated\n");
createSwapchain();
}
} else {
isImageAcquired = true;
}
} else {
isImageAcquired = true;
}
break;
}
case amdgpu::bridge::CommandId::MapDmem: {
auto addr = g_hostMemory.getPointer(cmd.mapDmem.address);
auto mapping = ::mmap(addr, cmd.mapDmem.size,
PROT_READ | PROT_WRITE /*TODO: cmd.mapDmem.prot >> 4*/,
MAP_FIXED | MAP_SHARED, dmemFd[cmd.mapDmem.dmemIndex],
cmd.mapDmem.offset);
device.handleProtectMemory(cmd.mapDmem.address, cmd.mapDmem.size, 0x33 /*TODO: cmd.mapDmem.prot*/);
case amdgpu::bridge::CommandId::MapProcess: {
mapProcess(cmd.mapProcess.pid, cmd.mapProcess.vmId, processInfo[cmd.mapProcess.pid]);
break;
}
case amdgpu::bridge::CommandId::UnmapProcess: {
unmapProcess(processInfo[cmd.mapProcess.pid]);
break;
}
case amdgpu::bridge::CommandId::MapMemory: {
auto &process = processInfo[cmd.mapMemory.pid];
process.vmTable.map(
cmd.mapMemory.address, cmd.mapMemory.address + cmd.mapMemory.size,
VmMapSlot{
.memoryType = static_cast<int>(cmd.mapMemory.memoryType >= 0
? cmd.mapMemory.dmemIndex
: -1),
.prot = static_cast<int>(cmd.mapMemory.prot),
.offset = cmd.mapMemory.offset,
.baseAddress = cmd.mapMemory.address,
});
if (process.vmId >= 0) {
auto memory = amdgpu::RemoteMemory{process.vmId};
int mapFd = process.vmFd;
if (cmd.mapMemory.memoryType >= 0) {
mapFd = dmemFd[cmd.mapMemory.dmemIndex];
}
auto mmapResult =
::mmap(memory.getPointer(cmd.mapMemory.address),
cmd.mapMemory.size, cmd.mapMemory.prot >> 4,
MAP_FIXED | MAP_SHARED, mapFd, cmd.mapMemory.offset);
if (mmapResult == MAP_FAILED) {
std::printf(
"failed to map process %x memory, address %lx-%lx, type %x\n",
(int)cmd.mapMemory.pid, cmd.mapMemory.address,
cmd.mapMemory.address + cmd.mapMemory.size,
cmd.mapMemory.memoryType);
std::abort();
}
device.handleProtectMemory(memory, cmd.mapMemory.address,
cmd.mapMemory.size, cmd.mapMemory.prot);
}
break;
}

View file

@ -44,6 +44,12 @@ orbis::ErrorCode DmemDevice::mmap(void **address, std::uint64_t len,
rx::vm::kMapProtGpuAll;
}
auto allocationInfoIt = allocations.queryArea(directMemoryStart);
if (allocationInfoIt == allocations.end()) {
std::abort();
}
auto allocationInfo = *allocationInfoIt;
auto result =
rx::vm::map(*address, len, prot, flags, rx::vm::kMapInternalReserveOnly,
this, directMemoryStart);
@ -60,9 +66,10 @@ orbis::ErrorCode DmemDevice::mmap(void **address, std::uint64_t len,
return orbis::ErrorCode::INVAL;
}
rx::bridge.sendMapDmem(orbis::g_currentThread->tproc->pid, index,
reinterpret_cast<std::uint64_t>(result), len, prot,
directMemoryStart);
rx::bridge.sendMapMemory(orbis::g_currentThread->tproc->pid,
allocationInfo.payload.memoryType, index,
reinterpret_cast<std::uint64_t>(result), len, prot,
directMemoryStart);
*address = result;

View file

@ -10,6 +10,7 @@
#include <cstdio>
#include <mutex>
#include <sys/mman.h>
#include <unordered_map>
struct ComputeQueue {
std::uint64_t ringBaseAddress{};
@ -19,14 +20,104 @@ struct ComputeQueue {
std::uint64_t len{};
};
static void runBridge(int vmId) {
std::thread{[=] {
pthread_setname_np(pthread_self(), "Bridge");
auto bridge = rx::bridge.header;
std::vector<std::uint64_t> fetchedCommands;
fetchedCommands.reserve(std::size(bridge->cacheCommands));
while (true) {
for (auto &command : bridge->cacheCommands) {
std::uint64_t value = command[vmId].load(std::memory_order::relaxed);
if (value != 0) {
fetchedCommands.push_back(value);
command[vmId].store(0, std::memory_order::relaxed);
}
}
if (fetchedCommands.empty()) {
continue;
}
for (auto command : fetchedCommands) {
auto page = static_cast<std::uint32_t>(command);
auto count = static_cast<std::uint32_t>(command >> 32) + 1;
auto pageFlags =
bridge->cachePages[vmId][page].load(std::memory_order::relaxed);
auto address =
static_cast<std::uint64_t>(page) * amdgpu::bridge::kHostPageSize;
auto origVmProt = rx::vm::getPageProtection(address);
int prot = 0;
if (origVmProt & rx::vm::kMapProtCpuRead) {
prot |= PROT_READ;
}
if (origVmProt & rx::vm::kMapProtCpuWrite) {
prot |= PROT_WRITE;
}
if (origVmProt & rx::vm::kMapProtCpuExec) {
prot |= PROT_EXEC;
}
if (pageFlags & amdgpu::bridge::kPageReadWriteLock) {
prot &= ~(PROT_READ | PROT_WRITE);
} else if (pageFlags & amdgpu::bridge::kPageWriteWatch) {
prot &= ~PROT_WRITE;
}
// std::fprintf(stderr, "protection %lx-%lx\n", address,
// address + amdgpu::bridge::kHostPageSize * count);
if (::mprotect(reinterpret_cast<void *>(address),
amdgpu::bridge::kHostPageSize * count, prot)) {
perror("protection failed");
std::abort();
}
}
fetchedCommands.clear();
}
}}.detach();
}
static constexpr auto kVmIdCount = 6;
struct GcDevice : public IoDevice {
std::uint32_t freeVmIds = (1 << (kVmIdCount + 1)) - 1;
orbis::shared_mutex mtx;
orbis::kmap<orbis::pid_t, int> clients;
orbis::kmap<std::uint64_t, ComputeQueue> computeQueues;
orbis::ErrorCode open(orbis::Ref<orbis::File> *file, const char *path,
std::uint32_t flags, std::uint32_t mode,
orbis::Thread *thread) override;
void addClient(orbis::Process *process);
void removeClient(orbis::Process *process);
int allocateVmId() {
int id = std::countr_zero(freeVmIds);
if (id >= kVmIdCount) {
std::fprintf(stderr, "out of vm slots\n");
std::abort();
}
freeVmIds &= ~(1 << id);
return id;
};
void deallocateVmId(int vmId) { freeVmIds |= (1 << vmId); };
};
struct GcFile : public orbis::File {};
struct GcFile : public orbis::File {
orbis::Process *process = nullptr;
~GcFile() { device.staticCast<GcDevice>()->removeClient(process); }
};
static std::uint64_t g_submitDoneFlag;
static orbis::ErrorCode gc_ioctl(orbis::File *file, std::uint64_t request,
@ -34,7 +125,7 @@ static orbis::ErrorCode gc_ioctl(orbis::File *file, std::uint64_t request,
// 0xc00c8110
// 0xc0848119
auto device = static_cast<GcDevice *>(file->device.get());
auto device = file->device.staticCast<GcDevice>();
std::lock_guard lock(device->mtx);
switch (request) {
@ -55,7 +146,7 @@ static orbis::ErrorCode gc_ioctl(orbis::File *file, std::uint64_t request,
// flockfile(stderr);
// if (thread->tproc->pid != amdgpu::bridge::expGpuPid) {
// ORBIS_LOG_ERROR("gc ioctl submit", args->arg0, args->count, args->cmds);
// ORBIS_LOG_ERROR("gc ioctl submit", args->arg0, args->count, args->cmds);
// }
for (unsigned i = 0; i < args->count; ++i) {
@ -172,14 +263,20 @@ static orbis::ErrorCode gc_ioctl(orbis::File *file, std::uint64_t request,
break;
}
case 0xc010810b: { // something like stats masks?
case 0xc010810b: { // get cu masks param
struct Args {
std::uint64_t arg1;
std::uint64_t arg2;
std::uint32_t se0sh0;
std::uint32_t se0sh1;
std::uint32_t se1sh0;
std::uint32_t se1sh1;
};
auto args = reinterpret_cast<Args *>(argp);
ORBIS_LOG_ERROR("gc ioctl stats mask", args->arg1, args->arg2);
// ORBIS_LOG_ERROR("gc ioctl stats mask", args->arg1, args->arg2);
args->se0sh0 = ~0;
args->se0sh1 = ~0;
args->se1sh0 = ~0;
args->se1sh1 = ~0;
break;
}
@ -265,8 +362,14 @@ static orbis::ErrorCode gc_ioctl(orbis::File *file, std::uint64_t request,
}
case 0xc0048113: {
// get client number
*(std::uint32_t *)argp = 0;
// get num clients
struct Args {
std::uint32_t numClients;
};
auto *args = reinterpret_cast<Args *>(argp);
args->numClients = device->clients.size();
break;
}
@ -312,8 +415,38 @@ orbis::ErrorCode GcDevice::open(orbis::Ref<orbis::File> *file, const char *path,
auto newFile = orbis::knew<GcFile>();
newFile->device = this;
newFile->ops = &ops;
newFile->process = thread->tproc;
addClient(thread->tproc);
*file = newFile;
return {};
}
void GcDevice::addClient(orbis::Process *process) {
std::lock_guard lock(mtx);
auto &client = clients[process->pid];
++client;
if (client == 1) {
auto vmId = allocateVmId();
rx::bridge.sendMapProcess(process->pid, vmId);
process->vmId = vmId;
runBridge(vmId);
}
}
void GcDevice::removeClient(orbis::Process *process) {
std::lock_guard lock(mtx);
auto clientIt = clients.find(process->pid);
assert(clientIt != clients.end());
assert(clientIt->second != 0);
--clientIt->second;
if (clientIt->second == 0) {
clients.erase(clientIt);
rx::bridge.sendUnmapProcess(process->pid);
deallocateVmId(process->vmId);
process->vmId = -1;
}
}
IoDevice *createGcCharacterDevice() { return orbis::knew<GcDevice>(); }

View file

@ -41,71 +41,6 @@
#include <unordered_map>
static int g_gpuPid;
void runBridge() {
std::thread{[] {
pthread_setname_np(pthread_self(), "Bridge");
auto bridge = rx::bridge.header;
std::vector<std::uint64_t> fetchedCommands;
fetchedCommands.reserve(std::size(bridge->cacheCommands));
while (true) {
for (auto &command : bridge->cacheCommands) {
std::uint64_t value = command.load(std::memory_order::relaxed);
if (value != 0) {
fetchedCommands.push_back(value);
command.store(0, std::memory_order::relaxed);
}
}
if (fetchedCommands.empty()) {
continue;
}
for (auto command : fetchedCommands) {
auto page = static_cast<std::uint32_t>(command);
auto count = static_cast<std::uint32_t>(command >> 32) + 1;
auto pageFlags =
bridge->cachePages[page].load(std::memory_order::relaxed);
auto address =
static_cast<std::uint64_t>(page) * amdgpu::bridge::kHostPageSize;
auto origVmProt = rx::vm::getPageProtection(address);
int prot = 0;
if (origVmProt & rx::vm::kMapProtCpuRead) {
prot |= PROT_READ;
}
if (origVmProt & rx::vm::kMapProtCpuWrite) {
prot |= PROT_WRITE;
}
if (origVmProt & rx::vm::kMapProtCpuExec) {
prot |= PROT_EXEC;
}
if (pageFlags & amdgpu::bridge::kPageReadWriteLock) {
prot &= ~(PROT_READ | PROT_WRITE);
} else if (pageFlags & amdgpu::bridge::kPageWriteWatch) {
prot &= ~PROT_WRITE;
}
// std::fprintf(stderr, "protection %lx-%lx\n", address,
// address + amdgpu::bridge::kHostPageSize * count);
if (::mprotect(reinterpret_cast<void *>(address),
amdgpu::bridge::kHostPageSize * count, prot)) {
perror("protection failed");
std::abort();
}
}
fetchedCommands.clear();
}
}}.detach();
}
extern bool allowMonoDebug;
__attribute__((no_stack_protector)) static void
@ -116,8 +51,9 @@ handle_signal(int sig, siginfo_t *info, void *ucontext) {
auto signalAddress = reinterpret_cast<std::uintptr_t>(info->si_addr);
if (orbis::g_currentThread != nullptr && sig == SIGSEGV &&
if (orbis::g_currentThread != nullptr && orbis::g_currentThread->tproc->vmId >= 0 && sig == SIGSEGV &&
signalAddress >= 0x40000 && signalAddress < 0x100'0000'0000) {
auto vmid = orbis::g_currentThread->tproc->vmId;
auto ctx = reinterpret_cast<ucontext_t *>(ucontext);
bool isWrite = (ctx->uc_mcontext.gregs[REG_ERR] & 0x2) != 0;
auto origVmProt = rx::vm::getPageProtection(signalAddress);
@ -138,17 +74,17 @@ handle_signal(int sig, siginfo_t *info, void *ucontext) {
auto bridge = rx::bridge.header;
while (true) {
auto flags = bridge->cachePages[page].load(std::memory_order::relaxed);
auto flags = bridge->cachePages[vmid][page].load(std::memory_order::relaxed);
if ((flags & amdgpu::bridge::kPageReadWriteLock) != 0) {
if ((flags & amdgpu::bridge::kPageLazyLock) != 0) {
if (std::uint32_t gpuCommand = 0;
!bridge->gpuCacheCommand.compare_exchange_weak(gpuCommand,
!bridge->gpuCacheCommand[vmid].compare_exchange_weak(gpuCommand,
page)) {
continue;
}
while (!bridge->cachePages[page].compare_exchange_weak(
while (!bridge->cachePages[vmid][page].compare_exchange_weak(
flags, flags & ~amdgpu::bridge::kPageLazyLock,
std::memory_order::relaxed)) {
}
@ -165,7 +101,7 @@ handle_signal(int sig, siginfo_t *info, void *ucontext) {
break;
}
if (bridge->cachePages[page].compare_exchange_weak(
if (bridge->cachePages[vmid][page].compare_exchange_weak(
flags, amdgpu::bridge::kPageInvalidated,
std::memory_order::relaxed)) {
break;
@ -188,6 +124,7 @@ handle_signal(int sig, siginfo_t *info, void *ucontext) {
}
if (orbis::g_currentThread != nullptr) {
orbis::g_currentThread->tproc->exitStatus = sig;
orbis::g_currentThread->tproc->event.emit(orbis::kEvFiltProc,
orbis::kNoteExit, sig);
}
@ -1640,29 +1577,34 @@ int main(int argc, const char *argv[]) {
};
if (isSystem) {
amdgpu::bridge::expGpuPid = isSafeMode ? 20001 : 60001;
orbis::g_context.safeMode = isSafeMode ? 1 : 0;
initProcess->authInfo = {
.unk0 = 0x380000000000000f,
.caps =
{
-1ul,
-1ul,
-1ul,
-1ul,
},
.attrs =
{
0x4000400040000000,
0x4000000000000000,
0x0080000000000002,
0xF0000000FFFF4000,
},
};
initProcess->authInfo = {.unk0 = 0x380000000000000f,
.caps =
{
-1ul,
-1ul,
-1ul,
-1ul,
},
.attrs =
{
0x4000400040000000,
0x4000000000000000,
0x0080000000000002,
0xF0000000FFFF4000,
},
.ucred = {
-1ul,
-1ul,
0x3800000000000022,
-1ul,
(1ul << 0x3a),
-1ul,
-1ul,
}};
initProcess->budgetId = 0;
initProcess->isInSandbox = false;
} else {
amdgpu::bridge::expGpuPid = initProcess->pid;
initProcess->authInfo = {
.unk0 = 0x3100000000000001,
.caps =
@ -1788,7 +1730,6 @@ int main(int argc, const char *argv[]) {
launchDaemon(mainThread, "/system/sys/orbis_audiod.elf",
{"/system/sys/orbis_audiod.elf"}, {});
runBridge();
status = ps4Exec(mainThread, execEnv, std::move(executableModule),
ps4Argv, {});
}

View file

@ -43,7 +43,6 @@ using namespace orbis;
extern bool allowMonoDebug;
extern "C" void __register_frame(const void *);
void runBridge();
void setupSigHandlers();
int ps4Exec(orbis::Thread *mainThread,
orbis::utils::Ref<orbis::Module> executableModule,
@ -828,9 +827,6 @@ SysResult fork(Thread *thread, slong flags) {
dup2(logFd, 1);
dup2(logFd, 2);
if (childPid == amdgpu::bridge::expGpuPid) {
runBridge();
}
return {};
}

View file

@ -958,11 +958,8 @@ void *rx::vm::map(void *addr, std::uint64_t len, std::int32_t prot,
}
if (auto thr = orbis::g_currentThread) {
// std::fprintf(stderr, "sending mapping %lx-%lx, pid %lx\n", address,
// address + len, thr->tproc->pid);
// if (!noOverwrite) {
// rx::bridge.sendMemoryProtect(thr->tproc->pid, address, len, prot);
// }
rx::bridge.sendMapMemory(thr->tproc->pid, -1, -1, address, len, prot,
address - kMinAddress);
} else {
std::fprintf(stderr, "ignoring mapping %lx-%lx\n", address, address + len);
}