mirror of
https://github.com/RPCSX/rpcsx.git
synced 2025-12-06 07:12:14 +01:00
rpcsx-gpu: add multiprocess support
This commit is contained in:
parent
f77376c1e3
commit
2c781626d3
|
|
@ -1,14 +1,12 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <orbis/utils/SharedMutex.hpp>
|
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <initializer_list>
|
#include <initializer_list>
|
||||||
|
#include <orbis/utils/SharedMutex.hpp>
|
||||||
|
|
||||||
namespace amdgpu::bridge {
|
namespace amdgpu::bridge {
|
||||||
extern std::uint32_t expGpuPid;
|
|
||||||
|
|
||||||
struct PadState {
|
struct PadState {
|
||||||
std::uint64_t timestamp;
|
std::uint64_t timestamp;
|
||||||
std::uint32_t unk;
|
std::uint32_t unk;
|
||||||
|
|
@ -47,7 +45,9 @@ enum class CommandId : std::uint32_t {
|
||||||
ProtectMemory,
|
ProtectMemory,
|
||||||
CommandBuffer,
|
CommandBuffer,
|
||||||
Flip,
|
Flip,
|
||||||
MapDmem,
|
MapMemory,
|
||||||
|
MapProcess,
|
||||||
|
UnmapProcess,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct CmdMemoryProt {
|
struct CmdMemoryProt {
|
||||||
|
|
@ -79,15 +79,25 @@ struct CmdFlip {
|
||||||
std::uint64_t arg;
|
std::uint64_t arg;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct CmdMapDmem {
|
struct CmdMapMemory {
|
||||||
std::uint64_t offset;
|
std::int64_t offset;
|
||||||
std::uint64_t address;
|
std::uint64_t address;
|
||||||
std::uint64_t size;
|
std::uint64_t size;
|
||||||
std::uint32_t prot;
|
std::uint32_t prot;
|
||||||
std::uint32_t pid;
|
std::uint32_t pid;
|
||||||
|
std::int32_t memoryType;
|
||||||
std::uint32_t dmemIndex;
|
std::uint32_t dmemIndex;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct CmdMapProcess {
|
||||||
|
std::uint64_t pid;
|
||||||
|
int vmId;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CmdUnmapProcess {
|
||||||
|
std::uint64_t pid;
|
||||||
|
};
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
kPageWriteWatch = 1 << 0,
|
kPageWriteWatch = 1 << 0,
|
||||||
kPageReadWriteLock = 1 << 1,
|
kPageReadWriteLock = 1 << 1,
|
||||||
|
|
@ -112,17 +122,15 @@ struct BridgeHeader {
|
||||||
volatile std::uint64_t flipArg;
|
volatile std::uint64_t flipArg;
|
||||||
volatile std::uint64_t flipCount;
|
volatile std::uint64_t flipCount;
|
||||||
volatile std::uint64_t bufferInUseAddress;
|
volatile std::uint64_t bufferInUseAddress;
|
||||||
std::uint32_t memoryAreaCount;
|
|
||||||
std::uint32_t commandBufferCount;
|
std::uint32_t commandBufferCount;
|
||||||
std::uint32_t bufferCount;
|
std::uint32_t bufferCount;
|
||||||
CmdMemoryProt memoryAreas[512];
|
|
||||||
CmdCommandBuffer commandBuffers[32];
|
CmdCommandBuffer commandBuffers[32];
|
||||||
CmdBuffer buffers[10];
|
CmdBuffer buffers[10];
|
||||||
// orbis::shared_mutex cacheCommandMtx;
|
// orbis::shared_mutex cacheCommandMtx;
|
||||||
// orbis::shared_cv cacheCommandCv;
|
// orbis::shared_cv cacheCommandCv;
|
||||||
std::atomic<std::uint64_t> cacheCommands[4];
|
std::atomic<std::uint64_t> cacheCommands[6][4];
|
||||||
std::atomic<std::uint32_t> gpuCacheCommand;
|
std::atomic<std::uint32_t> gpuCacheCommand[6];
|
||||||
std::atomic<std::uint8_t> cachePages[0x100'0000'0000 / kHostPageSize];
|
std::atomic<std::uint8_t> cachePages[6][0x100'0000'0000 / kHostPageSize];
|
||||||
|
|
||||||
volatile std::uint64_t pull;
|
volatile std::uint64_t pull;
|
||||||
volatile std::uint64_t push;
|
volatile std::uint64_t push;
|
||||||
|
|
@ -137,7 +145,9 @@ struct Command {
|
||||||
CmdCommandBuffer commandBuffer;
|
CmdCommandBuffer commandBuffer;
|
||||||
CmdBuffer buffer;
|
CmdBuffer buffer;
|
||||||
CmdFlip flip;
|
CmdFlip flip;
|
||||||
CmdMapDmem mapDmem;
|
CmdMapMemory mapMemory;
|
||||||
|
CmdMapProcess mapProcess;
|
||||||
|
CmdUnmapProcess unmapProcess;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -160,29 +170,32 @@ struct BridgePusher {
|
||||||
|
|
||||||
void sendMemoryProtect(std::uint32_t pid, std::uint64_t address,
|
void sendMemoryProtect(std::uint32_t pid, std::uint64_t address,
|
||||||
std::uint64_t size, std::uint32_t prot) {
|
std::uint64_t size, std::uint32_t prot) {
|
||||||
if (pid == expGpuPid) {
|
sendCommand(CommandId::ProtectMemory, {pid, address, size, prot});
|
||||||
sendCommand(CommandId::ProtectMemory, {pid, address, size, prot});
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void sendMapDmem(std::uint32_t pid, std::uint32_t dmemIndex, std::uint64_t address, std::uint64_t size, std::uint32_t prot, std::uint64_t offset) {
|
void sendMapMemory(std::uint32_t pid, std::uint32_t memoryType,
|
||||||
// if (pid == expGpuPid) {
|
std::uint32_t dmemIndex, std::uint64_t address,
|
||||||
sendCommand(CommandId::MapDmem, {pid, dmemIndex, address, size, prot, offset});
|
std::uint64_t size, std::uint32_t prot,
|
||||||
// }
|
std::uint64_t offset) {
|
||||||
|
sendCommand(CommandId::MapMemory,
|
||||||
|
{pid, memoryType, dmemIndex, address, size, prot, offset});
|
||||||
}
|
}
|
||||||
|
|
||||||
void sendCommandBuffer(std::uint32_t pid, std::uint64_t queue,
|
void sendCommandBuffer(std::uint32_t pid, std::uint64_t queue,
|
||||||
std::uint64_t address, std::uint64_t size) {
|
std::uint64_t address, std::uint64_t size) {
|
||||||
// if (pid == expGpuPid) {
|
sendCommand(CommandId::CommandBuffer, {pid, queue, address, size});
|
||||||
sendCommand(CommandId::CommandBuffer, {pid, queue, address, size});
|
|
||||||
// }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void sendFlip(std::uint32_t pid, std::uint32_t bufferIndex,
|
void sendFlip(std::uint32_t pid, std::uint32_t bufferIndex,
|
||||||
std::uint64_t arg) {
|
std::uint64_t arg) {
|
||||||
// if (pid == expGpuPid) {
|
sendCommand(CommandId::Flip, {pid, bufferIndex, arg});
|
||||||
sendCommand(CommandId::Flip, {pid, bufferIndex, arg});
|
}
|
||||||
// }
|
|
||||||
|
void sendMapProcess(std::uint32_t pid, unsigned vmId) {
|
||||||
|
sendCommand(CommandId::MapProcess, {pid, vmId});
|
||||||
|
}
|
||||||
|
void sendUnmapProcess(std::uint32_t pid) {
|
||||||
|
sendCommand(CommandId::UnmapProcess, {pid});
|
||||||
}
|
}
|
||||||
|
|
||||||
void wait() {
|
void wait() {
|
||||||
|
|
@ -198,7 +211,8 @@ private:
|
||||||
|
|
||||||
void sendCommand(CommandId id, std::initializer_list<std::uint64_t> args) {
|
void sendCommand(CommandId id, std::initializer_list<std::uint64_t> args) {
|
||||||
std::uint64_t exp = 0;
|
std::uint64_t exp = 0;
|
||||||
while (!header->lock.compare_exchange_weak(exp, 1, std::memory_order::acquire, std::memory_order::relaxed)) {
|
while (!header->lock.compare_exchange_weak(
|
||||||
|
exp, 1, std::memory_order::acquire, std::memory_order::relaxed)) {
|
||||||
exp = 0;
|
exp = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -303,13 +317,23 @@ private:
|
||||||
result.flip.arg = args[2];
|
result.flip.arg = args[2];
|
||||||
return result;
|
return result;
|
||||||
|
|
||||||
case CommandId::MapDmem:
|
case CommandId::MapMemory:
|
||||||
result.mapDmem.pid = args[0];
|
result.mapMemory.pid = args[0];
|
||||||
result.mapDmem.dmemIndex = args[1];
|
result.mapMemory.memoryType = args[1];
|
||||||
result.mapDmem.address = args[2];
|
result.mapMemory.dmemIndex = args[2];
|
||||||
result.mapDmem.size = args[3];
|
result.mapMemory.address = args[3];
|
||||||
result.mapDmem.prot = args[4];
|
result.mapMemory.size = args[4];
|
||||||
result.mapDmem.offset = args[5];
|
result.mapMemory.prot = args[5];
|
||||||
|
result.mapMemory.offset = args[6];
|
||||||
|
return result;
|
||||||
|
|
||||||
|
case CommandId::MapProcess:
|
||||||
|
result.mapProcess.pid = args[0];
|
||||||
|
result.mapProcess.vmId = args[1];
|
||||||
|
return result;
|
||||||
|
|
||||||
|
case CommandId::UnmapProcess:
|
||||||
|
result.unmapProcess.pid = args[0];
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -8,8 +8,6 @@
|
||||||
static int gShmFd = -1;
|
static int gShmFd = -1;
|
||||||
static constexpr std::size_t kShmSize = sizeof(amdgpu::bridge::BridgeHeader) +
|
static constexpr std::size_t kShmSize = sizeof(amdgpu::bridge::BridgeHeader) +
|
||||||
(sizeof(std::uint64_t) * 256);
|
(sizeof(std::uint64_t) * 256);
|
||||||
std::uint32_t amdgpu::bridge::expGpuPid = 0;
|
|
||||||
|
|
||||||
amdgpu::bridge::BridgeHeader *
|
amdgpu::bridge::BridgeHeader *
|
||||||
amdgpu::bridge::createShmCommandBuffer(const char *name) {
|
amdgpu::bridge::createShmCommandBuffer(const char *name) {
|
||||||
if (gShmFd != -1) {
|
if (gShmFd != -1) {
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include "amdgpu/RemoteMemory.hpp"
|
||||||
#include "amdgpu/bridge/bridge.hpp"
|
#include "amdgpu/bridge/bridge.hpp"
|
||||||
#include "amdgpu/shader/Instruction.hpp"
|
#include "amdgpu/shader/Instruction.hpp"
|
||||||
#include "gpu-scheduler.hpp"
|
#include "gpu-scheduler.hpp"
|
||||||
|
|
@ -1259,6 +1260,42 @@ struct GnmTBuffer {
|
||||||
|
|
||||||
static_assert(sizeof(GnmTBuffer) == sizeof(std::uint64_t) * 4);
|
static_assert(sizeof(GnmTBuffer) == sizeof(std::uint64_t) * 4);
|
||||||
|
|
||||||
|
struct GnmSSampler {
|
||||||
|
int32_t clamp_x : 3;
|
||||||
|
int32_t clamp_y : 3;
|
||||||
|
int32_t clamp_z : 3;
|
||||||
|
int32_t max_aniso_ratio : 3;
|
||||||
|
int32_t depth_compare_func : 3;
|
||||||
|
int32_t force_unorm_coords : 1;
|
||||||
|
int32_t aniso_threshold : 3;
|
||||||
|
int32_t mc_coord_trunc : 1;
|
||||||
|
int32_t force_degamma : 1;
|
||||||
|
int32_t aniso_bias : 6;
|
||||||
|
int32_t trunc_coord : 1;
|
||||||
|
int32_t disable_cube_wrap : 1;
|
||||||
|
int32_t filter_mode : 2;
|
||||||
|
int32_t : 1;
|
||||||
|
int32_t min_lod : 12;
|
||||||
|
int32_t max_lod : 12;
|
||||||
|
int32_t perf_mip : 4;
|
||||||
|
int32_t perf_z : 4;
|
||||||
|
int32_t lod_bias : 14;
|
||||||
|
int32_t lod_bias_sec : 6;
|
||||||
|
int32_t xy_mag_filter : 2;
|
||||||
|
int32_t xy_min_filter : 2;
|
||||||
|
int32_t z_filter : 2;
|
||||||
|
int32_t mip_filter : 2;
|
||||||
|
int32_t : 4;
|
||||||
|
int32_t border_color_ptr : 12;
|
||||||
|
int32_t : 18;
|
||||||
|
int32_t border_color_type : 2;
|
||||||
|
|
||||||
|
auto operator<=>(const GnmSSampler &) const = default;
|
||||||
|
bool operator==(const GnmSSampler &) const = default;
|
||||||
|
};
|
||||||
|
|
||||||
|
static_assert(sizeof(GnmSSampler) == sizeof(std::uint32_t) * 4);
|
||||||
|
|
||||||
constexpr auto kPageSize = 0x4000;
|
constexpr auto kPageSize = 0x4000;
|
||||||
|
|
||||||
void setVkDevice(VkDevice device,
|
void setVkDevice(VkDevice device,
|
||||||
|
|
@ -1266,11 +1303,11 @@ void setVkDevice(VkDevice device,
|
||||||
VkPhysicalDeviceProperties devProperties);
|
VkPhysicalDeviceProperties devProperties);
|
||||||
|
|
||||||
struct AmdgpuDevice {
|
struct AmdgpuDevice {
|
||||||
void handleProtectMemory(std::uint64_t address, std::uint64_t size,
|
void handleProtectMemory(RemoteMemory memory, std::uint64_t address,
|
||||||
std::uint32_t prot);
|
std::uint64_t size, std::uint32_t prot);
|
||||||
void handleCommandBuffer(std::uint64_t queueId, std::uint64_t address,
|
void handleCommandBuffer(RemoteMemory memory, std::uint64_t queueId,
|
||||||
std::uint64_t size);
|
std::uint64_t address, std::uint64_t size);
|
||||||
bool handleFlip(VkQueue queue, VkCommandBuffer cmdBuffer,
|
bool handleFlip(RemoteMemory memory, VkQueue queue, VkCommandBuffer cmdBuffer,
|
||||||
TaskChain &initTaskChain, std::uint32_t bufferIndex,
|
TaskChain &initTaskChain, std::uint32_t bufferIndex,
|
||||||
std::uint64_t arg, VkImage targetImage,
|
std::uint64_t arg, VkImage targetImage,
|
||||||
VkExtent2D targetExtent, VkSemaphore waitSemaphore,
|
VkExtent2D targetExtent, VkSemaphore waitSemaphore,
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -3,10 +3,11 @@
|
||||||
|
|
||||||
namespace amdgpu {
|
namespace amdgpu {
|
||||||
struct RemoteMemory {
|
struct RemoteMemory {
|
||||||
char *shmPointer;
|
int vmId;
|
||||||
|
|
||||||
template <typename T = void> T *getPointer(std::uint64_t address) const {
|
template <typename T = void> T *getPointer(std::uint64_t address) const {
|
||||||
return address ? reinterpret_cast<T *>(shmPointer + address - 0x40000)
|
return address ? reinterpret_cast<T *>(
|
||||||
|
static_cast<std::uint64_t>(vmId) << 40 | address)
|
||||||
: nullptr;
|
: nullptr;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,14 @@ struct AuthInfo {
|
||||||
uint64_t unk0;
|
uint64_t unk0;
|
||||||
uint64_t caps[4];
|
uint64_t caps[4];
|
||||||
uint64_t attrs[4];
|
uint64_t attrs[4];
|
||||||
uint64_t unk[8];
|
uint64_t ucred[8];
|
||||||
|
|
||||||
|
bool hasUseHp3dPipeCapability() const {
|
||||||
|
return ucred[2] == 0x3800000000000009;
|
||||||
|
}
|
||||||
|
bool hasMmapSelfCapability() const { return ((ucred[4] >> 0x3a) & 1) != 1; }
|
||||||
|
bool hasSystemCapability() const { return ((ucred[3] >> 0x3e) & 1) != 0; }
|
||||||
|
bool hasSceProgramAttribute() const { return ((ucred[3] >> 0x1f) & 1) != 0; }
|
||||||
};
|
};
|
||||||
|
|
||||||
static_assert(sizeof(AuthInfo) == 136);
|
static_assert(sizeof(AuthInfo) == 136);
|
||||||
|
|
|
||||||
|
|
@ -51,6 +51,7 @@ struct Process final {
|
||||||
ProcessState state = ProcessState::NEW;
|
ProcessState state = ProcessState::NEW;
|
||||||
Process *parentProcess = nullptr;
|
Process *parentProcess = nullptr;
|
||||||
shared_mutex mtx;
|
shared_mutex mtx;
|
||||||
|
int vmId = -1;
|
||||||
void (*onSysEnter)(Thread *thread, int id, uint64_t *args,
|
void (*onSysEnter)(Thread *thread, int id, uint64_t *args,
|
||||||
int argsCount) = nullptr;
|
int argsCount) = nullptr;
|
||||||
void (*onSysExit)(Thread *thread, int id, uint64_t *args, int argsCount,
|
void (*onSysExit)(Thread *thread, int id, uint64_t *args, int argsCount,
|
||||||
|
|
|
||||||
|
|
@ -8,4 +8,5 @@ add_executable(rpcsx-gpu
|
||||||
target_include_directories(rpcsx-gpu PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
target_include_directories(rpcsx-gpu PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
||||||
target_link_libraries(rpcsx-gpu PUBLIC amdgpu::bridge amdgpu::device glfw Vulkan::Vulkan rx)
|
target_link_libraries(rpcsx-gpu PUBLIC amdgpu::bridge amdgpu::device glfw Vulkan::Vulkan rx)
|
||||||
set_target_properties(rpcsx-gpu PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
set_target_properties(rpcsx-gpu PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
||||||
|
target_link_options(rpcsx-os PUBLIC "LINKER:-Ttext-segment,0x0000060000000000")
|
||||||
install(TARGETS rpcsx-gpu RUNTIME DESTINATION bin)
|
install(TARGETS rpcsx-gpu RUNTIME DESTINATION bin)
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,9 @@
|
||||||
#include "amdgpu/RemoteMemory.hpp"
|
#include "amdgpu/RemoteMemory.hpp"
|
||||||
#include "amdgpu/device/gpu-scheduler.hpp"
|
#include "amdgpu/device/gpu-scheduler.hpp"
|
||||||
#include "amdgpu/device/vk.hpp"
|
#include "amdgpu/device/vk.hpp"
|
||||||
|
#include "rx/MemoryTable.hpp"
|
||||||
#include "rx/Version.hpp"
|
#include "rx/Version.hpp"
|
||||||
|
#include "rx/mem.hpp"
|
||||||
#include "util/unreachable.hpp"
|
#include "util/unreachable.hpp"
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <amdgpu/bridge/bridge.hpp>
|
#include <amdgpu/bridge/bridge.hpp>
|
||||||
|
|
@ -16,18 +18,14 @@
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <unordered_set>
|
||||||
#include <util/VerifyVulkan.hpp>
|
#include <util/VerifyVulkan.hpp>
|
||||||
#include <vulkan/vulkan.h>
|
#include <vulkan/vulkan.h>
|
||||||
#include <vulkan/vulkan_core.h>
|
#include <vulkan/vulkan_core.h>
|
||||||
|
|
||||||
#include <GLFW/glfw3.h> // TODO: make in optional
|
#include <GLFW/glfw3.h> // TODO: make in optional
|
||||||
|
|
||||||
// TODO
|
|
||||||
// extern void *g_rwMemory;
|
|
||||||
extern std::size_t g_memorySize;
|
|
||||||
extern std::uint64_t g_memoryBase;
|
|
||||||
extern amdgpu::RemoteMemory g_hostMemory;
|
|
||||||
|
|
||||||
static void usage(std::FILE *out, const char *argv0) {
|
static void usage(std::FILE *out, const char *argv0) {
|
||||||
std::fprintf(out, "usage: %s [options...]\n", argv0);
|
std::fprintf(out, "usage: %s [options...]\n", argv0);
|
||||||
std::fprintf(out, " options:\n");
|
std::fprintf(out, " options:\n");
|
||||||
|
|
@ -159,6 +157,11 @@ int main(int argc, const char *argv[]) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!rx::mem::reserve((void *)0x40000, 0x60000000000 - 0x40000)) {
|
||||||
|
std::fprintf(stderr, "failed to reserve virtual memory\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
glfwInit();
|
glfwInit();
|
||||||
glfwWindowHint(GLFW_CLIENT_API, GLFW_NO_API);
|
glfwWindowHint(GLFW_CLIENT_API, GLFW_NO_API);
|
||||||
auto window = glfwCreateWindow(1280, 720, "RPCSX", nullptr, nullptr);
|
auto window = glfwCreateWindow(1280, 720, "RPCSX", nullptr, nullptr);
|
||||||
|
|
@ -725,20 +728,6 @@ int main(int argc, const char *argv[]) {
|
||||||
amdgpu::bridge::BridgePuller bridgePuller{bridge};
|
amdgpu::bridge::BridgePuller bridgePuller{bridge};
|
||||||
amdgpu::bridge::Command commandsBuffer[1];
|
amdgpu::bridge::Command commandsBuffer[1];
|
||||||
|
|
||||||
if (!std::filesystem::exists(std::string("/dev/shm") + shmName)) {
|
|
||||||
std::printf("Waiting for OS\n");
|
|
||||||
while (!std::filesystem::exists(std::string("/dev/shm") + shmName)) {
|
|
||||||
std::this_thread::sleep_for(std::chrono::milliseconds(300));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int memoryFd = ::shm_open(shmName, O_RDWR, S_IRUSR | S_IWUSR);
|
|
||||||
|
|
||||||
if (memoryFd < 0) {
|
|
||||||
std::printf("failed to open shared memory\n");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
int dmemFd[3];
|
int dmemFd[3];
|
||||||
|
|
||||||
for (std::size_t i = 0; i < std::size(dmemFd); ++i) {
|
for (std::size_t i = 0; i < std::size(dmemFd); ++i) {
|
||||||
|
|
@ -759,26 +748,80 @@ int main(int argc, const char *argv[]) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct stat memoryStat;
|
|
||||||
::fstat(memoryFd, &memoryStat);
|
|
||||||
amdgpu::RemoteMemory memory{(char *)::mmap(
|
|
||||||
nullptr, memoryStat.st_size, PROT_NONE, MAP_SHARED, memoryFd, 0)};
|
|
||||||
|
|
||||||
// extern void *g_rwMemory;
|
|
||||||
g_memorySize = memoryStat.st_size;
|
|
||||||
g_memoryBase = 0x40000;
|
|
||||||
// g_rwMemory = ::mmap(nullptr, g_memorySize, PROT_READ | PROT_WRITE, MAP_SHARED,
|
|
||||||
// memoryFd, 0);
|
|
||||||
|
|
||||||
g_hostMemory = memory;
|
|
||||||
|
|
||||||
{
|
{
|
||||||
amdgpu::device::AmdgpuDevice device(bridgePuller.header);
|
amdgpu::device::AmdgpuDevice device(bridgePuller.header);
|
||||||
|
|
||||||
for (std::uint32_t end = bridge->memoryAreaCount, i = 0; i < end; ++i) {
|
struct VmMapSlot {
|
||||||
auto area = bridge->memoryAreas[i];
|
int memoryType;
|
||||||
device.handleProtectMemory(area.address, area.size, area.prot);
|
int prot;
|
||||||
}
|
std::int64_t offset;
|
||||||
|
std::uint64_t baseAddress;
|
||||||
|
|
||||||
|
auto operator<=>(const VmMapSlot &) const = default;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ProcessInfo {
|
||||||
|
int vmId = -1;
|
||||||
|
int vmFd = -1;
|
||||||
|
rx::MemoryTableWithPayload<VmMapSlot> vmTable;
|
||||||
|
};
|
||||||
|
|
||||||
|
auto mapProcess = [&](std::int64_t pid, int vmId, ProcessInfo &process) {
|
||||||
|
process.vmId = vmId;
|
||||||
|
|
||||||
|
auto memory = amdgpu::RemoteMemory{vmId};
|
||||||
|
|
||||||
|
std::string pidVmName = shmName;
|
||||||
|
pidVmName += '-';
|
||||||
|
pidVmName += std::to_string(pid);
|
||||||
|
int memoryFd = ::shm_open(pidVmName.c_str(), O_RDWR, S_IRUSR | S_IWUSR);
|
||||||
|
process.vmFd = memoryFd;
|
||||||
|
|
||||||
|
if (memoryFd < 0) {
|
||||||
|
std::printf("failed to process %x shared memory\n", (int)pid);
|
||||||
|
std::abort();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto [startAddress, endAddress, slot] : process.vmTable) {
|
||||||
|
auto gpuProt = slot.prot >> 4;
|
||||||
|
if (gpuProt == 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto devOffset = slot.offset + startAddress - slot.baseAddress;
|
||||||
|
int mapFd = memoryFd;
|
||||||
|
|
||||||
|
if (slot.memoryType >= 0) {
|
||||||
|
mapFd = dmemFd[slot.memoryType];
|
||||||
|
}
|
||||||
|
|
||||||
|
auto mmapResult =
|
||||||
|
::mmap(memory.getPointer(startAddress), endAddress - startAddress,
|
||||||
|
gpuProt, MAP_FIXED | MAP_SHARED, mapFd, devOffset);
|
||||||
|
|
||||||
|
if (mmapResult == MAP_FAILED) {
|
||||||
|
std::printf(
|
||||||
|
"failed to map process %x memory, address %lx-%lx, type %x\n",
|
||||||
|
(int)pid, startAddress, endAddress, slot.memoryType);
|
||||||
|
std::abort();
|
||||||
|
}
|
||||||
|
|
||||||
|
device.handleProtectMemory(memory, startAddress,
|
||||||
|
endAddress - startAddress, slot.prot);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
auto unmapProcess = [&](ProcessInfo &process) {
|
||||||
|
auto startAddress = static_cast<std::uint64_t>(process.vmId) << 40;
|
||||||
|
auto size = static_cast<std::uint64_t>(1) << 40;
|
||||||
|
rx::mem::reserve(reinterpret_cast<void *>(startAddress), size);
|
||||||
|
|
||||||
|
::close(process.vmFd);
|
||||||
|
process.vmFd = -1;
|
||||||
|
process.vmId = -1;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::unordered_map<std::int64_t, ProcessInfo> processInfo;
|
||||||
|
|
||||||
std::vector<VkCommandBuffer> presentCmdBuffers(swapchainImages.size());
|
std::vector<VkCommandBuffer> presentCmdBuffers(swapchainImages.size());
|
||||||
|
|
||||||
|
|
@ -966,66 +1009,141 @@ int main(int argc, const char *argv[]) {
|
||||||
|
|
||||||
for (auto cmd : std::span(commandsBuffer, pulledCount)) {
|
for (auto cmd : std::span(commandsBuffer, pulledCount)) {
|
||||||
switch (cmd.id) {
|
switch (cmd.id) {
|
||||||
case amdgpu::bridge::CommandId::ProtectMemory:
|
case amdgpu::bridge::CommandId::ProtectMemory: {
|
||||||
device.handleProtectMemory(cmd.memoryProt.address,
|
auto &process = processInfo[cmd.memoryProt.pid];
|
||||||
cmd.memoryProt.size, cmd.memoryProt.prot);
|
|
||||||
break;
|
|
||||||
case amdgpu::bridge::CommandId::CommandBuffer:
|
|
||||||
device.handleCommandBuffer(cmd.commandBuffer.queue,
|
|
||||||
cmd.commandBuffer.address,
|
|
||||||
cmd.commandBuffer.size);
|
|
||||||
break;
|
|
||||||
case amdgpu::bridge::CommandId::Flip: {
|
|
||||||
if (!isImageAcquired) {
|
|
||||||
Verify() << vkAcquireNextImageKHR(vkDevice, swapchain, UINT64_MAX,
|
|
||||||
presentCompleteSemaphore, nullptr,
|
|
||||||
&imageIndex);
|
|
||||||
|
|
||||||
vkWaitForFences(vkDevice, 1, &inFlightFences[imageIndex], VK_TRUE,
|
auto vmSlotIt = process.vmTable.queryArea(cmd.memoryProt.address);
|
||||||
UINT64_MAX);
|
if (vmSlotIt == process.vmTable.end()) {
|
||||||
vkResetFences(vkDevice, 1, &inFlightFences[imageIndex]);
|
std::abort();
|
||||||
}
|
}
|
||||||
|
|
||||||
isImageAcquired = false;
|
auto vmSlot = (*vmSlotIt).payload;
|
||||||
|
|
||||||
vkResetCommandBuffer(presentCmdBuffers[imageIndex], 0);
|
process.vmTable.map(cmd.memoryProt.address,
|
||||||
VkCommandBufferBeginInfo beginInfo{};
|
cmd.memoryProt.address + cmd.memoryProt.size,
|
||||||
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
|
VmMapSlot{
|
||||||
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
|
.memoryType = vmSlot.memoryType,
|
||||||
|
.prot = static_cast<int>(cmd.memoryProt.prot),
|
||||||
|
.offset = vmSlot.offset,
|
||||||
|
.baseAddress = vmSlot.baseAddress,
|
||||||
|
});
|
||||||
|
|
||||||
vkBeginCommandBuffer(presentCmdBuffers[imageIndex], &beginInfo);
|
if (process.vmId >= 0) {
|
||||||
|
auto memory = amdgpu::RemoteMemory{process.vmId};
|
||||||
|
rx::mem::protect(memory.getPointer(cmd.memoryProt.address),
|
||||||
|
cmd.memoryProt.size, cmd.memoryProt.prot >> 4);
|
||||||
|
device.handleProtectMemory(memory, cmd.mapMemory.address,
|
||||||
|
cmd.mapMemory.size, cmd.mapMemory.prot);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case amdgpu::bridge::CommandId::CommandBuffer: {
|
||||||
|
auto &process = processInfo[cmd.commandBuffer.pid];
|
||||||
|
if (process.vmId >= 0) {
|
||||||
|
device.handleCommandBuffer(
|
||||||
|
amdgpu::RemoteMemory{process.vmId}, cmd.commandBuffer.queue,
|
||||||
|
cmd.commandBuffer.address, cmd.commandBuffer.size);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case amdgpu::bridge::CommandId::Flip: {
|
||||||
|
auto &process = processInfo[cmd.flip.pid];
|
||||||
|
|
||||||
if (device.handleFlip(
|
if (process.vmId >= 0) {
|
||||||
presentQueue, presentCmdBuffers[imageIndex],
|
if (!isImageAcquired) {
|
||||||
*flipTaskChain[imageIndex].get(), cmd.flip.bufferIndex,
|
Verify() << vkAcquireNextImageKHR(vkDevice, swapchain, UINT64_MAX,
|
||||||
cmd.flip.arg, swapchainImages[imageIndex], swapchainExtent,
|
presentCompleteSemaphore,
|
||||||
presentCompleteSemaphore, renderCompleteSemaphore,
|
nullptr, &imageIndex);
|
||||||
inFlightFences[imageIndex])) {
|
|
||||||
VkPresentInfoKHR presentInfo{
|
vkWaitForFences(vkDevice, 1, &inFlightFences[imageIndex], VK_TRUE,
|
||||||
.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
|
UINT64_MAX);
|
||||||
.waitSemaphoreCount = 1,
|
vkResetFences(vkDevice, 1, &inFlightFences[imageIndex]);
|
||||||
.pWaitSemaphores = &renderCompleteSemaphore,
|
}
|
||||||
.swapchainCount = 1,
|
|
||||||
.pSwapchains = &swapchain,
|
isImageAcquired = false;
|
||||||
.pImageIndices = &imageIndex,
|
|
||||||
};
|
vkResetCommandBuffer(presentCmdBuffers[imageIndex], 0);
|
||||||
if (vkQueuePresentKHR(presentQueue, &presentInfo) != VK_SUCCESS) {
|
VkCommandBufferBeginInfo beginInfo{};
|
||||||
std::printf("swapchain was invalidated\n");
|
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
|
||||||
createSwapchain();
|
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
|
||||||
|
|
||||||
|
vkBeginCommandBuffer(presentCmdBuffers[imageIndex], &beginInfo);
|
||||||
|
|
||||||
|
if (device.handleFlip(
|
||||||
|
amdgpu::RemoteMemory{process.vmId}, presentQueue,
|
||||||
|
presentCmdBuffers[imageIndex],
|
||||||
|
*flipTaskChain[imageIndex].get(), cmd.flip.bufferIndex,
|
||||||
|
cmd.flip.arg, swapchainImages[imageIndex], swapchainExtent,
|
||||||
|
presentCompleteSemaphore, renderCompleteSemaphore,
|
||||||
|
inFlightFences[imageIndex])) {
|
||||||
|
VkPresentInfoKHR presentInfo{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
|
||||||
|
.waitSemaphoreCount = 1,
|
||||||
|
.pWaitSemaphores = &renderCompleteSemaphore,
|
||||||
|
.swapchainCount = 1,
|
||||||
|
.pSwapchains = &swapchain,
|
||||||
|
.pImageIndices = &imageIndex,
|
||||||
|
};
|
||||||
|
if (vkQueuePresentKHR(presentQueue, &presentInfo) != VK_SUCCESS) {
|
||||||
|
std::printf("swapchain was invalidated\n");
|
||||||
|
createSwapchain();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
isImageAcquired = true;
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
isImageAcquired = true;
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case amdgpu::bridge::CommandId::MapDmem: {
|
case amdgpu::bridge::CommandId::MapProcess: {
|
||||||
auto addr = g_hostMemory.getPointer(cmd.mapDmem.address);
|
mapProcess(cmd.mapProcess.pid, cmd.mapProcess.vmId, processInfo[cmd.mapProcess.pid]);
|
||||||
auto mapping = ::mmap(addr, cmd.mapDmem.size,
|
break;
|
||||||
PROT_READ | PROT_WRITE /*TODO: cmd.mapDmem.prot >> 4*/,
|
}
|
||||||
MAP_FIXED | MAP_SHARED, dmemFd[cmd.mapDmem.dmemIndex],
|
case amdgpu::bridge::CommandId::UnmapProcess: {
|
||||||
cmd.mapDmem.offset);
|
unmapProcess(processInfo[cmd.mapProcess.pid]);
|
||||||
device.handleProtectMemory(cmd.mapDmem.address, cmd.mapDmem.size, 0x33 /*TODO: cmd.mapDmem.prot*/);
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case amdgpu::bridge::CommandId::MapMemory: {
|
||||||
|
auto &process = processInfo[cmd.mapMemory.pid];
|
||||||
|
|
||||||
|
process.vmTable.map(
|
||||||
|
cmd.mapMemory.address, cmd.mapMemory.address + cmd.mapMemory.size,
|
||||||
|
VmMapSlot{
|
||||||
|
.memoryType = static_cast<int>(cmd.mapMemory.memoryType >= 0
|
||||||
|
? cmd.mapMemory.dmemIndex
|
||||||
|
: -1),
|
||||||
|
.prot = static_cast<int>(cmd.mapMemory.prot),
|
||||||
|
.offset = cmd.mapMemory.offset,
|
||||||
|
.baseAddress = cmd.mapMemory.address,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (process.vmId >= 0) {
|
||||||
|
auto memory = amdgpu::RemoteMemory{process.vmId};
|
||||||
|
|
||||||
|
int mapFd = process.vmFd;
|
||||||
|
|
||||||
|
if (cmd.mapMemory.memoryType >= 0) {
|
||||||
|
mapFd = dmemFd[cmd.mapMemory.dmemIndex];
|
||||||
|
}
|
||||||
|
|
||||||
|
auto mmapResult =
|
||||||
|
::mmap(memory.getPointer(cmd.mapMemory.address),
|
||||||
|
cmd.mapMemory.size, cmd.mapMemory.prot >> 4,
|
||||||
|
MAP_FIXED | MAP_SHARED, mapFd, cmd.mapMemory.offset);
|
||||||
|
|
||||||
|
if (mmapResult == MAP_FAILED) {
|
||||||
|
std::printf(
|
||||||
|
"failed to map process %x memory, address %lx-%lx, type %x\n",
|
||||||
|
(int)cmd.mapMemory.pid, cmd.mapMemory.address,
|
||||||
|
cmd.mapMemory.address + cmd.mapMemory.size,
|
||||||
|
cmd.mapMemory.memoryType);
|
||||||
|
std::abort();
|
||||||
|
}
|
||||||
|
|
||||||
|
device.handleProtectMemory(memory, cmd.mapMemory.address,
|
||||||
|
cmd.mapMemory.size, cmd.mapMemory.prot);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -44,6 +44,12 @@ orbis::ErrorCode DmemDevice::mmap(void **address, std::uint64_t len,
|
||||||
rx::vm::kMapProtGpuAll;
|
rx::vm::kMapProtGpuAll;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto allocationInfoIt = allocations.queryArea(directMemoryStart);
|
||||||
|
if (allocationInfoIt == allocations.end()) {
|
||||||
|
std::abort();
|
||||||
|
}
|
||||||
|
auto allocationInfo = *allocationInfoIt;
|
||||||
|
|
||||||
auto result =
|
auto result =
|
||||||
rx::vm::map(*address, len, prot, flags, rx::vm::kMapInternalReserveOnly,
|
rx::vm::map(*address, len, prot, flags, rx::vm::kMapInternalReserveOnly,
|
||||||
this, directMemoryStart);
|
this, directMemoryStart);
|
||||||
|
|
@ -60,9 +66,10 @@ orbis::ErrorCode DmemDevice::mmap(void **address, std::uint64_t len,
|
||||||
return orbis::ErrorCode::INVAL;
|
return orbis::ErrorCode::INVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
rx::bridge.sendMapDmem(orbis::g_currentThread->tproc->pid, index,
|
rx::bridge.sendMapMemory(orbis::g_currentThread->tproc->pid,
|
||||||
reinterpret_cast<std::uint64_t>(result), len, prot,
|
allocationInfo.payload.memoryType, index,
|
||||||
directMemoryStart);
|
reinterpret_cast<std::uint64_t>(result), len, prot,
|
||||||
|
directMemoryStart);
|
||||||
|
|
||||||
*address = result;
|
*address = result;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,7 @@
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <sys/mman.h>
|
#include <sys/mman.h>
|
||||||
|
#include <unordered_map>
|
||||||
|
|
||||||
struct ComputeQueue {
|
struct ComputeQueue {
|
||||||
std::uint64_t ringBaseAddress{};
|
std::uint64_t ringBaseAddress{};
|
||||||
|
|
@ -19,14 +20,104 @@ struct ComputeQueue {
|
||||||
std::uint64_t len{};
|
std::uint64_t len{};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static void runBridge(int vmId) {
|
||||||
|
std::thread{[=] {
|
||||||
|
pthread_setname_np(pthread_self(), "Bridge");
|
||||||
|
auto bridge = rx::bridge.header;
|
||||||
|
|
||||||
|
std::vector<std::uint64_t> fetchedCommands;
|
||||||
|
fetchedCommands.reserve(std::size(bridge->cacheCommands));
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
for (auto &command : bridge->cacheCommands) {
|
||||||
|
std::uint64_t value = command[vmId].load(std::memory_order::relaxed);
|
||||||
|
|
||||||
|
if (value != 0) {
|
||||||
|
fetchedCommands.push_back(value);
|
||||||
|
command[vmId].store(0, std::memory_order::relaxed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fetchedCommands.empty()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto command : fetchedCommands) {
|
||||||
|
auto page = static_cast<std::uint32_t>(command);
|
||||||
|
auto count = static_cast<std::uint32_t>(command >> 32) + 1;
|
||||||
|
|
||||||
|
auto pageFlags =
|
||||||
|
bridge->cachePages[vmId][page].load(std::memory_order::relaxed);
|
||||||
|
|
||||||
|
auto address =
|
||||||
|
static_cast<std::uint64_t>(page) * amdgpu::bridge::kHostPageSize;
|
||||||
|
auto origVmProt = rx::vm::getPageProtection(address);
|
||||||
|
int prot = 0;
|
||||||
|
|
||||||
|
if (origVmProt & rx::vm::kMapProtCpuRead) {
|
||||||
|
prot |= PROT_READ;
|
||||||
|
}
|
||||||
|
if (origVmProt & rx::vm::kMapProtCpuWrite) {
|
||||||
|
prot |= PROT_WRITE;
|
||||||
|
}
|
||||||
|
if (origVmProt & rx::vm::kMapProtCpuExec) {
|
||||||
|
prot |= PROT_EXEC;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pageFlags & amdgpu::bridge::kPageReadWriteLock) {
|
||||||
|
prot &= ~(PROT_READ | PROT_WRITE);
|
||||||
|
} else if (pageFlags & amdgpu::bridge::kPageWriteWatch) {
|
||||||
|
prot &= ~PROT_WRITE;
|
||||||
|
}
|
||||||
|
|
||||||
|
// std::fprintf(stderr, "protection %lx-%lx\n", address,
|
||||||
|
// address + amdgpu::bridge::kHostPageSize * count);
|
||||||
|
if (::mprotect(reinterpret_cast<void *>(address),
|
||||||
|
amdgpu::bridge::kHostPageSize * count, prot)) {
|
||||||
|
perror("protection failed");
|
||||||
|
std::abort();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fetchedCommands.clear();
|
||||||
|
}
|
||||||
|
}}.detach();
|
||||||
|
}
|
||||||
|
|
||||||
|
static constexpr auto kVmIdCount = 6;
|
||||||
|
|
||||||
struct GcDevice : public IoDevice {
|
struct GcDevice : public IoDevice {
|
||||||
|
std::uint32_t freeVmIds = (1 << (kVmIdCount + 1)) - 1;
|
||||||
orbis::shared_mutex mtx;
|
orbis::shared_mutex mtx;
|
||||||
|
orbis::kmap<orbis::pid_t, int> clients;
|
||||||
orbis::kmap<std::uint64_t, ComputeQueue> computeQueues;
|
orbis::kmap<std::uint64_t, ComputeQueue> computeQueues;
|
||||||
orbis::ErrorCode open(orbis::Ref<orbis::File> *file, const char *path,
|
orbis::ErrorCode open(orbis::Ref<orbis::File> *file, const char *path,
|
||||||
std::uint32_t flags, std::uint32_t mode,
|
std::uint32_t flags, std::uint32_t mode,
|
||||||
orbis::Thread *thread) override;
|
orbis::Thread *thread) override;
|
||||||
|
|
||||||
|
void addClient(orbis::Process *process);
|
||||||
|
void removeClient(orbis::Process *process);
|
||||||
|
|
||||||
|
int allocateVmId() {
|
||||||
|
int id = std::countr_zero(freeVmIds);
|
||||||
|
|
||||||
|
if (id >= kVmIdCount) {
|
||||||
|
std::fprintf(stderr, "out of vm slots\n");
|
||||||
|
std::abort();
|
||||||
|
}
|
||||||
|
|
||||||
|
freeVmIds &= ~(1 << id);
|
||||||
|
return id;
|
||||||
|
};
|
||||||
|
|
||||||
|
void deallocateVmId(int vmId) { freeVmIds |= (1 << vmId); };
|
||||||
};
|
};
|
||||||
struct GcFile : public orbis::File {};
|
|
||||||
|
struct GcFile : public orbis::File {
|
||||||
|
orbis::Process *process = nullptr;
|
||||||
|
~GcFile() { device.staticCast<GcDevice>()->removeClient(process); }
|
||||||
|
};
|
||||||
|
|
||||||
static std::uint64_t g_submitDoneFlag;
|
static std::uint64_t g_submitDoneFlag;
|
||||||
|
|
||||||
static orbis::ErrorCode gc_ioctl(orbis::File *file, std::uint64_t request,
|
static orbis::ErrorCode gc_ioctl(orbis::File *file, std::uint64_t request,
|
||||||
|
|
@ -34,7 +125,7 @@ static orbis::ErrorCode gc_ioctl(orbis::File *file, std::uint64_t request,
|
||||||
// 0xc00c8110
|
// 0xc00c8110
|
||||||
// 0xc0848119
|
// 0xc0848119
|
||||||
|
|
||||||
auto device = static_cast<GcDevice *>(file->device.get());
|
auto device = file->device.staticCast<GcDevice>();
|
||||||
std::lock_guard lock(device->mtx);
|
std::lock_guard lock(device->mtx);
|
||||||
|
|
||||||
switch (request) {
|
switch (request) {
|
||||||
|
|
@ -55,7 +146,7 @@ static orbis::ErrorCode gc_ioctl(orbis::File *file, std::uint64_t request,
|
||||||
|
|
||||||
// flockfile(stderr);
|
// flockfile(stderr);
|
||||||
// if (thread->tproc->pid != amdgpu::bridge::expGpuPid) {
|
// if (thread->tproc->pid != amdgpu::bridge::expGpuPid) {
|
||||||
// ORBIS_LOG_ERROR("gc ioctl submit", args->arg0, args->count, args->cmds);
|
// ORBIS_LOG_ERROR("gc ioctl submit", args->arg0, args->count, args->cmds);
|
||||||
// }
|
// }
|
||||||
|
|
||||||
for (unsigned i = 0; i < args->count; ++i) {
|
for (unsigned i = 0; i < args->count; ++i) {
|
||||||
|
|
@ -172,14 +263,20 @@ static orbis::ErrorCode gc_ioctl(orbis::File *file, std::uint64_t request,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case 0xc010810b: { // something like stats masks?
|
case 0xc010810b: { // get cu masks param
|
||||||
struct Args {
|
struct Args {
|
||||||
std::uint64_t arg1;
|
std::uint32_t se0sh0;
|
||||||
std::uint64_t arg2;
|
std::uint32_t se0sh1;
|
||||||
|
std::uint32_t se1sh0;
|
||||||
|
std::uint32_t se1sh1;
|
||||||
};
|
};
|
||||||
|
|
||||||
auto args = reinterpret_cast<Args *>(argp);
|
auto args = reinterpret_cast<Args *>(argp);
|
||||||
ORBIS_LOG_ERROR("gc ioctl stats mask", args->arg1, args->arg2);
|
// ORBIS_LOG_ERROR("gc ioctl stats mask", args->arg1, args->arg2);
|
||||||
|
args->se0sh0 = ~0;
|
||||||
|
args->se0sh1 = ~0;
|
||||||
|
args->se1sh0 = ~0;
|
||||||
|
args->se1sh1 = ~0;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -265,8 +362,14 @@ static orbis::ErrorCode gc_ioctl(orbis::File *file, std::uint64_t request,
|
||||||
}
|
}
|
||||||
|
|
||||||
case 0xc0048113: {
|
case 0xc0048113: {
|
||||||
// get client number
|
// get num clients
|
||||||
*(std::uint32_t *)argp = 0;
|
|
||||||
|
struct Args {
|
||||||
|
std::uint32_t numClients;
|
||||||
|
};
|
||||||
|
|
||||||
|
auto *args = reinterpret_cast<Args *>(argp);
|
||||||
|
args->numClients = device->clients.size();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -312,8 +415,38 @@ orbis::ErrorCode GcDevice::open(orbis::Ref<orbis::File> *file, const char *path,
|
||||||
auto newFile = orbis::knew<GcFile>();
|
auto newFile = orbis::knew<GcFile>();
|
||||||
newFile->device = this;
|
newFile->device = this;
|
||||||
newFile->ops = &ops;
|
newFile->ops = &ops;
|
||||||
|
newFile->process = thread->tproc;
|
||||||
|
addClient(thread->tproc);
|
||||||
*file = newFile;
|
*file = newFile;
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void GcDevice::addClient(orbis::Process *process) {
|
||||||
|
std::lock_guard lock(mtx);
|
||||||
|
auto &client = clients[process->pid];
|
||||||
|
++client;
|
||||||
|
|
||||||
|
if (client == 1) {
|
||||||
|
auto vmId = allocateVmId();
|
||||||
|
rx::bridge.sendMapProcess(process->pid, vmId);
|
||||||
|
process->vmId = vmId;
|
||||||
|
|
||||||
|
runBridge(vmId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void GcDevice::removeClient(orbis::Process *process) {
|
||||||
|
std::lock_guard lock(mtx);
|
||||||
|
auto clientIt = clients.find(process->pid);
|
||||||
|
assert(clientIt != clients.end());
|
||||||
|
assert(clientIt->second != 0);
|
||||||
|
--clientIt->second;
|
||||||
|
if (clientIt->second == 0) {
|
||||||
|
clients.erase(clientIt);
|
||||||
|
rx::bridge.sendUnmapProcess(process->pid);
|
||||||
|
deallocateVmId(process->vmId);
|
||||||
|
process->vmId = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
IoDevice *createGcCharacterDevice() { return orbis::knew<GcDevice>(); }
|
IoDevice *createGcCharacterDevice() { return orbis::knew<GcDevice>(); }
|
||||||
|
|
|
||||||
|
|
@ -41,71 +41,6 @@
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
|
||||||
static int g_gpuPid;
|
static int g_gpuPid;
|
||||||
|
|
||||||
void runBridge() {
|
|
||||||
std::thread{[] {
|
|
||||||
pthread_setname_np(pthread_self(), "Bridge");
|
|
||||||
auto bridge = rx::bridge.header;
|
|
||||||
|
|
||||||
std::vector<std::uint64_t> fetchedCommands;
|
|
||||||
fetchedCommands.reserve(std::size(bridge->cacheCommands));
|
|
||||||
|
|
||||||
while (true) {
|
|
||||||
for (auto &command : bridge->cacheCommands) {
|
|
||||||
std::uint64_t value = command.load(std::memory_order::relaxed);
|
|
||||||
|
|
||||||
if (value != 0) {
|
|
||||||
fetchedCommands.push_back(value);
|
|
||||||
command.store(0, std::memory_order::relaxed);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (fetchedCommands.empty()) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (auto command : fetchedCommands) {
|
|
||||||
auto page = static_cast<std::uint32_t>(command);
|
|
||||||
auto count = static_cast<std::uint32_t>(command >> 32) + 1;
|
|
||||||
|
|
||||||
auto pageFlags =
|
|
||||||
bridge->cachePages[page].load(std::memory_order::relaxed);
|
|
||||||
|
|
||||||
auto address =
|
|
||||||
static_cast<std::uint64_t>(page) * amdgpu::bridge::kHostPageSize;
|
|
||||||
auto origVmProt = rx::vm::getPageProtection(address);
|
|
||||||
int prot = 0;
|
|
||||||
|
|
||||||
if (origVmProt & rx::vm::kMapProtCpuRead) {
|
|
||||||
prot |= PROT_READ;
|
|
||||||
}
|
|
||||||
if (origVmProt & rx::vm::kMapProtCpuWrite) {
|
|
||||||
prot |= PROT_WRITE;
|
|
||||||
}
|
|
||||||
if (origVmProt & rx::vm::kMapProtCpuExec) {
|
|
||||||
prot |= PROT_EXEC;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (pageFlags & amdgpu::bridge::kPageReadWriteLock) {
|
|
||||||
prot &= ~(PROT_READ | PROT_WRITE);
|
|
||||||
} else if (pageFlags & amdgpu::bridge::kPageWriteWatch) {
|
|
||||||
prot &= ~PROT_WRITE;
|
|
||||||
}
|
|
||||||
|
|
||||||
// std::fprintf(stderr, "protection %lx-%lx\n", address,
|
|
||||||
// address + amdgpu::bridge::kHostPageSize * count);
|
|
||||||
if (::mprotect(reinterpret_cast<void *>(address),
|
|
||||||
amdgpu::bridge::kHostPageSize * count, prot)) {
|
|
||||||
perror("protection failed");
|
|
||||||
std::abort();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fetchedCommands.clear();
|
|
||||||
}
|
|
||||||
}}.detach();
|
|
||||||
}
|
|
||||||
|
|
||||||
extern bool allowMonoDebug;
|
extern bool allowMonoDebug;
|
||||||
|
|
||||||
__attribute__((no_stack_protector)) static void
|
__attribute__((no_stack_protector)) static void
|
||||||
|
|
@ -116,8 +51,9 @@ handle_signal(int sig, siginfo_t *info, void *ucontext) {
|
||||||
|
|
||||||
auto signalAddress = reinterpret_cast<std::uintptr_t>(info->si_addr);
|
auto signalAddress = reinterpret_cast<std::uintptr_t>(info->si_addr);
|
||||||
|
|
||||||
if (orbis::g_currentThread != nullptr && sig == SIGSEGV &&
|
if (orbis::g_currentThread != nullptr && orbis::g_currentThread->tproc->vmId >= 0 && sig == SIGSEGV &&
|
||||||
signalAddress >= 0x40000 && signalAddress < 0x100'0000'0000) {
|
signalAddress >= 0x40000 && signalAddress < 0x100'0000'0000) {
|
||||||
|
auto vmid = orbis::g_currentThread->tproc->vmId;
|
||||||
auto ctx = reinterpret_cast<ucontext_t *>(ucontext);
|
auto ctx = reinterpret_cast<ucontext_t *>(ucontext);
|
||||||
bool isWrite = (ctx->uc_mcontext.gregs[REG_ERR] & 0x2) != 0;
|
bool isWrite = (ctx->uc_mcontext.gregs[REG_ERR] & 0x2) != 0;
|
||||||
auto origVmProt = rx::vm::getPageProtection(signalAddress);
|
auto origVmProt = rx::vm::getPageProtection(signalAddress);
|
||||||
|
|
@ -138,17 +74,17 @@ handle_signal(int sig, siginfo_t *info, void *ucontext) {
|
||||||
auto bridge = rx::bridge.header;
|
auto bridge = rx::bridge.header;
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
auto flags = bridge->cachePages[page].load(std::memory_order::relaxed);
|
auto flags = bridge->cachePages[vmid][page].load(std::memory_order::relaxed);
|
||||||
|
|
||||||
if ((flags & amdgpu::bridge::kPageReadWriteLock) != 0) {
|
if ((flags & amdgpu::bridge::kPageReadWriteLock) != 0) {
|
||||||
if ((flags & amdgpu::bridge::kPageLazyLock) != 0) {
|
if ((flags & amdgpu::bridge::kPageLazyLock) != 0) {
|
||||||
if (std::uint32_t gpuCommand = 0;
|
if (std::uint32_t gpuCommand = 0;
|
||||||
!bridge->gpuCacheCommand.compare_exchange_weak(gpuCommand,
|
!bridge->gpuCacheCommand[vmid].compare_exchange_weak(gpuCommand,
|
||||||
page)) {
|
page)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
while (!bridge->cachePages[page].compare_exchange_weak(
|
while (!bridge->cachePages[vmid][page].compare_exchange_weak(
|
||||||
flags, flags & ~amdgpu::bridge::kPageLazyLock,
|
flags, flags & ~amdgpu::bridge::kPageLazyLock,
|
||||||
std::memory_order::relaxed)) {
|
std::memory_order::relaxed)) {
|
||||||
}
|
}
|
||||||
|
|
@ -165,7 +101,7 @@ handle_signal(int sig, siginfo_t *info, void *ucontext) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (bridge->cachePages[page].compare_exchange_weak(
|
if (bridge->cachePages[vmid][page].compare_exchange_weak(
|
||||||
flags, amdgpu::bridge::kPageInvalidated,
|
flags, amdgpu::bridge::kPageInvalidated,
|
||||||
std::memory_order::relaxed)) {
|
std::memory_order::relaxed)) {
|
||||||
break;
|
break;
|
||||||
|
|
@ -188,6 +124,7 @@ handle_signal(int sig, siginfo_t *info, void *ucontext) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (orbis::g_currentThread != nullptr) {
|
if (orbis::g_currentThread != nullptr) {
|
||||||
|
orbis::g_currentThread->tproc->exitStatus = sig;
|
||||||
orbis::g_currentThread->tproc->event.emit(orbis::kEvFiltProc,
|
orbis::g_currentThread->tproc->event.emit(orbis::kEvFiltProc,
|
||||||
orbis::kNoteExit, sig);
|
orbis::kNoteExit, sig);
|
||||||
}
|
}
|
||||||
|
|
@ -1640,29 +1577,34 @@ int main(int argc, const char *argv[]) {
|
||||||
};
|
};
|
||||||
|
|
||||||
if (isSystem) {
|
if (isSystem) {
|
||||||
amdgpu::bridge::expGpuPid = isSafeMode ? 20001 : 60001;
|
|
||||||
orbis::g_context.safeMode = isSafeMode ? 1 : 0;
|
orbis::g_context.safeMode = isSafeMode ? 1 : 0;
|
||||||
initProcess->authInfo = {
|
initProcess->authInfo = {.unk0 = 0x380000000000000f,
|
||||||
.unk0 = 0x380000000000000f,
|
.caps =
|
||||||
.caps =
|
{
|
||||||
{
|
-1ul,
|
||||||
-1ul,
|
-1ul,
|
||||||
-1ul,
|
-1ul,
|
||||||
-1ul,
|
-1ul,
|
||||||
-1ul,
|
},
|
||||||
},
|
.attrs =
|
||||||
.attrs =
|
{
|
||||||
{
|
0x4000400040000000,
|
||||||
0x4000400040000000,
|
0x4000000000000000,
|
||||||
0x4000000000000000,
|
0x0080000000000002,
|
||||||
0x0080000000000002,
|
0xF0000000FFFF4000,
|
||||||
0xF0000000FFFF4000,
|
},
|
||||||
},
|
.ucred = {
|
||||||
};
|
-1ul,
|
||||||
|
-1ul,
|
||||||
|
0x3800000000000022,
|
||||||
|
-1ul,
|
||||||
|
(1ul << 0x3a),
|
||||||
|
-1ul,
|
||||||
|
-1ul,
|
||||||
|
}};
|
||||||
initProcess->budgetId = 0;
|
initProcess->budgetId = 0;
|
||||||
initProcess->isInSandbox = false;
|
initProcess->isInSandbox = false;
|
||||||
} else {
|
} else {
|
||||||
amdgpu::bridge::expGpuPid = initProcess->pid;
|
|
||||||
initProcess->authInfo = {
|
initProcess->authInfo = {
|
||||||
.unk0 = 0x3100000000000001,
|
.unk0 = 0x3100000000000001,
|
||||||
.caps =
|
.caps =
|
||||||
|
|
@ -1788,7 +1730,6 @@ int main(int argc, const char *argv[]) {
|
||||||
|
|
||||||
launchDaemon(mainThread, "/system/sys/orbis_audiod.elf",
|
launchDaemon(mainThread, "/system/sys/orbis_audiod.elf",
|
||||||
{"/system/sys/orbis_audiod.elf"}, {});
|
{"/system/sys/orbis_audiod.elf"}, {});
|
||||||
runBridge();
|
|
||||||
status = ps4Exec(mainThread, execEnv, std::move(executableModule),
|
status = ps4Exec(mainThread, execEnv, std::move(executableModule),
|
||||||
ps4Argv, {});
|
ps4Argv, {});
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -43,7 +43,6 @@ using namespace orbis;
|
||||||
extern bool allowMonoDebug;
|
extern bool allowMonoDebug;
|
||||||
|
|
||||||
extern "C" void __register_frame(const void *);
|
extern "C" void __register_frame(const void *);
|
||||||
void runBridge();
|
|
||||||
void setupSigHandlers();
|
void setupSigHandlers();
|
||||||
int ps4Exec(orbis::Thread *mainThread,
|
int ps4Exec(orbis::Thread *mainThread,
|
||||||
orbis::utils::Ref<orbis::Module> executableModule,
|
orbis::utils::Ref<orbis::Module> executableModule,
|
||||||
|
|
@ -828,9 +827,6 @@ SysResult fork(Thread *thread, slong flags) {
|
||||||
dup2(logFd, 1);
|
dup2(logFd, 1);
|
||||||
dup2(logFd, 2);
|
dup2(logFd, 2);
|
||||||
|
|
||||||
if (childPid == amdgpu::bridge::expGpuPid) {
|
|
||||||
runBridge();
|
|
||||||
}
|
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -958,11 +958,8 @@ void *rx::vm::map(void *addr, std::uint64_t len, std::int32_t prot,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (auto thr = orbis::g_currentThread) {
|
if (auto thr = orbis::g_currentThread) {
|
||||||
// std::fprintf(stderr, "sending mapping %lx-%lx, pid %lx\n", address,
|
rx::bridge.sendMapMemory(thr->tproc->pid, -1, -1, address, len, prot,
|
||||||
// address + len, thr->tproc->pid);
|
address - kMinAddress);
|
||||||
// if (!noOverwrite) {
|
|
||||||
// rx::bridge.sendMemoryProtect(thr->tproc->pid, address, len, prot);
|
|
||||||
// }
|
|
||||||
} else {
|
} else {
|
||||||
std::fprintf(stderr, "ignoring mapping %lx-%lx\n", address, address + len);
|
std::fprintf(stderr, "ignoring mapping %lx-%lx\n", address, address + len);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue