merge rpcsx-gpu and rpcsx-os

initial watchdog implementation
implement gpu -> os events
implement main gfx queue
This commit is contained in:
DH 2024-10-12 05:24:58 +03:00
parent 8e9711e0f6
commit 0c16e294d4
236 changed files with 4649 additions and 4669 deletions

19
.github/BUILDING.md vendored
View file

@ -39,25 +39,8 @@ git clone https://github.com/KhronosGroup/SPIRV-Cross && cd SPIRV-Cross && mkdir
``` ```
git clone --recursive https://github.com/RPCSX/rpcsx && cd rpcsx git clone --recursive https://github.com/RPCSX/rpcsx && cd rpcsx
``` ```
```
git submodule update --init --recursive
```
## How to compile the emulator ## How to compile the emulator
``` ```
mkdir -p build && cd build && cmake .. && cmake --build . cmake -B build && cmake --build build -j$(nproc)
```
## How to create a Virtual HDD
> The PS4 has a case-insensitive filesystem. To create the Virtual HDD, do the following:
```
truncate -s 512M ps4-hdd.exfat
mkfs.exfat -n PS4-HDD ./ps4-hdd.exfat
mkdir ps4-fs
sudo mount -t exfat -o uid=`id -u`,gid=`id -g` ./ps4-hdd.exfat ./ps4-fs
``` ```

6
.github/USAGE.md vendored
View file

@ -4,17 +4,17 @@
You will need firmware 5.05 dumped via PS4 FTP it must be fully decrypted and we do not provide the firmware You will need firmware 5.05 dumped via PS4 FTP it must be fully decrypted and we do not provide the firmware
See the Commands of `rpcsx-os` (`-h` argument), or join the [Discord](https://discord.gg/t6dzA4wUdG) for help. See the Commands of `rpcsx` (`-h` argument), or join the [Discord](https://discord.gg/t6dzA4wUdG) for help.
You can run the emulator with some samples using this command: You can run the emulator with some samples using this command:
```sh ```sh
rm -f /dev/shm/rpcsx-* && ./rpcsx-os --mount "<path to fw>/system" "/system" --mount "<path to 'game' root>" /app0 /app0/some-test-sample.elf [<args for test elf>...] ./rpcsx --mount "<path to fw>/system" "/system" --mount "<path to 'game' root>" /app0 /app0/some-test-sample.elf [<args for test elf>...]
``` ```
### You can now enter safe mode ### You can now enter safe mode
```sh ```sh
./rpcsx-os --system --safemode --mount $PATH_TO_YOUR_FW_ROOT / /mini-syscore.elf ./rpcsx --system --safemode --mount $PATH_TO_YOUR_FW_ROOT / /mini-syscore.elf
``` ```
drop ```--safemode``` to have normal mode (not expected to produce graphics yet) drop ```--safemode``` to have normal mode (not expected to produce graphics yet)
## Creating a log ## Creating a log

View file

@ -57,9 +57,7 @@ endfunction()
add_subdirectory(tools) add_subdirectory(tools)
add_subdirectory(orbis-kernel) add_subdirectory(orbis-kernel)
add_subdirectory(rpcsx-os) add_subdirectory(rpcsx)
add_subdirectory(rpcsx-gpu)
add_subdirectory(hw/amdgpu)
add_subdirectory(rx) add_subdirectory(rx)
target_compile_definitions(rx PRIVATE target_compile_definitions(rx PRIVATE

View file

@ -1,7 +0,0 @@
cmake_minimum_required(VERSION 3.10)
set(CMAKE_CXX_STANDARD 23)
set(CMAKE_CXX_EXTENSIONS off)
add_subdirectory(bridge)

View file

@ -1,17 +0,0 @@
project(libamdgpu-bridge)
set(PROJECT_PATH amdgpu/bridge)
set(INCLUDE
include/${PROJECT_PATH}/bridge.hpp
)
set(SRC
src/bridge.cpp
)
add_library(${PROJECT_NAME} STATIC ${INCLUDE} ${SRC})
target_include_directories(${PROJECT_NAME} PUBLIC include PRIVATE include/${PROJECT_PATH})
target_link_libraries(${PROJECT_NAME} PUBLIC orbis::utils::ipc)
set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "")
add_library(amdgpu::bridge ALIAS ${PROJECT_NAME})
set_property(TARGET ${PROJECT_NAME} PROPERTY POSITION_INDEPENDENT_CODE ON)

View file

@ -1,402 +0,0 @@
#pragma once
#include <atomic>
#include <cstdint>
#include <cstring>
#include <initializer_list>
#include <orbis/utils/SharedMutex.hpp>
namespace amdgpu::bridge {
struct PadState {
std::uint64_t timestamp;
std::uint32_t unk;
std::uint32_t buttons;
std::uint8_t leftStickX;
std::uint8_t leftStickY;
std::uint8_t rightStickX;
std::uint8_t rightStickY;
std::uint8_t l2;
std::uint8_t r2;
};
enum {
kPadBtnL3 = 1 << 1,
kPadBtnR3 = 1 << 2,
kPadBtnOptions = 1 << 3,
kPadBtnUp = 1 << 4,
kPadBtnRight = 1 << 5,
kPadBtnDown = 1 << 6,
kPadBtnLeft = 1 << 7,
kPadBtnL2 = 1 << 8,
kPadBtnR2 = 1 << 9,
kPadBtnL1 = 1 << 10,
kPadBtnR1 = 1 << 11,
kPadBtnTriangle = 1 << 12,
kPadBtnCircle = 1 << 13,
kPadBtnCross = 1 << 14,
kPadBtnSquare = 1 << 15,
kPadBtnPs = 1 << 16,
kPadBtnTouchPad = 1 << 20,
kPadBtnIntercepted = 1 << 31,
};
enum class CommandId : std::uint32_t {
Nop,
ProtectMemory,
CommandBuffer,
Flip,
MapMemory,
MapProcess,
UnmapProcess,
RegisterBuffer,
RegisterBufferAttribute,
};
struct CmdMemoryProt {
std::uint64_t address;
std::uint64_t size;
std::uint32_t prot;
std::uint32_t pid;
};
struct CmdCommandBuffer {
std::uint64_t queue;
std::uint64_t address;
std::uint32_t size;
std::uint32_t pid;
};
struct CmdBufferAttribute {
std::uint32_t pid;
std::uint8_t attrId;
std::uint8_t submit;
std::uint64_t canary;
std::uint32_t pixelFormat;
std::uint32_t tilingMode;
std::uint32_t pitch;
std::uint32_t width;
std::uint32_t height;
};
struct CmdBuffer {
std::uint64_t canary;
std::uint32_t index;
std::uint32_t attrId;
std::uint64_t address;
std::uint64_t address2;
std::uint32_t pid;
};
struct CmdFlip {
std::uint32_t pid;
std::uint32_t bufferIndex;
std::uint64_t arg;
};
struct CmdMapMemory {
std::int64_t offset;
std::uint64_t address;
std::uint64_t size;
std::uint32_t prot;
std::uint32_t pid;
std::int32_t memoryType;
std::uint32_t dmemIndex;
};
struct CmdMapProcess {
std::uint64_t pid;
int vmId;
};
struct CmdUnmapProcess {
std::uint64_t pid;
};
enum {
kPageWriteWatch = 1 << 0,
kPageReadWriteLock = 1 << 1,
kPageInvalidated = 1 << 2,
kPageLazyLock = 1 << 3
};
static constexpr auto kHostPageSize = 0x1000;
struct BridgeHeader {
std::uint64_t size;
std::uint64_t info;
std::uint32_t pullerPid;
std::uint32_t pusherPid;
std::atomic<std::uint64_t> lock;
volatile std::uint64_t flags;
std::uint64_t vmAddress;
std::uint64_t vmSize;
char vmName[32];
PadState kbPadState;
volatile std::uint32_t flipBuffer[6];
volatile std::uint64_t flipArg[6];
volatile std::uint64_t flipCount[6];
volatile std::uint64_t bufferInUseAddress[6];
std::uint32_t commandBufferCount;
std::uint32_t bufferCount;
CmdCommandBuffer commandBuffers[32];
// CmdBuffer buffers[10];
// orbis::shared_mutex cacheCommandMtx;
// orbis::shared_cv cacheCommandCv;
std::atomic<std::uint64_t> cacheCommands[6][4];
std::atomic<std::uint32_t> gpuCacheCommand[6];
std::atomic<std::uint8_t> cachePages[6][0x100'0000'0000 / kHostPageSize];
volatile std::uint64_t pull;
volatile std::uint64_t push;
std::uint64_t commands[];
};
struct Command {
CommandId id;
union {
CmdMemoryProt memoryProt;
CmdCommandBuffer commandBuffer;
CmdBuffer buffer;
CmdBufferAttribute bufferAttribute;
CmdFlip flip;
CmdMapMemory mapMemory;
CmdMapProcess mapProcess;
CmdUnmapProcess unmapProcess;
};
};
enum class BridgeFlags {
VmConfigured = 1 << 0,
PushLock = 1 << 1,
PullLock = 1 << 2,
};
struct BridgePusher {
BridgeHeader *header = nullptr;
void setVm(std::uint64_t address, std::uint64_t size, const char *name) {
header->vmAddress = address;
header->vmSize = size;
std::strncpy(header->vmName, name, sizeof(header->vmName));
header->flags =
header->flags | static_cast<std::uint64_t>(BridgeFlags::VmConfigured);
}
void sendMemoryProtect(std::uint32_t pid, std::uint64_t address,
std::uint64_t size, std::uint32_t prot) {
sendCommand(CommandId::ProtectMemory, {pid, address, size, prot});
}
void sendMapMemory(std::uint32_t pid, std::uint32_t memoryType,
std::uint32_t dmemIndex, std::uint64_t address,
std::uint64_t size, std::uint32_t prot,
std::uint64_t offset) {
sendCommand(CommandId::MapMemory,
{pid, memoryType, dmemIndex, address, size, prot, offset});
}
void sendRegisterBuffer(std::uint32_t pid, std::uint64_t canary,
std::uint32_t index, std::uint32_t attrId,
std::uint64_t address, std::uint64_t address2) {
sendCommand(CommandId::RegisterBuffer,
{pid, canary, index, attrId, address, address2});
}
void sendRegisterBufferAttribute(std::uint32_t pid, std::uint8_t attrId,
std::uint8_t submit, std::uint64_t canary,
std::uint32_t pixelFormat,
std::uint32_t tilingMode,
std::uint32_t pitch, std::uint32_t width,
std::uint32_t height) {
sendCommand(CommandId::RegisterBufferAttribute,
{pid, attrId, submit, canary, pixelFormat, tilingMode, pitch,
width, height});
}
void sendCommandBuffer(std::uint32_t pid, std::uint64_t queue,
std::uint64_t address, std::uint64_t size) {
sendCommand(CommandId::CommandBuffer, {pid, queue, address, size});
}
void sendFlip(std::uint32_t pid, std::uint32_t bufferIndex,
std::uint64_t arg) {
sendCommand(CommandId::Flip, {pid, bufferIndex, arg});
}
void sendMapProcess(std::uint32_t pid, unsigned vmId) {
sendCommand(CommandId::MapProcess, {pid, vmId});
}
void sendUnmapProcess(std::uint32_t pid) {
sendCommand(CommandId::UnmapProcess, {pid});
}
void wait() {
while (header->pull != header->push)
;
}
private:
static std::uint64_t makeCommandHeader(CommandId id, std::size_t cmdSize) {
return static_cast<std::uint64_t>(id) |
(static_cast<std::uint64_t>(cmdSize - 1) << 32);
}
void sendCommand(CommandId id, std::initializer_list<std::uint64_t> args) {
std::uint64_t exp = 0;
while (!header->lock.compare_exchange_strong(
exp, 1, std::memory_order::acquire, std::memory_order::relaxed)) {
exp = 0;
}
std::size_t cmdSize = args.size() + 1;
std::uint64_t pos = getPushPosition(cmdSize);
header->commands[pos++] = makeCommandHeader(id, cmdSize);
for (auto arg : args) {
header->commands[pos++] = arg;
}
header->push = pos;
header->lock.store(0, std::memory_order::release);
}
std::uint64_t getPushPosition(std::uint64_t cmdSize) {
std::uint64_t position = header->push;
if (position + cmdSize > header->size) {
waitPuller(position);
if (position < header->size) {
header->commands[position] =
static_cast<std::uint64_t>(CommandId::Nop) |
((header->size - position + cmdSize) << 32);
}
position = 0;
header->push = position;
}
return position;
}
void waitPuller(std::uint64_t pullValue) {
while (header->pull != pullValue) {
;
}
}
};
struct BridgePuller {
BridgeHeader *header = nullptr;
BridgePuller() = default;
BridgePuller(BridgeHeader *header) : header(header) {}
std::size_t pullCommands(Command *commands, std::size_t maxCount) {
std::size_t processed = 0;
while (processed < maxCount) {
if (header->pull == header->push) {
break;
}
auto pos = header->pull;
if (pos >= header->size) {
header->pull = 0;
continue;
}
auto cmd = header->commands[pos];
CommandId cmdId = static_cast<CommandId>(cmd);
std::uint32_t argsCount = cmd >> 32;
if (cmdId != CommandId::Nop) {
commands[processed++] =
unpackCommand(cmdId, header->commands + pos + 1, argsCount);
}
header->pull = pos + argsCount + 1;
}
return processed;
}
private:
Command unpackCommand(CommandId command, const std::uint64_t *args,
std::uint32_t argsCount) {
Command result;
result.id = command;
switch (command) {
case CommandId::Nop:
return result;
case CommandId::ProtectMemory:
result.memoryProt.pid = args[0];
result.memoryProt.address = args[1];
result.memoryProt.size = args[2];
result.memoryProt.prot = args[3];
return result;
case CommandId::CommandBuffer:
result.commandBuffer.pid = args[0];
result.commandBuffer.queue = args[1];
result.commandBuffer.address = args[2];
result.commandBuffer.size = args[3];
return result;
case CommandId::Flip:
result.flip.pid = args[0];
result.flip.bufferIndex = args[1];
result.flip.arg = args[2];
return result;
case CommandId::MapMemory:
result.mapMemory.pid = args[0];
result.mapMemory.memoryType = args[1];
result.mapMemory.dmemIndex = args[2];
result.mapMemory.address = args[3];
result.mapMemory.size = args[4];
result.mapMemory.prot = args[5];
result.mapMemory.offset = args[6];
return result;
case CommandId::MapProcess:
result.mapProcess.pid = args[0];
result.mapProcess.vmId = args[1];
return result;
case CommandId::UnmapProcess:
result.unmapProcess.pid = args[0];
return result;
case CommandId::RegisterBufferAttribute:
result.bufferAttribute.pid = args[0];
result.bufferAttribute.attrId = args[1];
result.bufferAttribute.submit = args[2];
result.bufferAttribute.canary = args[3];
result.bufferAttribute.pixelFormat = args[4];
result.bufferAttribute.tilingMode = args[5];
result.bufferAttribute.pitch = args[6];
result.bufferAttribute.width = args[7];
result.bufferAttribute.height = args[8];
return result;
case CommandId::RegisterBuffer:
result.buffer.pid = args[0];
result.buffer.canary = args[1];
result.buffer.index = args[2];
result.buffer.attrId = args[3];
result.buffer.address = args[4];
result.buffer.address2 = args[5];
return result;
}
__builtin_trap();
}
};
BridgeHeader *createShmCommandBuffer(const char *name);
BridgeHeader *openShmCommandBuffer(const char *name);
void destroyShmCommandBuffer(BridgeHeader *buffer);
void unlinkShm(const char *name);
} // namespace amdgpu::bridge

View file

@ -1,87 +0,0 @@
#include "bridge.hpp"
#include <fcntl.h>
#include <new>
#include <sys/mman.h>
#include <unistd.h>
static int gShmFd = -1;
static constexpr std::size_t kShmSize = sizeof(amdgpu::bridge::BridgeHeader) +
(sizeof(std::uint64_t) * 1024);
amdgpu::bridge::BridgeHeader *
amdgpu::bridge::createShmCommandBuffer(const char *name) {
if (gShmFd != -1) {
return nullptr;
}
// unlinkShm(name);
int fd = ::shm_open(name, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR);
if (fd == -1) {
return nullptr;
}
if (ftruncate(fd, kShmSize) < 0) {
::close(fd);
return nullptr;
}
void *memory =
::mmap(nullptr, kShmSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (memory == MAP_FAILED) {
::close(fd);
return nullptr;
}
gShmFd = fd;
auto result = new (memory) amdgpu::bridge::BridgeHeader;
std::memset(result, 0, sizeof(*result));
result->size =
(kShmSize - sizeof(amdgpu::bridge::BridgeHeader)) / sizeof(std::uint64_t);
return result;
}
amdgpu::bridge::BridgeHeader *
amdgpu::bridge::openShmCommandBuffer(const char *name) {
if (gShmFd != -1) {
return nullptr;
}
int fd = ::shm_open(name, O_RDWR, S_IRUSR | S_IWUSR);
if (fd == -1) {
return nullptr;
}
if (ftruncate(fd, kShmSize) < 0) {
::close(fd);
return nullptr;
}
void *memory =
::mmap(nullptr, kShmSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (memory == MAP_FAILED) {
::close(fd);
return nullptr;
}
gShmFd = fd;
return new (memory) amdgpu::bridge::BridgeHeader;
}
void amdgpu::bridge::destroyShmCommandBuffer(
amdgpu::bridge::BridgeHeader *buffer) {
if (gShmFd == -1) {
__builtin_trap();
}
buffer->~BridgeHeader();
::close(gShmFd);
gShmFd = -1;
::munmap(buffer, kShmSize);
}
void amdgpu::bridge::unlinkShm(const char *name) { ::shm_unlink(name); }

View file

@ -2,6 +2,7 @@
#include "KernelAllocator.hpp" #include "KernelAllocator.hpp"
#include "evf.hpp" #include "evf.hpp"
#include "ipmi.hpp" #include "ipmi.hpp"
#include "orbis/note.hpp"
#include "osem.hpp" #include "osem.hpp"
#include "thread/types.hpp" #include "thread/types.hpp"
#include "utils/IdMap.hpp" #include "utils/IdMap.hpp"
@ -174,9 +175,12 @@ public:
return getUmtxChainIndexed(1, t, flags, ptr); return getUmtxChainIndexed(1, t, flags, ptr);
} }
Ref<EventEmitter> deviceEventEmitter;
Ref<RcBase> shmDevice; Ref<RcBase> shmDevice;
Ref<RcBase> dmemDevice; Ref<RcBase> dmemDevice;
Ref<RcBase> blockpoolDevice; Ref<RcBase> blockpoolDevice;
shared_mutex gpuDeviceMtx;
Ref<RcBase> gpuDevice;
uint sdkVersion{}; uint sdkVersion{};
uint fwSdkVersion{}; uint fwSdkVersion{};
uint safeMode{}; uint safeMode{};

View file

@ -1,4 +1,5 @@
#pragma once #pragma once
#include <compare>
namespace orbis { namespace orbis {
enum class ErrorCode : int; enum class ErrorCode : int;
@ -18,5 +19,13 @@ public:
[[nodiscard]] int value() const { return mValue < 0 ? -mValue : mValue; } [[nodiscard]] int value() const { return mValue < 0 ? -mValue : mValue; }
[[nodiscard]] bool isError() const { return mValue < 0; } [[nodiscard]] bool isError() const { return mValue < 0; }
[[nodiscard]] auto operator<=>(ErrorCode ec) const {
return static_cast<ErrorCode>(value()) <=> ec;
}
[[nodiscard]] auto operator<=>(SysResult other) const {
return value() <=> other.value();
}
}; };
} // namespace orbis } // namespace orbis

View file

@ -2,8 +2,8 @@
#include "KernelAllocator.hpp" #include "KernelAllocator.hpp"
#include "orbis-config.hpp" #include "orbis-config.hpp"
#include "orbis/utils/Rc.hpp"
#include "utils/SharedMutex.hpp" #include "utils/SharedMutex.hpp"
#include <mutex>
#include <set> #include <set>
namespace orbis { namespace orbis {
@ -71,6 +71,7 @@ struct KEvent {
ptr<void> udata; ptr<void> udata;
}; };
struct EventEmitter;
struct KQueue; struct KQueue;
struct KNote { struct KNote {
shared_mutex mutex; shared_mutex mutex;
@ -80,6 +81,7 @@ struct KNote {
bool enabled = true; bool enabled = true;
bool triggered = false; bool triggered = false;
void *linked = nullptr; // TODO: use Ref<> void *linked = nullptr; // TODO: use Ref<>
kvector<Ref<EventEmitter>> emitters;
~KNote(); ~KNote();
}; };
@ -88,6 +90,8 @@ struct EventEmitter : orbis::RcBase {
shared_mutex mutex; shared_mutex mutex;
std::set<KNote *, std::less<>, kallocator<KNote *>> notes; std::set<KNote *, std::less<>, kallocator<KNote *>> notes;
void emit(uint filter, uint fflags = 0, intptr_t data = 0); void emit(sshort filter, uint fflags = 0, intptr_t data = 0);
void subscribe(KNote *note);
void unsubscribe(KNote *note);
}; };
} // namespace orbis } // namespace orbis

View file

@ -46,6 +46,7 @@ struct NamedMemoryRange {
struct Process final { struct Process final {
KernelContext *context = nullptr; KernelContext *context = nullptr;
pid_t pid = -1; pid_t pid = -1;
int gfxRing = 0;
std::uint64_t hostPid = -1; std::uint64_t hostPid = -1;
sysentvec *sysent = nullptr; sysentvec *sysent = nullptr;
ProcessState state = ProcessState::NEW; ProcessState state = ProcessState::NEW;

View file

@ -3,7 +3,7 @@
namespace orbis { namespace orbis {
using lwpid_t = int32_t; using lwpid_t = int32_t;
using pid_t = int64_t; using pid_t = int32_t;
using uid_t = uint32_t; using uid_t = uint32_t;
using gid_t = uint32_t; using gid_t = uint32_t;

View file

@ -49,11 +49,11 @@ template <typename T> class Ref {
public: public:
Ref() = default; Ref() = default;
Ref(std::nullptr_t) {} Ref(std::nullptr_t) noexcept {}
template <typename OT> template <typename OT>
requires(std::is_base_of_v<T, OT>) requires(std::is_base_of_v<T, OT>)
Ref(OT *ref) : m_ref(ref) { Ref(OT *ref) noexcept : m_ref(ref) {
if (m_ref != nullptr) { if (m_ref != nullptr) {
ref->incRef(); ref->incRef();
} }
@ -61,7 +61,7 @@ public:
template <typename OT> template <typename OT>
requires(std::is_base_of_v<T, OT>) requires(std::is_base_of_v<T, OT>)
Ref(const Ref<OT> &other) : m_ref(other.get()) { Ref(const Ref<OT> &other) noexcept : m_ref(other.get()) {
if (m_ref != nullptr) { if (m_ref != nullptr) {
m_ref->incRef(); m_ref->incRef();
} }
@ -69,42 +69,42 @@ public:
template <typename OT> template <typename OT>
requires(std::is_base_of_v<T, OT>) requires(std::is_base_of_v<T, OT>)
Ref(Ref<OT> &&other) : m_ref(other.release()) {} Ref(Ref<OT> &&other) noexcept : m_ref(other.release()) {}
Ref(const Ref &other) : m_ref(other.get()) { Ref(const Ref &other) noexcept : m_ref(other.get()) {
if (m_ref != nullptr) { if (m_ref != nullptr) {
m_ref->incRef(); m_ref->incRef();
} }
} }
Ref(Ref &&other) : m_ref(other.release()) {} Ref(Ref &&other) noexcept : m_ref(other.release()) {}
template <typename OT> template <typename OT>
requires(std::is_base_of_v<T, OT>) requires(std::is_base_of_v<T, OT>)
Ref &operator=(Ref<OT> &&other) { Ref &operator=(Ref<OT> &&other) noexcept {
other.template cast<T>().swap(*this); other.template cast<T>().swap(*this);
return *this; return *this;
} }
template <typename OT> template <typename OT>
requires(std::is_base_of_v<T, OT>) requires(std::is_base_of_v<T, OT>)
Ref &operator=(OT *other) { Ref &operator=(OT *other) noexcept {
*this = Ref(other); *this = Ref(other);
return *this; return *this;
} }
template <typename OT> template <typename OT>
requires(std::is_base_of_v<T, OT>) requires(std::is_base_of_v<T, OT>)
Ref &operator=(const Ref<OT> &other) { Ref &operator=(const Ref<OT> &other) noexcept {
*this = Ref(other); *this = Ref(other);
return *this; return *this;
} }
Ref &operator=(const Ref &other) { Ref &operator=(const Ref &other) noexcept {
*this = Ref(other); *this = Ref(other);
return *this; return *this;
} }
Ref &operator=(Ref &&other) { Ref &operator=(Ref &&other) noexcept {
other.swap(*this); other.swap(*this);
return *this; return *this;
} }
@ -115,7 +115,7 @@ public:
} }
} }
void swap(Ref<T> &other) { std::swap(m_ref, other.m_ref); } void swap(Ref<T> &other) noexcept { std::swap(m_ref, other.m_ref); }
T *get() const { return m_ref; } T *get() const { return m_ref; }
T *release() { return std::exchange(m_ref, nullptr); } T *release() { return std::exchange(m_ref, nullptr); }
T *operator->() const { return m_ref; } T *operator->() const { return m_ref; }
@ -126,10 +126,17 @@ public:
auto operator<=>(const Ref &other) const = default; auto operator<=>(const Ref &other) const = default;
template <typename OtherT> Ref<OtherT> cast() { template <typename OtherT> Ref<OtherT> cast() {
return Ref<OtherT>(dynamic_cast<OtherT *>(m_ref)); return dynamic_cast<OtherT *>(m_ref);
} }
template <typename OtherT> Ref<OtherT> staticCast() { template <typename OtherT> Ref<OtherT> staticCast() {
return Ref<OtherT>(static_cast<OtherT *>(m_ref)); return static_cast<OtherT *>(m_ref);
}
template <typename OtherT> OtherT *rawCast() {
return dynamic_cast<OtherT *>(m_ref);
}
template <typename OtherT> OtherT *rawStaticCast() {
return static_cast<OtherT *>(m_ref);
} }
}; };

View file

@ -9,6 +9,8 @@
#include <unistd.h> #include <unistd.h>
static const std::uint64_t g_allocProtWord = 0xDEADBEAFBADCAFE1; static const std::uint64_t g_allocProtWord = 0xDEADBEAFBADCAFE1;
static constexpr auto kHeapBaseAddress = 0x600'0000'0000;
static constexpr auto kHeapSize = 0x2'0000'0000;
namespace orbis { namespace orbis {
thread_local Thread *g_currentThread; thread_local Thread *g_currentThread;
@ -16,7 +18,7 @@ thread_local Thread *g_currentThread;
KernelContext &g_context = *[]() -> KernelContext * { KernelContext &g_context = *[]() -> KernelContext * {
// Allocate global shared kernel memory // Allocate global shared kernel memory
// TODO: randomize for hardening and reduce size // TODO: randomize for hardening and reduce size
auto ptr = mmap(reinterpret_cast<void *>(0x200'0000'0000), 0x2'0000'0000, auto ptr = mmap(reinterpret_cast<void *>(kHeapBaseAddress), kHeapSize,
PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0); MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
if (ptr == MAP_FAILED) if (ptr == MAP_FAILED)
@ -166,15 +168,32 @@ void *KernelContext::kalloc(std::size_t size, std::size_t align) {
align = std::max<std::size_t>(align, __STDCPP_DEFAULT_NEW_ALIGNMENT__); align = std::max<std::size_t>(align, __STDCPP_DEFAULT_NEW_ALIGNMENT__);
auto heap = reinterpret_cast<std::uintptr_t>(m_heap_next); auto heap = reinterpret_cast<std::uintptr_t>(m_heap_next);
heap = (heap + (align - 1)) & ~(align - 1); heap = (heap + (align - 1)) & ~(align - 1);
if (heap + size > kHeapBaseAddress + kHeapSize) {
std::fprintf(stderr, "out of kernel memory");
std::abort();
}
// Check overflow
if (heap + size < heap) {
std::fprintf(stderr, "too big allocation");
std::abort();
}
auto result = reinterpret_cast<void *>(heap); auto result = reinterpret_cast<void *>(heap);
std::memcpy(std::bit_cast<std::byte *>(result) + size, &g_allocProtWord, std::memcpy(std::bit_cast<std::byte *>(result) + size, &g_allocProtWord,
sizeof(g_allocProtWord)); sizeof(g_allocProtWord));
m_heap_next = reinterpret_cast<void *>(heap + size + sizeof(g_allocProtWord)); m_heap_next = reinterpret_cast<void *>(heap + size + sizeof(g_allocProtWord));
// Check overflow
if (heap + size < heap) if (true) {
std::abort(); heap = reinterpret_cast<std::uintptr_t>(m_heap_next);
if (heap + size > (uintptr_t)&g_context + 0x1'0000'0000) align = std::min<std::size_t>(align, 4096);
std::abort(); heap = (heap + (align - 1)) & ~(align - 1);
size = 4096;
::mmap(reinterpret_cast<void *>(heap), size, PROT_NONE, MAP_FIXED, -1, 0);
m_heap_next = reinterpret_cast<void *>(heap + size);
}
return result; return result;
} }

View file

@ -1,7 +1,13 @@
#include "event.hpp" #include "event.hpp"
#include "thread/Process.hpp" #include "thread/Process.hpp"
#include <algorithm>
orbis::KNote::~KNote() { orbis::KNote::~KNote() {
while (!emitters.empty()) {
emitters.back()->unsubscribe(this);
}
if (linked == nullptr) { if (linked == nullptr) {
return; return;
} }
@ -14,7 +20,7 @@ orbis::KNote::~KNote() {
} }
} }
void orbis::EventEmitter::emit(uint filter, uint fflags, intptr_t data) { void orbis::EventEmitter::emit(sshort filter, uint fflags, intptr_t data) {
std::lock_guard lock(mutex); std::lock_guard lock(mutex);
for (auto note : notes) { for (auto note : notes) {
@ -40,3 +46,28 @@ void orbis::EventEmitter::emit(uint filter, uint fflags, intptr_t data) {
note->queue->cv.notify_all(note->queue->mtx); note->queue->cv.notify_all(note->queue->mtx);
} }
} }
void orbis::EventEmitter::subscribe(KNote *note) {
std::lock_guard lock(mutex);
notes.insert(note);
note->emitters.emplace_back(this);
}
void orbis::EventEmitter::unsubscribe(KNote *note) {
std::lock_guard lock(mutex);
notes.erase(note);
auto it = std::ranges::find(note->emitters, this);
if (it == note->emitters.end()) {
return;
}
std::size_t index = it - note->emitters.begin();
auto lastEmitter = note->emitters.size() - 1;
if (index != lastEmitter) {
std::swap(note->emitters[index], note->emitters[lastEmitter]);
}
note->emitters.pop_back();
}

View file

@ -223,6 +223,10 @@ orbis::SysResult orbis::sysIpmiServerReceivePacket(Thread *thread,
ptr<uint> unk; ptr<uint> unk;
}; };
if (paramsSz != sizeof(IpmiServerReceivePacketParams)) {
return orbis::ErrorCode::INVAL;
}
IpmiServerReceivePacketParams _params; IpmiServerReceivePacketParams _params;
ORBIS_RET_ON_ERROR( ORBIS_RET_ON_ERROR(
@ -265,9 +269,6 @@ orbis::SysResult orbis::sysIpmiServerReceivePacket(Thread *thread,
auto asyncMessage = (IpmiAsyncMessageHeader *)_packet.message.data(); auto asyncMessage = (IpmiAsyncMessageHeader *)_packet.message.data();
ORBIS_LOG_ERROR(__FUNCTION__, server->name, asyncMessage->methodId, ORBIS_LOG_ERROR(__FUNCTION__, server->name, asyncMessage->methodId,
asyncMessage->numInData, asyncMessage->pid); asyncMessage->numInData, asyncMessage->pid);
ORBIS_LOG_ERROR(__FUNCTION__, server->name,
*(std::uint64_t *)(*(long *)server->eventHandler + 0x18));
} }
if (_params.bufferSize < _packet.message.size()) { if (_params.bufferSize < _packet.message.size()) {
@ -380,11 +381,13 @@ orbis::SysResult orbis::sysIpmiSessionRespondSync(Thread *thread,
clientTid = session->server->tidToClientTid.at(thread->tid); clientTid = session->server->tidToClientTid.at(thread->tid);
} }
ORBIS_LOG_ERROR(__FUNCTION__, session->client->name, _params.errorCode);
if (_params.errorCode != 0) { if (_params.errorCode != 0) {
ORBIS_LOG_ERROR(__FUNCTION__, session->client->name, _params.errorCode); ORBIS_LOG_ERROR(__FUNCTION__, session->client->name, _params.errorCode);
thread->where(); thread->where();
// HACK: completely broken audio audio support should not be visible // HACK: completely broken audio support should not be visible
if (session->client->name == "SceSysAudioSystemIpc" && if (session->client->name == "SceSysAudioSystemIpc" &&
_params.errorCode == -1) { _params.errorCode == -1) {
_params.errorCode = 0; _params.errorCode = 0;
@ -1268,6 +1271,10 @@ orbis::SysResult orbis::sysIpmiClientWaitEventFlag(Thread *thread,
static_assert(sizeof(IpmiWaitEventFlagParam) == 0x28); static_assert(sizeof(IpmiWaitEventFlagParam) == 0x28);
if (paramsSz != sizeof(IpmiWaitEventFlagParam)) {
return ErrorCode::INVAL;
}
IpmiWaitEventFlagParam _params; IpmiWaitEventFlagParam _params;
ORBIS_RET_ON_ERROR(uread(_params, ptr<IpmiWaitEventFlagParam>(params))); ORBIS_RET_ON_ERROR(uread(_params, ptr<IpmiWaitEventFlagParam>(params)));

View file

@ -113,17 +113,15 @@ static SysResult keventChange(KQueue *kq, KEvent &change, Thread *thread) {
nodeIt->file = fd; nodeIt->file = fd;
if (auto eventEmitter = fd->event) { if (auto eventEmitter = fd->event) {
std::unique_lock lock(eventEmitter->mutex); eventEmitter->subscribe(&*nodeIt);
// if (change.filter == kEvFiltWrite) {
// nodeIt->triggered = true;
// kq->cv.notify_all(kq->mtx);
// }
nodeIt->triggered = true; nodeIt->triggered = true;
eventEmitter->notes.insert(&*nodeIt);
kq->cv.notify_all(kq->mtx); kq->cv.notify_all(kq->mtx);
} else if (note.file->hostFd < 0) { } else if (note.file->hostFd < 0) {
ORBIS_LOG_ERROR("Unimplemented event emitter", change.ident); ORBIS_LOG_ERROR("Unimplemented event emitter", change.ident);
} }
} else if (change.filter == kEvFiltGraphicsCore ||
change.filter == kEvFiltDisplay) {
g_context.deviceEventEmitter->subscribe(&*nodeIt);
} }
} }
} }
@ -172,19 +170,14 @@ static SysResult keventChange(KQueue *kq, KEvent &change, Thread *thread) {
nodeIt->triggered = true; nodeIt->triggered = true;
kq->cv.notify_all(kq->mtx); kq->cv.notify_all(kq->mtx);
} }
} else if (change.filter == kEvFiltGraphicsCore) { } else if (change.filter == kEvFiltDisplay && change.ident >> 48 == 0x6301) {
nodeIt->triggered = true;
kq->cv.notify_all(kq->mtx);
} else if (change.filter == kEvFiltGraphicsCore && change.ident == 0x84) {
nodeIt->triggered = true; nodeIt->triggered = true;
if (change.ident == 0x84) {
// clock change event
nodeIt->event.data |= 1000ull << 16; // clock nodeIt->event.data |= 1000ull << 16; // clock
}
kq->cv.notify_all(kq->mtx); kq->cv.notify_all(kq->mtx);
} else if (change.filter == kEvFiltDisplay) {
if (change.ident != 0x51000100000000 && change.ident != 0x63010100000000) {
nodeIt->triggered = true;
kq->cv.notify_all(kq->mtx);
}
} }
return {}; return {};

View file

@ -307,8 +307,8 @@ SysResult kern_sysctl(Thread *thread, ptr<sint> name, uint namelen,
case sysctl_ctl::unspec: { case sysctl_ctl::unspec: {
switch (name[1]) { switch (name[1]) {
case 3: { case 3: {
std::fprintf(stderr, " unspec - get name of '%s'\n", // std::fprintf(stderr, " unspec - get name of '%s'\n",
std::string((char *)new_, newlen).c_str()); // std::string((char *)new_, newlen).c_str());
auto searchName = std::string_view((char *)new_, newlen); auto searchName = std::string_view((char *)new_, newlen);
auto *dest = (std::uint32_t *)old; auto *dest = (std::uint32_t *)old;
std::uint32_t count = 0; std::uint32_t count = 0;

View file

@ -305,7 +305,7 @@ orbis::ErrorCode orbis::umtx_cv_wait(Thread *thread, ptr<ucond> cv,
ORBIS_LOG_FATAL("umtx_cv_wait: UNKNOWN wflags", wflags); ORBIS_LOG_FATAL("umtx_cv_wait: UNKNOWN wflags", wflags);
return ErrorCode::INVAL; return ErrorCode::INVAL;
} }
if ((wflags & kCvWaitClockId) != 0 && ut + 1) { if ((wflags & kCvWaitClockId) != 0 && ut + 1 && cv->clockid != 0) {
ORBIS_LOG_WARNING("umtx_cv_wait: CLOCK_ID", wflags, cv->clockid); ORBIS_LOG_WARNING("umtx_cv_wait: CLOCK_ID", wflags, cv->clockid);
// std::abort(); // std::abort();
return ErrorCode::NOSYS; return ErrorCode::NOSYS;

View file

@ -1,437 +0,0 @@
#include "Device.hpp"
#include "FlipPipeline.hpp"
#include "Renderer.hpp"
#include "amdgpu/tiler.hpp"
#include "gnm/constants.hpp"
#include "gnm/pm4.hpp"
#include "rx/bits.hpp"
#include "rx/die.hpp"
#include "rx/mem.hpp"
#include "shader/spv.hpp"
#include "shaders/rdna-semantic-spirv.hpp"
#include "vk.hpp"
#include <fcntl.h>
#include <print>
#include <sys/mman.h>
using namespace amdgpu;
Device::Device() {
if (!shader::spv::validate(g_rdna_semantic_spirv)) {
shader::spv::dump(g_rdna_semantic_spirv, true);
rx::die("builtin semantic validation failed");
}
if (auto sem = shader::spv::deserialize(
shaderSemanticContext, g_rdna_semantic_spirv,
shaderSemanticContext.getUnknownLocation())) {
auto shaderSemantic = *sem;
shader::gcn::canonicalizeSemantic(shaderSemanticContext, shaderSemantic);
shader::gcn::collectSemanticModuleInfo(gcnSemanticModuleInfo,
shaderSemantic);
gcnSemantic = shader::gcn::collectSemanticInfo(gcnSemanticModuleInfo);
} else {
rx::die("failed to deserialize builtin semantics\n");
}
for (auto &pipe : graphicsPipes) {
pipe.device = this;
}
// for (auto &pipe : computePipes) {
// pipe.device = this;
// }
}
Device::~Device() {
for (auto fd : dmemFd) {
if (fd >= 0) {
::close(fd);
}
}
for (auto &[pid, info] : processInfo) {
if (info.vmFd >= 0) {
::close(info.vmFd);
}
}
}
void Device::mapProcess(std::int64_t pid, int vmId, const char *shmName) {
auto &process = processInfo[pid];
process.vmId = vmId;
auto memory = amdgpu::RemoteMemory{vmId};
std::string pidVmName = shmName;
pidVmName += '-';
pidVmName += std::to_string(pid);
int memoryFd = ::shm_open(pidVmName.c_str(), O_RDWR, S_IRUSR | S_IWUSR);
process.vmFd = memoryFd;
if (memoryFd < 0) {
std::println("failed to process {:x} shared memory", (int)pid);
std::abort();
}
for (auto [startAddress, endAddress, slot] : process.vmTable) {
auto gpuProt = slot.prot >> 4;
if (gpuProt == 0) {
continue;
}
auto devOffset = slot.offset + startAddress - slot.baseAddress;
int mapFd = memoryFd;
if (slot.memoryType >= 0) {
mapFd = dmemFd[slot.memoryType];
}
auto mmapResult =
::mmap(memory.getPointer(startAddress), endAddress - startAddress,
gpuProt, MAP_FIXED | MAP_SHARED, mapFd, devOffset);
if (mmapResult == MAP_FAILED) {
std::println("failed to map process {:x} memory, address {:x}-{:x}, type {:x}",
(int)pid, startAddress, endAddress, slot.memoryType);
std::abort();
}
handleProtectChange(vmId, startAddress, endAddress - startAddress,
slot.prot);
}
}
void Device::unmapProcess(std::int64_t pid) {
auto &process = processInfo[pid];
auto startAddress = static_cast<std::uint64_t>(process.vmId) << 40;
auto size = static_cast<std::uint64_t>(1) << 40;
rx::mem::reserve(reinterpret_cast<void *>(startAddress), size);
::close(process.vmFd);
process.vmFd = -1;
process.vmId = -1;
}
void Device::protectMemory(int pid, std::uint64_t address, std::uint64_t size,
int prot) {
auto &process = processInfo[pid];
auto vmSlotIt = process.vmTable.queryArea(address);
if (vmSlotIt == process.vmTable.end()) {
std::abort();
}
auto vmSlot = (*vmSlotIt).payload;
process.vmTable.map(address, address + size,
VmMapSlot{
.memoryType = vmSlot.memoryType,
.prot = static_cast<int>(prot),
.offset = vmSlot.offset,
.baseAddress = vmSlot.baseAddress,
});
if (process.vmId >= 0) {
auto memory = amdgpu::RemoteMemory{process.vmId};
rx::mem::protect(memory.getPointer(address), size, prot >> 4);
handleProtectChange(process.vmId, address, size, prot);
}
}
void Device::onCommandBuffer(std::int64_t pid, int cmdHeader,
std::uint64_t address, std::uint64_t size) {
auto &process = processInfo[pid];
if (process.vmId < 0) {
return;
}
auto memory = RemoteMemory{process.vmId};
auto op = rx::getBits(cmdHeader, 15, 8);
if (op == gnm::IT_INDIRECT_BUFFER_CNST) {
graphicsPipes[0].setCeQueue(Queue::createFromRange(
process.vmId, memory.getPointer<std::uint32_t>(address),
size / sizeof(std::uint32_t)));
} else if (op == gnm::IT_INDIRECT_BUFFER) {
graphicsPipes[0].setDeQueue(
Queue::createFromRange(process.vmId,
memory.getPointer<std::uint32_t>(address),
size / sizeof(std::uint32_t)),
1);
} else {
rx::die("unimplemented command buffer %x", cmdHeader);
}
}
bool Device::processPipes() {
bool allProcessed = true;
// for (auto &pipe : computePipes) {
// if (!pipe.processAllRings()) {
// allProcessed = false;
// }
// }
for (auto &pipe : graphicsPipes) {
if (!pipe.processAllRings()) {
allProcessed = false;
}
}
return allProcessed;
}
static void
transitionImageLayout(VkCommandBuffer commandBuffer, VkImage image,
VkImageLayout oldLayout, VkImageLayout newLayout,
const VkImageSubresourceRange &subresourceRange) {
VkImageMemoryBarrier barrier{};
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
barrier.oldLayout = oldLayout;
barrier.newLayout = newLayout;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.image = image;
barrier.subresourceRange = subresourceRange;
auto layoutToStageAccess = [](VkImageLayout layout)
-> std::pair<VkPipelineStageFlags, VkAccessFlags> {
switch (layout) {
case VK_IMAGE_LAYOUT_UNDEFINED:
case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR:
case VK_IMAGE_LAYOUT_GENERAL:
return {VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0};
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
return {VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT};
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
return {VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT};
case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
return {VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_ACCESS_SHADER_READ_BIT};
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
return {VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT,
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT};
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
return {VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT};
default:
std::abort();
}
};
auto [sourceStage, sourceAccess] = layoutToStageAccess(oldLayout);
auto [destinationStage, destinationAccess] = layoutToStageAccess(newLayout);
barrier.srcAccessMask = sourceAccess;
barrier.dstAccessMask = destinationAccess;
vkCmdPipelineBarrier(commandBuffer, sourceStage, destinationStage, 0, 0,
nullptr, 0, nullptr, 1, &barrier);
}
bool Device::flip(std::int64_t pid, int bufferIndex, std::uint64_t arg,
VkImage swapchainImage, VkImageView swapchainImageView) {
auto &pipe = graphicsPipes[0];
auto &scheduler = pipe.scheduler;
auto &process = processInfo[pid];
if (process.vmId < 0) {
return false;
}
if (bufferIndex < 0) {
bridge->flipBuffer[process.vmId] = bufferIndex;
bridge->flipArg[process.vmId] = arg;
bridge->flipCount[process.vmId] = bridge->flipCount[process.vmId] + 1;
return false;
}
auto &buffer = process.buffers[bufferIndex];
auto &bufferAttr = process.bufferAttributes[buffer.attrId];
gnm::DataFormat dfmt;
gnm::NumericFormat nfmt;
auto flipType = FlipType::Alt;
switch (bufferAttr.pixelFormat) {
case 0x80000000:
dfmt = gnm::kDataFormat8_8_8_8;
nfmt = gnm::kNumericFormatSrgb;
break;
case 0x80002200:
dfmt = gnm::kDataFormat8_8_8_8;
nfmt = gnm::kNumericFormatSrgb;
flipType = FlipType::Std;
break;
case 0x88740000:
case 0x88060000:
dfmt = gnm::kDataFormat2_10_10_10;
nfmt = gnm::kNumericFormatSNorm;
break;
case 0xc1060000:
dfmt = gnm::kDataFormat16_16_16_16;
nfmt = gnm::kNumericFormatFloat;
break;
default:
rx::die("unimplemented color buffer format %x", bufferAttr.pixelFormat);
}
// std::printf("displaying buffer %lx\n", buffer.address);
auto cacheTag = getCacheTag(process.vmId, scheduler);
auto &sched = cacheTag.getScheduler();
transitionImageLayout(sched.getCommandBuffer(), swapchainImage,
VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.levelCount = 1,
.layerCount = 1,
});
amdgpu::flip(
cacheTag, vk::context->swapchainExtent, buffer.address,
swapchainImageView, {bufferAttr.width, bufferAttr.height}, flipType,
getDefaultTileModes()[bufferAttr.tilingMode == 1 ? 10 : 8], dfmt, nfmt);
transitionImageLayout(sched.getCommandBuffer(), swapchainImage,
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.levelCount = 1,
.layerCount = 1,
});
sched.submit();
auto submitCompleteTask = scheduler.createExternalSubmit();
{
VkSemaphoreSubmitInfo waitSemSubmitInfos[] = {
{
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
.semaphore = vk::context->presentCompleteSemaphore,
.value = 1,
.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
},
{
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
.semaphore = scheduler.getSemaphoreHandle(),
.value = submitCompleteTask - 1,
.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
},
};
VkSemaphoreSubmitInfo signalSemSubmitInfos[] = {
{
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
.semaphore = vk::context->renderCompleteSemaphore,
.value = 1,
.stageMask = VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT,
},
{
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
.semaphore = scheduler.getSemaphoreHandle(),
.value = submitCompleteTask,
.stageMask = VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT,
},
};
VkSubmitInfo2 submitInfo{
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
.waitSemaphoreInfoCount = 2,
.pWaitSemaphoreInfos = waitSemSubmitInfos,
.signalSemaphoreInfoCount = 2,
.pSignalSemaphoreInfos = signalSemSubmitInfos,
};
vkQueueSubmit2(vk::context->presentQueue, 1, &submitInfo, VK_NULL_HANDLE);
}
scheduler.then([=, this, cacheTag = std::move(cacheTag)] {
bridge->flipBuffer[process.vmId] = bufferIndex;
bridge->flipArg[process.vmId] = arg;
bridge->flipCount[process.vmId] = bridge->flipCount[process.vmId] + 1;
auto mem = RemoteMemory{process.vmId};
auto bufferInUse =
mem.getPointer<std::uint64_t>(bridge->bufferInUseAddress[process.vmId]);
if (bufferInUse != nullptr) {
bufferInUse[bufferIndex] = 0;
}
});
return true;
}
void Device::mapMemory(std::int64_t pid, std::uint64_t address,
std::uint64_t size, int memoryType, int dmemIndex,
int prot, std::int64_t offset) {
auto &process = processInfo[pid];
process.vmTable.map(address, address + size,
VmMapSlot{
.memoryType = memoryType >= 0 ? dmemIndex : -1,
.prot = prot,
.offset = offset,
.baseAddress = address,
});
if (process.vmId < 0) {
return;
}
auto memory = amdgpu::RemoteMemory{process.vmId};
int mapFd = process.vmFd;
if (memoryType >= 0) {
mapFd = dmemFd[dmemIndex];
}
auto mmapResult = ::mmap(memory.getPointer(address), size, prot >> 4,
MAP_FIXED | MAP_SHARED, mapFd, offset);
if (mmapResult == MAP_FAILED) {
rx::die("failed to map process %x memory, address %lx-%lx, type %x",
(int)pid, address, address + size, memoryType);
}
handleProtectChange(process.vmId, address, size, prot);
}
void Device::registerBuffer(std::int64_t pid, bridge::CmdBuffer buffer) {
auto &process = processInfo[pid];
if (buffer.attrId >= 10 || buffer.index >= 10) {
rx::die("out of buffers %u, %u", buffer.attrId, buffer.index);
}
process.buffers[buffer.index] = buffer;
}
void Device::registerBufferAttribute(std::int64_t pid,
bridge::CmdBufferAttribute attr) {
auto &process = processInfo[pid];
if (attr.attrId >= 10) {
rx::die("out of buffer attributes %u", attr.attrId);
}
process.bufferAttributes[attr.attrId] = attr;
}
void Device::handleProtectChange(int vmId, std::uint64_t address,
std::uint64_t size, int prot) {}

View file

@ -1,98 +0,0 @@
#pragma once
#include "Cache.hpp"
#include "FlipPipeline.hpp"
#include "Pipe.hpp"
#include "amdgpu/bridge/bridge.hpp"
#include "amdgpu/tiler_vulkan.hpp"
#include "rx/MemoryTable.hpp"
#include "shader/SemanticInfo.hpp"
#include "shader/SpvConverter.hpp"
#include "shader/gcn.hpp"
#include <unordered_map>
#include <vulkan/vulkan_core.h>
namespace amdgpu {
struct VmMapSlot {
int memoryType;
int prot;
std::int64_t offset;
std::uint64_t baseAddress;
auto operator<=>(const VmMapSlot &) const = default;
};
struct ProcessInfo {
int vmId = -1;
int vmFd = -1;
amdgpu::bridge::CmdBufferAttribute bufferAttributes[10];
amdgpu::bridge::CmdBuffer buffers[10];
rx::MemoryTableWithPayload<VmMapSlot> vmTable;
};
struct RemoteMemory {
int vmId;
template <typename T = void> T *getPointer(std::uint64_t address) const {
return address ? reinterpret_cast<T *>(
static_cast<std::uint64_t>(vmId) << 40 | address)
: nullptr;
}
};
struct Device {
static constexpr auto kComputePipeCount = 8;
static constexpr auto kGfxPipeCount = 2;
shader::SemanticInfo gcnSemantic;
shader::spv::Context shaderSemanticContext;
shader::gcn::SemanticModuleInfo gcnSemanticModuleInfo;
amdgpu::bridge::BridgeHeader *bridge;
Registers::Config config;
GpuTiler tiler;
GraphicsPipe graphicsPipes[kGfxPipeCount]{0, 1};
// ComputePipe computePipes[kComputePipeCount]{0, 1, 2, 3, 4, 5, 6, 7};
FlipPipeline flipPipeline;
int dmemFd[3] = {-1, -1, -1};
std::unordered_map<std::int64_t, ProcessInfo> processInfo;
Cache caches[6]{
{this, 0}, {this, 1}, {this, 2}, {this, 3}, {this, 4}, {this, 5},
};
Device();
~Device();
Cache::Tag getCacheTag(int vmId, Scheduler &scheduler) {
return caches[vmId].createTag(scheduler);
}
Cache::GraphicsTag getGraphicsTag(int vmId, Scheduler &scheduler) {
return caches[vmId].createGraphicsTag(scheduler);
}
Cache::ComputeTag getComputeTag(int vmId, Scheduler &scheduler) {
return caches[vmId].createComputeTag(scheduler);
}
void mapProcess(std::int64_t pid, int vmId, const char *shmName);
void unmapProcess(std::int64_t pid);
void protectMemory(int pid, std::uint64_t address, std::uint64_t size,
int prot);
void onCommandBuffer(std::int64_t pid, int cmdHeader, std::uint64_t address,
std::uint64_t size);
bool processPipes();
bool flip(std::int64_t pid, int bufferIndex, std::uint64_t arg,
VkImage swapchainImage, VkImageView swapchainImageView);
void mapMemory(std::int64_t pid, std::uint64_t address, std::uint64_t size,
int memoryType, int dmemIndex, int prot, std::int64_t offset);
void registerBuffer(std::int64_t pid, bridge::CmdBuffer buffer);
void registerBufferAttribute(std::int64_t pid,
bridge::CmdBufferAttribute attr);
void handleProtectChange(int vmId, std::uint64_t address, std::uint64_t size,
int prot);
};
} // namespace amdgpu

View file

@ -1,646 +0,0 @@
#include "vk.hpp"
#include <amdgpu/bridge/bridge.hpp>
#include <print>
#include <rx/MemoryTable.hpp>
#include <rx/atScopeExit.hpp>
#include <rx/die.hpp>
#include <rx/mem.hpp>
#include <shader/gcn.hpp>
#include <shader/glsl.hpp>
#include <shader/spv.hpp>
#include <vulkan/vulkan.h>
#include <chrono>
#include <csignal>
#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <filesystem>
#include <print>
#include <span>
#include <thread>
#include <unordered_map>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>
#include <GLFW/glfw3.h>
#include <gnm/pm4.hpp>
#include <vulkan/vulkan_core.h>
#include <amdgpu/tiler.hpp>
#include <shaders/rdna-semantic-spirv.hpp>
#include "Device.hpp"
void transitionImageLayout(VkCommandBuffer commandBuffer, VkImage image,
VkImageLayout oldLayout, VkImageLayout newLayout,
const VkImageSubresourceRange &subresourceRange) {
VkImageMemoryBarrier barrier{};
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
barrier.oldLayout = oldLayout;
barrier.newLayout = newLayout;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.image = image;
barrier.subresourceRange = subresourceRange;
auto layoutToStageAccess = [](VkImageLayout layout)
-> std::pair<VkPipelineStageFlags, VkAccessFlags> {
switch (layout) {
case VK_IMAGE_LAYOUT_UNDEFINED:
case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR:
case VK_IMAGE_LAYOUT_GENERAL:
return {VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0};
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
return {VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT};
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
return {VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT};
case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
return {VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_ACCESS_SHADER_READ_BIT};
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
return {VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT,
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT};
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
return {VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT};
default:
std::abort();
}
};
auto [sourceStage, sourceAccess] = layoutToStageAccess(oldLayout);
auto [destinationStage, destinationAccess] = layoutToStageAccess(newLayout);
barrier.srcAccessMask = sourceAccess;
barrier.dstAccessMask = destinationAccess;
vkCmdPipelineBarrier(commandBuffer, sourceStage, destinationStage, 0, 0,
nullptr, 0, nullptr, 1, &barrier);
}
void transitionImageLayout(VkCommandBuffer commandBuffer, VkImage image,
VkImageAspectFlags aspectFlags,
VkImageLayout oldLayout, VkImageLayout newLayout) {
transitionImageLayout(commandBuffer, image, oldLayout, newLayout,
VkImageSubresourceRange{
.aspectMask = aspectFlags,
.levelCount = 1,
.layerCount = 1,
});
}
static void usage(std::FILE *out, const char *argv0) {
std::println(out, "usage: {} [options...]", argv0);
std::println(out, " options:");
std::println(out, " --version, -v - print version");
std::println(out,
" --cmd-bridge <name> - setup command queue bridge name");
std::println(out, " --shm <name> - setup shared memory name");
std::println(
out,
" --gpu <index> - specify physical gpu index to use, default is 0");
std::println(out,
" --presenter <presenter mode> - set flip engine target");
std::println(out, " --validate - enable validation layers");
std::println(out, " -h, --help - show this message");
std::println(out, "");
std::println(out, " presenter mode:");
std::println(out, " window - create and use native window (default)");
}
static VKAPI_ATTR VkBool32 VKAPI_CALL debugUtilsMessageCallback(
VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity,
VkDebugUtilsMessageTypeFlagsEXT messageType,
const VkDebugUtilsMessengerCallbackDataEXT *pCallbackData,
void *pUserData) {
if (pCallbackData->pMessage) {
std::println("{}", pCallbackData->pMessage);
}
return VK_FALSE;
}
int main(int argc, const char *argv[]) {
const char *cmdBridgeName = "/rpcsx-gpu-cmds";
const char *shmName = "/rpcsx-os-memory";
unsigned long gpuIndex = 0;
// auto presenter = PresenterMode::Window;
bool enableValidation = false;
for (int i = 1; i < argc; ++i) {
if (argv[i] == std::string_view("--cmd-bridge")) {
if (argc <= i + 1) {
usage(stderr, argv[0]);
return 1;
}
cmdBridgeName = argv[++i];
continue;
}
if (argv[i] == std::string_view("--shm")) {
if (argc <= i + 1) {
usage(stderr, argv[0]);
return 1;
}
shmName = argv[++i];
continue;
}
if (argv[i] == std::string_view("--presenter")) {
if (argc <= i + 1) {
usage(stderr, argv[0]);
return 1;
}
auto presenterText = std::string_view(argv[++i]);
if (presenterText == "window") {
// presenter = PresenterMode::Window;
} else {
usage(stderr, argv[0]);
return 1;
}
continue;
}
if (argv[i] == std::string_view("--gpu")) {
if (argc <= i + 1) {
usage(stderr, argv[0]);
return 1;
}
char *endPtr = nullptr;
gpuIndex = std::strtoul(argv[++i], &endPtr, 10);
if (endPtr == nullptr || *endPtr != '\0') {
usage(stderr, argv[0]);
return 1;
}
continue;
}
if (argv[i] == std::string_view("--validate")) {
enableValidation = true;
continue;
}
usage(stderr, argv[0]);
return 1;
}
if (!rx::mem::reserve((void *)0x40000, 0x60000000000 - 0x40000)) {
std::fprintf(stderr, "failed to reserve virtual memory\n");
return 1;
}
auto bridge = amdgpu::bridge::openShmCommandBuffer(cmdBridgeName);
if (bridge == nullptr) {
bridge = amdgpu::bridge::createShmCommandBuffer(cmdBridgeName);
}
if (bridge->pullerPid > 0 && ::kill(bridge->pullerPid, 0) == 0) {
// another instance of rpcsx-gpu on the same bridge, kill self after that
std::fprintf(stderr, "Another instance already exists\n");
return 1;
}
bridge->pullerPid = ::getpid();
int dmemFd[3];
for (std::size_t i = 0; i < std::size(dmemFd); ++i) {
auto path = "/dev/shm/rpcsx-dmem-" + std::to_string(i);
if (!std::filesystem::exists(path)) {
std::printf("Waiting for dmem %zu\n", i);
while (!std::filesystem::exists(path)) {
std::this_thread::sleep_for(std::chrono::milliseconds(300));
}
}
dmemFd[i] = ::shm_open(("/rpcsx-dmem-" + std::to_string(i)).c_str(), O_RDWR,
S_IRUSR | S_IWUSR);
if (dmemFd[i] < 0) {
std::printf("failed to open dmem shared memory %zu\n", i);
return 1;
}
}
glfwInit();
glfwWindowHint(GLFW_CLIENT_API, GLFW_NO_API);
auto window = glfwCreateWindow(1920, 1080, "RPCSX", nullptr, nullptr);
rx::atScopeExit _{[window] { glfwDestroyWindow(window); }};
const char **glfwExtensions;
uint32_t glfwExtensionCount = 0;
glfwExtensions = glfwGetRequiredInstanceExtensions(&glfwExtensionCount);
std::vector<const char *> requiredExtensions(
glfwExtensions, glfwExtensions + glfwExtensionCount);
std::vector<const char *> optionalLayers;
if (enableValidation) {
optionalLayers.push_back("VK_LAYER_KHRONOS_validation");
requiredExtensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
}
auto vkContext =
vk::Context::create({}, optionalLayers, requiredExtensions, {});
vk::context = &vkContext;
VkDebugUtilsMessengerEXT debugMessenger = VK_NULL_HANDLE;
if (enableValidation) {
VkDebugUtilsMessengerCreateInfoEXT debugUtilsMessengerCreateInfo{
.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT |
VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT |
VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT,
.messageType =
VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT |
VK_DEBUG_UTILS_MESSAGE_TYPE_DEVICE_ADDRESS_BINDING_BIT_EXT |
VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT,
.pfnUserCallback = debugUtilsMessageCallback,
};
VK_VERIFY(vk::CreateDebugUtilsMessengerEXT(
vkContext.instance, &debugUtilsMessengerCreateInfo,
vk::context->allocator, &debugMessenger));
}
rx::atScopeExit _debugMessenger{[=] {
if (debugMessenger != VK_NULL_HANDLE) {
vk::DestroyDebugUtilsMessengerEXT(vk::context->instance, debugMessenger,
vk::context->allocator);
}
}};
VkSurfaceKHR vkSurface;
glfwCreateWindowSurface(vkContext.instance, window, nullptr, &vkSurface);
vkContext.createDevice(vkSurface, gpuIndex,
{
// VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME,
// VK_EXT_DEPTH_CLIP_ENABLE_EXTENSION_NAME,
// VK_EXT_INLINE_UNIFORM_BLOCK_EXTENSION_NAME,
// VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME,
// VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME,
// VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME,
VK_EXT_SEPARATE_STENCIL_USAGE_EXTENSION_NAME,
VK_KHR_SWAPCHAIN_EXTENSION_NAME,
VK_EXT_SHADER_OBJECT_EXTENSION_NAME,
VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME,
VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME,
},
{
VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME,
VK_KHR_SHADER_NON_SEMANTIC_INFO_EXTENSION_NAME,
});
auto getTotalMemorySize = [&](int memoryType) -> VkDeviceSize {
auto deviceLocalMemoryType =
vkContext.findPhysicalMemoryTypeIndex(~0, memoryType);
if (deviceLocalMemoryType < 0) {
return 0;
}
auto heapIndex =
vkContext.physicalMemoryProperties.memoryTypes[deviceLocalMemoryType]
.heapIndex;
return vkContext.physicalMemoryProperties.memoryHeaps[heapIndex].size;
};
auto localMemoryTotalSize =
getTotalMemorySize(VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
auto hostVisibleMemoryTotalSize =
getTotalMemorySize(VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
vk::getHostVisibleMemory().initHostVisible(
std::min(hostVisibleMemoryTotalSize / 2, 1ul * 1024 * 1024 * 1024));
vk::getDeviceLocalMemory().initDeviceLocal(
std::min(localMemoryTotalSize / 4, 4ul * 1024 * 1024 * 1024));
auto commandPool =
vk::CommandPool::Create(vkContext.presentQueueFamily,
VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT);
vkContext.createSwapchain();
amdgpu::bridge::BridgePuller bridgePuller{bridge};
amdgpu::bridge::Command commandsBuffer[1];
amdgpu::Device device;
device.bridge = bridge;
for (int i = 0; i < std::size(device.dmemFd); ++i) {
device.dmemFd[i] = dmemFd[i];
}
uint32_t imageIndex = 0;
bool isImageAcquired = false;
uint32_t gpIndex = -1;
GLFWgamepadstate gpState;
rx::atScopeExit __{[] {
vk::getHostVisibleMemory().free();
vk::getDeviceLocalMemory().free();
}};
while (!glfwWindowShouldClose(window)) {
glfwPollEvents();
while (true) {
bool allProcessed = false;
for (int i = 0; i < 1000; ++i) {
if (device.processPipes()) {
allProcessed = true;
break;
}
}
if (allProcessed) {
break;
}
glfwPollEvents();
if (glfwWindowShouldClose(window)) {
break;
}
}
std::size_t pulledCount =
bridgePuller.pullCommands(commandsBuffer, std::size(commandsBuffer));
if (gpIndex > GLFW_JOYSTICK_LAST) {
for (int i = 0; i <= GLFW_JOYSTICK_LAST; ++i) {
if (glfwJoystickIsGamepad(i) == GLFW_TRUE) {
std::print("Gamepad \"{}\" activated", glfwGetGamepadName(i));
gpIndex = i;
break;
}
}
} else if (gpIndex <= GLFW_JOYSTICK_LAST) {
if (!glfwJoystickIsGamepad(gpIndex)) {
gpIndex = -1;
}
}
if (gpIndex <= GLFW_JOYSTICK_LAST) {
if (glfwGetGamepadState(gpIndex, &gpState) == GLFW_TRUE) {
bridge->kbPadState.leftStickX =
gpState.axes[GLFW_GAMEPAD_AXIS_LEFT_X] * 127.5f + 127.5f;
bridge->kbPadState.leftStickY =
gpState.axes[GLFW_GAMEPAD_AXIS_LEFT_Y] * 127.5f + 127.5f;
bridge->kbPadState.rightStickX =
gpState.axes[GLFW_GAMEPAD_AXIS_RIGHT_X] * 127.5f + 127.5f;
bridge->kbPadState.rightStickY =
gpState.axes[GLFW_GAMEPAD_AXIS_RIGHT_Y] * 127.5f + 127.5f;
bridge->kbPadState.l2 =
(gpState.axes[GLFW_GAMEPAD_AXIS_LEFT_TRIGGER] + 1.0f) * 127.5f;
bridge->kbPadState.r2 =
(gpState.axes[GLFW_GAMEPAD_AXIS_RIGHT_TRIGGER] + 1.0f) * 127.5f;
bridge->kbPadState.buttons = 0;
if (bridge->kbPadState.l2 == 0xFF) {
bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnL2;
}
if (bridge->kbPadState.r2 == 0xFF) {
bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnR2;
}
static const uint32_t gpmap[GLFW_GAMEPAD_BUTTON_LAST + 1] = {
[GLFW_GAMEPAD_BUTTON_A] = amdgpu::bridge::kPadBtnCross,
[GLFW_GAMEPAD_BUTTON_B] = amdgpu::bridge::kPadBtnCircle,
[GLFW_GAMEPAD_BUTTON_X] = amdgpu::bridge::kPadBtnSquare,
[GLFW_GAMEPAD_BUTTON_Y] = amdgpu::bridge::kPadBtnTriangle,
[GLFW_GAMEPAD_BUTTON_LEFT_BUMPER] = amdgpu::bridge::kPadBtnL1,
[GLFW_GAMEPAD_BUTTON_RIGHT_BUMPER] = amdgpu::bridge::kPadBtnR1,
[GLFW_GAMEPAD_BUTTON_BACK] = 0,
[GLFW_GAMEPAD_BUTTON_START] = amdgpu::bridge::kPadBtnOptions,
[GLFW_GAMEPAD_BUTTON_GUIDE] = 0,
[GLFW_GAMEPAD_BUTTON_LEFT_THUMB] = amdgpu::bridge::kPadBtnL3,
[GLFW_GAMEPAD_BUTTON_RIGHT_THUMB] = amdgpu::bridge::kPadBtnR3,
[GLFW_GAMEPAD_BUTTON_DPAD_UP] = amdgpu::bridge::kPadBtnUp,
[GLFW_GAMEPAD_BUTTON_DPAD_RIGHT] = amdgpu::bridge::kPadBtnRight,
[GLFW_GAMEPAD_BUTTON_DPAD_DOWN] = amdgpu::bridge::kPadBtnDown,
[GLFW_GAMEPAD_BUTTON_DPAD_LEFT] = amdgpu::bridge::kPadBtnLeft};
for (int i = 0; i <= GLFW_GAMEPAD_BUTTON_LAST; ++i) {
if (gpState.buttons[i] == GLFW_PRESS) {
bridge->kbPadState.buttons |= gpmap[i];
}
}
}
} else {
bridge->kbPadState.leftStickX = 0x80;
bridge->kbPadState.leftStickY = 0x80;
bridge->kbPadState.rightStickX = 0x80;
bridge->kbPadState.rightStickY = 0x80;
bridge->kbPadState.buttons = 0;
if (glfwGetKey(window, GLFW_KEY_A) == GLFW_PRESS) {
bridge->kbPadState.leftStickX = 0;
} else if (glfwGetKey(window, GLFW_KEY_D) == GLFW_PRESS) {
bridge->kbPadState.leftStickX = 0xff;
}
if (glfwGetKey(window, GLFW_KEY_W) == GLFW_PRESS) {
bridge->kbPadState.leftStickY = 0;
} else if (glfwGetKey(window, GLFW_KEY_S) == GLFW_PRESS) {
bridge->kbPadState.leftStickY = 0xff;
}
if (glfwGetKey(window, GLFW_KEY_O) == GLFW_PRESS) {
bridge->kbPadState.rightStickX = 0;
} else if (glfwGetKey(window, GLFW_KEY_L) == GLFW_PRESS) {
bridge->kbPadState.rightStickX = 0xff;
}
if (glfwGetKey(window, GLFW_KEY_K) == GLFW_PRESS) {
bridge->kbPadState.rightStickY = 0;
} else if (glfwGetKey(window, GLFW_KEY_SEMICOLON) == GLFW_PRESS) {
bridge->kbPadState.rightStickY = 0xff;
}
if (glfwGetKey(window, GLFW_KEY_UP) == GLFW_PRESS) {
bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnUp;
}
if (glfwGetKey(window, GLFW_KEY_DOWN) == GLFW_PRESS) {
bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnDown;
}
if (glfwGetKey(window, GLFW_KEY_LEFT) == GLFW_PRESS) {
bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnLeft;
}
if (glfwGetKey(window, GLFW_KEY_RIGHT) == GLFW_PRESS) {
bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnRight;
}
if (glfwGetKey(window, GLFW_KEY_Z) == GLFW_PRESS) {
bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnSquare;
}
if (glfwGetKey(window, GLFW_KEY_X) == GLFW_PRESS) {
bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnCross;
}
if (glfwGetKey(window, GLFW_KEY_C) == GLFW_PRESS) {
bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnCircle;
}
if (glfwGetKey(window, GLFW_KEY_V) == GLFW_PRESS) {
bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnTriangle;
}
if (glfwGetKey(window, GLFW_KEY_Q) == GLFW_PRESS) {
bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnL1;
}
if (glfwGetKey(window, GLFW_KEY_E) == GLFW_PRESS) {
bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnL2;
bridge->kbPadState.l2 = 0xff;
}
if (glfwGetKey(window, GLFW_KEY_F) == GLFW_PRESS) {
bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnL3;
}
if (glfwGetKey(window, GLFW_KEY_ESCAPE) == GLFW_PRESS) {
bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnPs;
}
if (glfwGetKey(window, GLFW_KEY_I) == GLFW_PRESS) {
bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnR1;
}
if (glfwGetKey(window, GLFW_KEY_P) == GLFW_PRESS) {
bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnR2;
bridge->kbPadState.r2 = 0xff;
}
if (glfwGetKey(window, GLFW_KEY_APOSTROPHE) == GLFW_PRESS) {
bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnR3;
}
if (glfwGetKey(window, GLFW_KEY_ENTER) == GLFW_PRESS) {
bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnOptions;
}
}
bridge->kbPadState.timestamp =
std::chrono::high_resolution_clock::now().time_since_epoch().count();
if (pulledCount == 0) {
std::this_thread::sleep_for(std::chrono::microseconds(1));
continue;
}
for (auto cmd : std::span(commandsBuffer, pulledCount)) {
switch (cmd.id) {
case amdgpu::bridge::CommandId::ProtectMemory: {
device.protectMemory(cmd.memoryProt.pid, cmd.memoryProt.address,
cmd.memoryProt.size, cmd.memoryProt.prot);
break;
}
case amdgpu::bridge::CommandId::CommandBuffer: {
device.onCommandBuffer(cmd.commandBuffer.pid, cmd.commandBuffer.queue,
cmd.commandBuffer.address,
cmd.commandBuffer.size);
break;
}
case amdgpu::bridge::CommandId::Flip: {
if (!isImageAcquired) {
while (true) {
auto acquireNextImageResult = vkAcquireNextImageKHR(
vkContext.device, vkContext.swapchain, UINT64_MAX,
vkContext.presentCompleteSemaphore, VK_NULL_HANDLE,
&imageIndex);
if (acquireNextImageResult == VK_ERROR_OUT_OF_DATE_KHR) {
vkContext.recreateSwapchain();
continue;
}
if (acquireNextImageResult != VK_SUBOPTIMAL_KHR) {
VK_VERIFY(acquireNextImageResult);
}
break;
}
}
if (!device.flip(cmd.flip.pid, cmd.flip.bufferIndex, cmd.flip.arg,
vkContext.swapchainImages[imageIndex],
vkContext.swapchainImageViews[imageIndex])) {
isImageAcquired = true;
break;
}
VkPresentInfoKHR presentInfo{
.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
.waitSemaphoreCount = 1,
.pWaitSemaphores = &vkContext.renderCompleteSemaphore,
.swapchainCount = 1,
.pSwapchains = &vkContext.swapchain,
.pImageIndices = &imageIndex,
};
auto vkQueuePresentResult =
vkQueuePresentKHR(vkContext.presentQueue, &presentInfo);
isImageAcquired = false;
if (vkQueuePresentResult == VK_ERROR_OUT_OF_DATE_KHR ||
vkQueuePresentResult == VK_SUBOPTIMAL_KHR) {
vkContext.recreateSwapchain();
} else {
VK_VERIFY(vkQueuePresentResult);
}
break;
}
case amdgpu::bridge::CommandId::MapProcess:
device.mapProcess(cmd.mapProcess.pid, cmd.mapProcess.vmId, shmName);
break;
case amdgpu::bridge::CommandId::UnmapProcess:
device.unmapProcess(cmd.mapProcess.pid);
break;
case amdgpu::bridge::CommandId::MapMemory:
device.mapMemory(cmd.mapMemory.pid, cmd.mapMemory.address,
cmd.mapMemory.size, cmd.mapMemory.memoryType,
cmd.mapMemory.dmemIndex, cmd.mapMemory.prot,
cmd.mapMemory.offset);
break;
case amdgpu::bridge::CommandId::RegisterBuffer:
device.registerBuffer(cmd.buffer.pid, cmd.buffer);
break;
case amdgpu::bridge::CommandId::RegisterBufferAttribute:
device.registerBufferAttribute(cmd.bufferAttribute.pid,
cmd.bufferAttribute);
break;
default:
rx::die("Unexpected command id %u\n", (unsigned)cmd.id);
}
}
}
vkDeviceWaitIdle(vk::context->device);
}

View file

@ -1,3 +0,0 @@
#include "bridge.hpp"
amdgpu::bridge::BridgePusher rx::bridge;

View file

@ -1,7 +0,0 @@
#pragma once
#include <amdgpu/bridge/bridge.hpp>
namespace rx {
extern amdgpu::bridge::BridgePusher bridge;
}

File diff suppressed because it is too large Load diff

View file

@ -1,7 +1,10 @@
#include "AudioOut.hpp" #include "AudioOut.hpp"
#include "rx/mem.hpp"
#include "rx/watchdog.hpp"
#include <atomic> #include <atomic>
#include <chrono> #include <chrono>
#include <fcntl.h> #include <fcntl.h>
#include <format>
#include <mutex> #include <mutex>
#include <orbis/evf.hpp> #include <orbis/evf.hpp>
#include <orbis/utils/Logs.hpp> #include <orbis/utils/Logs.hpp>
@ -28,21 +31,23 @@ AudioOut::~AudioOut() {
void AudioOut::start() { void AudioOut::start() {
std::lock_guard lock(thrMtx); std::lock_guard lock(thrMtx);
threads.push_back(std::thread( threads.emplace_back(
[this, channelInfo = channelInfo] { channelEntry(channelInfo); })); [this, channelInfo = channelInfo] { channelEntry(channelInfo); });
} }
void AudioOut::channelEntry(AudioOutChannelInfo info) { void AudioOut::channelEntry(AudioOutChannelInfo info) {
char control_shm_name[32]; char control_shm_name[128];
char audio_shm_name[32]; char audio_shm_name[128];
std::snprintf(control_shm_name, sizeof(control_shm_name), "/rpcsx-shm_%d_C", std::format_to(
info.idControl); control_shm_name, "{}",
std::snprintf(audio_shm_name, sizeof(audio_shm_name), "/rpcsx-shm_%d_%d_A", rx::getShmGuestPath(std::format("shm_{}_C", info.idControl)).string());
info.channel, info.port); std::format_to(
audio_shm_name, "{}",
rx::getShmGuestPath(std::format("shm_{}_{}_A", info.channel, info.port))
.string());
int controlFd = int controlFd = ::open(control_shm_name, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR);
::shm_open(control_shm_name, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR);
if (controlFd == -1) { if (controlFd == -1) {
perror("shm_open"); perror("shm_open");
std::abort(); std::abort();
@ -50,20 +55,19 @@ void AudioOut::channelEntry(AudioOutChannelInfo info) {
struct stat controlStat; struct stat controlStat;
if (::fstat(controlFd, &controlStat)) { if (::fstat(controlFd, &controlStat)) {
perror("shm_open"); perror("fstat");
std::abort(); std::abort();
} }
auto controlPtr = reinterpret_cast<std::uint8_t *>( auto controlPtr = reinterpret_cast<std::uint8_t *>(
::mmap(NULL, controlStat.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, rx::mem::map(nullptr, controlStat.st_size, PROT_READ | PROT_WRITE,
controlFd, 0)); MAP_SHARED, controlFd));
if (controlPtr == MAP_FAILED) { if (controlPtr == MAP_FAILED) {
perror("mmap"); perror("mmap");
std::abort(); std::abort();
} }
int bufferFd = int bufferFd = ::open(audio_shm_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
::shm_open(audio_shm_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
if (bufferFd == -1) { if (bufferFd == -1) {
perror("open"); perror("open");
std::abort(); std::abort();
@ -71,7 +75,7 @@ void AudioOut::channelEntry(AudioOutChannelInfo info) {
struct stat bufferStat; struct stat bufferStat;
if (::fstat(bufferFd, &bufferStat)) { if (::fstat(bufferFd, &bufferStat)) {
perror("shm_open"); perror("fstat");
std::abort(); std::abort();
} }
@ -145,7 +149,7 @@ void AudioOut::channelEntry(AudioOutChannelInfo info) {
// output // output
std::unique_lock lock(soxMtx); std::unique_lock lock(soxMtx);
sox_format_t *output = sox_format_t *output =
sox_open_write("default", &out_si, NULL, "alsa", NULL, NULL); sox_open_write("default", &out_si, NULL, "alsa", nullptr, nullptr);
soxMtx.unlock(); soxMtx.unlock();
if (!output) { if (!output) {

View file

@ -6,7 +6,7 @@ add_library(standalone-config INTERFACE)
target_include_directories(standalone-config INTERFACE orbis-kernel-config) target_include_directories(standalone-config INTERFACE orbis-kernel-config)
add_library(orbis::kernel::config ALIAS standalone-config) add_library(orbis::kernel::config ALIAS standalone-config)
add_executable(rpcsx-os add_executable(rpcsx
audio/AudioDevice.cpp audio/AudioDevice.cpp
audio/AlsaDevice.cpp audio/AlsaDevice.cpp
@ -60,19 +60,34 @@ add_executable(rpcsx-os
main.cpp main.cpp
AudioOut.cpp AudioOut.cpp
backtrace.cpp backtrace.cpp
bridge.cpp
vm.cpp vm.cpp
ops.cpp ops.cpp
linker.cpp linker.cpp
io-device.cpp io-device.cpp
thread.cpp thread.cpp
vfs.cpp vfs.cpp
ipmi.cpp
) )
target_include_directories(rpcsx-os PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) add_subdirectory(gpu)
target_link_libraries(rpcsx-os PUBLIC orbis::kernel amdgpu::bridge rx libcrypto libunwind::unwind-x86_64 xbyak::xbyak sox::sox ALSA::ALSA) add_subdirectory(core)
target_base_address(rpcsx-os 0x0000010000000000)
target_compile_options(rpcsx-os PRIVATE "-mfsgsbase")
set_target_properties(rpcsx-os PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) target_include_directories(rpcsx PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
install(TARGETS rpcsx-os RUNTIME DESTINATION bin) target_link_libraries(rpcsx
PUBLIC
rpcsx-gpu
orbis::kernel
rx
libcrypto
libunwind::unwind-x86_64
xbyak::xbyak
sox::sox
ALSA::ALSA
rpcsx-core
)
target_base_address(rpcsx 0x0000070000000000)
target_compile_options(rpcsx PRIVATE "-mfsgsbase")
set_target_properties(rpcsx PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
install(TARGETS rpcsx RUNTIME DESTINATION bin)

13
rpcsx/core/CMakeLists.txt Normal file
View file

@ -0,0 +1,13 @@
add_library(rpcsx-core
STATIC
src/Config.cpp
src/watchdog.cpp
)
target_include_directories(rpcsx-core
PUBLIC
${CMAKE_CURRENT_SOURCE_DIR}/include
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/..
)
target_link_libraries(rpcsx-core PUBLIC orbis::kernel rx rpcsx-gpu)

View file

@ -0,0 +1,11 @@
#pragma once
namespace rx {
// FIXME: serialization
struct Config {
int gpuIndex = 0;
bool validateGpu = false;
};
extern Config g_config;
} // namespace rx

View file

@ -0,0 +1,12 @@
#pragma once
#include <filesystem>
#include <string_view>
namespace rx {
const char *getShmPath();
std::filesystem::path getShmGuestPath(std::string_view path);
void createGpuDevice();
void shutdown();
int startWatchdog();
} // namespace rx

View file

@ -0,0 +1,3 @@
#include "rx/Config.hpp"
rx::Config rx::g_config;

194
rpcsx/core/src/watchdog.cpp Normal file
View file

@ -0,0 +1,194 @@
#include "rx/watchdog.hpp"
#include "gpu/Device.hpp"
#include "orbis/KernelContext.hpp"
#include <chrono>
#include <csignal>
#include <cstdio>
#include <cstdlib>
#include <fcntl.h>
#include <filesystem>
#include <format>
#include <print>
#include <string_view>
#include <sys/mman.h>
#include <sys/socket.h>
#include <sys/wait.h>
#include <unistd.h>
#include <utility>
static std::atomic<bool> g_exitRequested;
static std::atomic<bool> g_runGpuRequested;
static pid_t g_watchdogPid;
static pid_t g_gpuPid;
static char g_shmPath[256];
enum class MessageId {
RunGPU,
};
static void runGPU() {
if (g_gpuPid != 0 || orbis::g_context.gpuDevice != nullptr) {
return;
}
auto childPid = ::fork();
if (childPid != 0) {
g_gpuPid = childPid;
return;
}
amdgpu::Device *gpu;
{
pthread_setname_np(pthread_self(), "rpcsx-gpu");
std::lock_guard lock(orbis::g_context.gpuDeviceMtx);
if (orbis::g_context.gpuDevice != nullptr) {
std::exit(0);
}
int logFd =
::open("log-gpu.txt", O_CREAT | O_RDWR | O_TRUNC, S_IRUSR | S_IWUSR);
dup2(logFd, 1);
dup2(logFd, 2);
::close(logFd);
gpu = orbis::knew<amdgpu::Device>();
orbis::g_context.gpuDevice = gpu;
}
gpu->start();
std::exit(0);
}
static void handleManagementSignal(siginfo_t *info) {
switch (static_cast<MessageId>(info->si_value.sival_int)) {
case MessageId::RunGPU:
g_runGpuRequested = true;
break;
}
}
static void handle_watchdog_signal(int sig, siginfo_t *info, void *) {
if (sig == SIGUSR1) {
handleManagementSignal(info);
}
if (sig == SIGINT || sig == SIGQUIT) {
g_exitRequested = true;
}
}
static void sendMessage(MessageId id) {
sigqueue(g_watchdogPid, SIGUSR1,
{
.sival_int = static_cast<int>(id),
});
}
const char *rx::getShmPath() { return g_shmPath; }
std::filesystem::path rx::getShmGuestPath(std::string_view path) {
return std::format("{}/guest/{}", getShmPath(), path);
}
void rx::createGpuDevice() { sendMessage(MessageId::RunGPU); }
void rx::shutdown() { kill(g_watchdogPid, SIGQUIT); }
static void killProcesses(std::vector<int> list) {
int iteration = 0;
while (!list.empty()) {
auto signal = iteration++ > 20 ? SIGKILL : SIGQUIT;
for (std::size_t i = 0; i < list.size();) {
if (list[i] == 0 || ::kill(list[i], signal) != 0) {
if (i + 1 < list.size()) {
std::swap(list[i], list.back());
}
list.pop_back();
continue;
}
++i;
}
if (signal == SIGKILL) {
break;
}
std::this_thread::sleep_for(std::chrono::milliseconds(10));
}
}
int rx::startWatchdog() {
auto watchdogPid = ::getpid();
g_watchdogPid = watchdogPid;
std::format_to(g_shmPath, "/dev/shm/rpcsx/{}", watchdogPid);
if (!std::filesystem::create_directories(g_shmPath)) {
perror("failed to create shared memory directory");
std::exit(-1);
}
if (!std::filesystem::create_directory(std::format("{}/guest", g_shmPath))) {
perror("failed to create guest shared memory directory");
std::exit(-1);
}
pid_t initProcessPid = fork();
if (initProcessPid == 0) {
return watchdogPid;
}
pthread_setname_np(pthread_self(), "rpcsx-watchdog");
struct sigaction act{};
act.sa_sigaction = handle_watchdog_signal;
act.sa_flags = SA_SIGINFO;
if (sigaction(SIGUSR1, &act, nullptr)) {
perror("Error sigaction:");
std::exit(-1);
}
if (sigaction(SIGINT, &act, nullptr)) {
perror("Error sigaction:");
std::exit(-1);
}
if (sigaction(SIGQUIT, &act, nullptr)) {
perror("Error sigaction:");
std::exit(-1);
}
int stat = 0;
while (true) {
auto childPid = wait(&stat);
if (g_exitRequested == true) {
break;
}
if (childPid == initProcessPid) {
initProcessPid = 0;
break;
}
if (childPid == g_gpuPid) {
g_gpuPid = 0;
// FIXME: Restart GPU?
break;
}
if (g_runGpuRequested) {
std::println("watchdog: gpu start requested");
g_runGpuRequested = false;
runGPU();
}
}
std::filesystem::remove_all(g_shmPath);
killProcesses({initProcessPid, g_gpuPid});
::wait(nullptr);
std::_Exit(stat);
}

View file

@ -8,9 +8,9 @@ add_precompiled_vulkan_spirv(rpcsx-gpu-shaders
shaders/rect_list.geom.glsl shaders/rect_list.geom.glsl
) )
add_executable(rpcsx-gpu add_library(rpcsx-gpu
STATIC
Cache.cpp Cache.cpp
main.cpp
Device.cpp Device.cpp
FlipPipeline.cpp FlipPipeline.cpp
Pipe.cpp Pipe.cpp
@ -21,7 +21,6 @@ add_executable(rpcsx-gpu
target_link_libraries(rpcsx-gpu target_link_libraries(rpcsx-gpu
PUBLIC PUBLIC
rpcsx-gpu-shaders rpcsx-gpu-shaders
amdgpu::bridge
rx rx
gcn-shader gcn-shader
glfw glfw
@ -30,9 +29,8 @@ PUBLIC
rdna-semantic-spirv rdna-semantic-spirv
gnm::vulkan gnm::vulkan
gnm gnm
orbis::kernel
rpcsx-core
) )
install(TARGETS rpcsx-gpu RUNTIME DESTINATION bin)
set_target_properties(rpcsx-gpu PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
target_base_address(rpcsx-gpu 0x0000060000000000)
add_subdirectory(lib) add_subdirectory(lib)

View file

@ -1,8 +1,8 @@
#include "Cache.hpp" #include "Cache.hpp"
#include "Device.hpp" #include "Device.hpp"
#include "amdgpu/bridge/bridge.hpp"
#include "amdgpu/tiler.hpp" #include "amdgpu/tiler.hpp"
#include "gnm/vulkan.hpp" #include "gnm/vulkan.hpp"
#include "rx/mem.hpp"
#include "shader/Evaluator.hpp" #include "shader/Evaluator.hpp"
#include "shader/GcnConverter.hpp" #include "shader/GcnConverter.hpp"
#include "shader/dialect.hpp" #include "shader/dialect.hpp"
@ -22,16 +22,15 @@
using namespace amdgpu; using namespace amdgpu;
using namespace shader; using namespace shader;
static void notifyPageChanges(bridge::BridgeHeader *bridge, int vmId, static void notifyPageChanges(Device *device, int vmId, std::uint32_t firstPage,
std::uint32_t firstPage,
std::uint32_t pageCount) { std::uint32_t pageCount) {
std::uint64_t command = std::uint64_t command =
(static_cast<std::uint64_t>(pageCount - 1) << 32) | firstPage; (static_cast<std::uint64_t>(pageCount - 1) << 32) | firstPage;
while (true) { while (true) {
for (std::size_t i = 0; i < std::size(bridge->cacheCommands); ++i) { for (std::size_t i = 0; i < std::size(device->cacheCommands); ++i) {
std::uint64_t expCommand = 0; std::uint64_t expCommand = 0;
if (bridge->cacheCommands[vmId][i].compare_exchange_strong( if (device->cacheCommands[vmId][i].compare_exchange_strong(
expCommand, command, std::memory_order::acquire, expCommand, command, std::memory_order::acquire,
std::memory_order::relaxed)) { std::memory_order::relaxed)) {
return; return;
@ -40,67 +39,16 @@ static void notifyPageChanges(bridge::BridgeHeader *bridge, int vmId,
} }
} }
static void modifyWatchFlags(bridge::BridgeHeader *bridge, int vmId, static bool testHostInvalidations(Device *device, int vmId,
std::uint64_t address, std::uint64_t size,
std::uint8_t addFlags, std::uint8_t removeFlags) {
auto firstPage = address / bridge::kHostPageSize;
auto lastPage =
(address + size + bridge::kHostPageSize - 1) / bridge::kHostPageSize;
bool hasChanges = false;
for (auto page = firstPage; page < lastPage; ++page) {
auto prevValue =
bridge->cachePages[vmId][page].load(std::memory_order::relaxed);
auto newValue = (prevValue & ~removeFlags) | addFlags;
if (newValue == prevValue) {
continue;
}
while (!bridge->cachePages[vmId][page].compare_exchange_weak(
prevValue, newValue, std::memory_order::relaxed)) {
newValue = (prevValue & ~removeFlags) | addFlags;
}
if (newValue != prevValue) {
hasChanges = true;
}
}
if (hasChanges) {
notifyPageChanges(bridge, vmId, firstPage, lastPage - firstPage);
}
}
static void watchWrites(bridge::BridgeHeader *bridge, int vmId,
std::uint64_t address, std::uint64_t size) { std::uint64_t address, std::uint64_t size) {
modifyWatchFlags(bridge, vmId, address, size, bridge::kPageWriteWatch, auto firstPage = address / rx::mem::pageSize;
bridge::kPageInvalidated); auto lastPage = (address + size + rx::mem::pageSize - 1) / rx::mem::pageSize;
}
static void lockReadWrite(bridge::BridgeHeader *bridge, int vmId,
std::uint64_t address, std::uint64_t size,
bool isLazy) {
modifyWatchFlags(bridge, vmId, address, size,
bridge::kPageReadWriteLock |
(isLazy ? bridge::kPageLazyLock : 0),
bridge::kPageInvalidated);
}
static void unlockReadWrite(bridge::BridgeHeader *bridge, int vmId,
std::uint64_t address, std::uint64_t size) {
modifyWatchFlags(bridge, vmId, address, size, bridge::kPageWriteWatch,
bridge::kPageReadWriteLock | bridge::kPageLazyLock);
}
static bool testHostInvalidations(bridge::BridgeHeader *bridge, int vmId,
std::uint64_t address, std::uint64_t size) {
auto firstPage = address / bridge::kHostPageSize;
auto lastPage =
(address + size + bridge::kHostPageSize - 1) / bridge::kHostPageSize;
for (auto page = firstPage; page < lastPage; ++page) { for (auto page = firstPage; page < lastPage; ++page) {
auto prevValue = auto prevValue =
bridge->cachePages[vmId][page].load(std::memory_order::relaxed); device->cachePages[vmId][page].load(std::memory_order::relaxed);
if (~prevValue & bridge::kPageInvalidated) { if (~prevValue & kPageInvalidated) {
continue; continue;
} }
@ -110,25 +58,23 @@ static bool testHostInvalidations(bridge::BridgeHeader *bridge, int vmId,
return false; return false;
} }
static bool handleHostInvalidations(bridge::BridgeHeader *bridge, int vmId, static bool handleHostInvalidations(Device *device, int vmId,
std::uint64_t address, std::uint64_t size) { std::uint64_t address, std::uint64_t size) {
auto firstPage = address / bridge::kHostPageSize; auto firstPage = address / rx::mem::pageSize;
auto lastPage = auto lastPage = (address + size + rx::mem::pageSize - 1) / rx::mem::pageSize;
(address + size + bridge::kHostPageSize - 1) / bridge::kHostPageSize;
bool hasInvalidations = false; bool hasInvalidations = false;
for (auto page = firstPage; page < lastPage; ++page) { for (auto page = firstPage; page < lastPage; ++page) {
auto prevValue = auto prevValue =
bridge->cachePages[vmId][page].load(std::memory_order::relaxed); device->cachePages[vmId][page].load(std::memory_order::relaxed);
if (~prevValue & bridge::kPageInvalidated) { if (~prevValue & kPageInvalidated) {
continue; continue;
} }
while (!bridge->cachePages[vmId][page].compare_exchange_weak( while (!device->cachePages[vmId][page].compare_exchange_weak(
prevValue, prevValue & ~bridge::kPageInvalidated, prevValue, prevValue & ~kPageInvalidated, std::memory_order::relaxed)) {
std::memory_order::relaxed)) {
} }
hasInvalidations = true; hasInvalidations = true;
@ -137,18 +83,16 @@ static bool handleHostInvalidations(bridge::BridgeHeader *bridge, int vmId,
return hasInvalidations; return hasInvalidations;
} }
static void markHostInvalidated(bridge::BridgeHeader *bridge, int vmId, static void markHostInvalidated(Device *device, int vmId, std::uint64_t address,
std::uint64_t address, std::uint64_t size) { std::uint64_t size) {
auto firstPage = address / bridge::kHostPageSize; auto firstPage = address / rx::mem::pageSize;
auto lastPage = auto lastPage = (address + size + rx::mem::pageSize - 1) / rx::mem::pageSize;
(address + size + bridge::kHostPageSize - 1) / bridge::kHostPageSize;
for (auto page = firstPage; page < lastPage; ++page) { for (auto page = firstPage; page < lastPage; ++page) {
std::uint8_t prevValue = 0; std::uint8_t prevValue = 0;
while (!bridge->cachePages[vmId][page].compare_exchange_weak( while (!device->cachePages[vmId][page].compare_exchange_weak(
prevValue, prevValue | bridge::kPageInvalidated, prevValue, prevValue | kPageInvalidated, std::memory_order::relaxed)) {
std::memory_order::relaxed)) {
} }
} }
} }
@ -676,7 +620,7 @@ struct CachedBuffer : Cache::Entry {
struct CachedHostVisibleBuffer : CachedBuffer { struct CachedHostVisibleBuffer : CachedBuffer {
using CachedBuffer::update; using CachedBuffer::update;
bool expensive() { return addressRange.size() >= bridge::kHostPageSize; } bool expensive() { return addressRange.size() >= rx::mem::pageSize; }
void flush(void *target, rx::AddressRange range) { void flush(void *target, rx::AddressRange range) {
if (!hasDelayedFlush) { if (!hasDelayedFlush) {
@ -1258,7 +1202,7 @@ Cache::Buffer Cache::Tag::getBuffer(rx::AddressRange range, Access access) {
if ((access & Access::Read) != Access::None) { if ((access & Access::Read) != Access::None) {
if (!cached->expensive() || if (!cached->expensive() ||
handleHostInvalidations(getDevice()->bridge, mParent->mVmId, handleHostInvalidations(getDevice(), mParent->mVmId,
addressRange.beginAddress(), addressRange.beginAddress(),
addressRange.size()) || addressRange.size()) ||
!mParent->isInSync(addressRange, cached->tagId)) { !mParent->isInSync(addressRange, cached->tagId)) {
@ -1631,7 +1575,7 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) {
if ((access & Access::Read) != Access::None) { if ((access & Access::Read) != Access::None) {
if (!cached->expensive() || if (!cached->expensive() ||
testHostInvalidations(getDevice()->bridge, mParent->mVmId, testHostInvalidations(getDevice(), mParent->mVmId,
updateRange.beginAddress(), updateRange.size()) || updateRange.beginAddress(), updateRange.size()) ||
!mParent->isInSync(cached->addressRange, cached->tagId)) { !mParent->isInSync(cached->addressRange, cached->tagId)) {
@ -2316,8 +2260,7 @@ VkImage Cache::getFrameBuffer(Scheduler &scheduler, int index) { return {}; }
void Cache::invalidate(Tag &tag, rx::AddressRange range) { void Cache::invalidate(Tag &tag, rx::AddressRange range) {
flush(tag, range); flush(tag, range);
markHostInvalidated(mDevice->bridge, mVmId, range.beginAddress(), markHostInvalidated(mDevice, mVmId, range.beginAddress(), range.size());
range.size());
} }
void Cache::flush(Tag &tag, rx::AddressRange range) { void Cache::flush(Tag &tag, rx::AddressRange range) {
flushImages(tag, range); flushImages(tag, range);
@ -2340,7 +2283,7 @@ void Cache::trackUpdate(EntryType type, rx::AddressRange range,
table.map(range.beginAddress(), range.endAddress(), std::move(entry)); table.map(range.beginAddress(), range.endAddress(), std::move(entry));
if (watchChanges) { if (watchChanges) {
watchWrites(mDevice->bridge, mVmId, range.beginAddress(), range.size()); mDevice->watchWrites(mVmId, range.beginAddress(), range.size());
} }
} }
@ -2355,38 +2298,7 @@ void Cache::trackWrite(rx::AddressRange range, TagId tagId, bool lockMemory) {
return; return;
} }
lockReadWrite(mDevice->bridge, mVmId, range.beginAddress(), range.size(), mDevice->lockReadWrite(mVmId, range.beginAddress(), range.size(), true);
true);
static auto updateThread = std::thread{[this] {
auto &sched = mDevice->graphicsPipes[0].scheduler;
auto vmId = mVmId;
while (true) {
auto page = mDevice->bridge->gpuCacheCommand[vmId].load(
std::memory_order::relaxed);
if (page == 0) {
continue;
}
mDevice->bridge->gpuCacheCommand[vmId].store(0,
std::memory_order::relaxed);
auto address = static_cast<std::uint64_t>(page) * bridge::kHostPageSize;
auto range =
rx::AddressRange::fromBeginSize(address, bridge::kHostPageSize);
auto tag = mDevice->getCacheTag(vmId, sched);
flushImages(tag, range);
sched.submit();
sched.wait();
auto flushedRange = flushBuffers(range);
assert(flushedRange.isValid() && flushedRange.size() > 0);
unlockReadWrite(mDevice->bridge, vmId, flushedRange.beginAddress(),
flushedRange.size());
}
}};
} }
rx::AddressRange Cache::flushImages(Tag &tag, rx::AddressRange range) { rx::AddressRange Cache::flushImages(Tag &tag, rx::AddressRange range) {

1070
rpcsx/gpu/Device.cpp Normal file

File diff suppressed because it is too large Load diff

224
rpcsx/gpu/Device.hpp Normal file
View file

@ -0,0 +1,224 @@
#pragma once
#include "Cache.hpp"
#include "FlipPipeline.hpp"
#include "Pipe.hpp"
#include "amdgpu/tiler_vulkan.hpp"
#include "orbis/KernelAllocator.hpp"
#include "orbis/utils/Rc.hpp"
#include "orbis/utils/SharedMutex.hpp"
#include "rx/MemoryTable.hpp"
#include "shader/SemanticInfo.hpp"
#include "shader/SpvConverter.hpp"
#include "shader/gcn.hpp"
#include <GLFW/glfw3.h>
#include <array>
#include <thread>
#include <unordered_map>
#include <vulkan/vulkan_core.h>
namespace amdgpu {
enum : std::uint8_t {
IT_FLIP = 0xF0,
IT_MAP_MEMORY,
IT_UNMAP_MEMORY,
IT_PROTECT_MEMORY,
IT_UNMAP_PROCESS,
};
template <typename... T>
requires(sizeof...(T) > 0)
std::array<std::uint32_t, sizeof...(T) + 1> createPm4Packet(std::uint32_t op,
T... data) {
return {static_cast<std::uint32_t>((3 << 30) | (op << 8) |
((sizeof...(T) - 1) << 16)),
static_cast<std::uint32_t>(data)...};
}
struct VmMapSlot {
int memoryType;
int prot;
std::int64_t offset;
std::uint64_t baseAddress;
auto operator<=>(const VmMapSlot &) const = default;
};
struct BufferAttribute {
std::uint8_t attrId;
std::uint8_t submit;
std::uint64_t canary;
std::uint32_t pixelFormat;
std::uint32_t tilingMode;
std::uint32_t pitch;
std::uint32_t width;
std::uint32_t height;
};
struct Buffer {
std::uint64_t canary;
std::uint32_t index;
std::uint32_t attrId;
std::uint64_t address;
std::uint64_t address2;
};
struct ProcessInfo {
int vmId = -1;
int vmFd = -1;
BufferAttribute bufferAttributes[10];
Buffer buffers[10];
rx::MemoryTableWithPayload<VmMapSlot> vmTable;
};
enum {
kPageWriteWatch = 1 << 0,
kPageReadWriteLock = 1 << 1,
kPageInvalidated = 1 << 2,
kPageLazyLock = 1 << 3
};
struct PadState {
std::uint64_t timestamp;
std::uint32_t unk;
std::uint32_t buttons;
std::uint8_t leftStickX;
std::uint8_t leftStickY;
std::uint8_t rightStickX;
std::uint8_t rightStickY;
std::uint8_t l2;
std::uint8_t r2;
};
enum {
kPadBtnL3 = 1 << 1,
kPadBtnR3 = 1 << 2,
kPadBtnOptions = 1 << 3,
kPadBtnUp = 1 << 4,
kPadBtnRight = 1 << 5,
kPadBtnDown = 1 << 6,
kPadBtnLeft = 1 << 7,
kPadBtnL2 = 1 << 8,
kPadBtnR2 = 1 << 9,
kPadBtnL1 = 1 << 10,
kPadBtnR1 = 1 << 11,
kPadBtnTriangle = 1 << 12,
kPadBtnCircle = 1 << 13,
kPadBtnCross = 1 << 14,
kPadBtnSquare = 1 << 15,
kPadBtnPs = 1 << 16,
kPadBtnTouchPad = 1 << 20,
kPadBtnIntercepted = 1 << 31,
};
struct RemoteMemory {
int vmId;
template <typename T = void> T *getPointer(std::uint64_t address) const {
return address ? reinterpret_cast<T *>(
static_cast<std::uint64_t>(vmId) << 40 | address)
: nullptr;
}
};
struct Device : orbis::RcBase {
static constexpr auto kComputePipeCount = 8;
static constexpr auto kGfxPipeCount = 2;
static constexpr auto kMaxProcessCount = 6;
shader::SemanticInfo gcnSemantic;
shader::spv::Context shaderSemanticContext;
shader::gcn::SemanticModuleInfo gcnSemanticModuleInfo;
Registers::Config config;
GLFWwindow *window = nullptr;
VkSurfaceKHR surface = VK_NULL_HANDLE;
VkDebugUtilsMessengerEXT debugMessenger = VK_NULL_HANDLE;
vk::Context vkContext;
GpuTiler tiler;
GraphicsPipe graphicsPipes[kGfxPipeCount]{0, 1};
ComputePipe computePipes[kComputePipeCount]{0, 1, 2, 3, 4, 5, 6, 7};
FlipPipeline flipPipeline;
orbis::shared_mutex writeCommandMtx;
uint32_t imageIndex = 0;
bool isImageAcquired = false;
std::jthread cacheUpdateThread;
int dmemFd[3] = {-1, -1, -1};
orbis::kmap<std::int32_t, ProcessInfo> processInfo;
Cache caches[kMaxProcessCount]{
{this, 0}, {this, 1}, {this, 2}, {this, 3}, {this, 4}, {this, 5},
};
PadState kbPadState;
std::atomic<std::uint64_t> cacheCommands[kMaxProcessCount][4];
std::atomic<std::uint32_t> gpuCacheCommand[kMaxProcessCount];
std::atomic<std::uint8_t> *cachePages[kMaxProcessCount];
volatile std::uint32_t flipBuffer[kMaxProcessCount];
volatile std::uint64_t flipArg[kMaxProcessCount];
volatile std::uint64_t flipCount[kMaxProcessCount];
volatile std::uint64_t bufferInUseAddress[kMaxProcessCount];
std::uint32_t mainGfxRings[kGfxPipeCount][0x4000 / sizeof(std::uint32_t)];
Device();
~Device();
void start();
Cache::Tag getCacheTag(int vmId, Scheduler &scheduler) {
return caches[vmId].createTag(scheduler);
}
Cache::GraphicsTag getGraphicsTag(int vmId, Scheduler &scheduler) {
return caches[vmId].createGraphicsTag(scheduler);
}
Cache::ComputeTag getComputeTag(int vmId, Scheduler &scheduler) {
return caches[vmId].createComputeTag(scheduler);
}
void submitCommand(Queue &ring, std::span<const std::uint32_t> command);
void submitGfxCommand(int gfxPipe, std::span<const std::uint32_t> command);
void submitGfxCommand(int gfxPipe, int vmId,
std::span<const std::uint32_t> command);
void submitSwitchBuffer(int gfxPipe);
void submitFlip(int gfxPipe, std::uint32_t pid, int bufferIndex,
std::uint64_t flipArg);
void submitMapMemory(int gfxPipe, std::uint32_t pid, std::uint64_t address,
std::uint64_t size, int memoryType, int dmemIndex,
int prot, std::int64_t offset);
void submitUnmapMemory(int gfxPipe, std::uint32_t pid, std::uint64_t address,
std::uint64_t size);
void submitMapProcess(int gfxPipe, std::uint32_t pid, int vmId);
void submitUnmapProcess(int gfxPipe, std::uint32_t pid);
void submitProtectMemory(int gfxPipe, std::uint32_t pid,
std::uint64_t address, std::uint64_t size, int prot);
void mapProcess(std::uint32_t pid, int vmId);
void unmapProcess(std::uint32_t pid);
void protectMemory(std::uint32_t pid, std::uint64_t address,
std::uint64_t size, int prot);
void onCommandBuffer(std::uint32_t pid, int cmdHeader, std::uint64_t address,
std::uint64_t size);
bool processPipes();
bool flip(std::uint32_t pid, int bufferIndex, std::uint64_t arg,
VkImage swapchainImage, VkImageView swapchainImageView);
void flip(std::uint32_t pid, int bufferIndex, std::uint64_t arg);
void waitForIdle();
void mapMemory(std::uint32_t pid, std::uint64_t address, std::uint64_t size,
int memoryType, int dmemIndex, int prot, std::int64_t offset);
void unmapMemory(std::uint32_t pid, std::uint64_t address,
std::uint64_t size);
void registerBuffer(std::uint32_t pid, Buffer buffer);
void registerBufferAttribute(std::uint32_t pid, BufferAttribute attr);
void watchWrites(int vmId, std::uint64_t address, std::uint64_t size);
void lockReadWrite(int vmId, std::uint64_t address, std::uint64_t size,
bool isLazy);
void unlockReadWrite(int vmId, std::uint64_t address, std::uint64_t size);
};
} // namespace amdgpu

View file

@ -4,15 +4,29 @@
#include "Renderer.hpp" #include "Renderer.hpp"
#include "gnm/mmio.hpp" #include "gnm/mmio.hpp"
#include "gnm/pm4.hpp" #include "gnm/pm4.hpp"
#include "orbis/KernelContext.hpp"
#include "vk.hpp" #include "vk.hpp"
#include <bit> #include <bit>
#include <cstdio> #include <cstdio>
#include <print>
#include <rx/bits.hpp> #include <rx/bits.hpp>
#include <rx/die.hpp> #include <rx/die.hpp>
#include <vulkan/vulkan_core.h> #include <vulkan/vulkan_core.h>
using namespace amdgpu; using namespace amdgpu;
enum GraphicsCoreEvent {
kGcEventCompute0RelMem = 0x00,
kGcEventCompute1RelMem = 0x01,
kGcEventCompute2RelMem = 0x02,
kGcEventCompute3RelMem = 0x03,
kGcEventCompute4RelMem = 0x04,
kGcEventCompute5RelMem = 0x05,
kGcEventCompute6RelMem = 0x06,
kGcEventGfxEop = 0x40,
kGcEventClockSet = 0x84,
};
static Scheduler createGfxScheduler(int index) { static Scheduler createGfxScheduler(int index) {
auto queue = vk::context->presentQueue; auto queue = vk::context->presentQueue;
auto family = vk::context->presentQueueFamily; auto family = vk::context->presentQueueFamily;
@ -31,6 +45,12 @@ static Scheduler createGfxScheduler(int index) {
static Scheduler createComputeScheduler(int index) { static Scheduler createComputeScheduler(int index) {
auto &compQueues = vk::context->computeQueues; auto &compQueues = vk::context->computeQueues;
if (compQueues.empty()) {
// Workaround for LLVM device
return createGfxScheduler(index);
}
auto [queue, family] = compQueues[index % compQueues.size()]; auto [queue, family] = compQueues[index % compQueues.size()];
return Scheduler{queue, family}; return Scheduler{queue, family};
@ -142,8 +162,9 @@ GraphicsPipe::GraphicsPipe(int index) : scheduler(createGfxScheduler(index)) {
processorHandlers[gnm::IT_NOP] = &GraphicsPipe::handleNop; processorHandlers[gnm::IT_NOP] = &GraphicsPipe::handleNop;
} }
auto &dataHandlers = commandHandlers[2]; auto &dataHandlers = commandHandlers[3];
auto &deHandlers = commandHandlers[1]; auto &deHandlers = commandHandlers[2];
auto &mainHandlers = commandHandlers[1];
auto &ceHandlers = commandHandlers[0]; auto &ceHandlers = commandHandlers[0];
deHandlers[gnm::IT_SET_BASE] = &GraphicsPipe::setBase; deHandlers[gnm::IT_SET_BASE] = &GraphicsPipe::setBase;
@ -175,7 +196,8 @@ GraphicsPipe::GraphicsPipe(int index) : scheduler(createGfxScheduler(index)) {
deHandlers[gnm::IT_NUM_INSTANCES] = &GraphicsPipe::numInstances; deHandlers[gnm::IT_NUM_INSTANCES] = &GraphicsPipe::numInstances;
deHandlers[gnm::IT_DRAW_INDEX_MULTI_AUTO] = &GraphicsPipe::drawIndexMultiAuto; deHandlers[gnm::IT_DRAW_INDEX_MULTI_AUTO] = &GraphicsPipe::drawIndexMultiAuto;
// IT_INDIRECT_BUFFER_CNST mainHandlers[gnm::IT_INDIRECT_BUFFER_CNST] =
&GraphicsPipe::indirectBufferConst;
// IT_STRMOUT_BUFFER_UPDATE // IT_STRMOUT_BUFFER_UPDATE
deHandlers[gnm::IT_DRAW_INDEX_OFFSET_2] = &GraphicsPipe::drawIndexOffset2; deHandlers[gnm::IT_DRAW_INDEX_OFFSET_2] = &GraphicsPipe::drawIndexOffset2;
@ -186,6 +208,7 @@ GraphicsPipe::GraphicsPipe(int index) : scheduler(createGfxScheduler(index)) {
// IT_COPY_DW // IT_COPY_DW
deHandlers[gnm::IT_WAIT_REG_MEM] = &GraphicsPipe::waitRegMem; deHandlers[gnm::IT_WAIT_REG_MEM] = &GraphicsPipe::waitRegMem;
deHandlers[gnm::IT_INDIRECT_BUFFER] = &GraphicsPipe::indirectBuffer; deHandlers[gnm::IT_INDIRECT_BUFFER] = &GraphicsPipe::indirectBuffer;
mainHandlers[gnm::IT_INDIRECT_BUFFER] = &GraphicsPipe::indirectBuffer;
// IT_COPY_DATA // IT_COPY_DATA
deHandlers[gnm::IT_PFP_SYNC_ME] = &GraphicsPipe::pfpSyncMe; deHandlers[gnm::IT_PFP_SYNC_ME] = &GraphicsPipe::pfpSyncMe;
// IT_SURFACE_SYNC // IT_SURFACE_SYNC
@ -216,11 +239,15 @@ GraphicsPipe::GraphicsPipe(int index) : scheduler(createGfxScheduler(index)) {
deHandlers[gnm::IT_WAIT_ON_CE_COUNTER] = &GraphicsPipe::waitOnCeCounter; deHandlers[gnm::IT_WAIT_ON_CE_COUNTER] = &GraphicsPipe::waitOnCeCounter;
deHandlers[gnm::IT_SET_CE_DE_COUNTERS] = &GraphicsPipe::setCeDeCounters; deHandlers[gnm::IT_SET_CE_DE_COUNTERS] = &GraphicsPipe::setCeDeCounters;
// IT_WAIT_ON_AVAIL_BUFFER // IT_WAIT_ON_AVAIL_BUFFER
// IT_SWITCH_BUFFER mainHandlers[gnm::IT_SWITCH_BUFFER] = &GraphicsPipe::switchBuffer;
// IT_SET_RESOURCES // IT_SET_RESOURCES
// IT_MAP_PROCESS mainHandlers[gnm::IT_MAP_PROCESS] = &GraphicsPipe::mapProcess;
// IT_MAP_QUEUES mainHandlers[gnm::IT_MAP_QUEUES] = &GraphicsPipe::mapQueues;
// IT_UNMAP_QUEUES mainHandlers[gnm::IT_UNMAP_QUEUES] = &GraphicsPipe::unmapQueues;
mainHandlers[IT_MAP_MEMORY] = &GraphicsPipe::mapMemory;
mainHandlers[IT_UNMAP_MEMORY] = &GraphicsPipe::unmapMemory;
mainHandlers[IT_PROTECT_MEMORY] = &GraphicsPipe::protectMemory;
mainHandlers[IT_UNMAP_PROCESS] = &GraphicsPipe::unmapProcess;
// IT_QUERY_STATUS // IT_QUERY_STATUS
// IT_RUN_LIST // IT_RUN_LIST
// IT_DISPATCH_DRAW_PREAMBLE // IT_DISPATCH_DRAW_PREAMBLE
@ -232,6 +259,8 @@ GraphicsPipe::GraphicsPipe(int index) : scheduler(createGfxScheduler(index)) {
ceHandlers[gnm::IT_LOAD_CONST_RAM] = &GraphicsPipe::loadConstRam; ceHandlers[gnm::IT_LOAD_CONST_RAM] = &GraphicsPipe::loadConstRam;
ceHandlers[gnm::IT_WRITE_CONST_RAM] = &GraphicsPipe::writeConstRam; ceHandlers[gnm::IT_WRITE_CONST_RAM] = &GraphicsPipe::writeConstRam;
ceHandlers[gnm::IT_DUMP_CONST_RAM] = &GraphicsPipe::dumpConstRam; ceHandlers[gnm::IT_DUMP_CONST_RAM] = &GraphicsPipe::dumpConstRam;
mainHandlers[IT_FLIP] = &GraphicsPipe::flip;
} }
void GraphicsPipe::setCeQueue(Queue queue) { void GraphicsPipe::setCeQueue(Queue queue) {
@ -289,9 +318,7 @@ bool GraphicsPipe::processAllRings() {
} }
} }
for (int i = 0; i < 3; ++i) { for (auto &queue : deQueues) {
auto &queue = deQueues[i];
if (queue.rptr == queue.wptr) { if (queue.rptr == queue.wptr) {
continue; continue;
} }
@ -308,16 +335,17 @@ bool GraphicsPipe::processAllRings() {
} }
void GraphicsPipe::processRing(Queue &queue) { void GraphicsPipe::processRing(Queue &queue) {
auto cp = 1; int cp;
if (queue.indirectLevel < 0) { if (queue.indirectLevel < 0) {
cp = 0; cp = 0;
} else if (queue.indirectLevel == 2) { } else {
cp = 2; cp = queue.indirectLevel + 1;
} }
while (queue.rptr != queue.wptr) { while (queue.rptr != queue.wptr) {
if (queue.rptr >= queue.base + queue.size) { if (queue.rptr >= queue.base + queue.size) {
queue.rptr = queue.base; queue.rptr = queue.base;
continue;
} }
auto header = *queue.rptr; auto header = *queue.rptr;
@ -327,8 +355,11 @@ void GraphicsPipe::processRing(Queue &queue) {
auto op = rx::getBits(header, 15, 8); auto op = rx::getBits(header, 15, 8);
auto len = rx::getBits(header, 29, 16) + 2; auto len = rx::getBits(header, 29, 16) + 2;
// std::fprintf(stderr, "queue %d: %s\n", queue.indirectLevel, // if (auto str = gnm::pm4OpcodeToString(op)) {
// gnm::pm4OpcodeToString(op)); // std::println(stderr, "queue {}: {}", queue.indirectLevel, str);
// } else {
// std::println(stderr, "queue {}: {:x}", queue.indirectLevel, op);
// }
if (op == gnm::IT_COND_EXEC) { if (op == gnm::IT_COND_EXEC) {
rx::die("unimplemented COND_EXEC"); rx::die("unimplemented COND_EXEC");
@ -353,7 +384,10 @@ void GraphicsPipe::processRing(Queue &queue) {
continue; continue;
} }
rx::die("unexpected pm4 packet type %u", type); rx::die("unexpected pm4 packet type %u, ring %u, header %u, rptr %p, wptr "
"%p, base %p",
type, queue.indirectLevel, header, queue.rptr, queue.wptr,
queue.base);
} }
} }
@ -707,17 +741,38 @@ bool GraphicsPipe::waitRegMem(Queue &queue) {
return compare(function, pollData, mask, reference); return compare(function, pollData, mask, reference);
} }
bool GraphicsPipe::indirectBufferConst(Queue &queue) {
rx::dieIf(queue.indirectLevel < 0, "unexpected indirect buffer from CP");
auto addressLo = queue.rptr[1] & ~3;
auto addressHi = queue.rptr[2] & ((1 << 8) - 1);
int vmId = queue.rptr[3] >> 24;
auto ibSize = queue.rptr[3] & ((1 << 20) - 1);
auto address = addressLo | (static_cast<std::uint64_t>(addressHi) << 32);
if (queue.indirectLevel != 0) {
vmId = queue.vmId;
}
auto rptr = RemoteMemory{vmId}.getPointer<std::uint32_t>(address);
setCeQueue(Queue::createFromRange(vmId, rptr, ibSize));
return true;
}
bool GraphicsPipe::indirectBuffer(Queue &queue) { bool GraphicsPipe::indirectBuffer(Queue &queue) {
rx::dieIf(queue.indirectLevel < 0, "unexpected indirect buffer from CP"); rx::dieIf(queue.indirectLevel < 0, "unexpected indirect buffer from CP");
auto addressLo = queue.rptr[1] & ~3; auto addressLo = queue.rptr[1] & ~3;
auto addressHi = queue.rptr[2] & ((1 << 16) - 1); auto addressHi = queue.rptr[2] & ((1 << 8) - 1);
auto vmId = queue.rptr[3] >> 24; int vmId = queue.rptr[3] >> 24;
auto ibSize = queue.rptr[4] & ((1 << 20) - 1); auto ibSize = queue.rptr[3] & ((1 << 20) - 1);
auto address = addressLo | (static_cast<std::uint64_t>(addressHi) << 32); auto address = addressLo | (static_cast<std::uint64_t>(addressHi) << 32);
auto rptr = RemoteMemory{queue.vmId}.getPointer<std::uint32_t>(address); if (queue.indirectLevel != 0) {
setDeQueue(Queue::createFromRange(queue.vmId, rptr, ibSize), vmId = queue.vmId;
}
auto rptr = RemoteMemory{vmId}.getPointer<std::uint32_t>(address);
setDeQueue(Queue::createFromRange(vmId, rptr, ibSize),
queue.indirectLevel + 1); queue.indirectLevel + 1);
return true; return true;
} }
@ -834,6 +889,11 @@ bool GraphicsPipe::eventWriteEop(Queue &queue) {
rx::die("unimplemented event write eop data %#x", dataSel); rx::die("unimplemented event write eop data %#x", dataSel);
} }
if (intSel) {
orbis::g_context.deviceEventEmitter->emit(orbis::kEvFiltGraphicsCore, 0,
kGcEventGfxEop);
}
return true; return true;
} }
@ -1056,15 +1116,15 @@ bool GraphicsPipe::setUConfigReg(Queue &queue) {
auto data = queue.rptr + 2; auto data = queue.rptr + 2;
if (index != 0) { if (index != 0) {
std::fprintf( std::println(
stderr, stderr,
"set UConfig regs with index, offset: %x, count %u, index %u, %s\n", "set UConfig regs with index, offset: {:x}, count {}, index {}, {}",
offset, len, index, offset, len, index,
gnm::mmio::registerName(decltype(uConfig)::kMmioOffset + offset)); gnm::mmio::registerName(decltype(uConfig)::kMmioOffset + offset));
for (std::size_t i = 0; i < len; ++i) { for (std::size_t i = 0; i < len; ++i) {
std::fprintf( std::println(
stderr, "writing to %s value %x\n", stderr, "writing to {} value {:x}",
gnm::mmio::registerName(decltype(uConfig)::kMmioOffset + offset + i), gnm::mmio::registerName(decltype(uConfig)::kMmioOffset + offset + i),
data[i]); data[i]);
} }
@ -1092,15 +1152,15 @@ bool GraphicsPipe::setContextReg(Queue &queue) {
auto data = queue.rptr + 2; auto data = queue.rptr + 2;
if (index != 0) { if (index != 0) {
std::fprintf( std::println(
stderr, stderr,
"set Context regs with index, offset: %x, count %u, index %u, %s\n", "set Context regs with index, offset: {:x}, count {}, index {}, {}",
offset, len, index, offset, len, index,
gnm::mmio::registerName(decltype(context)::kMmioOffset + offset)); gnm::mmio::registerName(decltype(context)::kMmioOffset + offset));
for (std::size_t i = 0; i < len; ++i) { for (std::size_t i = 0; i < len; ++i) {
std::fprintf( std::println(
stderr, "writing to %s value %x\n", stderr, "writing to {} value {:x}",
gnm::mmio::registerName(decltype(context)::kMmioOffset + offset + i), gnm::mmio::registerName(decltype(context)::kMmioOffset + offset + i),
data[i]); data[i]);
} }
@ -1195,3 +1255,87 @@ bool GraphicsPipe::unknownPacket(Queue &queue) {
rx::die("unimplemented gfx pm4 packet: %s, queue %u\n", rx::die("unimplemented gfx pm4 packet: %s, queue %u\n",
gnm::pm4OpcodeToString(op), queue.indirectLevel); gnm::pm4OpcodeToString(op), queue.indirectLevel);
} }
bool GraphicsPipe::switchBuffer(Queue &queue) {
// FIXME: implement
return true;
}
bool GraphicsPipe::mapProcess(Queue &queue) {
auto pid = queue.rptr[1];
int vmId = queue.rptr[2];
device->mapProcess(pid, vmId);
return true;
}
bool GraphicsPipe::mapQueues(Queue &queue) {
// FIXME: implement
return true;
}
bool GraphicsPipe::unmapQueues(Queue &queue) {
// FIXME: implement
return true;
}
bool GraphicsPipe::mapMemory(Queue &queue) {
auto pid = queue.rptr[1];
auto addressLo = queue.rptr[2];
auto addressHi = queue.rptr[3];
auto sizeLo = queue.rptr[4];
auto sizeHi = queue.rptr[5];
auto memoryType = queue.rptr[6];
auto dmemIndex = queue.rptr[7];
auto prot = queue.rptr[8];
auto offsetLo = queue.rptr[9];
auto offsetHi = queue.rptr[10];
auto address = addressLo | (static_cast<std::uint64_t>(addressHi) << 32);
auto size = sizeLo | (static_cast<std::uint64_t>(sizeHi) << 32);
auto offset = offsetLo | (static_cast<std::uint64_t>(offsetHi) << 32);
device->mapMemory(pid, address, size, memoryType, dmemIndex, prot, offset);
return true;
}
bool GraphicsPipe::unmapMemory(Queue &queue) {
auto pid = queue.rptr[1];
auto addressLo = queue.rptr[2];
auto addressHi = queue.rptr[3];
auto sizeLo = queue.rptr[4];
auto sizeHi = queue.rptr[5];
auto address = addressLo | (static_cast<std::uint64_t>(addressHi) << 32);
auto size = sizeLo | (static_cast<std::uint64_t>(sizeHi) << 32);
device->unmapMemory(pid, address, size);
return true;
}
bool GraphicsPipe::protectMemory(Queue &queue) {
auto pid = queue.rptr[1];
auto addressLo = queue.rptr[2];
auto addressHi = queue.rptr[3];
auto sizeLo = queue.rptr[4];
auto sizeHi = queue.rptr[5];
auto prot = queue.rptr[6];
auto address = addressLo | (static_cast<std::uint64_t>(addressHi) << 32);
auto size = sizeLo | (static_cast<std::uint64_t>(sizeHi) << 32);
device->protectMemory(pid, address, size, prot);
return true;
}
bool GraphicsPipe::unmapProcess(Queue &queue) {
auto pid = queue.rptr[1];
device->unmapProcess(pid);
return true;
}
bool GraphicsPipe::flip(Queue &queue) {
auto buffer = queue.rptr[1];
auto dataLo = queue.rptr[2];
auto dataHi = queue.rptr[3];
auto pid = queue.rptr[4];
auto data = dataLo | (static_cast<std::uint64_t>(dataHi) << 32);
device->flip(pid, buffer, data);
return true;
}

View file

@ -75,7 +75,7 @@ struct GraphicsPipe {
Queue ceQueue; Queue ceQueue;
using CommandHandler = bool (GraphicsPipe::*)(Queue &); using CommandHandler = bool (GraphicsPipe::*)(Queue &);
CommandHandler commandHandlers[3][255]; CommandHandler commandHandlers[4][255];
GraphicsPipe(int index); GraphicsPipe(int index);
@ -96,6 +96,7 @@ struct GraphicsPipe {
bool writeData(Queue &queue); bool writeData(Queue &queue);
bool memSemaphore(Queue &queue); bool memSemaphore(Queue &queue);
bool waitRegMem(Queue &queue); bool waitRegMem(Queue &queue);
bool indirectBufferConst(Queue &queue);
bool indirectBuffer(Queue &queue); bool indirectBuffer(Queue &queue);
bool condWrite(Queue &queue); bool condWrite(Queue &queue);
bool eventWrite(Queue &queue); bool eventWrite(Queue &queue);
@ -130,6 +131,16 @@ struct GraphicsPipe {
bool unknownPacket(Queue &queue); bool unknownPacket(Queue &queue);
bool switchBuffer(Queue &queue);
bool mapProcess(Queue &queue);
bool mapQueues(Queue &queue);
bool unmapQueues(Queue &queue);
bool mapMemory(Queue &queue);
bool unmapMemory(Queue &queue);
bool protectMemory(Queue &queue);
bool unmapProcess(Queue &queue);
bool flip(Queue &queue);
std::uint32_t *getMmRegister(std::uint32_t dwAddress); std::uint32_t *getMmRegister(std::uint32_t dwAddress);
}; };
} // namespace amdgpu } // namespace amdgpu

Some files were not shown because too many files have changed in this diff Show more