rpcsx/rpcsx/gpu/Device.cpp
DH d7ad77b406 orbis: implement physical memory emulation level & utils improvement
fix blockpool & dmem implementation
modernize blockpool & dmem io devices
use budgets per allocation
add serialization support for MemoryTableWithPayload and AddressRange utils
add format support for EnumBitSet util
implemented trace formatter per syscall
increased allowed reference count for Ref
2025-11-30 15:46:37 +03:00

1070 lines
34 KiB
C++

#include "Device.hpp"
#include "FlipPipeline.hpp"
#include "Renderer.hpp"
#include "amdgpu/tiler.hpp"
#include "gnm/constants.hpp"
#include "gnm/pm4.hpp"
#include "orbis-config.hpp"
#include "orbis/KernelContext.hpp"
#include "orbis/note.hpp"
#include "orbis/pmem.hpp"
#include "orbis/vmem.hpp"
#include "rx/AddressRange.hpp"
#include "rx/Config.hpp"
#include "rx/bits.hpp"
#include "rx/die.hpp"
#include "rx/format.hpp"
#include "rx/mem.hpp"
#include "rx/print.hpp"
#include "rx/watchdog.hpp"
#include "shader/spv.hpp"
#include "shaders/rdna-semantic-spirv.hpp"
#include "vk.hpp"
#include <chrono>
#include <cstdio>
#include <fcntl.h>
#include <stop_token>
#include <sys/mman.h>
#include <thread>
#define GLFW_INCLUDE_NONE
#include <GLFW/glfw3.h>
using namespace amdgpu;
static VKAPI_ATTR VkBool32 VKAPI_CALL debugUtilsMessageCallback(
VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity,
VkDebugUtilsMessageTypeFlagsEXT messageType,
const VkDebugUtilsMessengerCallbackDataEXT *pCallbackData,
void *pUserData) {
if (pCallbackData->pMessage) {
rx::println("{}", pCallbackData->pMessage);
}
if (messageSeverity == VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) {
// std::abort();
}
return VK_FALSE;
}
enum class DisplayEvent : std::uint16_t {
Flip = 6,
VBlank = 7,
PreVBlankStart = 0x59,
};
static constexpr std::uint64_t
makeDisplayEvent(DisplayEvent id, std::uint16_t unk0 = 0,
std::uint32_t unk1 = 0x1000'0000) {
std::uint64_t result = 0;
result |= static_cast<std::uint64_t>(id) << 48;
result |= static_cast<std::uint64_t>(unk0) << 32;
result |= static_cast<std::uint64_t>(unk1);
return result;
}
static vk::Context createVkContext(Device *device) {
std::vector<const char *> optionalLayers;
bool enableValidation = rx::g_config.validateGpu;
for (std::size_t process = 0; process < 6; ++process) {
if (auto errc = rx::mem::reserve(rx::AddressRange::fromBeginSize(
orbis::kMinAddress + orbis::kMaxAddress * process,
orbis::kMaxAddress - orbis::kMinAddress));
errc != std::errc{}) {
rx::die("failed to reserve userspace memory: {}", (int)errc);
}
}
auto createWindow = [=] {
glfwWindowHint(GLFW_CLIENT_API, GLFW_NO_API);
device->window = glfwCreateWindow(1920, 1080, "RPCSX", nullptr, nullptr);
};
#ifdef GLFW_PLATFORM_WAYLAND
if (glfwPlatformSupported(GLFW_PLATFORM_WAYLAND)) {
glfwInitHint(GLFW_PLATFORM, GLFW_PLATFORM_WAYLAND);
}
glfwInit();
createWindow();
if (device->window == nullptr) {
glfwTerminate();
glfwInitHint(GLFW_PLATFORM, GLFW_ANY_PLATFORM);
glfwInit();
createWindow();
}
#else
glfwInit();
createWindow();
#endif
glfwHideWindow(device->window);
const char **glfwExtensions;
uint32_t glfwExtensionCount = 0;
glfwExtensions = glfwGetRequiredInstanceExtensions(&glfwExtensionCount);
std::vector<const char *> requiredExtensions{
glfwExtensions, glfwExtensions + glfwExtensionCount};
if (enableValidation) {
optionalLayers.push_back("VK_LAYER_KHRONOS_validation");
requiredExtensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
}
vk::Context result =
vk::Context::create({}, optionalLayers, requiredExtensions, {});
vk::context = &result;
if (enableValidation) {
VkDebugUtilsMessengerCreateInfoEXT debugUtilsMessengerCreateInfo{
.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT |
VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT |
VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT,
.messageType =
VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT |
VK_DEBUG_UTILS_MESSAGE_TYPE_DEVICE_ADDRESS_BINDING_BIT_EXT |
VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT,
.pfnUserCallback = debugUtilsMessageCallback,
};
VK_VERIFY(vk::CreateDebugUtilsMessengerEXT(
result.instance, &debugUtilsMessengerCreateInfo, vk::context->allocator,
&device->debugMessenger));
}
VK_VERIFY(glfwCreateWindowSurface(vk::context->instance, device->window,
nullptr, &device->surface));
result.createDevice(device->surface, rx::g_config.gpuIndex,
{
// VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME,
// VK_EXT_DEPTH_CLIP_ENABLE_EXTENSION_NAME,
// VK_EXT_INLINE_UNIFORM_BLOCK_EXTENSION_NAME,
// VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME,
// VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME,
// VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME,
VK_EXT_SEPARATE_STENCIL_USAGE_EXTENSION_NAME,
VK_KHR_SWAPCHAIN_EXTENSION_NAME,
VK_EXT_SHADER_OBJECT_EXTENSION_NAME,
VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME,
VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME,
},
{
VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME,
VK_KHR_SHADER_NON_SEMANTIC_INFO_EXTENSION_NAME,
});
auto getTotalMemorySize = [&](int memoryType) -> VkDeviceSize {
auto deviceLocalMemoryType =
result.findPhysicalMemoryTypeIndex(~0, memoryType);
if (deviceLocalMemoryType < 0) {
return 0;
}
auto heapIndex =
result.physicalMemoryProperties.memoryTypes[deviceLocalMemoryType]
.heapIndex;
return result.physicalMemoryProperties.memoryHeaps[heapIndex].size;
};
auto localMemoryTotalSize =
getTotalMemorySize(VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
auto hostVisibleMemoryTotalSize =
getTotalMemorySize(VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
vk::getHostVisibleMemory().initHostVisible(
std::min(hostVisibleMemoryTotalSize / 2, 1ul * 1024 * 1024 * 1024));
vk::getDeviceLocalMemory().initDeviceLocal(
std::min(localMemoryTotalSize / 4, 4ul * 1024 * 1024 * 1024));
vk::context = &device->vkContext;
return result;
}
const auto kCachePageSize = 0x100'0000'0000 / rx::mem::pageSize;
Device::Device() : vkContext(createVkContext(this)) {
if (!shader::spv::validate(g_rdna_semantic_spirv)) {
shader::spv::dump(g_rdna_semantic_spirv, true);
rx::die("builtin semantic validation failed");
}
if (auto sem = shader::spv::deserialize(
shaderSemanticContext, g_rdna_semantic_spirv,
shaderSemanticContext.getUnknownLocation())) {
auto shaderSemantic = *sem;
shader::gcn::canonicalizeSemantic(shaderSemanticContext, shaderSemantic);
shader::gcn::collectSemanticModuleInfo(gcnSemanticModuleInfo,
shaderSemantic);
gcnSemantic = shader::gcn::collectSemanticInfo(gcnSemanticModuleInfo);
} else {
rx::die("failed to deserialize builtin semantics\n");
}
for (auto &pipe : graphicsPipes) {
pipe.device = this;
}
for (auto &cachePage : cachePages) {
cachePage = static_cast<std::atomic<std::uint8_t> *>(
orbis::kalloc(kCachePageSize, 1));
std::memset(cachePage, 0, kCachePageSize);
}
cacheUpdateThread = std::jthread([this](const std::stop_token &stopToken) {
auto &sched = graphicsPipes[0].scheduler;
std::uint32_t prevIdleValue = 0;
while (!stopToken.stop_requested()) {
if (gpuCacheCommandIdle.wait(prevIdleValue) != std::errc{}) {
continue;
}
prevIdleValue = gpuCacheCommandIdle.load(std::memory_order::acquire);
for (int vmId = 0; vmId < kMaxProcessCount; ++vmId) {
auto page = gpuCacheCommand[vmId].load(std::memory_order::relaxed);
if (page == 0) {
continue;
}
gpuCacheCommand[vmId].store(0, std::memory_order::relaxed);
auto address = static_cast<std::uint64_t>(page) * rx::mem::pageSize;
auto range =
rx::AddressRange::fromBeginSize(address, rx::mem::pageSize);
auto tag = getCacheTag(vmId, sched);
auto flushedRange = tag.getCache()->flushImages(tag, range);
flushedRange =
flushedRange.merge(tag.getCache()->flushImageBuffers(tag, range));
if (flushedRange) {
sched.submit();
sched.wait();
}
flushedRange = tag.getCache()->flushBuffers(flushedRange);
if (flushedRange) {
unlockReadWrite(vmId, flushedRange.beginAddress(),
flushedRange.size());
} else {
unlockReadWrite(vmId, range.beginAddress(), range.size());
}
}
}
});
commandPipe.device = this;
commandPipe.ring = {
.base = std::data(cmdRing),
.size = std::size(cmdRing),
.rptr = std::data(cmdRing),
.wptr = std::data(cmdRing),
};
for (auto &pipe : computePipes) {
pipe.device = this;
}
for (int i = 0; i < kGfxPipeCount; ++i) {
graphicsPipes[i].setDeQueue(
Ring{
.base = mainGfxRings[i],
.size = std::size(mainGfxRings[i]),
.rptr = mainGfxRings[i],
.wptr = mainGfxRings[i],
},
0);
}
}
Device::~Device() {
vkDeviceWaitIdle(vk::context->device);
if (debugMessenger != VK_NULL_HANDLE) {
vk::DestroyDebugUtilsMessengerEXT(vk::context->instance, debugMessenger,
vk::context->allocator);
}
for (auto &cachePage : cachePages) {
orbis::kfree(cachePage, kCachePageSize);
}
}
void Device::start() {
{
int width;
int height;
glfwGetWindowSize(window, &width, &height);
vk::context->createSwapchain({
.width = static_cast<uint32_t>(width),
.height = static_cast<uint32_t>(height),
});
}
std::jthread vblankThread([](const std::stop_token &stopToken) {
orbis::g_context->deviceEventEmitter->emit(
orbis::kEvFiltDisplay,
[=](orbis::KNote *note) -> std::optional<orbis::intptr_t> {
if (DisplayEvent(note->event.ident >> 48) ==
DisplayEvent::PreVBlankStart) {
return 0;
}
return {};
});
auto prevVBlank = std::chrono::steady_clock::now();
auto period = std::chrono::seconds(1) / 59.94;
while (!stopToken.stop_requested()) {
prevVBlank +=
std::chrono::duration_cast<std::chrono::nanoseconds>(period);
std::this_thread::sleep_until(prevVBlank);
orbis::g_context->deviceEventEmitter->emit(
orbis::kEvFiltDisplay,
[=](orbis::KNote *note) -> std::optional<orbis::intptr_t> {
if (DisplayEvent(note->event.ident >> 48) == DisplayEvent::VBlank) {
return 0;
}
return {};
});
}
});
uint32_t gpIndex = -1;
GLFWgamepadstate gpState;
glfwShowWindow(window);
while (true) {
glfwPollEvents();
if (gpIndex > GLFW_JOYSTICK_LAST) {
for (int i = 0; i <= GLFW_JOYSTICK_LAST; ++i) {
if (glfwJoystickIsGamepad(i) == GLFW_TRUE) {
rx::print("Gamepad \"{}\" activated", glfwGetGamepadName(i));
gpIndex = i;
break;
}
}
} else if (gpIndex <= GLFW_JOYSTICK_LAST) {
if (!glfwJoystickIsGamepad(gpIndex)) {
gpIndex = -1;
}
}
if (gpIndex <= GLFW_JOYSTICK_LAST) {
if (glfwGetGamepadState(gpIndex, &gpState) == GLFW_TRUE) {
kbPadState.leftStickX =
gpState.axes[GLFW_GAMEPAD_AXIS_LEFT_X] * 127.5f + 127.5f;
kbPadState.leftStickY =
gpState.axes[GLFW_GAMEPAD_AXIS_LEFT_Y] * 127.5f + 127.5f;
kbPadState.rightStickX =
gpState.axes[GLFW_GAMEPAD_AXIS_RIGHT_X] * 127.5f + 127.5f;
kbPadState.rightStickY =
gpState.axes[GLFW_GAMEPAD_AXIS_RIGHT_Y] * 127.5f + 127.5f;
kbPadState.l2 =
(gpState.axes[GLFW_GAMEPAD_AXIS_LEFT_TRIGGER] + 1.0f) * 127.5f;
kbPadState.r2 =
(gpState.axes[GLFW_GAMEPAD_AXIS_RIGHT_TRIGGER] + 1.0f) * 127.5f;
kbPadState.buttons = 0;
if (kbPadState.l2 == 0xFF) {
kbPadState.buttons |= kPadBtnL2;
}
if (kbPadState.r2 == 0xFF) {
kbPadState.buttons |= kPadBtnR2;
}
static const uint32_t gpmap[GLFW_GAMEPAD_BUTTON_LAST + 1] = {
[GLFW_GAMEPAD_BUTTON_A] = kPadBtnCross,
[GLFW_GAMEPAD_BUTTON_B] = kPadBtnCircle,
[GLFW_GAMEPAD_BUTTON_X] = kPadBtnSquare,
[GLFW_GAMEPAD_BUTTON_Y] = kPadBtnTriangle,
[GLFW_GAMEPAD_BUTTON_LEFT_BUMPER] = kPadBtnL1,
[GLFW_GAMEPAD_BUTTON_RIGHT_BUMPER] = kPadBtnR1,
[GLFW_GAMEPAD_BUTTON_BACK] = 0,
[GLFW_GAMEPAD_BUTTON_START] = kPadBtnOptions,
[GLFW_GAMEPAD_BUTTON_GUIDE] = 0,
[GLFW_GAMEPAD_BUTTON_LEFT_THUMB] = kPadBtnL3,
[GLFW_GAMEPAD_BUTTON_RIGHT_THUMB] = kPadBtnR3,
[GLFW_GAMEPAD_BUTTON_DPAD_UP] = kPadBtnUp,
[GLFW_GAMEPAD_BUTTON_DPAD_RIGHT] = kPadBtnRight,
[GLFW_GAMEPAD_BUTTON_DPAD_DOWN] = kPadBtnDown,
[GLFW_GAMEPAD_BUTTON_DPAD_LEFT] = kPadBtnLeft};
for (int i = 0; i <= GLFW_GAMEPAD_BUTTON_LAST; ++i) {
if (gpState.buttons[i] == GLFW_PRESS) {
kbPadState.buttons |= gpmap[i];
}
}
}
} else {
kbPadState.leftStickX = 0x80;
kbPadState.leftStickY = 0x80;
kbPadState.rightStickX = 0x80;
kbPadState.rightStickY = 0x80;
kbPadState.buttons = 0;
if (glfwGetKey(window, GLFW_KEY_A) == GLFW_PRESS) {
kbPadState.leftStickX = 0;
} else if (glfwGetKey(window, GLFW_KEY_D) == GLFW_PRESS) {
kbPadState.leftStickX = 0xff;
}
if (glfwGetKey(window, GLFW_KEY_W) == GLFW_PRESS) {
kbPadState.leftStickY = 0;
} else if (glfwGetKey(window, GLFW_KEY_S) == GLFW_PRESS) {
kbPadState.leftStickY = 0xff;
}
if (glfwGetKey(window, GLFW_KEY_O) == GLFW_PRESS) {
kbPadState.rightStickY = 0;
} else if (glfwGetKey(window, GLFW_KEY_L) == GLFW_PRESS) {
kbPadState.rightStickY = 0xff;
}
if (glfwGetKey(window, GLFW_KEY_K) == GLFW_PRESS) {
kbPadState.rightStickX = 0;
} else if (glfwGetKey(window, GLFW_KEY_SEMICOLON) == GLFW_PRESS) {
kbPadState.rightStickX = 0xff;
}
if (glfwGetKey(window, GLFW_KEY_UP) == GLFW_PRESS) {
kbPadState.buttons |= kPadBtnUp;
}
if (glfwGetKey(window, GLFW_KEY_DOWN) == GLFW_PRESS) {
kbPadState.buttons |= kPadBtnDown;
}
if (glfwGetKey(window, GLFW_KEY_LEFT) == GLFW_PRESS) {
kbPadState.buttons |= kPadBtnLeft;
}
if (glfwGetKey(window, GLFW_KEY_RIGHT) == GLFW_PRESS) {
kbPadState.buttons |= kPadBtnRight;
}
if (glfwGetKey(window, GLFW_KEY_Z) == GLFW_PRESS) {
kbPadState.buttons |= kPadBtnSquare;
}
if (glfwGetKey(window, GLFW_KEY_X) == GLFW_PRESS) {
kbPadState.buttons |= kPadBtnCross;
}
if (glfwGetKey(window, GLFW_KEY_C) == GLFW_PRESS) {
kbPadState.buttons |= kPadBtnCircle;
}
if (glfwGetKey(window, GLFW_KEY_V) == GLFW_PRESS) {
kbPadState.buttons |= kPadBtnTriangle;
}
if (glfwGetKey(window, GLFW_KEY_Q) == GLFW_PRESS) {
kbPadState.buttons |= kPadBtnL1;
}
if (glfwGetKey(window, GLFW_KEY_E) == GLFW_PRESS) {
kbPadState.buttons |= kPadBtnL2;
kbPadState.l2 = 0xff;
}
if (glfwGetKey(window, GLFW_KEY_F) == GLFW_PRESS) {
kbPadState.buttons |= kPadBtnL3;
}
if (glfwGetKey(window, GLFW_KEY_ESCAPE) == GLFW_PRESS) {
kbPadState.buttons |= kPadBtnPs;
}
if (glfwGetKey(window, GLFW_KEY_I) == GLFW_PRESS) {
kbPadState.buttons |= kPadBtnR1;
}
if (glfwGetKey(window, GLFW_KEY_P) == GLFW_PRESS) {
kbPadState.buttons |= kPadBtnR2;
kbPadState.r2 = 0xff;
}
if (glfwGetKey(window, GLFW_KEY_APOSTROPHE) == GLFW_PRESS) {
kbPadState.buttons |= kPadBtnR3;
}
if (glfwGetKey(window, GLFW_KEY_ENTER) == GLFW_PRESS) {
kbPadState.buttons |= kPadBtnOptions;
}
}
kbPadState.timestamp =
std::chrono::high_resolution_clock::now().time_since_epoch().count();
if (glfwWindowShouldClose(window)) {
rx::shutdown();
break;
}
processPipes();
}
}
void Device::submitCommand(Ring &ring, std::span<const std::uint32_t> command) {
if (ring.size < command.size()) {
std::println(stderr, "too big command: ring size {}, command size {}",
ring.size, command.size());
std::abort();
}
std::scoped_lock lock(writeCommandMtx);
if (ring.wptr + command.size() > ring.base + ring.size) {
while (ring.wptr != ring.rptr) {
}
for (auto it = ring.wptr; it < ring.base + ring.size; ++it) {
*it = 2 << 30;
}
ring.wptr = ring.base;
}
std::memcpy(const_cast<std::uint32_t *>(ring.wptr), command.data(),
command.size_bytes());
ring.wptr += command.size();
}
void Device::submitGfxCommand(int gfxPipe,
std::span<const std::uint32_t> command) {
auto &ring = graphicsPipes[gfxPipe].deQueues[2];
submitCommand(ring, command);
}
void Device::mapProcess(std::uint32_t pid, int vmId) {
auto &process = processInfo[pid];
process.vmId = vmId;
auto memory = amdgpu::RemoteMemory{vmId};
for (auto slot : process.vmTable) {
auto gpuProt = orbis::vmem::toGpuProtection(slot->prot);
if (!gpuProt) {
continue;
}
auto devOffset = slot->offset + slot.beginAddress() - slot->baseAddress;
auto errc = orbis::pmem::map(
memory.getVirtualAddress(slot.beginAddress()),
rx::AddressRange::fromBeginSize(devOffset, slot.size()), gpuProt);
rx::dieIf(
errc != orbis::ErrorCode{},
"failed to map process {} memory, address {}-{}, type {}, vmId {}",
(int)pid, memory.getPointer(slot.beginAddress()),
memory.getPointer(slot.endAddress()), slot->memoryType, vmId);
std::println(stderr, "map process {} memory, address {}-{}, vmId {}",
(int)pid, memory.getVirtualAddress(slot.beginAddress()),
memory.getVirtualAddress(slot.beginAddress()) + slot.size(),
vmId);
}
}
void Device::unmapProcess(std::uint32_t pid) {
auto &process = processInfo[pid];
auto startAddress = static_cast<std::uint64_t>(process.vmId) << 40;
auto size = static_cast<std::uint64_t>(1) << 40;
startAddress += orbis::kMinAddress;
size -= orbis::kMinAddress;
if (auto errc = rx::mem::release(
rx::AddressRange::fromBeginSize(startAddress, size), 1 << 14);
errc != std::errc{}) {
rx::die("failed to release userspace memory: {}", (int)errc);
}
process.vmId = -1;
}
void Device::protectMemory(std::uint32_t pid, std::uint64_t address,
std::uint64_t size,
rx::EnumBitSet<orbis::vmem::Protection> prot) {
auto &process = processInfo[pid];
auto vmSlotIt = process.vmTable.queryArea(address);
if (vmSlotIt == process.vmTable.end()) {
return;
}
auto vmSlot = vmSlotIt.get();
process.vmTable.map(rx::AddressRange::fromBeginSize(address, size),
VmMapSlot{
.memoryType = vmSlot.memoryType,
.prot = prot,
.offset = vmSlot.offset,
.baseAddress = vmSlot.baseAddress,
});
if (process.vmId >= 0) {
auto memory = amdgpu::RemoteMemory{process.vmId};
rx::mem::protect(rx::AddressRange::fromBeginSize(
memory.getVirtualAddress(address), size),
orbis::vmem::toGpuProtection(prot));
std::println(stderr, "protect process {} memory, address {}-{}, prot {}",
(int)pid, memory.getPointer(address),
memory.getPointer(address + size), prot);
}
}
void Device::onCommandBuffer(std::uint32_t pid, int cmdHeader,
std::uint64_t address, std::uint64_t size) {
auto &process = processInfo[pid];
if (process.vmId < 0) {
return;
}
auto memory = RemoteMemory{process.vmId};
auto op = rx::getBits(cmdHeader, 15, 8);
if (op == gnm::IT_INDIRECT_BUFFER_CNST) {
graphicsPipes[0].setCeQueue(Ring::createFromRange(
process.vmId, memory.getPointer<std::uint32_t>(address),
size / sizeof(std::uint32_t)));
} else if (op == gnm::IT_INDIRECT_BUFFER) {
graphicsPipes[0].setDeQueue(
Ring::createFromRange(process.vmId,
memory.getPointer<std::uint32_t>(address),
size / sizeof(std::uint32_t)),
1);
} else {
rx::die("unimplemented command buffer {:x}", cmdHeader);
}
}
bool Device::processPipes() {
bool allProcessed = true;
commandPipe.processAllRings();
for (auto &pipe : computePipes) {
if (!pipe.processAllRings()) {
allProcessed = false;
}
}
for (auto &pipe : graphicsPipes) {
if (!pipe.processAllRings()) {
allProcessed = false;
}
}
return allProcessed;
}
static void
transitionImageLayout(VkCommandBuffer commandBuffer, VkImage image,
VkImageLayout oldLayout, VkImageLayout newLayout,
const VkImageSubresourceRange &subresourceRange) {
VkImageMemoryBarrier barrier{};
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
barrier.oldLayout = oldLayout;
barrier.newLayout = newLayout;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.image = image;
barrier.subresourceRange = subresourceRange;
auto layoutToStageAccess =
[](VkImageLayout layout,
bool isSrc) -> std::pair<VkPipelineStageFlags, VkAccessFlags> {
switch (layout) {
case VK_IMAGE_LAYOUT_UNDEFINED:
case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR:
case VK_IMAGE_LAYOUT_GENERAL:
return {isSrc ? VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT
: VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
0};
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
return {VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT};
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
return {VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT};
case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
return {VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_ACCESS_SHADER_READ_BIT};
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
return {VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT,
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT};
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
return {VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT};
default:
std::abort();
}
};
auto [sourceStage, sourceAccess] = layoutToStageAccess(oldLayout, true);
auto [destinationStage, destinationAccess] =
layoutToStageAccess(newLayout, false);
barrier.srcAccessMask = sourceAccess;
barrier.dstAccessMask = destinationAccess;
vkCmdPipelineBarrier(commandBuffer, sourceStage, destinationStage, 0, 0,
nullptr, 0, nullptr, 1, &barrier);
}
bool Device::flip(std::uint32_t pid, int bufferIndex, std::uint64_t arg,
VkImage swapchainImage, VkImageView swapchainImageView) {
auto &pipe = graphicsPipes[0];
auto &scheduler = pipe.scheduler;
auto &process = processInfo[pid];
if (process.vmId < 0) {
return false;
}
if (bufferIndex < 0) {
flipBuffer[process.vmId] = bufferIndex;
flipArg[process.vmId] = arg;
flipCount[process.vmId] = flipCount[process.vmId] + 1;
return false;
}
auto &buffer = process.buffers[bufferIndex];
auto &bufferAttr = process.bufferAttributes[buffer.attrId];
gnm::DataFormat dfmt;
gnm::NumericFormat nfmt;
auto flipType = FlipType::Alt;
switch (bufferAttr.pixelFormat) {
case 0x80000000:
dfmt = gnm::kDataFormat8_8_8_8;
nfmt = gnm::kNumericFormatSrgb;
break;
case 0x80002200:
dfmt = gnm::kDataFormat8_8_8_8;
nfmt = gnm::kNumericFormatSrgb;
flipType = FlipType::Std;
break;
case 0x88740000:
case 0x88060000:
dfmt = gnm::kDataFormat2_10_10_10;
nfmt = gnm::kNumericFormatSNorm;
break;
case 0x88000000:
dfmt = gnm::kDataFormat2_10_10_10;
nfmt = gnm::kNumericFormatSrgb;
break;
case 0xc1060000:
dfmt = gnm::kDataFormat16_16_16_16;
nfmt = gnm::kNumericFormatFloat;
break;
default:
rx::die("unimplemented color buffer format {:x}", bufferAttr.pixelFormat);
}
// std::printf("displaying buffer %lx\n", buffer.address);
auto cacheTag = getCacheTag(process.vmId, scheduler);
auto &sched = cacheTag.getScheduler();
transitionImageLayout(sched.getCommandBuffer(), swapchainImage,
VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.levelCount = 1,
.layerCount = 1,
});
amdgpu::flip(
cacheTag, vk::context->swapchainExtent, buffer.address,
swapchainImageView, {bufferAttr.width, bufferAttr.height}, flipType,
getDefaultTileModes()[bufferAttr.tilingMode != 0 ? 10 : 8], dfmt, nfmt);
transitionImageLayout(sched.getCommandBuffer(), swapchainImage,
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.levelCount = 1,
.layerCount = 1,
});
sched.submit();
auto submitCompleteTask = scheduler.createExternalSubmit();
{
VkSemaphoreSubmitInfo waitSemSubmitInfos[] = {
{
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
.semaphore = vk::context->presentCompleteSemaphore,
.value = 1,
.stageMask = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
},
{
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
.semaphore = scheduler.getSemaphoreHandle(),
.value = submitCompleteTask - 1,
.stageMask = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
},
};
VkSemaphoreSubmitInfo signalSemSubmitInfos[] = {
{
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
.semaphore = vk::context->renderCompleteSemaphore,
.value = 1,
.stageMask = VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT,
},
{
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
.semaphore = scheduler.getSemaphoreHandle(),
.value = submitCompleteTask,
.stageMask = VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT,
},
};
VkSubmitInfo2 submitInfo{
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
.waitSemaphoreInfoCount = 2,
.pWaitSemaphoreInfos = waitSemSubmitInfos,
.signalSemaphoreInfoCount = 2,
.pSignalSemaphoreInfos = signalSemSubmitInfos,
};
vkQueueSubmit2(vk::context->presentQueue, 1, &submitInfo, VK_NULL_HANDLE);
}
scheduler.then([=, this, vmId = process.vmId,
cacheTag = std::move(cacheTag)] {
flipBuffer[vmId] = bufferIndex;
flipArg[vmId] = arg;
flipCount[vmId] = flipCount[vmId] + 1;
auto mem = RemoteMemory{vmId};
auto bufferInUse = mem.getPointer<std::uint64_t>(bufferInUseAddress[vmId]);
if (bufferInUse != nullptr) {
bufferInUse[bufferIndex] = 0;
}
});
return true;
}
void Device::flip(std::uint32_t pid, int bufferIndex, std::uint64_t arg) {
auto recreateSwapchain = [this] {
int width;
int height;
glfwGetWindowSize(window, &width, &height);
vk::context->recreateSwapchain({
.width = static_cast<uint32_t>(width),
.height = static_cast<uint32_t>(height),
});
};
if (!isImageAcquired) {
while (true) {
auto acquireNextImageResult = vkAcquireNextImageKHR(
vk::context->device, vk::context->swapchain, UINT64_MAX,
vk::context->presentCompleteSemaphore, VK_NULL_HANDLE, &imageIndex);
if (acquireNextImageResult == VK_ERROR_OUT_OF_DATE_KHR) {
recreateSwapchain();
continue;
}
if (acquireNextImageResult != VK_SUBOPTIMAL_KHR) {
VK_VERIFY(acquireNextImageResult);
}
break;
}
}
bool flipComplete =
flip(pid, bufferIndex, arg, vk::context->swapchainImages[imageIndex],
vk::context->swapchainImageViews[imageIndex]);
orbis::g_context->deviceEventEmitter->emit(
orbis::kEvFiltDisplay,
[=](orbis::KNote *note) -> std::optional<orbis::intptr_t> {
if (DisplayEvent(note->event.ident >> 48) == DisplayEvent::Flip) {
return arg;
}
return {};
});
if (!flipComplete) {
isImageAcquired = true;
return;
}
isImageAcquired = false;
VkPresentInfoKHR presentInfo{
.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
.waitSemaphoreCount = 1,
.pWaitSemaphores = &vk::context->renderCompleteSemaphore,
.swapchainCount = 1,
.pSwapchains = &vk::context->swapchain,
.pImageIndices = &imageIndex,
};
auto vkQueuePresentResult =
vkQueuePresentKHR(vk::context->presentQueue, &presentInfo);
if (vkQueuePresentResult == VK_ERROR_OUT_OF_DATE_KHR ||
vkQueuePresentResult == VK_SUBOPTIMAL_KHR) {
recreateSwapchain();
} else {
VK_VERIFY(vkQueuePresentResult);
}
}
void Device::waitForIdle() {
while (true) {
bool allProcessed = true;
for (auto &queue : graphicsPipes[0].deQueues) {
if (queue.wptr != queue.rptr) {
allProcessed = false;
}
}
{
auto &queue = graphicsPipes[0].ceQueue;
if (queue.wptr != queue.rptr) {
allProcessed = false;
}
}
if (allProcessed) {
break;
}
}
}
void Device::mapMemory(std::uint32_t pid, rx::AddressRange virtualRange,
orbis::MemoryType memoryType,
rx::EnumBitSet<orbis::vmem::Protection> prot,
std::uint64_t physicalOffset) {
auto &process = processInfo[pid];
process.vmTable.map(virtualRange,
VmMapSlot{
.memoryType = memoryType,
.prot = prot,
.offset = physicalOffset,
.baseAddress = virtualRange.beginAddress(),
});
if (process.vmId < 0) {
return;
}
auto memory = amdgpu::RemoteMemory{process.vmId};
auto vmemAddress = memory.getVirtualAddress(virtualRange.beginAddress());
auto errc = orbis::pmem::map(vmemAddress,
rx::AddressRange::fromBeginSize(
physicalOffset, virtualRange.beginAddress()),
orbis::vmem::toGpuProtection(prot));
if (errc != orbis::ErrorCode{}) {
rx::die("failed to map process {} memory, address {:x}-{:x}, type {}, "
"offset {:x}, prot {}, error {}",
pid, vmemAddress, vmemAddress + virtualRange.size(), memoryType,
physicalOffset, prot, errc);
}
std::println(
stderr,
"map memory of process {}, address {:x}-{:x}, prot {}, phy memory {:x}",
(int)pid, vmemAddress, vmemAddress + virtualRange.size(), prot,
physicalOffset);
}
void Device::unmapMemory(std::uint32_t pid, std::uint64_t address,
std::uint64_t size) {
// TODO
protectMemory(pid, address, size, {});
}
static void notifyPageChanges(Device *device, int vmId, std::uint32_t firstPage,
std::uint32_t pageCount) {
std::uint64_t command =
(static_cast<std::uint64_t>(pageCount - 1) << 32) | firstPage;
while (true) {
for (std::size_t i = 0; i < std::size(device->cpuCacheCommands); ++i) {
std::uint64_t expCommand = 0;
if (device->cpuCacheCommands[vmId][i].compare_exchange_strong(
expCommand, command, std::memory_order::release,
std::memory_order::relaxed)) {
device->cpuCacheCommandsIdle[vmId].fetch_add(
1, std::memory_order::release);
device->cpuCacheCommandsIdle[vmId].notify_one();
while (device->cpuCacheCommands[vmId][i].load(
std::memory_order::acquire) != 0) {
}
return;
}
}
}
}
static void modifyWatchFlags(Device *device, int vmId, std::uint64_t address,
std::uint64_t size, std::uint8_t addFlags,
std::uint8_t removeFlags) {
auto firstPage = address / rx::mem::pageSize;
auto lastPage = (address + size + rx::mem::pageSize - 1) / rx::mem::pageSize;
bool hasChanges = false;
for (auto page = firstPage; page < lastPage; ++page) {
auto prevValue =
device->cachePages[vmId][page].load(std::memory_order::relaxed);
auto newValue = (prevValue & ~removeFlags) | addFlags;
if (newValue == prevValue) {
continue;
}
while (!device->cachePages[vmId][page].compare_exchange_weak(
prevValue, newValue, std::memory_order::relaxed)) {
newValue = (prevValue & ~removeFlags) | addFlags;
}
if (newValue != prevValue) {
hasChanges = true;
}
}
if (hasChanges) {
notifyPageChanges(device, vmId, firstPage, lastPage - firstPage);
}
}
void Device::watchWrites(int vmId, std::uint64_t address, std::uint64_t size) {
modifyWatchFlags(this, vmId, address, size, kPageWriteWatch,
kPageInvalidated);
}
void Device::lockReadWrite(int vmId, std::uint64_t address, std::uint64_t size,
bool isLazy) {
modifyWatchFlags(this, vmId, address, size,
kPageReadWriteLock | (isLazy ? kPageLazyLock : 0),
kPageInvalidated);
}
void Device::unlockReadWrite(int vmId, std::uint64_t address,
std::uint64_t size) {
modifyWatchFlags(this, vmId, address, size, kPageWriteWatch,
kPageReadWriteLock | kPageLazyLock);
}