[amdgpu] Implement cpu->gpu change notifications

This commit is contained in:
DH 2023-07-27 03:03:02 +03:00
parent a2173d1336
commit 450fd30889
11 changed files with 2212 additions and 886 deletions

View file

@ -11,6 +11,7 @@ set(SRC
add_library(${PROJECT_NAME} STATIC ${INCLUDE} ${SRC})
target_include_directories(${PROJECT_NAME} PUBLIC include PRIVATE include/${PROJECT_PATH})
target_link_libraries(${PROJECT_NAME} PUBLIC orbis::utils::ipc)
set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "")
add_library(amdgpu::bridge ALIAS ${PROJECT_NAME})
set_property(TARGET ${PROJECT_NAME} PROPERTY POSITION_INDEPENDENT_CODE ON)

View file

@ -1,8 +1,11 @@
#pragma once
#include "orbis/utils/SharedMutex.hpp"
#include <atomic>
#include <cstdint>
#include <cstring>
#include <initializer_list>
#include <orbis/utils/SharedCV.hpp>
namespace amdgpu::bridge {
enum class CommandId : std::uint32_t {
@ -38,6 +41,14 @@ struct CmdFlip {
std::uint64_t arg;
};
enum {
kPageWriteWatch = 1 << 0,
kPageReadWriteLock = 1 << 1,
kPageInvalidated = 1 << 2,
};
static constexpr auto kHostPageSize = 0x1000;
struct BridgeHeader {
std::uint64_t size;
std::uint64_t info;
@ -57,6 +68,10 @@ struct BridgeHeader {
CmdMemoryProt memoryAreas[128];
CmdCommandBuffer commandBuffers[32];
CmdBuffer buffers[8];
// orbis::shared_mutex cacheCommandMtx;
// orbis::shared_cv cacheCommandCv;
std::atomic<std::uint64_t> cacheCommands[4];
std::atomic<std::uint8_t> cachePages[0x100'0000'0000 / kHostPageSize];
volatile std::uint64_t pull;
volatile std::uint64_t push;

View file

@ -1263,7 +1263,6 @@ static_assert(sizeof(GnmTBuffer) == sizeof(std::uint64_t) * 4);
constexpr auto kPageSize = 0x4000;
struct DrawContext {
VkPipelineCache pipelineCache;
VkQueue queue;
VkCommandPool commandPool;
};
@ -1274,7 +1273,6 @@ void setVkDevice(VkDevice device,
struct AmdgpuDevice {
amdgpu::device::DrawContext dc;
amdgpu::bridge::BridgeHeader *bridge;
void handleProtectMemory(std::uint64_t address, std::uint64_t size,
std::uint32_t prot);
@ -1286,7 +1284,8 @@ struct AmdgpuDevice {
std::vector<VkImage> &usedImages);
AmdgpuDevice(amdgpu::device::DrawContext dc,
amdgpu::bridge::BridgeHeader *bridge)
: dc(dc), bridge(bridge) {}
amdgpu::bridge::BridgeHeader *bridge);
~AmdgpuDevice();
};
} // namespace amdgpu::device

View file

@ -86,11 +86,15 @@ public:
T *operator->() const { return m_ref; }
explicit operator bool() const { return m_ref != nullptr; }
bool operator==(std::nullptr_t) const { return m_ref == nullptr; }
bool operator!=(std::nullptr_t) const { return m_ref != nullptr; }
bool operator==(const Ref &other) const = default;
bool operator==(const T *other) const { return m_ref == other; }
auto operator<=>(const T *other) const { return m_ref <=> other; }
auto operator<=>(const Ref &other) const = default;
};
template <typename T> Ref(T *) -> Ref<T>;
template <typename T> Ref(Ref<T>) -> Ref<T>;
enum class TaskState { InProgress, Complete, Canceled };
struct AsyncTaskCtl {
@ -194,6 +198,12 @@ struct AsyncTask<T> : AsyncTaskCtl {
return *this;
}
~AsyncTask() {
if (isInProgress()) {
std::bit_cast<T *>(&taskStorage)->~T();
}
}
void invoke() override {
auto &lambda = *std::bit_cast<T *>(&taskStorage);
auto &base = *static_cast<const AsyncTaskCtl *>(this);
@ -273,7 +283,7 @@ private:
while (!exit.load(std::memory_order::relaxed)) {
Ref<AsyncTaskCtl> task;
if (task == nullptr) {
{
std::unique_lock lock(taskMtx);
if (tasks.empty()) {
@ -288,9 +298,7 @@ private:
tasks.pop_back();
}
if (task != nullptr) {
task->invoke();
}
task->invoke();
}
}
};

View file

@ -83,8 +83,7 @@ inline uint32_t getThinElementIndex(uint32_t x, uint32_t y) {
return elem;
}
inline uint32_t getDisplayElementIndex(uint32_t x, uint32_t y, uint32_t z,
uint32_t bpp) {
inline uint32_t getDisplayElementIndex(uint32_t x, uint32_t y, uint32_t bpp) {
uint32_t elem = 0;
switch (bpp) {
case 8:
@ -456,7 +455,7 @@ inline uint64_t computeTiledElementByteOffset(
util::unreachable();
case kTileModeDisplay_2dThin:
return compute2dThinTileElementOffset(bpp, macroTileMode,
getDisplayElementIndex(x, y, z, bpp),
getDisplayElementIndex(x, y, bpp),
tileSwizzleMask, fragmentIndex,
arraySlice, x, y, z, height, pitch) /
8;

View file

@ -9,12 +9,18 @@
#include <cstring>
#include <mutex>
#include <span>
#include <string_view>
#include <utility>
#include <vector>
#include <vulkan/vulkan_core.h>
namespace amdgpu::device::vk {
extern VkDevice g_vkDevice;
extern VkAllocationCallbacks *g_vkAllocator;
extern std::vector<std::pair<VkQueue, unsigned>> g_computeQueues;
extern std::vector<std::pair<VkQueue, unsigned>> g_transferQueues;
extern std::vector<std::pair<VkQueue, unsigned>> g_graphicsQueues;
std::uint32_t findPhysicalMemoryTypeIndex(std::uint32_t typeBits,
VkMemoryPropertyFlags properties);
@ -229,6 +235,11 @@ public:
continue;
}
if (debugName == std::string_view{"local"}) {
std::printf("memory: allocation %s memory %lx-%lx\n", debugName, offset,
offset + requirements.size);
}
table.unmap(offset, offset + requirements.size);
return {mMemory.getHandle(),
offset,
@ -248,6 +259,8 @@ public:
void deallocate(DeviceMemoryRef memory) {
std::lock_guard lock(mMtx);
table.map(memory.offset, memory.offset + memory.size);
std::printf("memory: free %s memory %lx-%lx\n", debugName, memory.offset,
memory.offset + memory.size);
}
void dump() {
@ -601,6 +614,9 @@ public:
return result;
}
unsigned getWidth() const { return mWidth; }
unsigned getHeight() const { return mHeight; }
unsigned getDepth() const { return mDepth; }
VkImage getHandle() const { return mImage; }
VkMemoryRequirements getMemoryRequirements() const {
@ -609,6 +625,19 @@ public:
return requirements;
}
VkSubresourceLayout getSubresourceLayout(VkImageAspectFlags aspectMask,
uint32_t mipLevel = 0,
uint32_t arrayLayer = 0) const {
VkImageSubresource subResource{.aspectMask = aspectMask,
.mipLevel = mipLevel,
.arrayLayer = arrayLayer};
VkSubresourceLayout subResourceLayout;
vkGetImageSubresourceLayout(g_vkDevice, mImage, &subResource,
&subResourceLayout);
return subResourceLayout;
}
void readFromBuffer(VkCommandBuffer cmdBuffer, VkBuffer buffer,
VkImageAspectFlags destAspect,
VkDeviceSize bufferOffset = 0) {
@ -858,6 +887,7 @@ public:
mMemory = memory;
}
const DeviceMemoryRef &getMemory() const { return mMemory; }
friend ImageRef;
};

File diff suppressed because it is too large Load diff

View file

@ -130,9 +130,32 @@ public:
void unmap(std::uint64_t beginAddress, std::uint64_t endAddress) {
auto beginIt = mAreas.lower_bound(beginAddress);
if (beginIt == mAreas.end() || beginIt->first >= endAddress) {
if (beginIt == mAreas.end()) {
return;
}
if (beginIt->first >= endAddress) {
if (beginIt->second != Kind::X) {
return;
}
auto prevEnd = beginIt->first;
--beginIt;
if (beginIt->first >= endAddress) {
return;
}
if (beginIt->first < beginAddress) {
this->handleInvalidation(beginIt->first);
mAreas.emplace(beginAddress, Kind::X);
}
if (prevEnd > endAddress) {
mAreas.emplace(endAddress, Kind::O);
return;
}
}
if (beginIt->first > beginAddress && beginIt->second == Kind::X) {
// we have found end after unmap begin, need to insert new end
this->handleInvalidation(std::prev(beginIt)->first);
@ -179,4 +202,166 @@ public:
}
};
template <typename PayloadT> class MemoryTableWithPayload {
enum class Kind { O, X, XO };
std::map<std::uint64_t, std::pair<Kind, PayloadT>> mAreas;
public:
struct AreaInfo {
std::uint64_t beginAddress;
std::uint64_t endAddress;
PayloadT payload;
};
class iterator {
using map_iterator =
typename std::map<std::uint64_t, std::pair<Kind, PayloadT>>::iterator;
map_iterator it;
public:
iterator() = default;
iterator(map_iterator it) : it(it) {}
AreaInfo operator*() const {
return {it->first, std::next(it)->first, it->second.second};
}
iterator &operator++() {
++it;
if (it->second.first != Kind::XO) {
++it;
}
return *this;
}
bool operator==(iterator other) const { return it == other.it; }
bool operator!=(iterator other) const { return it != other.it; }
};
iterator begin() { return iterator(mAreas.begin()); }
iterator end() { return iterator(mAreas.end()); }
void clear() { mAreas.clear(); }
iterator queryArea(std::uint64_t address) {
auto it = mAreas.lower_bound(address);
if (it == mAreas.end()) {
return it;
}
std::uint64_t endAddress = 0;
if (it->first == address) {
if (it->second.first == Kind::X) {
return mAreas.end();
}
endAddress = std::next(it)->first;
} else {
if (it->second.first == Kind::O) {
return mAreas.end();
}
endAddress = it->first;
--it;
}
return endAddress < address ? mAreas.end() : it;
}
void map(std::uint64_t beginAddress, std::uint64_t endAddress,
PayloadT payload, bool merge = true) {
assert(beginAddress < endAddress);
auto [beginIt, beginInserted] =
mAreas.emplace(beginAddress, std::pair{Kind::O, payload});
auto [endIt, endInserted] =
mAreas.emplace(endAddress, std::pair{Kind::X, PayloadT{}});
bool seenOpen = false;
bool endCollision = false;
bool lastRemovedIsOpen = false;
PayloadT lastRemovedOpenPayload;
if (!beginInserted || !endInserted) {
if (!beginInserted) {
if (beginIt->second.first == Kind::X) {
beginIt->second.first = Kind::XO;
} else {
seenOpen = true;
lastRemovedIsOpen = true;
lastRemovedOpenPayload = std::move(beginIt->second.second);
}
beginIt->second.second = std::move(payload);
}
if (!endInserted) {
if (endIt->second.first == Kind::O) {
endIt->second.first = Kind::XO;
} else {
endCollision = true;
}
lastRemovedIsOpen = false;
}
} else if (beginIt != mAreas.begin()) {
auto prev = std::prev(beginIt);
if (prev->second.first != Kind::X) {
beginIt->second.first = Kind::XO;
seenOpen = true;
lastRemovedIsOpen = true;
lastRemovedOpenPayload = prev->second.second;
}
}
auto origBegin = beginIt;
++beginIt;
while (beginIt != endIt) {
if (beginIt->second.first == Kind::X) {
lastRemovedIsOpen = false;
if (!seenOpen) {
origBegin->second.first = Kind::XO;
}
} else {
if (!seenOpen && beginIt->second.first == Kind::XO) {
origBegin->second.first = Kind::XO;
}
seenOpen = true;
lastRemovedIsOpen = true;
lastRemovedOpenPayload = std::move(beginIt->second.second);
}
beginIt = mAreas.erase(beginIt);
}
if (endCollision && !seenOpen) {
origBegin->second.first = Kind::XO;
} else if (lastRemovedIsOpen && !endCollision) {
endIt->second.first = Kind::XO;
endIt->second.second = std::move(lastRemovedOpenPayload);
}
if (!merge) {
return;
}
if (origBegin->second.first == Kind::XO) {
auto prevBegin = std::prev(origBegin);
if (prevBegin->second.second == origBegin->second.second) {
mAreas.erase(origBegin);
}
}
if (endIt->second.first == Kind::XO) {
if (endIt->second.second == origBegin->second.second) {
mAreas.erase(endIt);
}
}
}
};
} // namespace util

View file

@ -1,5 +1,9 @@
set(CMAKE_POSITION_INDEPENDENT_CODE on)
add_library(obj.orbis-utils-ipc OBJECT
src/utils/SharedMutex.cpp
src/utils/SharedCV.cpp
)
add_library(obj.orbis-kernel OBJECT
src/module.cpp
src/sysvec.cpp
@ -62,11 +66,9 @@ add_library(obj.orbis-kernel OBJECT
src/sys/sys_vm_unix.cpp
src/utils/Logs.cpp
src/utils/SharedMutex.cpp
src/utils/SharedCV.cpp
)
target_link_libraries(obj.orbis-kernel PUBLIC orbis::kernel::config)
target_link_libraries(obj.orbis-kernel PUBLIC orbis::kernel::config obj.orbis-utils-ipc)
target_include_directories(obj.orbis-kernel
PUBLIC
@ -76,10 +78,21 @@ target_include_directories(obj.orbis-kernel
${CMAKE_CURRENT_SOURCE_DIR}/include/orbis
)
target_include_directories(obj.orbis-utils-ipc
PUBLIC
${CMAKE_CURRENT_SOURCE_DIR}/include
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/include/orbis
)
add_library(orbis-utils-ipc STATIC)
add_library(orbis-kernel STATIC)
add_library(orbis-kernel-shared SHARED)
add_library(orbis::utils::ipc ALIAS orbis-utils-ipc)
add_library(orbis::kernel ALIAS orbis-kernel)
add_library(orbis::kernel-shared ALIAS orbis-kernel-shared)
target_link_libraries(orbis-utils-ipc PUBLIC obj.orbis-utils-ipc)
target_link_libraries(orbis-kernel PUBLIC obj.orbis-kernel)
target_link_libraries(orbis-kernel-shared PUBLIC obj.orbis-kernel)

View file

@ -1,4 +1,5 @@
#include "amdgpu/RemoteMemory.hpp"
#include "amdgpu/device/vk.hpp"
#include <algorithm>
#include <amdgpu/bridge/bridge.hpp>
#include <amdgpu/device/device.hpp>
@ -643,6 +644,10 @@ int main(int argc, const char *argv[]) {
Verify() << (graphicsQueues.size() > 0);
Verify() << (presentQueue != VK_NULL_HANDLE);
amdgpu::device::vk::g_computeQueues = computeQueues;
amdgpu::device::vk::g_transferQueues = transferQueues;
amdgpu::device::vk::g_graphicsQueues = graphicsQueues;
VkCommandPoolCreateInfo commandPoolCreateInfo = {
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
@ -653,16 +658,8 @@ int main(int argc, const char *argv[]) {
Verify() << vkCreateCommandPool(vkDevice, &commandPoolCreateInfo, nullptr,
&commandPool);
VkPipelineCacheCreateInfo pipelineCacheCreateInfo = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO,
};
VkPipelineCache pipelineCache;
Verify() << vkCreatePipelineCache(vkDevice, &pipelineCacheCreateInfo, nullptr,
&pipelineCache);
amdgpu::device::DrawContext dc{
// TODO
.pipelineCache = pipelineCache,
.queue = graphicsQueues.front().first,
.commandPool = commandPool,
};
@ -707,7 +704,7 @@ int main(int argc, const char *argv[]) {
bridge->pullerPid = ::getpid();
amdgpu::bridge::BridgePuller bridgePuller{bridge};
amdgpu::bridge::Command commandsBuffer[32];
amdgpu::bridge::Command commandsBuffer[1];
if (!std::filesystem::exists(std::string("/dev/shm") + shmName)) {
std::printf("Waiting for OS\n");
@ -735,164 +732,170 @@ int main(int argc, const char *argv[]) {
memoryFd, 0);
g_hostMemory = memory;
amdgpu::device::AmdgpuDevice device(dc, bridgePuller.header);
std::vector<VkCommandBuffer> presentCmdBuffers(swapchainImages.size());
{
VkCommandBufferAllocateInfo allocInfo{};
allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
allocInfo.commandPool = dc.commandPool;
allocInfo.commandBufferCount = presentCmdBuffers.size();
vkAllocateCommandBuffers(vkDevice, &allocInfo, presentCmdBuffers.data());
}
amdgpu::device::AmdgpuDevice device(dc, bridgePuller.header);
std::printf("Initialization complete\n");
uint32_t imageIndex = 0;
bool isImageAcquired = false;
std::vector<std::vector<VkBuffer>> swapchainBufferHandles;
swapchainBufferHandles.resize(swapchainImages.size());
std::vector<std::vector<VkImage>> swapchainImageHandles;
swapchainImageHandles.resize(swapchainImages.size());
VkPipelineStageFlags submitPipelineStages =
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
while (!glfwWindowShouldClose(window)) {
glfwPollEvents();
std::size_t pulledCount =
bridgePuller.pullCommands(commandsBuffer, std::size(commandsBuffer));
if (pulledCount == 0) {
// std::this_thread::sleep_for(
// std::chrono::milliseconds(1)); // Just for testing, should be
// removed
continue;
for (std::uint32_t end = bridge->memoryAreaCount, i = 0; i < end; ++i) {
auto area = bridge->memoryAreas[i];
device.handleProtectMemory(area.address, area.size, area.prot);
}
for (auto cmd : std::span(commandsBuffer, pulledCount)) {
switch (cmd.id) {
case amdgpu::bridge::CommandId::ProtectMemory:
device.handleProtectMemory(cmd.memoryProt.address, cmd.memoryProt.size,
cmd.memoryProt.prot);
break;
case amdgpu::bridge::CommandId::CommandBuffer:
device.handleCommandBuffer(cmd.commandBuffer.queue,
cmd.commandBuffer.address,
cmd.commandBuffer.size);
break;
case amdgpu::bridge::CommandId::Flip: {
if (!isImageAcquired) {
Verify() << vkAcquireNextImageKHR(vkDevice, swapchain, UINT64_MAX,
presentCompleteSemaphore, nullptr,
&imageIndex);
std::vector<VkCommandBuffer> presentCmdBuffers(swapchainImages.size());
vkWaitForFences(vkDevice, 1, &inFlightFences[imageIndex], VK_TRUE,
UINT64_MAX);
vkResetFences(vkDevice, 1, &inFlightFences[imageIndex]);
}
{
VkCommandBufferAllocateInfo allocInfo{};
allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
allocInfo.commandPool = dc.commandPool;
allocInfo.commandBufferCount = presentCmdBuffers.size();
vkAllocateCommandBuffers(vkDevice, &allocInfo, presentCmdBuffers.data());
}
isImageAcquired = false;
std::printf("Initialization complete\n");
vkResetCommandBuffer(presentCmdBuffers[imageIndex], 0);
VkCommandBufferBeginInfo beginInfo{};
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
uint32_t imageIndex = 0;
bool isImageAcquired = false;
std::vector<std::vector<VkBuffer>> swapchainBufferHandles;
swapchainBufferHandles.resize(swapchainImages.size());
std::vector<std::vector<VkImage>> swapchainImageHandles;
swapchainImageHandles.resize(swapchainImages.size());
vkBeginCommandBuffer(presentCmdBuffers[imageIndex], &beginInfo);
VkPipelineStageFlags submitPipelineStages =
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
for (auto handle : swapchainBufferHandles[imageIndex]) {
vkDestroyBuffer(vkDevice, handle, nullptr);
}
while (!glfwWindowShouldClose(window)) {
glfwPollEvents();
for (auto handle : swapchainImageHandles[imageIndex]) {
vkDestroyImage(vkDevice, handle, nullptr);
}
std::size_t pulledCount =
bridgePuller.pullCommands(commandsBuffer, std::size(commandsBuffer));
swapchainBufferHandles[imageIndex].clear();
swapchainImageHandles[imageIndex].clear();
if (pulledCount == 0) {
// std::this_thread::sleep_for(
// std::chrono::milliseconds(1)); // Just for testing, should be
// removed
continue;
}
if (device.handleFlip(cmd.flip.bufferIndex, cmd.flip.arg,
presentCmdBuffers[imageIndex],
swapchainImages[imageIndex], swapchainExtent,
swapchainBufferHandles[imageIndex],
swapchainImageHandles[imageIndex])) {
vkEndCommandBuffer(presentCmdBuffers[imageIndex]);
for (auto cmd : std::span(commandsBuffer, pulledCount)) {
switch (cmd.id) {
case amdgpu::bridge::CommandId::ProtectMemory:
device.handleProtectMemory(cmd.memoryProt.address,
cmd.memoryProt.size, cmd.memoryProt.prot);
break;
case amdgpu::bridge::CommandId::CommandBuffer:
device.handleCommandBuffer(cmd.commandBuffer.queue,
cmd.commandBuffer.address,
cmd.commandBuffer.size);
break;
case amdgpu::bridge::CommandId::Flip: {
if (!isImageAcquired) {
Verify() << vkAcquireNextImageKHR(vkDevice, swapchain, UINT64_MAX,
presentCompleteSemaphore, nullptr,
&imageIndex);
VkSubmitInfo submitInfo{};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = &presentCmdBuffers[imageIndex];
submitInfo.waitSemaphoreCount = 1;
submitInfo.signalSemaphoreCount = 1;
submitInfo.pSignalSemaphores = &renderCompleteSemaphore;
submitInfo.pWaitSemaphores = &presentCompleteSemaphore;
submitInfo.pWaitDstStageMask = &submitPipelineStages;
Verify() << vkQueueSubmit(dc.queue, 1, &submitInfo,
inFlightFences[imageIndex]);
VkPresentInfoKHR presentInfo{};
presentInfo.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
presentInfo.waitSemaphoreCount = 1;
presentInfo.pWaitSemaphores = &renderCompleteSemaphore;
presentInfo.swapchainCount = 1;
presentInfo.pSwapchains = &swapchain;
presentInfo.pImageIndices = &imageIndex;
if (vkQueuePresentKHR(presentQueue, &presentInfo) != VK_SUCCESS) {
std::printf("swapchain was invalidated\n");
createSwapchain();
vkWaitForFences(vkDevice, 1, &inFlightFences[imageIndex], VK_TRUE,
UINT64_MAX);
vkResetFences(vkDevice, 1, &inFlightFences[imageIndex]);
}
// std::this_thread::sleep_for(std::chrono::seconds(3));
} else {
isImageAcquired = true;
isImageAcquired = false;
vkResetCommandBuffer(presentCmdBuffers[imageIndex], 0);
VkCommandBufferBeginInfo beginInfo{};
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
vkBeginCommandBuffer(presentCmdBuffers[imageIndex], &beginInfo);
for (auto handle : swapchainBufferHandles[imageIndex]) {
vkDestroyBuffer(vkDevice, handle, nullptr);
}
for (auto handle : swapchainImageHandles[imageIndex]) {
vkDestroyImage(vkDevice, handle, nullptr);
}
swapchainBufferHandles[imageIndex].clear();
swapchainImageHandles[imageIndex].clear();
if (device.handleFlip(cmd.flip.bufferIndex, cmd.flip.arg,
presentCmdBuffers[imageIndex],
swapchainImages[imageIndex], swapchainExtent,
swapchainBufferHandles[imageIndex],
swapchainImageHandles[imageIndex])) {
vkEndCommandBuffer(presentCmdBuffers[imageIndex]);
VkSubmitInfo submitInfo{};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = &presentCmdBuffers[imageIndex];
submitInfo.waitSemaphoreCount = 1;
submitInfo.signalSemaphoreCount = 1;
submitInfo.pSignalSemaphores = &renderCompleteSemaphore;
submitInfo.pWaitSemaphores = &presentCompleteSemaphore;
submitInfo.pWaitDstStageMask = &submitPipelineStages;
Verify() << vkQueueSubmit(dc.queue, 1, &submitInfo,
inFlightFences[imageIndex]);
VkPresentInfoKHR presentInfo{};
presentInfo.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
presentInfo.waitSemaphoreCount = 1;
presentInfo.pWaitSemaphores = &renderCompleteSemaphore;
presentInfo.swapchainCount = 1;
presentInfo.pSwapchains = &swapchain;
presentInfo.pImageIndices = &imageIndex;
if (vkQueuePresentKHR(presentQueue, &presentInfo) != VK_SUCCESS) {
std::printf("swapchain was invalidated\n");
createSwapchain();
}
// std::this_thread::sleep_for(std::chrono::seconds(3));
} else {
isImageAcquired = true;
}
break;
}
break;
}
default:
util::unreachable("Unexpected command id %u\n", (unsigned)cmd.id);
default:
util::unreachable("Unexpected command id %u\n", (unsigned)cmd.id);
}
}
}
}
if (bridge->pusherPid > 0) {
kill(bridge->pusherPid, SIGINT);
}
for (auto fence : inFlightFences) {
vkDestroyFence(vkDevice, fence, nullptr);
}
vkDestroySemaphore(vkDevice, presentCompleteSemaphore, nullptr);
vkDestroySemaphore(vkDevice, renderCompleteSemaphore, nullptr);
vkDestroyCommandPool(vkDevice, commandPool, nullptr);
for (auto &handles : swapchainImageHandles) {
for (auto handle : handles) {
vkDestroyImage(vkDevice, handle, nullptr);
if (bridge->pusherPid > 0) {
kill(bridge->pusherPid, SIGINT);
}
}
for (auto &handles : swapchainBufferHandles) {
for (auto handle : handles) {
vkDestroyBuffer(vkDevice, handle, nullptr);
for (auto fence : inFlightFences) {
vkDestroyFence(vkDevice, fence, nullptr);
}
vkDestroySemaphore(vkDevice, presentCompleteSemaphore, nullptr);
vkDestroySemaphore(vkDevice, renderCompleteSemaphore, nullptr);
vkDestroyCommandPool(vkDevice, commandPool, nullptr);
for (auto &handles : swapchainImageHandles) {
for (auto handle : handles) {
vkDestroyImage(vkDevice, handle, nullptr);
}
}
for (auto &handles : swapchainBufferHandles) {
for (auto handle : handles) {
vkDestroyBuffer(vkDevice, handle, nullptr);
}
}
}
vkDestroySwapchainKHR(vkDevice, swapchain, nullptr);
for (auto handle : swapchainImages) {
vkDestroyImage(vkDevice, handle, nullptr);
}
vkDestroyDevice(vkDevice, nullptr);
vkDestroySurfaceKHR(vkInstance, vkSurface, nullptr);
vkDestroyInstance(vkInstance, nullptr);
glfwDestroyWindow(window);
amdgpu::bridge::destroyShmCommandBuffer(bridge);
amdgpu::bridge::unlinkShm(cmdBridgeName);
return 0;

View file

@ -10,6 +10,7 @@
#include "vfs.hpp"
#include "vm.hpp"
#include <atomic>
#include <elf.h>
#include <filesystem>
#include <orbis/KernelContext.hpp>
@ -23,6 +24,7 @@
#include <fcntl.h>
#include <pthread.h>
#include <sys/mman.h>
#include <ucontext.h>
#include <csignal>
@ -37,6 +39,67 @@ handle_signal(int sig, siginfo_t *info, void *ucontext) {
_writefsbase_u64(hostFs);
}
auto signalAddress = reinterpret_cast<std::uintptr_t>(info->si_addr);
if (rx::thread::g_current != nullptr && sig == SIGSEGV &&
signalAddress >= 0x40000 && signalAddress < 0x100'0000'0000) {
auto ctx = reinterpret_cast<ucontext_t *>(ucontext);
bool isWrite = (ctx->uc_mcontext.gregs[REG_ERR] & 0x2) != 0;
auto origVmProt = rx::vm::getPageProtection(signalAddress);
int prot = 0;
auto page = signalAddress / amdgpu::bridge::kHostPageSize;
if (origVmProt & rx::vm::kMapProtCpuRead) {
prot |= PROT_READ;
}
if (origVmProt & rx::vm::kMapProtCpuWrite) {
prot |= PROT_WRITE;
}
if (origVmProt & rx::vm::kMapProtCpuExec) {
prot |= PROT_EXEC;
}
if (prot & (isWrite ? PROT_WRITE : PROT_READ)) {
auto bridge = rx::bridge.header;
while (true) {
auto flags = bridge->cachePages[page].load(std::memory_order::relaxed);
if ((flags & amdgpu::bridge::kPageReadWriteLock) != 0) {
continue;
}
if ((flags & amdgpu::bridge::kPageWriteWatch) == 0) {
break;
}
if (!isWrite) {
prot &= ~PROT_WRITE;
break;
}
if (bridge->cachePages[page].compare_exchange_weak(
flags, amdgpu::bridge::kPageInvalidated,
std::memory_order::relaxed)) {
break;
}
}
if (::mprotect((void *)(page * amdgpu::bridge::kHostPageSize),
amdgpu::bridge::kHostPageSize, prot)) {
std::perror("cache reprotection error");
std::abort();
}
_writefsbase_u64(rx::thread::g_current->fsBase);
return;
}
std::fprintf(stderr, "SIGSEGV, address %lx, access %s, prot %s\n",
signalAddress, isWrite ? "write" : "read",
rx::vm::mapProtToString(origVmProt).c_str());
}
if (g_gpuPid > 0) {
// stop gpu thread
::kill(g_gpuPid, SIGINT);
@ -514,6 +577,68 @@ int main(int argc, const char *argv[]) {
initProcess->processParam = executableModule->processParam;
initProcess->processParamSize = executableModule->processParamSize;
std::thread{[] {
pthread_setname_np(pthread_self(), "Bridge");
auto bridge = rx::bridge.header;
std::vector<std::uint64_t> fetchedCommands;
fetchedCommands.reserve(std::size(bridge->cacheCommands));
while (true) {
for (auto &command : bridge->cacheCommands) {
std::uint64_t value = command.load(std::memory_order::relaxed);
if (value != 0) {
fetchedCommands.push_back(value);
command.store(0, std::memory_order::relaxed);
}
}
if (fetchedCommands.empty()) {
continue;
}
for (auto command : fetchedCommands) {
auto page = static_cast<std::uint32_t>(command);
auto count = static_cast<std::uint32_t>(command >> 32) + 1;
auto pageFlags =
bridge->cachePages[page].load(std::memory_order::relaxed);
auto address =
static_cast<std::uint64_t>(page) * amdgpu::bridge::kHostPageSize;
auto origVmProt = rx::vm::getPageProtection(address);
int prot = 0;
if (origVmProt & rx::vm::kMapProtCpuRead) {
prot |= PROT_READ;
}
if (origVmProt & rx::vm::kMapProtCpuWrite) {
prot |= PROT_WRITE;
}
if (origVmProt & rx::vm::kMapProtCpuExec) {
prot |= PROT_EXEC;
}
if (pageFlags & amdgpu::bridge::kPageReadWriteLock) {
prot &= ~(PROT_READ | PROT_WRITE);
} else if (pageFlags & amdgpu::bridge::kPageWriteWatch) {
prot &= ~PROT_WRITE;
}
// std::fprintf(stderr, "protection %lx-%lx\n", address,
// address + amdgpu::bridge::kHostPageSize * count);
if (::mprotect(reinterpret_cast<void *>(address),
amdgpu::bridge::kHostPageSize * count, prot)) {
perror("protection failed");
std::abort();
}
}
fetchedCommands.clear();
}
}}.detach();
int status = 0;
if (executableModule->type == rx::linker::kElfTypeSceDynExec ||