mirror of
https://github.com/RPCSX/rpcsx.git
synced 2026-01-25 18:10:48 +01:00
[amdgpu] Implement cpu->gpu change notifications
This commit is contained in:
parent
a2173d1336
commit
450fd30889
|
|
@ -11,6 +11,7 @@ set(SRC
|
|||
|
||||
add_library(${PROJECT_NAME} STATIC ${INCLUDE} ${SRC})
|
||||
target_include_directories(${PROJECT_NAME} PUBLIC include PRIVATE include/${PROJECT_PATH})
|
||||
target_link_libraries(${PROJECT_NAME} PUBLIC orbis::utils::ipc)
|
||||
set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "")
|
||||
add_library(amdgpu::bridge ALIAS ${PROJECT_NAME})
|
||||
set_property(TARGET ${PROJECT_NAME} PROPERTY POSITION_INDEPENDENT_CODE ON)
|
||||
|
|
|
|||
|
|
@ -1,8 +1,11 @@
|
|||
#pragma once
|
||||
|
||||
#include "orbis/utils/SharedMutex.hpp"
|
||||
#include <atomic>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <initializer_list>
|
||||
#include <orbis/utils/SharedCV.hpp>
|
||||
|
||||
namespace amdgpu::bridge {
|
||||
enum class CommandId : std::uint32_t {
|
||||
|
|
@ -38,6 +41,14 @@ struct CmdFlip {
|
|||
std::uint64_t arg;
|
||||
};
|
||||
|
||||
enum {
|
||||
kPageWriteWatch = 1 << 0,
|
||||
kPageReadWriteLock = 1 << 1,
|
||||
kPageInvalidated = 1 << 2,
|
||||
};
|
||||
|
||||
static constexpr auto kHostPageSize = 0x1000;
|
||||
|
||||
struct BridgeHeader {
|
||||
std::uint64_t size;
|
||||
std::uint64_t info;
|
||||
|
|
@ -57,6 +68,10 @@ struct BridgeHeader {
|
|||
CmdMemoryProt memoryAreas[128];
|
||||
CmdCommandBuffer commandBuffers[32];
|
||||
CmdBuffer buffers[8];
|
||||
// orbis::shared_mutex cacheCommandMtx;
|
||||
// orbis::shared_cv cacheCommandCv;
|
||||
std::atomic<std::uint64_t> cacheCommands[4];
|
||||
std::atomic<std::uint8_t> cachePages[0x100'0000'0000 / kHostPageSize];
|
||||
|
||||
volatile std::uint64_t pull;
|
||||
volatile std::uint64_t push;
|
||||
|
|
|
|||
|
|
@ -1263,7 +1263,6 @@ static_assert(sizeof(GnmTBuffer) == sizeof(std::uint64_t) * 4);
|
|||
constexpr auto kPageSize = 0x4000;
|
||||
|
||||
struct DrawContext {
|
||||
VkPipelineCache pipelineCache;
|
||||
VkQueue queue;
|
||||
VkCommandPool commandPool;
|
||||
};
|
||||
|
|
@ -1274,7 +1273,6 @@ void setVkDevice(VkDevice device,
|
|||
|
||||
struct AmdgpuDevice {
|
||||
amdgpu::device::DrawContext dc;
|
||||
amdgpu::bridge::BridgeHeader *bridge;
|
||||
|
||||
void handleProtectMemory(std::uint64_t address, std::uint64_t size,
|
||||
std::uint32_t prot);
|
||||
|
|
@ -1286,7 +1284,8 @@ struct AmdgpuDevice {
|
|||
std::vector<VkImage> &usedImages);
|
||||
|
||||
AmdgpuDevice(amdgpu::device::DrawContext dc,
|
||||
amdgpu::bridge::BridgeHeader *bridge)
|
||||
: dc(dc), bridge(bridge) {}
|
||||
amdgpu::bridge::BridgeHeader *bridge);
|
||||
|
||||
~AmdgpuDevice();
|
||||
};
|
||||
} // namespace amdgpu::device
|
||||
|
|
|
|||
|
|
@ -86,11 +86,15 @@ public:
|
|||
T *operator->() const { return m_ref; }
|
||||
explicit operator bool() const { return m_ref != nullptr; }
|
||||
bool operator==(std::nullptr_t) const { return m_ref == nullptr; }
|
||||
bool operator!=(std::nullptr_t) const { return m_ref != nullptr; }
|
||||
bool operator==(const Ref &other) const = default;
|
||||
bool operator==(const T *other) const { return m_ref == other; }
|
||||
auto operator<=>(const T *other) const { return m_ref <=> other; }
|
||||
auto operator<=>(const Ref &other) const = default;
|
||||
};
|
||||
|
||||
template <typename T> Ref(T *) -> Ref<T>;
|
||||
template <typename T> Ref(Ref<T>) -> Ref<T>;
|
||||
|
||||
enum class TaskState { InProgress, Complete, Canceled };
|
||||
|
||||
struct AsyncTaskCtl {
|
||||
|
|
@ -194,6 +198,12 @@ struct AsyncTask<T> : AsyncTaskCtl {
|
|||
return *this;
|
||||
}
|
||||
|
||||
~AsyncTask() {
|
||||
if (isInProgress()) {
|
||||
std::bit_cast<T *>(&taskStorage)->~T();
|
||||
}
|
||||
}
|
||||
|
||||
void invoke() override {
|
||||
auto &lambda = *std::bit_cast<T *>(&taskStorage);
|
||||
auto &base = *static_cast<const AsyncTaskCtl *>(this);
|
||||
|
|
@ -273,7 +283,7 @@ private:
|
|||
while (!exit.load(std::memory_order::relaxed)) {
|
||||
Ref<AsyncTaskCtl> task;
|
||||
|
||||
if (task == nullptr) {
|
||||
{
|
||||
std::unique_lock lock(taskMtx);
|
||||
|
||||
if (tasks.empty()) {
|
||||
|
|
@ -288,9 +298,7 @@ private:
|
|||
tasks.pop_back();
|
||||
}
|
||||
|
||||
if (task != nullptr) {
|
||||
task->invoke();
|
||||
}
|
||||
task->invoke();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
|
|
|||
|
|
@ -83,8 +83,7 @@ inline uint32_t getThinElementIndex(uint32_t x, uint32_t y) {
|
|||
return elem;
|
||||
}
|
||||
|
||||
inline uint32_t getDisplayElementIndex(uint32_t x, uint32_t y, uint32_t z,
|
||||
uint32_t bpp) {
|
||||
inline uint32_t getDisplayElementIndex(uint32_t x, uint32_t y, uint32_t bpp) {
|
||||
uint32_t elem = 0;
|
||||
switch (bpp) {
|
||||
case 8:
|
||||
|
|
@ -456,7 +455,7 @@ inline uint64_t computeTiledElementByteOffset(
|
|||
util::unreachable();
|
||||
case kTileModeDisplay_2dThin:
|
||||
return compute2dThinTileElementOffset(bpp, macroTileMode,
|
||||
getDisplayElementIndex(x, y, z, bpp),
|
||||
getDisplayElementIndex(x, y, bpp),
|
||||
tileSwizzleMask, fragmentIndex,
|
||||
arraySlice, x, y, z, height, pitch) /
|
||||
8;
|
||||
|
|
|
|||
|
|
@ -9,12 +9,18 @@
|
|||
#include <cstring>
|
||||
#include <mutex>
|
||||
#include <span>
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
namespace amdgpu::device::vk {
|
||||
extern VkDevice g_vkDevice;
|
||||
extern VkAllocationCallbacks *g_vkAllocator;
|
||||
extern std::vector<std::pair<VkQueue, unsigned>> g_computeQueues;
|
||||
extern std::vector<std::pair<VkQueue, unsigned>> g_transferQueues;
|
||||
extern std::vector<std::pair<VkQueue, unsigned>> g_graphicsQueues;
|
||||
|
||||
std::uint32_t findPhysicalMemoryTypeIndex(std::uint32_t typeBits,
|
||||
VkMemoryPropertyFlags properties);
|
||||
|
||||
|
|
@ -229,6 +235,11 @@ public:
|
|||
continue;
|
||||
}
|
||||
|
||||
if (debugName == std::string_view{"local"}) {
|
||||
std::printf("memory: allocation %s memory %lx-%lx\n", debugName, offset,
|
||||
offset + requirements.size);
|
||||
}
|
||||
|
||||
table.unmap(offset, offset + requirements.size);
|
||||
return {mMemory.getHandle(),
|
||||
offset,
|
||||
|
|
@ -248,6 +259,8 @@ public:
|
|||
void deallocate(DeviceMemoryRef memory) {
|
||||
std::lock_guard lock(mMtx);
|
||||
table.map(memory.offset, memory.offset + memory.size);
|
||||
std::printf("memory: free %s memory %lx-%lx\n", debugName, memory.offset,
|
||||
memory.offset + memory.size);
|
||||
}
|
||||
|
||||
void dump() {
|
||||
|
|
@ -601,6 +614,9 @@ public:
|
|||
return result;
|
||||
}
|
||||
|
||||
unsigned getWidth() const { return mWidth; }
|
||||
unsigned getHeight() const { return mHeight; }
|
||||
unsigned getDepth() const { return mDepth; }
|
||||
VkImage getHandle() const { return mImage; }
|
||||
|
||||
VkMemoryRequirements getMemoryRequirements() const {
|
||||
|
|
@ -609,6 +625,19 @@ public:
|
|||
return requirements;
|
||||
}
|
||||
|
||||
VkSubresourceLayout getSubresourceLayout(VkImageAspectFlags aspectMask,
|
||||
uint32_t mipLevel = 0,
|
||||
uint32_t arrayLayer = 0) const {
|
||||
VkImageSubresource subResource{.aspectMask = aspectMask,
|
||||
.mipLevel = mipLevel,
|
||||
.arrayLayer = arrayLayer};
|
||||
VkSubresourceLayout subResourceLayout;
|
||||
vkGetImageSubresourceLayout(g_vkDevice, mImage, &subResource,
|
||||
&subResourceLayout);
|
||||
|
||||
return subResourceLayout;
|
||||
}
|
||||
|
||||
void readFromBuffer(VkCommandBuffer cmdBuffer, VkBuffer buffer,
|
||||
VkImageAspectFlags destAspect,
|
||||
VkDeviceSize bufferOffset = 0) {
|
||||
|
|
@ -858,6 +887,7 @@ public:
|
|||
mMemory = memory;
|
||||
}
|
||||
|
||||
const DeviceMemoryRef &getMemory() const { return mMemory; }
|
||||
friend ImageRef;
|
||||
};
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -130,9 +130,32 @@ public:
|
|||
void unmap(std::uint64_t beginAddress, std::uint64_t endAddress) {
|
||||
auto beginIt = mAreas.lower_bound(beginAddress);
|
||||
|
||||
if (beginIt == mAreas.end() || beginIt->first >= endAddress) {
|
||||
if (beginIt == mAreas.end()) {
|
||||
return;
|
||||
}
|
||||
if (beginIt->first >= endAddress) {
|
||||
if (beginIt->second != Kind::X) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto prevEnd = beginIt->first;
|
||||
|
||||
--beginIt;
|
||||
if (beginIt->first >= endAddress) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (beginIt->first < beginAddress) {
|
||||
this->handleInvalidation(beginIt->first);
|
||||
mAreas.emplace(beginAddress, Kind::X);
|
||||
}
|
||||
|
||||
if (prevEnd > endAddress) {
|
||||
mAreas.emplace(endAddress, Kind::O);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (beginIt->first > beginAddress && beginIt->second == Kind::X) {
|
||||
// we have found end after unmap begin, need to insert new end
|
||||
this->handleInvalidation(std::prev(beginIt)->first);
|
||||
|
|
@ -179,4 +202,166 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
template <typename PayloadT> class MemoryTableWithPayload {
|
||||
enum class Kind { O, X, XO };
|
||||
std::map<std::uint64_t, std::pair<Kind, PayloadT>> mAreas;
|
||||
|
||||
public:
|
||||
struct AreaInfo {
|
||||
std::uint64_t beginAddress;
|
||||
std::uint64_t endAddress;
|
||||
PayloadT payload;
|
||||
};
|
||||
|
||||
class iterator {
|
||||
using map_iterator =
|
||||
typename std::map<std::uint64_t, std::pair<Kind, PayloadT>>::iterator;
|
||||
map_iterator it;
|
||||
|
||||
public:
|
||||
iterator() = default;
|
||||
iterator(map_iterator it) : it(it) {}
|
||||
|
||||
AreaInfo operator*() const {
|
||||
return {it->first, std::next(it)->first, it->second.second};
|
||||
}
|
||||
|
||||
iterator &operator++() {
|
||||
++it;
|
||||
|
||||
if (it->second.first != Kind::XO) {
|
||||
++it;
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool operator==(iterator other) const { return it == other.it; }
|
||||
bool operator!=(iterator other) const { return it != other.it; }
|
||||
};
|
||||
|
||||
iterator begin() { return iterator(mAreas.begin()); }
|
||||
iterator end() { return iterator(mAreas.end()); }
|
||||
|
||||
void clear() { mAreas.clear(); }
|
||||
|
||||
iterator queryArea(std::uint64_t address) {
|
||||
auto it = mAreas.lower_bound(address);
|
||||
|
||||
if (it == mAreas.end()) {
|
||||
return it;
|
||||
}
|
||||
|
||||
std::uint64_t endAddress = 0;
|
||||
|
||||
if (it->first == address) {
|
||||
if (it->second.first == Kind::X) {
|
||||
return mAreas.end();
|
||||
}
|
||||
|
||||
endAddress = std::next(it)->first;
|
||||
} else {
|
||||
if (it->second.first == Kind::O) {
|
||||
return mAreas.end();
|
||||
}
|
||||
|
||||
endAddress = it->first;
|
||||
--it;
|
||||
}
|
||||
|
||||
return endAddress < address ? mAreas.end() : it;
|
||||
}
|
||||
|
||||
void map(std::uint64_t beginAddress, std::uint64_t endAddress,
|
||||
PayloadT payload, bool merge = true) {
|
||||
assert(beginAddress < endAddress);
|
||||
auto [beginIt, beginInserted] =
|
||||
mAreas.emplace(beginAddress, std::pair{Kind::O, payload});
|
||||
auto [endIt, endInserted] =
|
||||
mAreas.emplace(endAddress, std::pair{Kind::X, PayloadT{}});
|
||||
|
||||
bool seenOpen = false;
|
||||
bool endCollision = false;
|
||||
bool lastRemovedIsOpen = false;
|
||||
PayloadT lastRemovedOpenPayload;
|
||||
|
||||
if (!beginInserted || !endInserted) {
|
||||
if (!beginInserted) {
|
||||
if (beginIt->second.first == Kind::X) {
|
||||
beginIt->second.first = Kind::XO;
|
||||
} else {
|
||||
seenOpen = true;
|
||||
lastRemovedIsOpen = true;
|
||||
lastRemovedOpenPayload = std::move(beginIt->second.second);
|
||||
}
|
||||
|
||||
beginIt->second.second = std::move(payload);
|
||||
}
|
||||
|
||||
if (!endInserted) {
|
||||
if (endIt->second.first == Kind::O) {
|
||||
endIt->second.first = Kind::XO;
|
||||
} else {
|
||||
endCollision = true;
|
||||
}
|
||||
|
||||
lastRemovedIsOpen = false;
|
||||
}
|
||||
} else if (beginIt != mAreas.begin()) {
|
||||
auto prev = std::prev(beginIt);
|
||||
|
||||
if (prev->second.first != Kind::X) {
|
||||
beginIt->second.first = Kind::XO;
|
||||
seenOpen = true;
|
||||
lastRemovedIsOpen = true;
|
||||
lastRemovedOpenPayload = prev->second.second;
|
||||
}
|
||||
}
|
||||
|
||||
auto origBegin = beginIt;
|
||||
++beginIt;
|
||||
while (beginIt != endIt) {
|
||||
if (beginIt->second.first == Kind::X) {
|
||||
lastRemovedIsOpen = false;
|
||||
if (!seenOpen) {
|
||||
origBegin->second.first = Kind::XO;
|
||||
}
|
||||
} else {
|
||||
if (!seenOpen && beginIt->second.first == Kind::XO) {
|
||||
origBegin->second.first = Kind::XO;
|
||||
}
|
||||
|
||||
seenOpen = true;
|
||||
lastRemovedIsOpen = true;
|
||||
lastRemovedOpenPayload = std::move(beginIt->second.second);
|
||||
}
|
||||
beginIt = mAreas.erase(beginIt);
|
||||
}
|
||||
|
||||
if (endCollision && !seenOpen) {
|
||||
origBegin->second.first = Kind::XO;
|
||||
} else if (lastRemovedIsOpen && !endCollision) {
|
||||
endIt->second.first = Kind::XO;
|
||||
endIt->second.second = std::move(lastRemovedOpenPayload);
|
||||
}
|
||||
|
||||
if (!merge) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (origBegin->second.first == Kind::XO) {
|
||||
auto prevBegin = std::prev(origBegin);
|
||||
|
||||
if (prevBegin->second.second == origBegin->second.second) {
|
||||
mAreas.erase(origBegin);
|
||||
}
|
||||
}
|
||||
|
||||
if (endIt->second.first == Kind::XO) {
|
||||
if (endIt->second.second == origBegin->second.second) {
|
||||
mAreas.erase(endIt);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
} // namespace util
|
||||
|
|
|
|||
|
|
@ -1,5 +1,9 @@
|
|||
set(CMAKE_POSITION_INDEPENDENT_CODE on)
|
||||
|
||||
add_library(obj.orbis-utils-ipc OBJECT
|
||||
src/utils/SharedMutex.cpp
|
||||
src/utils/SharedCV.cpp
|
||||
)
|
||||
add_library(obj.orbis-kernel OBJECT
|
||||
src/module.cpp
|
||||
src/sysvec.cpp
|
||||
|
|
@ -62,11 +66,9 @@ add_library(obj.orbis-kernel OBJECT
|
|||
src/sys/sys_vm_unix.cpp
|
||||
|
||||
src/utils/Logs.cpp
|
||||
src/utils/SharedMutex.cpp
|
||||
src/utils/SharedCV.cpp
|
||||
)
|
||||
|
||||
target_link_libraries(obj.orbis-kernel PUBLIC orbis::kernel::config)
|
||||
target_link_libraries(obj.orbis-kernel PUBLIC orbis::kernel::config obj.orbis-utils-ipc)
|
||||
|
||||
target_include_directories(obj.orbis-kernel
|
||||
PUBLIC
|
||||
|
|
@ -76,10 +78,21 @@ target_include_directories(obj.orbis-kernel
|
|||
${CMAKE_CURRENT_SOURCE_DIR}/include/orbis
|
||||
)
|
||||
|
||||
target_include_directories(obj.orbis-utils-ipc
|
||||
PUBLIC
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include
|
||||
|
||||
PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/orbis
|
||||
)
|
||||
|
||||
add_library(orbis-utils-ipc STATIC)
|
||||
add_library(orbis-kernel STATIC)
|
||||
add_library(orbis-kernel-shared SHARED)
|
||||
add_library(orbis::utils::ipc ALIAS orbis-utils-ipc)
|
||||
add_library(orbis::kernel ALIAS orbis-kernel)
|
||||
add_library(orbis::kernel-shared ALIAS orbis-kernel-shared)
|
||||
|
||||
target_link_libraries(orbis-utils-ipc PUBLIC obj.orbis-utils-ipc)
|
||||
target_link_libraries(orbis-kernel PUBLIC obj.orbis-kernel)
|
||||
target_link_libraries(orbis-kernel-shared PUBLIC obj.orbis-kernel)
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
#include "amdgpu/RemoteMemory.hpp"
|
||||
#include "amdgpu/device/vk.hpp"
|
||||
#include <algorithm>
|
||||
#include <amdgpu/bridge/bridge.hpp>
|
||||
#include <amdgpu/device/device.hpp>
|
||||
|
|
@ -643,6 +644,10 @@ int main(int argc, const char *argv[]) {
|
|||
Verify() << (graphicsQueues.size() > 0);
|
||||
Verify() << (presentQueue != VK_NULL_HANDLE);
|
||||
|
||||
amdgpu::device::vk::g_computeQueues = computeQueues;
|
||||
amdgpu::device::vk::g_transferQueues = transferQueues;
|
||||
amdgpu::device::vk::g_graphicsQueues = graphicsQueues;
|
||||
|
||||
VkCommandPoolCreateInfo commandPoolCreateInfo = {
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
|
||||
.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
|
||||
|
|
@ -653,16 +658,8 @@ int main(int argc, const char *argv[]) {
|
|||
Verify() << vkCreateCommandPool(vkDevice, &commandPoolCreateInfo, nullptr,
|
||||
&commandPool);
|
||||
|
||||
VkPipelineCacheCreateInfo pipelineCacheCreateInfo = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO,
|
||||
};
|
||||
|
||||
VkPipelineCache pipelineCache;
|
||||
Verify() << vkCreatePipelineCache(vkDevice, &pipelineCacheCreateInfo, nullptr,
|
||||
&pipelineCache);
|
||||
amdgpu::device::DrawContext dc{
|
||||
// TODO
|
||||
.pipelineCache = pipelineCache,
|
||||
.queue = graphicsQueues.front().first,
|
||||
.commandPool = commandPool,
|
||||
};
|
||||
|
|
@ -707,7 +704,7 @@ int main(int argc, const char *argv[]) {
|
|||
bridge->pullerPid = ::getpid();
|
||||
|
||||
amdgpu::bridge::BridgePuller bridgePuller{bridge};
|
||||
amdgpu::bridge::Command commandsBuffer[32];
|
||||
amdgpu::bridge::Command commandsBuffer[1];
|
||||
|
||||
if (!std::filesystem::exists(std::string("/dev/shm") + shmName)) {
|
||||
std::printf("Waiting for OS\n");
|
||||
|
|
@ -735,164 +732,170 @@ int main(int argc, const char *argv[]) {
|
|||
memoryFd, 0);
|
||||
|
||||
g_hostMemory = memory;
|
||||
amdgpu::device::AmdgpuDevice device(dc, bridgePuller.header);
|
||||
|
||||
std::vector<VkCommandBuffer> presentCmdBuffers(swapchainImages.size());
|
||||
|
||||
{
|
||||
VkCommandBufferAllocateInfo allocInfo{};
|
||||
allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
|
||||
allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
|
||||
allocInfo.commandPool = dc.commandPool;
|
||||
allocInfo.commandBufferCount = presentCmdBuffers.size();
|
||||
vkAllocateCommandBuffers(vkDevice, &allocInfo, presentCmdBuffers.data());
|
||||
}
|
||||
amdgpu::device::AmdgpuDevice device(dc, bridgePuller.header);
|
||||
|
||||
std::printf("Initialization complete\n");
|
||||
|
||||
uint32_t imageIndex = 0;
|
||||
bool isImageAcquired = false;
|
||||
std::vector<std::vector<VkBuffer>> swapchainBufferHandles;
|
||||
swapchainBufferHandles.resize(swapchainImages.size());
|
||||
std::vector<std::vector<VkImage>> swapchainImageHandles;
|
||||
swapchainImageHandles.resize(swapchainImages.size());
|
||||
|
||||
VkPipelineStageFlags submitPipelineStages =
|
||||
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
||||
|
||||
while (!glfwWindowShouldClose(window)) {
|
||||
glfwPollEvents();
|
||||
|
||||
std::size_t pulledCount =
|
||||
bridgePuller.pullCommands(commandsBuffer, std::size(commandsBuffer));
|
||||
|
||||
if (pulledCount == 0) {
|
||||
// std::this_thread::sleep_for(
|
||||
// std::chrono::milliseconds(1)); // Just for testing, should be
|
||||
// removed
|
||||
continue;
|
||||
for (std::uint32_t end = bridge->memoryAreaCount, i = 0; i < end; ++i) {
|
||||
auto area = bridge->memoryAreas[i];
|
||||
device.handleProtectMemory(area.address, area.size, area.prot);
|
||||
}
|
||||
|
||||
for (auto cmd : std::span(commandsBuffer, pulledCount)) {
|
||||
switch (cmd.id) {
|
||||
case amdgpu::bridge::CommandId::ProtectMemory:
|
||||
device.handleProtectMemory(cmd.memoryProt.address, cmd.memoryProt.size,
|
||||
cmd.memoryProt.prot);
|
||||
break;
|
||||
case amdgpu::bridge::CommandId::CommandBuffer:
|
||||
device.handleCommandBuffer(cmd.commandBuffer.queue,
|
||||
cmd.commandBuffer.address,
|
||||
cmd.commandBuffer.size);
|
||||
break;
|
||||
case amdgpu::bridge::CommandId::Flip: {
|
||||
if (!isImageAcquired) {
|
||||
Verify() << vkAcquireNextImageKHR(vkDevice, swapchain, UINT64_MAX,
|
||||
presentCompleteSemaphore, nullptr,
|
||||
&imageIndex);
|
||||
std::vector<VkCommandBuffer> presentCmdBuffers(swapchainImages.size());
|
||||
|
||||
vkWaitForFences(vkDevice, 1, &inFlightFences[imageIndex], VK_TRUE,
|
||||
UINT64_MAX);
|
||||
vkResetFences(vkDevice, 1, &inFlightFences[imageIndex]);
|
||||
}
|
||||
{
|
||||
VkCommandBufferAllocateInfo allocInfo{};
|
||||
allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
|
||||
allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
|
||||
allocInfo.commandPool = dc.commandPool;
|
||||
allocInfo.commandBufferCount = presentCmdBuffers.size();
|
||||
vkAllocateCommandBuffers(vkDevice, &allocInfo, presentCmdBuffers.data());
|
||||
}
|
||||
|
||||
isImageAcquired = false;
|
||||
std::printf("Initialization complete\n");
|
||||
|
||||
vkResetCommandBuffer(presentCmdBuffers[imageIndex], 0);
|
||||
VkCommandBufferBeginInfo beginInfo{};
|
||||
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
|
||||
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
|
||||
uint32_t imageIndex = 0;
|
||||
bool isImageAcquired = false;
|
||||
std::vector<std::vector<VkBuffer>> swapchainBufferHandles;
|
||||
swapchainBufferHandles.resize(swapchainImages.size());
|
||||
std::vector<std::vector<VkImage>> swapchainImageHandles;
|
||||
swapchainImageHandles.resize(swapchainImages.size());
|
||||
|
||||
vkBeginCommandBuffer(presentCmdBuffers[imageIndex], &beginInfo);
|
||||
VkPipelineStageFlags submitPipelineStages =
|
||||
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
||||
|
||||
for (auto handle : swapchainBufferHandles[imageIndex]) {
|
||||
vkDestroyBuffer(vkDevice, handle, nullptr);
|
||||
}
|
||||
while (!glfwWindowShouldClose(window)) {
|
||||
glfwPollEvents();
|
||||
|
||||
for (auto handle : swapchainImageHandles[imageIndex]) {
|
||||
vkDestroyImage(vkDevice, handle, nullptr);
|
||||
}
|
||||
std::size_t pulledCount =
|
||||
bridgePuller.pullCommands(commandsBuffer, std::size(commandsBuffer));
|
||||
|
||||
swapchainBufferHandles[imageIndex].clear();
|
||||
swapchainImageHandles[imageIndex].clear();
|
||||
if (pulledCount == 0) {
|
||||
// std::this_thread::sleep_for(
|
||||
// std::chrono::milliseconds(1)); // Just for testing, should be
|
||||
// removed
|
||||
continue;
|
||||
}
|
||||
|
||||
if (device.handleFlip(cmd.flip.bufferIndex, cmd.flip.arg,
|
||||
presentCmdBuffers[imageIndex],
|
||||
swapchainImages[imageIndex], swapchainExtent,
|
||||
swapchainBufferHandles[imageIndex],
|
||||
swapchainImageHandles[imageIndex])) {
|
||||
vkEndCommandBuffer(presentCmdBuffers[imageIndex]);
|
||||
for (auto cmd : std::span(commandsBuffer, pulledCount)) {
|
||||
switch (cmd.id) {
|
||||
case amdgpu::bridge::CommandId::ProtectMemory:
|
||||
device.handleProtectMemory(cmd.memoryProt.address,
|
||||
cmd.memoryProt.size, cmd.memoryProt.prot);
|
||||
break;
|
||||
case amdgpu::bridge::CommandId::CommandBuffer:
|
||||
device.handleCommandBuffer(cmd.commandBuffer.queue,
|
||||
cmd.commandBuffer.address,
|
||||
cmd.commandBuffer.size);
|
||||
break;
|
||||
case amdgpu::bridge::CommandId::Flip: {
|
||||
if (!isImageAcquired) {
|
||||
Verify() << vkAcquireNextImageKHR(vkDevice, swapchain, UINT64_MAX,
|
||||
presentCompleteSemaphore, nullptr,
|
||||
&imageIndex);
|
||||
|
||||
VkSubmitInfo submitInfo{};
|
||||
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
|
||||
submitInfo.commandBufferCount = 1;
|
||||
submitInfo.pCommandBuffers = &presentCmdBuffers[imageIndex];
|
||||
submitInfo.waitSemaphoreCount = 1;
|
||||
submitInfo.signalSemaphoreCount = 1;
|
||||
submitInfo.pSignalSemaphores = &renderCompleteSemaphore;
|
||||
submitInfo.pWaitSemaphores = &presentCompleteSemaphore;
|
||||
submitInfo.pWaitDstStageMask = &submitPipelineStages;
|
||||
|
||||
Verify() << vkQueueSubmit(dc.queue, 1, &submitInfo,
|
||||
inFlightFences[imageIndex]);
|
||||
|
||||
VkPresentInfoKHR presentInfo{};
|
||||
presentInfo.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
|
||||
presentInfo.waitSemaphoreCount = 1;
|
||||
presentInfo.pWaitSemaphores = &renderCompleteSemaphore;
|
||||
presentInfo.swapchainCount = 1;
|
||||
presentInfo.pSwapchains = &swapchain;
|
||||
presentInfo.pImageIndices = &imageIndex;
|
||||
|
||||
if (vkQueuePresentKHR(presentQueue, &presentInfo) != VK_SUCCESS) {
|
||||
std::printf("swapchain was invalidated\n");
|
||||
createSwapchain();
|
||||
vkWaitForFences(vkDevice, 1, &inFlightFences[imageIndex], VK_TRUE,
|
||||
UINT64_MAX);
|
||||
vkResetFences(vkDevice, 1, &inFlightFences[imageIndex]);
|
||||
}
|
||||
// std::this_thread::sleep_for(std::chrono::seconds(3));
|
||||
} else {
|
||||
isImageAcquired = true;
|
||||
|
||||
isImageAcquired = false;
|
||||
|
||||
vkResetCommandBuffer(presentCmdBuffers[imageIndex], 0);
|
||||
VkCommandBufferBeginInfo beginInfo{};
|
||||
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
|
||||
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
|
||||
|
||||
vkBeginCommandBuffer(presentCmdBuffers[imageIndex], &beginInfo);
|
||||
|
||||
for (auto handle : swapchainBufferHandles[imageIndex]) {
|
||||
vkDestroyBuffer(vkDevice, handle, nullptr);
|
||||
}
|
||||
|
||||
for (auto handle : swapchainImageHandles[imageIndex]) {
|
||||
vkDestroyImage(vkDevice, handle, nullptr);
|
||||
}
|
||||
|
||||
swapchainBufferHandles[imageIndex].clear();
|
||||
swapchainImageHandles[imageIndex].clear();
|
||||
|
||||
if (device.handleFlip(cmd.flip.bufferIndex, cmd.flip.arg,
|
||||
presentCmdBuffers[imageIndex],
|
||||
swapchainImages[imageIndex], swapchainExtent,
|
||||
swapchainBufferHandles[imageIndex],
|
||||
swapchainImageHandles[imageIndex])) {
|
||||
vkEndCommandBuffer(presentCmdBuffers[imageIndex]);
|
||||
|
||||
VkSubmitInfo submitInfo{};
|
||||
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
|
||||
submitInfo.commandBufferCount = 1;
|
||||
submitInfo.pCommandBuffers = &presentCmdBuffers[imageIndex];
|
||||
submitInfo.waitSemaphoreCount = 1;
|
||||
submitInfo.signalSemaphoreCount = 1;
|
||||
submitInfo.pSignalSemaphores = &renderCompleteSemaphore;
|
||||
submitInfo.pWaitSemaphores = &presentCompleteSemaphore;
|
||||
submitInfo.pWaitDstStageMask = &submitPipelineStages;
|
||||
|
||||
Verify() << vkQueueSubmit(dc.queue, 1, &submitInfo,
|
||||
inFlightFences[imageIndex]);
|
||||
|
||||
VkPresentInfoKHR presentInfo{};
|
||||
presentInfo.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
|
||||
presentInfo.waitSemaphoreCount = 1;
|
||||
presentInfo.pWaitSemaphores = &renderCompleteSemaphore;
|
||||
presentInfo.swapchainCount = 1;
|
||||
presentInfo.pSwapchains = &swapchain;
|
||||
presentInfo.pImageIndices = &imageIndex;
|
||||
|
||||
if (vkQueuePresentKHR(presentQueue, &presentInfo) != VK_SUCCESS) {
|
||||
std::printf("swapchain was invalidated\n");
|
||||
createSwapchain();
|
||||
}
|
||||
// std::this_thread::sleep_for(std::chrono::seconds(3));
|
||||
} else {
|
||||
isImageAcquired = true;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
util::unreachable("Unexpected command id %u\n", (unsigned)cmd.id);
|
||||
default:
|
||||
util::unreachable("Unexpected command id %u\n", (unsigned)cmd.id);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (bridge->pusherPid > 0) {
|
||||
kill(bridge->pusherPid, SIGINT);
|
||||
}
|
||||
|
||||
for (auto fence : inFlightFences) {
|
||||
vkDestroyFence(vkDevice, fence, nullptr);
|
||||
}
|
||||
|
||||
vkDestroySemaphore(vkDevice, presentCompleteSemaphore, nullptr);
|
||||
vkDestroySemaphore(vkDevice, renderCompleteSemaphore, nullptr);
|
||||
vkDestroyCommandPool(vkDevice, commandPool, nullptr);
|
||||
|
||||
for (auto &handles : swapchainImageHandles) {
|
||||
for (auto handle : handles) {
|
||||
vkDestroyImage(vkDevice, handle, nullptr);
|
||||
if (bridge->pusherPid > 0) {
|
||||
kill(bridge->pusherPid, SIGINT);
|
||||
}
|
||||
}
|
||||
for (auto &handles : swapchainBufferHandles) {
|
||||
for (auto handle : handles) {
|
||||
vkDestroyBuffer(vkDevice, handle, nullptr);
|
||||
|
||||
for (auto fence : inFlightFences) {
|
||||
vkDestroyFence(vkDevice, fence, nullptr);
|
||||
}
|
||||
|
||||
vkDestroySemaphore(vkDevice, presentCompleteSemaphore, nullptr);
|
||||
vkDestroySemaphore(vkDevice, renderCompleteSemaphore, nullptr);
|
||||
vkDestroyCommandPool(vkDevice, commandPool, nullptr);
|
||||
|
||||
for (auto &handles : swapchainImageHandles) {
|
||||
for (auto handle : handles) {
|
||||
vkDestroyImage(vkDevice, handle, nullptr);
|
||||
}
|
||||
}
|
||||
for (auto &handles : swapchainBufferHandles) {
|
||||
for (auto handle : handles) {
|
||||
vkDestroyBuffer(vkDevice, handle, nullptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
vkDestroySwapchainKHR(vkDevice, swapchain, nullptr);
|
||||
|
||||
for (auto handle : swapchainImages) {
|
||||
vkDestroyImage(vkDevice, handle, nullptr);
|
||||
}
|
||||
|
||||
vkDestroyDevice(vkDevice, nullptr);
|
||||
vkDestroySurfaceKHR(vkInstance, vkSurface, nullptr);
|
||||
vkDestroyInstance(vkInstance, nullptr);
|
||||
|
||||
glfwDestroyWindow(window);
|
||||
|
||||
amdgpu::bridge::destroyShmCommandBuffer(bridge);
|
||||
amdgpu::bridge::unlinkShm(cmdBridgeName);
|
||||
return 0;
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@
|
|||
#include "vfs.hpp"
|
||||
#include "vm.hpp"
|
||||
|
||||
#include <atomic>
|
||||
#include <elf.h>
|
||||
#include <filesystem>
|
||||
#include <orbis/KernelContext.hpp>
|
||||
|
|
@ -23,6 +24,7 @@
|
|||
|
||||
#include <fcntl.h>
|
||||
#include <pthread.h>
|
||||
#include <sys/mman.h>
|
||||
#include <ucontext.h>
|
||||
|
||||
#include <csignal>
|
||||
|
|
@ -37,6 +39,67 @@ handle_signal(int sig, siginfo_t *info, void *ucontext) {
|
|||
_writefsbase_u64(hostFs);
|
||||
}
|
||||
|
||||
auto signalAddress = reinterpret_cast<std::uintptr_t>(info->si_addr);
|
||||
|
||||
if (rx::thread::g_current != nullptr && sig == SIGSEGV &&
|
||||
signalAddress >= 0x40000 && signalAddress < 0x100'0000'0000) {
|
||||
auto ctx = reinterpret_cast<ucontext_t *>(ucontext);
|
||||
bool isWrite = (ctx->uc_mcontext.gregs[REG_ERR] & 0x2) != 0;
|
||||
auto origVmProt = rx::vm::getPageProtection(signalAddress);
|
||||
int prot = 0;
|
||||
auto page = signalAddress / amdgpu::bridge::kHostPageSize;
|
||||
|
||||
if (origVmProt & rx::vm::kMapProtCpuRead) {
|
||||
prot |= PROT_READ;
|
||||
}
|
||||
if (origVmProt & rx::vm::kMapProtCpuWrite) {
|
||||
prot |= PROT_WRITE;
|
||||
}
|
||||
if (origVmProt & rx::vm::kMapProtCpuExec) {
|
||||
prot |= PROT_EXEC;
|
||||
}
|
||||
|
||||
if (prot & (isWrite ? PROT_WRITE : PROT_READ)) {
|
||||
auto bridge = rx::bridge.header;
|
||||
|
||||
while (true) {
|
||||
auto flags = bridge->cachePages[page].load(std::memory_order::relaxed);
|
||||
|
||||
if ((flags & amdgpu::bridge::kPageReadWriteLock) != 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((flags & amdgpu::bridge::kPageWriteWatch) == 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (!isWrite) {
|
||||
prot &= ~PROT_WRITE;
|
||||
break;
|
||||
}
|
||||
|
||||
if (bridge->cachePages[page].compare_exchange_weak(
|
||||
flags, amdgpu::bridge::kPageInvalidated,
|
||||
std::memory_order::relaxed)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (::mprotect((void *)(page * amdgpu::bridge::kHostPageSize),
|
||||
amdgpu::bridge::kHostPageSize, prot)) {
|
||||
std::perror("cache reprotection error");
|
||||
std::abort();
|
||||
}
|
||||
|
||||
_writefsbase_u64(rx::thread::g_current->fsBase);
|
||||
return;
|
||||
}
|
||||
|
||||
std::fprintf(stderr, "SIGSEGV, address %lx, access %s, prot %s\n",
|
||||
signalAddress, isWrite ? "write" : "read",
|
||||
rx::vm::mapProtToString(origVmProt).c_str());
|
||||
}
|
||||
|
||||
if (g_gpuPid > 0) {
|
||||
// stop gpu thread
|
||||
::kill(g_gpuPid, SIGINT);
|
||||
|
|
@ -514,6 +577,68 @@ int main(int argc, const char *argv[]) {
|
|||
initProcess->processParam = executableModule->processParam;
|
||||
initProcess->processParamSize = executableModule->processParamSize;
|
||||
|
||||
std::thread{[] {
|
||||
pthread_setname_np(pthread_self(), "Bridge");
|
||||
auto bridge = rx::bridge.header;
|
||||
|
||||
std::vector<std::uint64_t> fetchedCommands;
|
||||
fetchedCommands.reserve(std::size(bridge->cacheCommands));
|
||||
|
||||
while (true) {
|
||||
for (auto &command : bridge->cacheCommands) {
|
||||
std::uint64_t value = command.load(std::memory_order::relaxed);
|
||||
|
||||
if (value != 0) {
|
||||
fetchedCommands.push_back(value);
|
||||
command.store(0, std::memory_order::relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
if (fetchedCommands.empty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (auto command : fetchedCommands) {
|
||||
auto page = static_cast<std::uint32_t>(command);
|
||||
auto count = static_cast<std::uint32_t>(command >> 32) + 1;
|
||||
|
||||
auto pageFlags =
|
||||
bridge->cachePages[page].load(std::memory_order::relaxed);
|
||||
|
||||
auto address =
|
||||
static_cast<std::uint64_t>(page) * amdgpu::bridge::kHostPageSize;
|
||||
auto origVmProt = rx::vm::getPageProtection(address);
|
||||
int prot = 0;
|
||||
|
||||
if (origVmProt & rx::vm::kMapProtCpuRead) {
|
||||
prot |= PROT_READ;
|
||||
}
|
||||
if (origVmProt & rx::vm::kMapProtCpuWrite) {
|
||||
prot |= PROT_WRITE;
|
||||
}
|
||||
if (origVmProt & rx::vm::kMapProtCpuExec) {
|
||||
prot |= PROT_EXEC;
|
||||
}
|
||||
|
||||
if (pageFlags & amdgpu::bridge::kPageReadWriteLock) {
|
||||
prot &= ~(PROT_READ | PROT_WRITE);
|
||||
} else if (pageFlags & amdgpu::bridge::kPageWriteWatch) {
|
||||
prot &= ~PROT_WRITE;
|
||||
}
|
||||
|
||||
// std::fprintf(stderr, "protection %lx-%lx\n", address,
|
||||
// address + amdgpu::bridge::kHostPageSize * count);
|
||||
if (::mprotect(reinterpret_cast<void *>(address),
|
||||
amdgpu::bridge::kHostPageSize * count, prot)) {
|
||||
perror("protection failed");
|
||||
std::abort();
|
||||
}
|
||||
}
|
||||
|
||||
fetchedCommands.clear();
|
||||
}
|
||||
}}.detach();
|
||||
|
||||
int status = 0;
|
||||
|
||||
if (executableModule->type == rx::linker::kElfTypeSceDynExec ||
|
||||
|
|
|
|||
Loading…
Reference in a new issue