mirror of
https://github.com/RPCSX/rpcsx.git
synced 2026-01-07 17:20:14 +01:00
[amdgpu] device: implement multi queue support
implement IT_INDIRECT_BUFFER, IT_INDEX_BASE and IT_DRAW_INDEX_OFFSET_2 New gpu task scheduler
This commit is contained in:
parent
ade074721e
commit
9e109918fd
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
#include "amdgpu/bridge/bridge.hpp"
|
||||
#include "amdgpu/shader/Instruction.hpp"
|
||||
#include "gpu-scheduler.hpp"
|
||||
#include "util/area.hpp"
|
||||
|
||||
#include <string>
|
||||
|
|
@ -760,7 +761,7 @@ enum Opcodes {
|
|||
kOpcodeDISPATCH_DIRECT = 0x15,
|
||||
kOpcodeDISPATCH_INDIRECT = 0x16,
|
||||
kOpcodeINDIRECT_BUFFER_END = 0x17,
|
||||
MODE_CONTROL = 0x18,
|
||||
kOpcodeMODE_CONTROL = 0x18,
|
||||
kOpcodeATOMIC_GDS = 0x1D,
|
||||
kOpcodeATOMIC_MEM = 0x1E,
|
||||
kOpcodeOCCLUSION_QUERY = 0x1F,
|
||||
|
|
@ -773,7 +774,7 @@ enum Opcodes {
|
|||
kOpcodeINDEX_BASE = 0x26,
|
||||
kOpcodeDRAW_INDEX_2 = 0x27,
|
||||
kOpcodeCONTEXT_CONTROL = 0x28,
|
||||
DRAW_INDEX_OFFSET = 0x29,
|
||||
kOpcodeDRAW_INDEX_OFFSET = 0x29,
|
||||
kOpcodeINDEX_TYPE = 0x2A,
|
||||
kOpcodeDRAW_INDEX = 0x2B,
|
||||
kOpcodeDRAW_INDIRECT_MULTI = 0x2C,
|
||||
|
|
@ -805,11 +806,11 @@ enum Opcodes {
|
|||
kOpcodeEVENT_WRITE_EOS = 0x48,
|
||||
kOpcodeRELEASE_MEM = 0x49,
|
||||
kOpcodePREAMBLE_CNTL = 0x4A,
|
||||
RB_OFFSET = 0x4B,
|
||||
ALU_PS_CONST_BUFFER_COPY = 0x4C,
|
||||
ALU_VS_CONST_BUFFER_COPY = 0x4D,
|
||||
ALU_PS_CONST_UPDATE = 0x4E,
|
||||
ALU_VS_CONST_UPDATE = 0x4F,
|
||||
kOpcodeRB_OFFSET = 0x4B,
|
||||
kOpcodeALU_PS_CONST_BUFFER_COPY = 0x4C,
|
||||
kOpcodeALU_VS_CONST_BUFFER_COPY = 0x4D,
|
||||
kOpcodeALU_PS_CONST_UPDATE = 0x4E,
|
||||
kOpcodeALU_VS_CONST_UPDATE = 0x4F,
|
||||
kOpcodeDMA_DATA = 0x50,
|
||||
kOpcodeONE_REG_WRITE = 0x57,
|
||||
kOpcodeAQUIRE_MEM = 0x58,
|
||||
|
|
@ -826,12 +827,12 @@ enum Opcodes {
|
|||
kOpcodeSET_RESOURCE = 0x6D,
|
||||
kOpcodeSET_SAMPLER = 0x6E,
|
||||
kOpcodeSET_CTL_CONST = 0x6F,
|
||||
SET_RESOURCE_OFFSET = 0x70,
|
||||
SET_ALU_CONST_VS = 0x71,
|
||||
SET_ALU_CONST_DI = 0x72,
|
||||
kOpcodeSET_RESOURCE_OFFSET = 0x70,
|
||||
kOpcodeSET_ALU_CONST_VS = 0x71,
|
||||
kOpcodeSET_ALU_CONST_DI = 0x72,
|
||||
kOpcodeSET_CONTEXT_REG_INDIRECT = 0x73,
|
||||
SET_RESOURCE_INDIRECT = 0x74,
|
||||
SET_APPEND_CNT = 0x75,
|
||||
kOpcodeSET_RESOURCE_INDIRECT = 0x74,
|
||||
kOpcodeSET_APPEND_CNT = 0x75,
|
||||
kOpcodeSET_SH_REG = 0x76,
|
||||
kOpcodeSET_SH_REG_OFFSET = 0x77,
|
||||
kOpcodeSET_QUEUE_REG = 0x78,
|
||||
|
|
@ -1018,8 +1019,6 @@ inline const std::string opcodeToString(int op) {
|
|||
}
|
||||
|
||||
inline void dumpShader(const std::uint32_t *data) {
|
||||
int hackExit = 0;
|
||||
|
||||
flockfile(stdout);
|
||||
while (true) {
|
||||
auto instHex = *data;
|
||||
|
|
@ -1262,29 +1261,22 @@ static_assert(sizeof(GnmTBuffer) == sizeof(std::uint64_t) * 4);
|
|||
|
||||
constexpr auto kPageSize = 0x4000;
|
||||
|
||||
struct DrawContext {
|
||||
VkQueue queue;
|
||||
VkCommandPool commandPool;
|
||||
};
|
||||
|
||||
void setVkDevice(VkDevice device,
|
||||
VkPhysicalDeviceMemoryProperties memProperties,
|
||||
VkPhysicalDeviceProperties devProperties);
|
||||
|
||||
struct AmdgpuDevice {
|
||||
amdgpu::device::DrawContext dc;
|
||||
|
||||
void handleProtectMemory(std::uint64_t address, std::uint64_t size,
|
||||
std::uint32_t prot);
|
||||
void handleCommandBuffer(std::uint64_t queueId, std::uint64_t address,
|
||||
std::uint64_t size);
|
||||
bool handleFlip(std::uint32_t bufferIndex, std::uint64_t arg,
|
||||
VkCommandBuffer cmd, VkImage targetImage,
|
||||
VkExtent2D targetExtent, std::vector<VkBuffer> &usedBuffers,
|
||||
std::vector<VkImage> &usedImages);
|
||||
bool handleFlip(VkQueue queue, VkCommandBuffer cmdBuffer,
|
||||
TaskChain &initTaskChain, std::uint32_t bufferIndex,
|
||||
std::uint64_t arg, VkImage targetImage,
|
||||
VkExtent2D targetExtent, VkSemaphore waitSemaphore,
|
||||
VkSemaphore signalSemaphore, VkFence fence);
|
||||
|
||||
AmdgpuDevice(amdgpu::device::DrawContext dc,
|
||||
amdgpu::bridge::BridgeHeader *bridge);
|
||||
AmdgpuDevice(amdgpu::bridge::BridgeHeader *bridge);
|
||||
|
||||
~AmdgpuDevice();
|
||||
};
|
||||
|
|
|
|||
321
hw/amdgpu/device/include/amdgpu/device/gpu-scheduler.hpp
Normal file
321
hw/amdgpu/device/include/amdgpu/device/gpu-scheduler.hpp
Normal file
|
|
@ -0,0 +1,321 @@
|
|||
#pragma once
|
||||
|
||||
#include "scheduler.hpp"
|
||||
#include "vk.hpp"
|
||||
#include <atomic>
|
||||
#include <concepts>
|
||||
#include <cstdint>
|
||||
#include <list>
|
||||
#include <source_location>
|
||||
#include <thread>
|
||||
#include <utility>
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
namespace amdgpu::device {
|
||||
enum class ProcessQueue {
|
||||
Graphics = 1 << 1,
|
||||
Compute = 1 << 2,
|
||||
Transfer = 1 << 3,
|
||||
Any = Graphics | Compute | Transfer
|
||||
};
|
||||
|
||||
inline ProcessQueue operator|(ProcessQueue lhs, ProcessQueue rhs) {
|
||||
return static_cast<ProcessQueue>(std::to_underlying(lhs) |
|
||||
std::to_underlying(rhs));
|
||||
}
|
||||
|
||||
inline ProcessQueue operator&(ProcessQueue lhs, ProcessQueue rhs) {
|
||||
return static_cast<ProcessQueue>(std::to_underlying(lhs) &
|
||||
std::to_underlying(rhs));
|
||||
}
|
||||
|
||||
struct TaskChain;
|
||||
class GpuScheduler;
|
||||
|
||||
Scheduler &getCpuScheduler();
|
||||
GpuScheduler &getGpuScheduler(ProcessQueue queue);
|
||||
|
||||
struct GpuTaskLayout {
|
||||
static constexpr auto kInvalidId = 0; //~static_cast<std::uint64_t>(0);
|
||||
|
||||
Ref<TaskChain> chain;
|
||||
std::uint64_t id;
|
||||
std::uint64_t waitId = kInvalidId;
|
||||
VkPipelineStageFlags waitStage = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
|
||||
|
||||
std::function<void(VkCommandBuffer)> invoke;
|
||||
std::function<void(VkQueue, VkCommandBuffer)> submit;
|
||||
};
|
||||
|
||||
struct TaskChain {
|
||||
vk::Semaphore semaphore;
|
||||
std::uint64_t nextTaskId = 1;
|
||||
std::atomic<unsigned> refs{0};
|
||||
std::vector<std::source_location> taskLocations;
|
||||
|
||||
void incRef() { refs.fetch_add(1, std::memory_order::relaxed); }
|
||||
void decRef() {
|
||||
if (refs.fetch_sub(1, std::memory_order::relaxed) == 1) {
|
||||
delete this;
|
||||
}
|
||||
}
|
||||
|
||||
static Ref<TaskChain> Create() {
|
||||
auto result = new TaskChain();
|
||||
result->semaphore = vk::Semaphore::Create();
|
||||
return result;
|
||||
}
|
||||
|
||||
std::uint64_t add(ProcessQueue queue, std::uint64_t waitId,
|
||||
std::function<void(VkCommandBuffer)> invoke);
|
||||
|
||||
std::uint64_t add(ProcessQueue queue,
|
||||
std::function<void(VkCommandBuffer)> invoke) {
|
||||
return add(queue, GpuTaskLayout::kInvalidId, std::move(invoke));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires requires(T &&t) {
|
||||
{ t() } -> std::same_as<void>;
|
||||
}
|
||||
std::uint64_t add(std::uint64_t waitId, T &&task) {
|
||||
auto prevTaskId = getLastTaskId();
|
||||
auto id = nextTaskId++;
|
||||
auto cpuTask =
|
||||
createCpuTask([=, task = std::forward<T>(task),
|
||||
self = Ref(this)](const AsyncTaskCtl &) mutable {
|
||||
if (waitId != GpuTaskLayout::kInvalidId) {
|
||||
if (self->semaphore.getCounterValue() < waitId) {
|
||||
return TaskResult::Reschedule;
|
||||
}
|
||||
}
|
||||
|
||||
task();
|
||||
|
||||
if (prevTaskId != GpuTaskLayout::kInvalidId && waitId != prevTaskId) {
|
||||
self->wait(prevTaskId);
|
||||
}
|
||||
|
||||
self->semaphore.signal(id);
|
||||
return TaskResult::Complete;
|
||||
});
|
||||
getCpuScheduler().enqueue(std::move(cpuTask));
|
||||
return id;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires requires(T &&t) {
|
||||
{ t() } -> std::same_as<void>;
|
||||
}
|
||||
std::uint64_t add(T &&task) {
|
||||
return add(GpuTaskLayout::kInvalidId, std::forward<T>(task));
|
||||
}
|
||||
|
||||
std::uint64_t getLastTaskId() const { return nextTaskId - 1; }
|
||||
|
||||
std::uint64_t createExternalTask() { return nextTaskId++; }
|
||||
void notifyExternalTaskComplete(std::uint64_t id) { semaphore.signal(id); }
|
||||
|
||||
bool isComplete() const { return isComplete(getLastTaskId()); }
|
||||
|
||||
bool isComplete(std::uint64_t task) const {
|
||||
return semaphore.getCounterValue() >= task;
|
||||
}
|
||||
|
||||
bool empty() const { return getLastTaskId() == GpuTaskLayout::kInvalidId; }
|
||||
|
||||
void wait(std::uint64_t task = GpuTaskLayout::kInvalidId) const {
|
||||
if (empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (task == GpuTaskLayout::kInvalidId) {
|
||||
task = getLastTaskId();
|
||||
}
|
||||
|
||||
Verify() << semaphore.wait(task, UINT64_MAX);
|
||||
}
|
||||
};
|
||||
|
||||
class GpuScheduler {
|
||||
std::list<std::thread> workThreads;
|
||||
std::vector<GpuTaskLayout> tasks;
|
||||
std::vector<GpuTaskLayout> delayedTasks;
|
||||
std::mutex taskMtx;
|
||||
std::condition_variable taskCv;
|
||||
std::atomic<bool> exit{false};
|
||||
std::string debugName;
|
||||
|
||||
public:
|
||||
explicit GpuScheduler(std::span<std::pair<VkQueue, std::uint32_t>> queues,
|
||||
std::string debugName)
|
||||
: debugName(debugName) {
|
||||
for (std::size_t index = 0; auto [queue, queueFamilyIndex] : queues) {
|
||||
workThreads.push_back(std::thread{[=, this] {
|
||||
setThreadName(
|
||||
("GPU " + std::to_string(index) + " " + debugName).c_str());
|
||||
entry(queue, queueFamilyIndex);
|
||||
}});
|
||||
|
||||
++index;
|
||||
}
|
||||
}
|
||||
|
||||
~GpuScheduler() {
|
||||
exit = true;
|
||||
taskCv.notify_all();
|
||||
|
||||
for (auto &thread : workThreads) {
|
||||
thread.join();
|
||||
}
|
||||
}
|
||||
|
||||
void enqueue(GpuTaskLayout &&task) {
|
||||
std::lock_guard lock(taskMtx);
|
||||
tasks.push_back(std::move(task));
|
||||
taskCv.notify_one();
|
||||
}
|
||||
|
||||
private:
|
||||
void submitTask(VkCommandPool pool, VkQueue queue, GpuTaskLayout &task) {
|
||||
VkCommandBuffer cmdBuffer;
|
||||
{
|
||||
VkCommandBufferAllocateInfo allocateInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
|
||||
.commandPool = pool,
|
||||
.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
|
||||
.commandBufferCount = 1,
|
||||
};
|
||||
|
||||
Verify() << vkAllocateCommandBuffers(vk::g_vkDevice, &allocateInfo,
|
||||
&cmdBuffer);
|
||||
|
||||
VkCommandBufferBeginInfo beginInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
|
||||
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
|
||||
};
|
||||
|
||||
vkBeginCommandBuffer(cmdBuffer, &beginInfo);
|
||||
}
|
||||
|
||||
task.invoke(cmdBuffer);
|
||||
|
||||
vkEndCommandBuffer(cmdBuffer);
|
||||
|
||||
if (task.submit) {
|
||||
task.submit(queue, cmdBuffer);
|
||||
return;
|
||||
}
|
||||
|
||||
VkSemaphoreSubmitInfo signalSemSubmitInfo = {
|
||||
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
|
||||
.semaphore = task.chain->semaphore.getHandle(),
|
||||
.value = task.id,
|
||||
.stageMask = VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT,
|
||||
};
|
||||
|
||||
VkSemaphoreSubmitInfo waitSemSubmitInfo = {
|
||||
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
|
||||
.semaphore = task.chain->semaphore.getHandle(),
|
||||
.value = task.waitId,
|
||||
.stageMask = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
|
||||
};
|
||||
|
||||
VkCommandBufferSubmitInfo cmdBufferSubmitInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
|
||||
.commandBuffer = cmdBuffer,
|
||||
};
|
||||
|
||||
VkSubmitInfo2 submitInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
|
||||
.waitSemaphoreInfoCount =
|
||||
static_cast<std::uint32_t>(task.waitId ? 1 : 0),
|
||||
.pWaitSemaphoreInfos = &waitSemSubmitInfo,
|
||||
.commandBufferInfoCount = 1,
|
||||
.pCommandBufferInfos = &cmdBufferSubmitInfo,
|
||||
.signalSemaphoreInfoCount = 1,
|
||||
.pSignalSemaphoreInfos = &signalSemSubmitInfo,
|
||||
};
|
||||
|
||||
Verify() << vkQueueSubmit2(queue, 1, &submitInfo, VK_NULL_HANDLE);
|
||||
|
||||
// if (task.signalChain->semaphore.wait(
|
||||
// task.id, std::chrono::duration_cast<std::chrono::nanoseconds>(
|
||||
// std::chrono::seconds(10))
|
||||
// .count())) {
|
||||
// util::unreachable("gpu operation takes too long time. wait id = %lu\n",
|
||||
// task.waitId);
|
||||
// }
|
||||
}
|
||||
|
||||
void entry(VkQueue queue, std::uint32_t queueFamilyIndex) {
|
||||
VkCommandPool pool;
|
||||
{
|
||||
VkCommandPoolCreateInfo poolCreateInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
|
||||
.queueFamilyIndex = queueFamilyIndex};
|
||||
|
||||
Verify() << vkCreateCommandPool(vk::g_vkDevice, &poolCreateInfo,
|
||||
vk::g_vkAllocator, &pool);
|
||||
}
|
||||
|
||||
while (!exit.load(std::memory_order::relaxed)) {
|
||||
GpuTaskLayout task;
|
||||
|
||||
{
|
||||
std::unique_lock lock(taskMtx);
|
||||
|
||||
while (tasks.empty()) {
|
||||
if (tasks.empty() && delayedTasks.empty()) {
|
||||
taskCv.wait(lock);
|
||||
}
|
||||
|
||||
if (tasks.empty()) {
|
||||
std::swap(delayedTasks, tasks);
|
||||
}
|
||||
}
|
||||
|
||||
task = std::move(tasks.back());
|
||||
tasks.pop_back();
|
||||
}
|
||||
|
||||
if (task.waitId != GpuTaskLayout::kInvalidId &&
|
||||
!task.chain->isComplete(task.waitId)) {
|
||||
std::unique_lock lock(taskMtx);
|
||||
delayedTasks.push_back(std::move(task));
|
||||
taskCv.notify_one();
|
||||
continue;
|
||||
}
|
||||
|
||||
submitTask(pool, queue, task);
|
||||
}
|
||||
|
||||
vkDestroyCommandPool(vk::g_vkDevice, pool, vk::g_vkAllocator);
|
||||
}
|
||||
};
|
||||
|
||||
inline std::uint64_t
|
||||
TaskChain::add(ProcessQueue queue, std::uint64_t waitId,
|
||||
std::function<void(VkCommandBuffer)> invoke) {
|
||||
VkPipelineStageFlags waitStage = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
|
||||
if (waitId == GpuTaskLayout::kInvalidId) {
|
||||
waitId = getLastTaskId();
|
||||
waitStage = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
|
||||
}
|
||||
auto id = nextTaskId++;
|
||||
|
||||
getGpuScheduler(queue).enqueue({
|
||||
.chain = Ref(this),
|
||||
.id = id,
|
||||
.waitId = waitId,
|
||||
.waitStage = waitStage,
|
||||
.invoke = std::move(invoke),
|
||||
});
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
GpuScheduler &getTransferQueueScheduler();
|
||||
GpuScheduler &getComputeQueueScheduler();
|
||||
GpuScheduler &getGraphicsQueueScheduler();
|
||||
} // namespace amdgpu::device
|
||||
|
|
@ -1,14 +1,23 @@
|
|||
#pragma once
|
||||
|
||||
#include "util/unreachable.hpp"
|
||||
#include <atomic>
|
||||
#include <bit>
|
||||
#include <cassert>
|
||||
#include <concepts>
|
||||
#include <condition_variable>
|
||||
#include <functional>
|
||||
#include <mutex>
|
||||
#include <pthread.h>
|
||||
#include <thread>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace amdgpu::device {
|
||||
inline void setThreadName(const char *name) {
|
||||
pthread_setname_np(pthread_self(), name);
|
||||
}
|
||||
|
||||
template <typename T> class Ref {
|
||||
T *m_ref = nullptr;
|
||||
|
||||
|
|
@ -95,11 +104,13 @@ public:
|
|||
template <typename T> Ref(T *) -> Ref<T>;
|
||||
template <typename T> Ref(Ref<T>) -> Ref<T>;
|
||||
|
||||
enum class TaskState { InProgress, Complete, Canceled };
|
||||
enum class TaskState { Created, InProgress, Complete, Canceled };
|
||||
enum class TaskResult { Complete, Canceled, Reschedule };
|
||||
|
||||
struct AsyncTaskCtl {
|
||||
std::atomic<unsigned> refs{0};
|
||||
std::atomic<TaskState> stateStorage{TaskState::InProgress};
|
||||
std::atomic<TaskState> stateStorage{TaskState::Created};
|
||||
std::atomic<bool> cancelRequested{false};
|
||||
|
||||
virtual ~AsyncTaskCtl() = default;
|
||||
|
||||
|
|
@ -110,48 +121,29 @@ struct AsyncTaskCtl {
|
|||
}
|
||||
}
|
||||
|
||||
bool isCanceled() const {
|
||||
return stateStorage.load(std::memory_order::relaxed) == TaskState::Canceled;
|
||||
bool isCancelRequested() const {
|
||||
return cancelRequested.load(std::memory_order::relaxed) == true;
|
||||
}
|
||||
bool isComplete() const {
|
||||
return stateStorage.load(std::memory_order::relaxed) == TaskState::Complete;
|
||||
}
|
||||
bool isInProgress() const {
|
||||
return stateStorage.load(std::memory_order::relaxed) ==
|
||||
TaskState::InProgress;
|
||||
bool isCanceled() const { return getState() == TaskState::Canceled; }
|
||||
bool isComplete() const { return getState() == TaskState::Complete; }
|
||||
bool isInProgress() const { return getState() == TaskState::InProgress; }
|
||||
|
||||
TaskState getState() const {
|
||||
return stateStorage.load(std::memory_order::relaxed);
|
||||
}
|
||||
|
||||
void cancel() {
|
||||
auto state = TaskState::InProgress;
|
||||
|
||||
while (state == TaskState::InProgress) {
|
||||
if (stateStorage.compare_exchange_weak(state, TaskState::Canceled,
|
||||
std::memory_order::relaxed)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
stateStorage.notify_all();
|
||||
}
|
||||
|
||||
void complete() {
|
||||
auto state = TaskState::InProgress;
|
||||
|
||||
while (state != TaskState::Complete) {
|
||||
if (stateStorage.compare_exchange_weak(state, TaskState::Complete,
|
||||
std::memory_order::relaxed)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
stateStorage.notify_all();
|
||||
}
|
||||
void cancel() { cancelRequested.store(true, std::memory_order::relaxed); }
|
||||
|
||||
void wait() {
|
||||
if (stateStorage.load(std::memory_order::relaxed) == TaskState::Created) {
|
||||
util::unreachable("attempt to wait task that wasn't scheduled\n");
|
||||
}
|
||||
stateStorage.wait(TaskState::InProgress, std::memory_order::relaxed);
|
||||
}
|
||||
};
|
||||
|
||||
virtual void invoke() = 0;
|
||||
struct CpuTaskCtl : AsyncTaskCtl {
|
||||
virtual TaskResult invoke() = 0;
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
|
@ -159,69 +151,100 @@ template <typename T>
|
|||
concept LambdaWithoutClosure = requires(T t) { +t; };
|
||||
}
|
||||
|
||||
template <typename T> struct AsyncTask;
|
||||
template <typename T> struct AsyncCpuTask;
|
||||
|
||||
template <typename T>
|
||||
requires(std::is_invocable_r_v<bool, T, const AsyncTaskCtl &> &&
|
||||
detail::LambdaWithoutClosure<T>)
|
||||
struct AsyncTask<T> : AsyncTaskCtl {
|
||||
static constexpr bool (*fn)(const AsyncTaskCtl &) = +std::declval<T>();
|
||||
requires requires(T t, const AsyncTaskCtl &ctl) {
|
||||
{ t(ctl) } -> std::same_as<TaskResult>;
|
||||
requires detail::LambdaWithoutClosure<T>;
|
||||
}
|
||||
struct AsyncCpuTask<T> : CpuTaskCtl {
|
||||
static constexpr TaskResult (*fn)(const AsyncTaskCtl &) = +std::declval<T>();
|
||||
|
||||
AsyncTask() = default;
|
||||
AsyncTask(T &&) {}
|
||||
AsyncCpuTask() = default;
|
||||
AsyncCpuTask(T &&) {}
|
||||
|
||||
void invoke() override {
|
||||
TaskResult invoke() override {
|
||||
auto &base = *static_cast<const AsyncTaskCtl *>(this);
|
||||
|
||||
if (fn(base)) {
|
||||
complete();
|
||||
}
|
||||
return fn(base);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
requires std::is_invocable_r_v<bool, T, const AsyncTaskCtl &>
|
||||
Ref<AsyncTaskCtl> createTask(T &&task) {
|
||||
return Ref<AsyncTaskCtl>(new AsyncTask<T>(std::forward<T>(task)));
|
||||
requires requires(T t, const AsyncTaskCtl &ctl) {
|
||||
{ t(ctl) } -> std::same_as<TaskResult>;
|
||||
requires !detail::LambdaWithoutClosure<T>;
|
||||
}
|
||||
struct AsyncCpuTask<T> : CpuTaskCtl {
|
||||
alignas(T) std::byte taskStorage[sizeof(T)];
|
||||
|
||||
AsyncCpuTask(T &&t) { new (taskStorage) T(std::forward<T>(t)); }
|
||||
~AsyncCpuTask() { std::bit_cast<T *>(&taskStorage)->~T(); }
|
||||
|
||||
TaskResult invoke() override {
|
||||
auto &lambda = *std::bit_cast<T *>(&taskStorage);
|
||||
auto &base = *static_cast<const AsyncTaskCtl *>(this);
|
||||
return lambda(base);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
requires requires(T t, const AsyncTaskCtl &ctl) {
|
||||
{ t(ctl) } -> std::same_as<TaskResult>;
|
||||
}
|
||||
Ref<CpuTaskCtl> createCpuTask(T &&task) {
|
||||
return Ref<CpuTaskCtl>(new AsyncCpuTask<T>(std::forward<T>(task)));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires(std::is_invocable_r_v<bool, T, const AsyncTaskCtl &> &&
|
||||
!detail::LambdaWithoutClosure<T>)
|
||||
struct AsyncTask<T> : AsyncTaskCtl {
|
||||
alignas(T) std::byte taskStorage[sizeof(T)];
|
||||
|
||||
AsyncTask() = default;
|
||||
AsyncTask(T &&t) { new (taskStorage) T(std::forward<T>(t)); }
|
||||
AsyncTask &operator=(T &&t) {
|
||||
new (taskStorage) T(std::forward<T>(t));
|
||||
return *this;
|
||||
requires requires(T t) {
|
||||
{ t() } -> std::same_as<TaskResult>;
|
||||
}
|
||||
Ref<CpuTaskCtl> createCpuTask(T &&task) {
|
||||
return createCpuTask(
|
||||
[task = std::forward<T>(task)](
|
||||
const AsyncTaskCtl &) mutable -> TaskResult { return task(); });
|
||||
}
|
||||
|
||||
~AsyncTask() {
|
||||
if (isInProgress()) {
|
||||
std::bit_cast<T *>(&taskStorage)->~T();
|
||||
}
|
||||
template <typename T>
|
||||
requires requires(T t) {
|
||||
{ t() } -> std::same_as<void>;
|
||||
}
|
||||
|
||||
void invoke() override {
|
||||
auto &lambda = *std::bit_cast<T *>(&taskStorage);
|
||||
auto &base = *static_cast<const AsyncTaskCtl *>(this);
|
||||
|
||||
if (lambda(base)) {
|
||||
complete();
|
||||
Ref<CpuTaskCtl> createCpuTask(T &&task) {
|
||||
return createCpuTask([task = std::forward<T>(task)](
|
||||
const AsyncTaskCtl &ctl) mutable -> TaskResult {
|
||||
if (ctl.isCancelRequested()) {
|
||||
return TaskResult::Canceled;
|
||||
}
|
||||
|
||||
std::bit_cast<T *>(&taskStorage)->~T();
|
||||
task();
|
||||
return TaskResult::Complete;
|
||||
});
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires requires(T t, const AsyncTaskCtl &ctl) {
|
||||
{ t(ctl) } -> std::same_as<void>;
|
||||
}
|
||||
};
|
||||
Ref<CpuTaskCtl> createCpuTask(T &&task) {
|
||||
return createCpuTask([task = std::forward<T>(task)](const AsyncTaskCtl &ctl) {
|
||||
if (ctl.isCancelRequested()) {
|
||||
return TaskResult::Canceled;
|
||||
}
|
||||
|
||||
task(ctl);
|
||||
return TaskResult::Complete;
|
||||
});
|
||||
}
|
||||
|
||||
class Scheduler;
|
||||
class TaskSet {
|
||||
std::vector<Ref<AsyncTaskCtl>> tasks;
|
||||
|
||||
class CpuTaskSet {
|
||||
std::vector<Ref<CpuTaskCtl>> tasks;
|
||||
|
||||
public:
|
||||
void append(Ref<AsyncTaskCtl> task) { tasks.push_back(std::move(task)); }
|
||||
void append(Ref<CpuTaskCtl> task) { tasks.push_back(std::move(task)); }
|
||||
|
||||
void wait() {
|
||||
for (auto task : tasks) {
|
||||
|
|
@ -234,9 +257,91 @@ public:
|
|||
void enqueue(Scheduler &scheduler);
|
||||
};
|
||||
|
||||
class TaskSet {
|
||||
struct TaskEntry {
|
||||
Ref<AsyncTaskCtl> ctl;
|
||||
std::function<void()> schedule;
|
||||
};
|
||||
|
||||
std::vector<TaskEntry> tasks;
|
||||
|
||||
public:
|
||||
template <typename Scheduler, typename Task>
|
||||
requires requires(Scheduler &sched, Ref<Task> task) {
|
||||
sched.enqueue(std::move(task));
|
||||
task->wait();
|
||||
static_cast<Ref<AsyncTaskCtl>>(task);
|
||||
}
|
||||
void append(Scheduler &sched, Ref<Task> task) {
|
||||
Ref<AsyncTaskCtl> rawTask = task;
|
||||
auto schedFn = [sched = &sched, task = std::move(task)] {
|
||||
sched->enqueue(std::move(task));
|
||||
};
|
||||
|
||||
tasks.push_back({
|
||||
.ctl = std::move(rawTask),
|
||||
.schedule = std::move(schedFn),
|
||||
});
|
||||
}
|
||||
|
||||
void schedule() {
|
||||
for (auto &task : tasks) {
|
||||
if (auto schedule = std::exchange(task.schedule, nullptr)) {
|
||||
schedule();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool isCanceled() const {
|
||||
for (auto &task : tasks) {
|
||||
if (task.ctl->isCanceled()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool isComplete() const {
|
||||
for (auto &task : tasks) {
|
||||
if (!task.ctl->isComplete()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool isInProgress() const {
|
||||
for (auto &task : tasks) {
|
||||
if (task.ctl->isInProgress()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void clear() { tasks.clear(); }
|
||||
|
||||
void wait() const {
|
||||
for (auto &task : tasks) {
|
||||
assert(task.schedule == nullptr);
|
||||
task.ctl->wait();
|
||||
}
|
||||
}
|
||||
|
||||
void cancel() {
|
||||
for (auto &task : tasks) {
|
||||
task.ctl->cancel();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class Scheduler {
|
||||
std::vector<std::thread> workThreads;
|
||||
std::vector<Ref<AsyncTaskCtl>> tasks;
|
||||
std::vector<Ref<CpuTaskCtl>> tasks;
|
||||
std::vector<Ref<CpuTaskCtl>> rescheduleTasks;
|
||||
std::mutex taskMtx;
|
||||
std::condition_variable taskCv;
|
||||
std::atomic<bool> exit{false};
|
||||
|
|
@ -244,7 +349,10 @@ class Scheduler {
|
|||
public:
|
||||
explicit Scheduler(std::size_t threadCount) {
|
||||
for (std::size_t i = 0; i < threadCount; ++i) {
|
||||
workThreads.push_back(std::thread{[this] { entry(); }});
|
||||
workThreads.push_back(std::thread{[this, i] {
|
||||
setThreadName(("CPU " + std::to_string(i)).c_str());
|
||||
entry();
|
||||
}});
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -257,53 +365,88 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires std::is_invocable_r_v<bool, T, const AsyncTaskCtl &>
|
||||
Ref<AsyncTaskCtl> enqueue(T &&task) {
|
||||
auto taskHandle = createTask(std::forward<T>(task));
|
||||
enqueue(taskHandle);
|
||||
return taskHandle;
|
||||
}
|
||||
|
||||
void enqueue(Ref<AsyncTaskCtl> task) {
|
||||
void enqueue(Ref<CpuTaskCtl> task) {
|
||||
std::lock_guard lock(taskMtx);
|
||||
TaskState prevState = TaskState::Created;
|
||||
if (!task->stateStorage.compare_exchange_strong(
|
||||
prevState, TaskState::InProgress, std::memory_order::relaxed)) {
|
||||
util::unreachable("attempt to schedule cpu task in wrong state %u",
|
||||
(unsigned)prevState);
|
||||
}
|
||||
tasks.push_back(std::move(task));
|
||||
taskCv.notify_one();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires std::is_invocable_r_v<bool, T, const AsyncTaskCtl &>
|
||||
void enqueue(TaskSet &set, T &&task) {
|
||||
requires requires(T &&task) { createCpuTask(std::forward<T>(task)); }
|
||||
Ref<AsyncTaskCtl> enqueue(T &&task) {
|
||||
auto taskHandle = createCpuTask(std::forward<T>(task));
|
||||
enqueue(taskHandle);
|
||||
return taskHandle;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires requires(T &&task) { createCpuTask(std::forward<T>(task)); }
|
||||
void enqueue(CpuTaskSet &set, T &&task) {
|
||||
auto taskCtl = enqueue(std::forward<T>(task));
|
||||
set.append(taskCtl);
|
||||
}
|
||||
|
||||
private:
|
||||
void entry() {
|
||||
while (!exit.load(std::memory_order::relaxed)) {
|
||||
Ref<AsyncTaskCtl> task;
|
||||
Ref<CpuTaskCtl> fetchTask() {
|
||||
std::unique_lock lock(taskMtx);
|
||||
|
||||
{
|
||||
std::unique_lock lock(taskMtx);
|
||||
|
||||
if (tasks.empty()) {
|
||||
taskCv.wait(lock);
|
||||
}
|
||||
|
||||
if (tasks.empty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
task = std::move(tasks.back());
|
||||
tasks.pop_back();
|
||||
while (tasks.empty()) {
|
||||
if (rescheduleTasks.empty() && tasks.empty()) {
|
||||
taskCv.wait(lock);
|
||||
}
|
||||
|
||||
task->invoke();
|
||||
if (tasks.empty()) {
|
||||
std::swap(rescheduleTasks, tasks);
|
||||
}
|
||||
}
|
||||
|
||||
auto result = std::move(tasks.back());
|
||||
tasks.pop_back();
|
||||
return result;
|
||||
}
|
||||
|
||||
Ref<CpuTaskCtl> invokeTask(Ref<CpuTaskCtl> task) {
|
||||
switch (task->invoke()) {
|
||||
case TaskResult::Complete:
|
||||
task->stateStorage.store(TaskState::Complete, std::memory_order::relaxed);
|
||||
task->stateStorage.notify_all();
|
||||
return {};
|
||||
|
||||
case TaskResult::Canceled:
|
||||
task->stateStorage.store(TaskState::Canceled, std::memory_order::relaxed);
|
||||
task->stateStorage.notify_all();
|
||||
return {};
|
||||
|
||||
case TaskResult::Reschedule:
|
||||
return task;
|
||||
}
|
||||
|
||||
std::abort();
|
||||
}
|
||||
|
||||
void entry() {
|
||||
while (!exit.load(std::memory_order::relaxed)) {
|
||||
Ref<CpuTaskCtl> task = fetchTask();
|
||||
|
||||
auto rescheduleTask = invokeTask(std::move(task));
|
||||
if (rescheduleTask == nullptr) {
|
||||
continue;
|
||||
}
|
||||
|
||||
std::unique_lock lock(taskMtx);
|
||||
rescheduleTasks.push_back(std::move(rescheduleTask));
|
||||
taskCv.notify_one();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
inline void TaskSet::enqueue(Scheduler &scheduler) {
|
||||
inline void CpuTaskSet::enqueue(Scheduler &scheduler) {
|
||||
for (auto task : tasks) {
|
||||
scheduler.enqueue(std::move(task));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -342,6 +342,89 @@ public:
|
|||
bool operator!=(std::nullptr_t) const { return mSemaphore != nullptr; }
|
||||
};
|
||||
|
||||
struct BinSemaphore {
|
||||
VkSemaphore mSemaphore = VK_NULL_HANDLE;
|
||||
|
||||
public:
|
||||
BinSemaphore(const BinSemaphore &) = delete;
|
||||
|
||||
BinSemaphore() = default;
|
||||
BinSemaphore(BinSemaphore &&other) { *this = std::move(other); }
|
||||
|
||||
BinSemaphore &operator=(BinSemaphore &&other) {
|
||||
std::swap(mSemaphore, other.mSemaphore);
|
||||
return *this;
|
||||
}
|
||||
|
||||
~BinSemaphore() {
|
||||
if (mSemaphore != VK_NULL_HANDLE) {
|
||||
vkDestroySemaphore(g_vkDevice, mSemaphore, nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
static BinSemaphore Create() {
|
||||
VkSemaphoreTypeCreateInfo typeCreateInfo = {
|
||||
VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, nullptr,
|
||||
VK_SEMAPHORE_TYPE_BINARY, 0};
|
||||
|
||||
VkSemaphoreCreateInfo createInfo = {VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
|
||||
&typeCreateInfo, 0};
|
||||
|
||||
BinSemaphore result;
|
||||
Verify() << vkCreateSemaphore(g_vkDevice, &createInfo, nullptr,
|
||||
&result.mSemaphore);
|
||||
return result;
|
||||
}
|
||||
|
||||
VkSemaphore getHandle() const { return mSemaphore; }
|
||||
|
||||
bool operator==(std::nullptr_t) const { return mSemaphore == nullptr; }
|
||||
};
|
||||
|
||||
struct Fence {
|
||||
VkFence mFence = VK_NULL_HANDLE;
|
||||
|
||||
public:
|
||||
Fence(const Fence &) = delete;
|
||||
|
||||
Fence() = default;
|
||||
Fence(Fence &&other) { *this = std::move(other); }
|
||||
|
||||
Fence &operator=(Fence &&other) {
|
||||
std::swap(mFence, other.mFence);
|
||||
return *this;
|
||||
}
|
||||
|
||||
~Fence() {
|
||||
if (mFence != VK_NULL_HANDLE) {
|
||||
vkDestroyFence(g_vkDevice, mFence, nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
static Fence Create() {
|
||||
VkFenceCreateInfo fenceCreateInfo = {VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
|
||||
nullptr, 0};
|
||||
Fence result;
|
||||
Verify() << vkCreateFence(g_vkDevice, &fenceCreateInfo, nullptr,
|
||||
&result.mFence);
|
||||
return result;
|
||||
}
|
||||
|
||||
void wait() const {
|
||||
Verify() << vkWaitForFences(g_vkDevice, 1, &mFence, 1, UINT64_MAX);
|
||||
}
|
||||
|
||||
bool isComplete() const {
|
||||
return vkGetFenceStatus(g_vkDevice, mFence) == VK_SUCCESS;
|
||||
}
|
||||
|
||||
void reset() { vkResetFences(g_vkDevice, 1, &mFence); }
|
||||
|
||||
VkFence getHandle() const { return mFence; }
|
||||
|
||||
bool operator==(std::nullptr_t) const { return mFence == nullptr; }
|
||||
};
|
||||
|
||||
struct CommandBuffer {
|
||||
VkCommandBuffer mCmdBuffer = VK_NULL_HANDLE;
|
||||
|
||||
|
|
@ -641,7 +724,7 @@ public:
|
|||
void readFromBuffer(VkCommandBuffer cmdBuffer, VkBuffer buffer,
|
||||
VkImageAspectFlags destAspect,
|
||||
VkDeviceSize bufferOffset = 0) {
|
||||
transitionLayout(cmdBuffer, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
|
||||
transitionLayout(cmdBuffer, VK_IMAGE_LAYOUT_GENERAL);
|
||||
|
||||
VkBufferImageCopy region{};
|
||||
region.bufferOffset = bufferOffset;
|
||||
|
|
@ -654,13 +737,13 @@ public:
|
|||
region.imageOffset = {0, 0, 0};
|
||||
region.imageExtent = {mWidth, mHeight, 1};
|
||||
|
||||
vkCmdCopyBufferToImage(cmdBuffer, buffer, mImage,
|
||||
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion);
|
||||
vkCmdCopyBufferToImage(cmdBuffer, buffer, mImage, VK_IMAGE_LAYOUT_GENERAL,
|
||||
1, ®ion);
|
||||
}
|
||||
|
||||
void writeToBuffer(VkCommandBuffer cmdBuffer, VkBuffer buffer,
|
||||
VkImageAspectFlags sourceAspect) {
|
||||
transitionLayout(cmdBuffer, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
|
||||
transitionLayout(cmdBuffer, VK_IMAGE_LAYOUT_GENERAL);
|
||||
|
||||
VkBufferImageCopy region{};
|
||||
region.bufferOffset = 0;
|
||||
|
|
@ -673,9 +756,8 @@ public:
|
|||
region.imageOffset = {0, 0, 0};
|
||||
region.imageExtent = {mWidth, mHeight, 1};
|
||||
|
||||
vkCmdCopyImageToBuffer(cmdBuffer, mImage,
|
||||
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, 1,
|
||||
®ion);
|
||||
vkCmdCopyImageToBuffer(cmdBuffer, mImage, VK_IMAGE_LAYOUT_GENERAL, buffer,
|
||||
1, ®ion);
|
||||
}
|
||||
|
||||
[[nodiscard]] Buffer writeToBuffer(VkCommandBuffer cmdBuffer,
|
||||
|
|
@ -738,6 +820,7 @@ public:
|
|||
-> std::pair<VkPipelineStageFlags, VkAccessFlags> {
|
||||
switch (layout) {
|
||||
case VK_IMAGE_LAYOUT_UNDEFINED:
|
||||
case VK_IMAGE_LAYOUT_GENERAL:
|
||||
case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR:
|
||||
return {VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0};
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -1,4 +1,5 @@
|
|||
#include "amdgpu/RemoteMemory.hpp"
|
||||
#include "amdgpu/device/gpu-scheduler.hpp"
|
||||
#include "amdgpu/device/vk.hpp"
|
||||
#include <algorithm>
|
||||
#include <amdgpu/bridge/bridge.hpp>
|
||||
|
|
@ -45,6 +46,33 @@ static void usage(std::FILE *out, const char *argv0) {
|
|||
|
||||
enum class PresenterMode { Window };
|
||||
|
||||
static VKAPI_ATTR VkBool32 VKAPI_CALL
|
||||
debugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity,
|
||||
VkDebugUtilsMessageTypeFlagsEXT messageType,
|
||||
const VkDebugUtilsMessengerCallbackDataEXT *pCallbackData,
|
||||
void *pUserData) {
|
||||
|
||||
std::fprintf(stderr, "validation layer: %s\n", pCallbackData->pMessage);
|
||||
|
||||
if (messageSeverity >= VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) {
|
||||
std::abort();
|
||||
}
|
||||
return VK_FALSE;
|
||||
}
|
||||
|
||||
static VkResult _vkCreateDebugUtilsMessengerEXT(
|
||||
VkInstance instance, const VkDebugUtilsMessengerCreateInfoEXT *pCreateInfo,
|
||||
const VkAllocationCallbacks *pAllocator,
|
||||
VkDebugUtilsMessengerEXT *pDebugMessenger) {
|
||||
static auto func = (PFN_vkCreateDebugUtilsMessengerEXT)vkGetInstanceProcAddr(
|
||||
instance, "vkCreateDebugUtilsMessengerEXT");
|
||||
if (func != nullptr) {
|
||||
return func(instance, pCreateInfo, pAllocator, pDebugMessenger);
|
||||
} else {
|
||||
return VK_ERROR_EXTENSION_NOT_PRESENT;
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, const char *argv[]) {
|
||||
if (argc == 2 && (argv[1] == std::string_view("-h") ||
|
||||
argv[1] == std::string_view("--help"))) {
|
||||
|
|
@ -172,19 +200,39 @@ int main(int argc, const char *argv[]) {
|
|||
.apiVersion = VK_API_VERSION_1_3,
|
||||
};
|
||||
|
||||
VkDebugUtilsMessengerCreateInfoEXT debugCreateInfo{};
|
||||
debugCreateInfo.sType =
|
||||
VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT;
|
||||
debugCreateInfo.messageSeverity =
|
||||
VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT |
|
||||
VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT |
|
||||
VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
|
||||
VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT;
|
||||
debugCreateInfo.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
|
||||
VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
|
||||
0
|
||||
// VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT
|
||||
;
|
||||
debugCreateInfo.pfnUserCallback = debugCallback;
|
||||
|
||||
VkInstanceCreateInfo instanceCreateInfo = {};
|
||||
instanceCreateInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
|
||||
instanceCreateInfo.pNext = NULL;
|
||||
instanceCreateInfo.pNext = &debugCreateInfo;
|
||||
instanceCreateInfo.pApplicationInfo = &appInfo;
|
||||
instanceCreateInfo.enabledExtensionCount = requiredInstanceExtensions.size();
|
||||
instanceCreateInfo.ppEnabledExtensionNames =
|
||||
requiredInstanceExtensions.data();
|
||||
|
||||
std::vector<const char *> enabledLayers;
|
||||
// enabledLayers.push_back("VK_LAYER_KHRONOS_shader_object");
|
||||
|
||||
if (enableValidation) {
|
||||
instanceCreateInfo.ppEnabledLayerNames = &validationLayerName;
|
||||
instanceCreateInfo.enabledLayerCount = 1;
|
||||
enabledLayers.push_back(validationLayerName);
|
||||
}
|
||||
|
||||
instanceCreateInfo.ppEnabledLayerNames = enabledLayers.data();
|
||||
instanceCreateInfo.enabledLayerCount = enabledLayers.size();
|
||||
|
||||
VkInstance vkInstance;
|
||||
Verify() << vkCreateInstance(&instanceCreateInfo, nullptr, &vkInstance);
|
||||
auto getVkPhyDevice = [&](unsigned index) {
|
||||
|
|
@ -195,6 +243,10 @@ int main(int argc, const char *argv[]) {
|
|||
return devices[index];
|
||||
};
|
||||
|
||||
VkDebugUtilsMessengerEXT debugMessenger;
|
||||
_vkCreateDebugUtilsMessengerEXT(vkInstance, &debugCreateInfo, nullptr,
|
||||
&debugMessenger);
|
||||
|
||||
auto vkPhysicalDevice = getVkPhyDevice(gpuIndex);
|
||||
|
||||
VkPhysicalDeviceProperties vkPhyDeviceProperties;
|
||||
|
|
@ -342,7 +394,7 @@ int main(int argc, const char *argv[]) {
|
|||
std::vector<VkDeviceQueueCreateInfo> requestedQueues;
|
||||
|
||||
std::vector<float> defaultQueuePriorities;
|
||||
defaultQueuePriorities.resize(8);
|
||||
defaultQueuePriorities.resize(32);
|
||||
|
||||
for (uint32_t queueFamily = 0; queueFamily < queueFamiliesCount;
|
||||
++queueFamily) {
|
||||
|
|
@ -350,7 +402,10 @@ int main(int argc, const char *argv[]) {
|
|||
requestedQueues.push_back(
|
||||
{.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
|
||||
.queueFamilyIndex = queueFamily,
|
||||
.queueCount = 1,
|
||||
.queueCount =
|
||||
std::min<uint32_t>(queueFamilyProperties[queueFamily]
|
||||
.queueFamilyProperties.queueCount,
|
||||
defaultQueuePriorities.size()),
|
||||
.pQueuePriorities = defaultQueuePriorities.data()});
|
||||
} else if (queueFamiliesWithComputeSupport.contains(queueFamily) ||
|
||||
queueFamiliesWithTransferSupport.contains(queueFamily)) {
|
||||
|
|
@ -365,56 +420,6 @@ int main(int argc, const char *argv[]) {
|
|||
}
|
||||
}
|
||||
|
||||
// try to find queue that not graphics queue
|
||||
bool requestedPresentQueue = false;
|
||||
for (auto queueFamily : queueFamiliesWithPresentSupport) {
|
||||
if (queueFamiliesWithGraphicsSupport.contains(queueFamily)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
bool alreadyRequested = false;
|
||||
|
||||
for (auto &requested : requestedQueues) {
|
||||
if (requested.queueFamilyIndex == queueFamily) {
|
||||
alreadyRequested = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!alreadyRequested) {
|
||||
requestedQueues.push_back(
|
||||
{.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
|
||||
.queueFamilyIndex = queueFamily,
|
||||
.queueCount = 1,
|
||||
.pQueuePriorities = defaultQueuePriorities.data()});
|
||||
}
|
||||
|
||||
requestedPresentQueue = true;
|
||||
}
|
||||
|
||||
if (!requestedPresentQueue) {
|
||||
for (auto queueFamily : queueFamiliesWithPresentSupport) {
|
||||
bool alreadyRequested = false;
|
||||
|
||||
for (auto &requested : requestedQueues) {
|
||||
if (requested.queueFamilyIndex == queueFamily) {
|
||||
alreadyRequested = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!alreadyRequested) {
|
||||
requestedQueues.push_back(
|
||||
{.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
|
||||
.queueFamilyIndex = queueFamily,
|
||||
.queueCount = 1,
|
||||
.pQueuePriorities = defaultQueuePriorities.data()});
|
||||
}
|
||||
|
||||
requestedPresentQueue = true;
|
||||
}
|
||||
}
|
||||
|
||||
VkPhysicalDeviceShaderObjectFeaturesEXT shaderObjectFeatures{
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_OBJECT_FEATURES_EXT,
|
||||
.shaderObject = VK_TRUE};
|
||||
|
|
@ -422,6 +427,7 @@ int main(int argc, const char *argv[]) {
|
|||
VkPhysicalDeviceVulkan13Features phyDevFeatures13{
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES,
|
||||
.pNext = &shaderObjectFeatures,
|
||||
.synchronization2 = VK_TRUE,
|
||||
.dynamicRendering = VK_TRUE,
|
||||
.maintenance4 = VK_TRUE,
|
||||
};
|
||||
|
|
@ -601,26 +607,42 @@ int main(int argc, const char *argv[]) {
|
|||
std::vector<std::pair<VkQueue, unsigned>> transferQueues;
|
||||
std::vector<std::pair<VkQueue, unsigned>> graphicsQueues;
|
||||
VkQueue presentQueue = VK_NULL_HANDLE;
|
||||
unsigned presentQueueFamily;
|
||||
|
||||
for (auto &queueInfo : requestedQueues) {
|
||||
if (queueFamiliesWithComputeSupport.contains(queueInfo.queueFamilyIndex)) {
|
||||
for (uint32_t queueIndex = 0; queueIndex < queueInfo.queueCount;
|
||||
++queueIndex) {
|
||||
auto &[queue, index] = computeQueues.emplace_back();
|
||||
index = queueInfo.queueFamilyIndex;
|
||||
vkGetDeviceQueue(vkDevice, queueInfo.queueFamilyIndex, queueIndex,
|
||||
&queue);
|
||||
}
|
||||
}
|
||||
|
||||
if (queueFamiliesWithGraphicsSupport.contains(queueInfo.queueFamilyIndex)) {
|
||||
for (uint32_t queueIndex = 0; queueIndex < queueInfo.queueCount;
|
||||
++queueIndex) {
|
||||
|
||||
if (presentQueue == VK_NULL_HANDLE &&
|
||||
queueFamiliesWithPresentSupport.contains(
|
||||
queueInfo.queueFamilyIndex)) {
|
||||
presentQueueFamily = queueInfo.queueFamilyIndex;
|
||||
vkGetDeviceQueue(vkDevice, queueInfo.queueFamilyIndex, 0,
|
||||
&presentQueue);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
auto &[queue, index] = graphicsQueues.emplace_back();
|
||||
index = queueInfo.queueFamilyIndex;
|
||||
vkGetDeviceQueue(vkDevice, queueInfo.queueFamilyIndex, queueIndex,
|
||||
&queue);
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if (queueFamiliesWithComputeSupport.contains(queueInfo.queueFamilyIndex)) {
|
||||
uint32_t queueIndex = 0;
|
||||
for (; queueIndex < queueInfo.queueCount; ++queueIndex) {
|
||||
auto &[queue, index] = computeQueues.emplace_back();
|
||||
index = queueInfo.queueFamilyIndex;
|
||||
vkGetDeviceQueue(vkDevice, queueInfo.queueFamilyIndex, queueIndex,
|
||||
&queue);
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if (queueFamiliesWithTransferSupport.contains(queueInfo.queueFamilyIndex)) {
|
||||
|
|
@ -631,14 +653,15 @@ int main(int argc, const char *argv[]) {
|
|||
vkGetDeviceQueue(vkDevice, queueInfo.queueFamilyIndex, queueIndex,
|
||||
&queue);
|
||||
}
|
||||
}
|
||||
|
||||
if (presentQueue == VK_NULL_HANDLE &&
|
||||
queueFamiliesWithPresentSupport.contains(queueInfo.queueFamilyIndex)) {
|
||||
vkGetDeviceQueue(vkDevice, queueInfo.queueFamilyIndex, 0, &presentQueue);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (graphicsQueues.empty() && presentQueue != VK_NULL_HANDLE) {
|
||||
graphicsQueues.push_back({presentQueue, presentQueueFamily});
|
||||
}
|
||||
|
||||
Verify() << (computeQueues.size() > 1);
|
||||
Verify() << (transferQueues.size() > 0);
|
||||
Verify() << (graphicsQueues.size() > 0);
|
||||
|
|
@ -651,19 +674,12 @@ int main(int argc, const char *argv[]) {
|
|||
VkCommandPoolCreateInfo commandPoolCreateInfo = {
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
|
||||
.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
|
||||
.queueFamilyIndex = graphicsQueues.front().second,
|
||||
.queueFamilyIndex = presentQueueFamily,
|
||||
};
|
||||
|
||||
VkCommandPool commandPool;
|
||||
Verify() << vkCreateCommandPool(vkDevice, &commandPoolCreateInfo, nullptr,
|
||||
&commandPool);
|
||||
|
||||
amdgpu::device::DrawContext dc{
|
||||
// TODO
|
||||
.queue = graphicsQueues.front().first,
|
||||
.commandPool = commandPool,
|
||||
};
|
||||
|
||||
std::vector<VkFence> inFlightFences(swapchainImages.size());
|
||||
|
||||
for (auto &fence : inFlightFences) {
|
||||
|
|
@ -734,7 +750,7 @@ int main(int argc, const char *argv[]) {
|
|||
g_hostMemory = memory;
|
||||
|
||||
{
|
||||
amdgpu::device::AmdgpuDevice device(dc, bridgePuller.header);
|
||||
amdgpu::device::AmdgpuDevice device(bridgePuller.header);
|
||||
|
||||
for (std::uint32_t end = bridge->memoryAreaCount, i = 0; i < end; ++i) {
|
||||
auto area = bridge->memoryAreas[i];
|
||||
|
|
@ -747,22 +763,21 @@ int main(int argc, const char *argv[]) {
|
|||
VkCommandBufferAllocateInfo allocInfo{};
|
||||
allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
|
||||
allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
|
||||
allocInfo.commandPool = dc.commandPool;
|
||||
allocInfo.commandPool = commandPool;
|
||||
allocInfo.commandBufferCount = presentCmdBuffers.size();
|
||||
vkAllocateCommandBuffers(vkDevice, &allocInfo, presentCmdBuffers.data());
|
||||
}
|
||||
|
||||
std::vector<amdgpu::device::Ref<amdgpu::device::TaskChain>> flipTaskChain(
|
||||
swapchainImages.size());
|
||||
|
||||
for (auto &chain : flipTaskChain) {
|
||||
chain = amdgpu::device::TaskChain::Create();
|
||||
}
|
||||
std::printf("Initialization complete\n");
|
||||
|
||||
uint32_t imageIndex = 0;
|
||||
bool isImageAcquired = false;
|
||||
std::vector<std::vector<VkBuffer>> swapchainBufferHandles;
|
||||
swapchainBufferHandles.resize(swapchainImages.size());
|
||||
std::vector<std::vector<VkImage>> swapchainImageHandles;
|
||||
swapchainImageHandles.resize(swapchainImages.size());
|
||||
|
||||
VkPipelineStageFlags submitPipelineStages =
|
||||
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
||||
|
||||
while (!glfwWindowShouldClose(window)) {
|
||||
glfwPollEvents();
|
||||
|
|
@ -808,54 +823,27 @@ int main(int argc, const char *argv[]) {
|
|||
|
||||
vkBeginCommandBuffer(presentCmdBuffers[imageIndex], &beginInfo);
|
||||
|
||||
for (auto handle : swapchainBufferHandles[imageIndex]) {
|
||||
vkDestroyBuffer(vkDevice, handle, nullptr);
|
||||
}
|
||||
|
||||
for (auto handle : swapchainImageHandles[imageIndex]) {
|
||||
vkDestroyImage(vkDevice, handle, nullptr);
|
||||
}
|
||||
|
||||
swapchainBufferHandles[imageIndex].clear();
|
||||
swapchainImageHandles[imageIndex].clear();
|
||||
|
||||
if (device.handleFlip(cmd.flip.bufferIndex, cmd.flip.arg,
|
||||
presentCmdBuffers[imageIndex],
|
||||
swapchainImages[imageIndex], swapchainExtent,
|
||||
swapchainBufferHandles[imageIndex],
|
||||
swapchainImageHandles[imageIndex])) {
|
||||
vkEndCommandBuffer(presentCmdBuffers[imageIndex]);
|
||||
|
||||
VkSubmitInfo submitInfo{};
|
||||
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
|
||||
submitInfo.commandBufferCount = 1;
|
||||
submitInfo.pCommandBuffers = &presentCmdBuffers[imageIndex];
|
||||
submitInfo.waitSemaphoreCount = 1;
|
||||
submitInfo.signalSemaphoreCount = 1;
|
||||
submitInfo.pSignalSemaphores = &renderCompleteSemaphore;
|
||||
submitInfo.pWaitSemaphores = &presentCompleteSemaphore;
|
||||
submitInfo.pWaitDstStageMask = &submitPipelineStages;
|
||||
|
||||
Verify() << vkQueueSubmit(dc.queue, 1, &submitInfo,
|
||||
inFlightFences[imageIndex]);
|
||||
|
||||
VkPresentInfoKHR presentInfo{};
|
||||
presentInfo.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
|
||||
presentInfo.waitSemaphoreCount = 1;
|
||||
presentInfo.pWaitSemaphores = &renderCompleteSemaphore;
|
||||
presentInfo.swapchainCount = 1;
|
||||
presentInfo.pSwapchains = &swapchain;
|
||||
presentInfo.pImageIndices = &imageIndex;
|
||||
|
||||
if (device.handleFlip(
|
||||
presentQueue, presentCmdBuffers[imageIndex],
|
||||
*flipTaskChain[imageIndex].get(), cmd.flip.bufferIndex,
|
||||
cmd.flip.arg, swapchainImages[imageIndex], swapchainExtent,
|
||||
presentCompleteSemaphore, renderCompleteSemaphore,
|
||||
inFlightFences[imageIndex])) {
|
||||
VkPresentInfoKHR presentInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
|
||||
.waitSemaphoreCount = 1,
|
||||
.pWaitSemaphores = &renderCompleteSemaphore,
|
||||
.swapchainCount = 1,
|
||||
.pSwapchains = &swapchain,
|
||||
.pImageIndices = &imageIndex,
|
||||
};
|
||||
if (vkQueuePresentKHR(presentQueue, &presentInfo) != VK_SUCCESS) {
|
||||
std::printf("swapchain was invalidated\n");
|
||||
createSwapchain();
|
||||
}
|
||||
// std::this_thread::sleep_for(std::chrono::seconds(3));
|
||||
} else {
|
||||
isImageAcquired = true;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -876,17 +864,6 @@ int main(int argc, const char *argv[]) {
|
|||
vkDestroySemaphore(vkDevice, presentCompleteSemaphore, nullptr);
|
||||
vkDestroySemaphore(vkDevice, renderCompleteSemaphore, nullptr);
|
||||
vkDestroyCommandPool(vkDevice, commandPool, nullptr);
|
||||
|
||||
for (auto &handles : swapchainImageHandles) {
|
||||
for (auto handle : handles) {
|
||||
vkDestroyImage(vkDevice, handle, nullptr);
|
||||
}
|
||||
}
|
||||
for (auto &handles : swapchainBufferHandles) {
|
||||
for (auto handle : handles) {
|
||||
vkDestroyBuffer(vkDevice, handle, nullptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
vkDestroySwapchainKHR(vkDevice, swapchain, nullptr);
|
||||
|
|
|
|||
Loading…
Reference in a new issue