[amdgpu] device: implement multi queue support

implement IT_INDIRECT_BUFFER, IT_INDEX_BASE and IT_DRAW_INDEX_OFFSET_2
New gpu task scheduler
This commit is contained in:
DH 2023-08-06 17:35:54 +03:00
parent ade074721e
commit 9e109918fd
6 changed files with 2382 additions and 1557 deletions

View file

@ -2,6 +2,7 @@
#include "amdgpu/bridge/bridge.hpp"
#include "amdgpu/shader/Instruction.hpp"
#include "gpu-scheduler.hpp"
#include "util/area.hpp"
#include <string>
@ -760,7 +761,7 @@ enum Opcodes {
kOpcodeDISPATCH_DIRECT = 0x15,
kOpcodeDISPATCH_INDIRECT = 0x16,
kOpcodeINDIRECT_BUFFER_END = 0x17,
MODE_CONTROL = 0x18,
kOpcodeMODE_CONTROL = 0x18,
kOpcodeATOMIC_GDS = 0x1D,
kOpcodeATOMIC_MEM = 0x1E,
kOpcodeOCCLUSION_QUERY = 0x1F,
@ -773,7 +774,7 @@ enum Opcodes {
kOpcodeINDEX_BASE = 0x26,
kOpcodeDRAW_INDEX_2 = 0x27,
kOpcodeCONTEXT_CONTROL = 0x28,
DRAW_INDEX_OFFSET = 0x29,
kOpcodeDRAW_INDEX_OFFSET = 0x29,
kOpcodeINDEX_TYPE = 0x2A,
kOpcodeDRAW_INDEX = 0x2B,
kOpcodeDRAW_INDIRECT_MULTI = 0x2C,
@ -805,11 +806,11 @@ enum Opcodes {
kOpcodeEVENT_WRITE_EOS = 0x48,
kOpcodeRELEASE_MEM = 0x49,
kOpcodePREAMBLE_CNTL = 0x4A,
RB_OFFSET = 0x4B,
ALU_PS_CONST_BUFFER_COPY = 0x4C,
ALU_VS_CONST_BUFFER_COPY = 0x4D,
ALU_PS_CONST_UPDATE = 0x4E,
ALU_VS_CONST_UPDATE = 0x4F,
kOpcodeRB_OFFSET = 0x4B,
kOpcodeALU_PS_CONST_BUFFER_COPY = 0x4C,
kOpcodeALU_VS_CONST_BUFFER_COPY = 0x4D,
kOpcodeALU_PS_CONST_UPDATE = 0x4E,
kOpcodeALU_VS_CONST_UPDATE = 0x4F,
kOpcodeDMA_DATA = 0x50,
kOpcodeONE_REG_WRITE = 0x57,
kOpcodeAQUIRE_MEM = 0x58,
@ -826,12 +827,12 @@ enum Opcodes {
kOpcodeSET_RESOURCE = 0x6D,
kOpcodeSET_SAMPLER = 0x6E,
kOpcodeSET_CTL_CONST = 0x6F,
SET_RESOURCE_OFFSET = 0x70,
SET_ALU_CONST_VS = 0x71,
SET_ALU_CONST_DI = 0x72,
kOpcodeSET_RESOURCE_OFFSET = 0x70,
kOpcodeSET_ALU_CONST_VS = 0x71,
kOpcodeSET_ALU_CONST_DI = 0x72,
kOpcodeSET_CONTEXT_REG_INDIRECT = 0x73,
SET_RESOURCE_INDIRECT = 0x74,
SET_APPEND_CNT = 0x75,
kOpcodeSET_RESOURCE_INDIRECT = 0x74,
kOpcodeSET_APPEND_CNT = 0x75,
kOpcodeSET_SH_REG = 0x76,
kOpcodeSET_SH_REG_OFFSET = 0x77,
kOpcodeSET_QUEUE_REG = 0x78,
@ -1018,8 +1019,6 @@ inline const std::string opcodeToString(int op) {
}
inline void dumpShader(const std::uint32_t *data) {
int hackExit = 0;
flockfile(stdout);
while (true) {
auto instHex = *data;
@ -1262,29 +1261,22 @@ static_assert(sizeof(GnmTBuffer) == sizeof(std::uint64_t) * 4);
constexpr auto kPageSize = 0x4000;
struct DrawContext {
VkQueue queue;
VkCommandPool commandPool;
};
void setVkDevice(VkDevice device,
VkPhysicalDeviceMemoryProperties memProperties,
VkPhysicalDeviceProperties devProperties);
struct AmdgpuDevice {
amdgpu::device::DrawContext dc;
void handleProtectMemory(std::uint64_t address, std::uint64_t size,
std::uint32_t prot);
void handleCommandBuffer(std::uint64_t queueId, std::uint64_t address,
std::uint64_t size);
bool handleFlip(std::uint32_t bufferIndex, std::uint64_t arg,
VkCommandBuffer cmd, VkImage targetImage,
VkExtent2D targetExtent, std::vector<VkBuffer> &usedBuffers,
std::vector<VkImage> &usedImages);
bool handleFlip(VkQueue queue, VkCommandBuffer cmdBuffer,
TaskChain &initTaskChain, std::uint32_t bufferIndex,
std::uint64_t arg, VkImage targetImage,
VkExtent2D targetExtent, VkSemaphore waitSemaphore,
VkSemaphore signalSemaphore, VkFence fence);
AmdgpuDevice(amdgpu::device::DrawContext dc,
amdgpu::bridge::BridgeHeader *bridge);
AmdgpuDevice(amdgpu::bridge::BridgeHeader *bridge);
~AmdgpuDevice();
};

View file

@ -0,0 +1,321 @@
#pragma once
#include "scheduler.hpp"
#include "vk.hpp"
#include <atomic>
#include <concepts>
#include <cstdint>
#include <list>
#include <source_location>
#include <thread>
#include <utility>
#include <vulkan/vulkan_core.h>
namespace amdgpu::device {
enum class ProcessQueue {
Graphics = 1 << 1,
Compute = 1 << 2,
Transfer = 1 << 3,
Any = Graphics | Compute | Transfer
};
inline ProcessQueue operator|(ProcessQueue lhs, ProcessQueue rhs) {
return static_cast<ProcessQueue>(std::to_underlying(lhs) |
std::to_underlying(rhs));
}
inline ProcessQueue operator&(ProcessQueue lhs, ProcessQueue rhs) {
return static_cast<ProcessQueue>(std::to_underlying(lhs) &
std::to_underlying(rhs));
}
struct TaskChain;
class GpuScheduler;
Scheduler &getCpuScheduler();
GpuScheduler &getGpuScheduler(ProcessQueue queue);
struct GpuTaskLayout {
static constexpr auto kInvalidId = 0; //~static_cast<std::uint64_t>(0);
Ref<TaskChain> chain;
std::uint64_t id;
std::uint64_t waitId = kInvalidId;
VkPipelineStageFlags waitStage = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
std::function<void(VkCommandBuffer)> invoke;
std::function<void(VkQueue, VkCommandBuffer)> submit;
};
struct TaskChain {
vk::Semaphore semaphore;
std::uint64_t nextTaskId = 1;
std::atomic<unsigned> refs{0};
std::vector<std::source_location> taskLocations;
void incRef() { refs.fetch_add(1, std::memory_order::relaxed); }
void decRef() {
if (refs.fetch_sub(1, std::memory_order::relaxed) == 1) {
delete this;
}
}
static Ref<TaskChain> Create() {
auto result = new TaskChain();
result->semaphore = vk::Semaphore::Create();
return result;
}
std::uint64_t add(ProcessQueue queue, std::uint64_t waitId,
std::function<void(VkCommandBuffer)> invoke);
std::uint64_t add(ProcessQueue queue,
std::function<void(VkCommandBuffer)> invoke) {
return add(queue, GpuTaskLayout::kInvalidId, std::move(invoke));
}
template <typename T>
requires requires(T &&t) {
{ t() } -> std::same_as<void>;
}
std::uint64_t add(std::uint64_t waitId, T &&task) {
auto prevTaskId = getLastTaskId();
auto id = nextTaskId++;
auto cpuTask =
createCpuTask([=, task = std::forward<T>(task),
self = Ref(this)](const AsyncTaskCtl &) mutable {
if (waitId != GpuTaskLayout::kInvalidId) {
if (self->semaphore.getCounterValue() < waitId) {
return TaskResult::Reschedule;
}
}
task();
if (prevTaskId != GpuTaskLayout::kInvalidId && waitId != prevTaskId) {
self->wait(prevTaskId);
}
self->semaphore.signal(id);
return TaskResult::Complete;
});
getCpuScheduler().enqueue(std::move(cpuTask));
return id;
}
template <typename T>
requires requires(T &&t) {
{ t() } -> std::same_as<void>;
}
std::uint64_t add(T &&task) {
return add(GpuTaskLayout::kInvalidId, std::forward<T>(task));
}
std::uint64_t getLastTaskId() const { return nextTaskId - 1; }
std::uint64_t createExternalTask() { return nextTaskId++; }
void notifyExternalTaskComplete(std::uint64_t id) { semaphore.signal(id); }
bool isComplete() const { return isComplete(getLastTaskId()); }
bool isComplete(std::uint64_t task) const {
return semaphore.getCounterValue() >= task;
}
bool empty() const { return getLastTaskId() == GpuTaskLayout::kInvalidId; }
void wait(std::uint64_t task = GpuTaskLayout::kInvalidId) const {
if (empty()) {
return;
}
if (task == GpuTaskLayout::kInvalidId) {
task = getLastTaskId();
}
Verify() << semaphore.wait(task, UINT64_MAX);
}
};
class GpuScheduler {
std::list<std::thread> workThreads;
std::vector<GpuTaskLayout> tasks;
std::vector<GpuTaskLayout> delayedTasks;
std::mutex taskMtx;
std::condition_variable taskCv;
std::atomic<bool> exit{false};
std::string debugName;
public:
explicit GpuScheduler(std::span<std::pair<VkQueue, std::uint32_t>> queues,
std::string debugName)
: debugName(debugName) {
for (std::size_t index = 0; auto [queue, queueFamilyIndex] : queues) {
workThreads.push_back(std::thread{[=, this] {
setThreadName(
("GPU " + std::to_string(index) + " " + debugName).c_str());
entry(queue, queueFamilyIndex);
}});
++index;
}
}
~GpuScheduler() {
exit = true;
taskCv.notify_all();
for (auto &thread : workThreads) {
thread.join();
}
}
void enqueue(GpuTaskLayout &&task) {
std::lock_guard lock(taskMtx);
tasks.push_back(std::move(task));
taskCv.notify_one();
}
private:
void submitTask(VkCommandPool pool, VkQueue queue, GpuTaskLayout &task) {
VkCommandBuffer cmdBuffer;
{
VkCommandBufferAllocateInfo allocateInfo{
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
.commandPool = pool,
.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
.commandBufferCount = 1,
};
Verify() << vkAllocateCommandBuffers(vk::g_vkDevice, &allocateInfo,
&cmdBuffer);
VkCommandBufferBeginInfo beginInfo{
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
};
vkBeginCommandBuffer(cmdBuffer, &beginInfo);
}
task.invoke(cmdBuffer);
vkEndCommandBuffer(cmdBuffer);
if (task.submit) {
task.submit(queue, cmdBuffer);
return;
}
VkSemaphoreSubmitInfo signalSemSubmitInfo = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
.semaphore = task.chain->semaphore.getHandle(),
.value = task.id,
.stageMask = VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT,
};
VkSemaphoreSubmitInfo waitSemSubmitInfo = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
.semaphore = task.chain->semaphore.getHandle(),
.value = task.waitId,
.stageMask = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
};
VkCommandBufferSubmitInfo cmdBufferSubmitInfo{
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
.commandBuffer = cmdBuffer,
};
VkSubmitInfo2 submitInfo{
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
.waitSemaphoreInfoCount =
static_cast<std::uint32_t>(task.waitId ? 1 : 0),
.pWaitSemaphoreInfos = &waitSemSubmitInfo,
.commandBufferInfoCount = 1,
.pCommandBufferInfos = &cmdBufferSubmitInfo,
.signalSemaphoreInfoCount = 1,
.pSignalSemaphoreInfos = &signalSemSubmitInfo,
};
Verify() << vkQueueSubmit2(queue, 1, &submitInfo, VK_NULL_HANDLE);
// if (task.signalChain->semaphore.wait(
// task.id, std::chrono::duration_cast<std::chrono::nanoseconds>(
// std::chrono::seconds(10))
// .count())) {
// util::unreachable("gpu operation takes too long time. wait id = %lu\n",
// task.waitId);
// }
}
void entry(VkQueue queue, std::uint32_t queueFamilyIndex) {
VkCommandPool pool;
{
VkCommandPoolCreateInfo poolCreateInfo{
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
.queueFamilyIndex = queueFamilyIndex};
Verify() << vkCreateCommandPool(vk::g_vkDevice, &poolCreateInfo,
vk::g_vkAllocator, &pool);
}
while (!exit.load(std::memory_order::relaxed)) {
GpuTaskLayout task;
{
std::unique_lock lock(taskMtx);
while (tasks.empty()) {
if (tasks.empty() && delayedTasks.empty()) {
taskCv.wait(lock);
}
if (tasks.empty()) {
std::swap(delayedTasks, tasks);
}
}
task = std::move(tasks.back());
tasks.pop_back();
}
if (task.waitId != GpuTaskLayout::kInvalidId &&
!task.chain->isComplete(task.waitId)) {
std::unique_lock lock(taskMtx);
delayedTasks.push_back(std::move(task));
taskCv.notify_one();
continue;
}
submitTask(pool, queue, task);
}
vkDestroyCommandPool(vk::g_vkDevice, pool, vk::g_vkAllocator);
}
};
inline std::uint64_t
TaskChain::add(ProcessQueue queue, std::uint64_t waitId,
std::function<void(VkCommandBuffer)> invoke) {
VkPipelineStageFlags waitStage = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
if (waitId == GpuTaskLayout::kInvalidId) {
waitId = getLastTaskId();
waitStage = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
}
auto id = nextTaskId++;
getGpuScheduler(queue).enqueue({
.chain = Ref(this),
.id = id,
.waitId = waitId,
.waitStage = waitStage,
.invoke = std::move(invoke),
});
return id;
}
GpuScheduler &getTransferQueueScheduler();
GpuScheduler &getComputeQueueScheduler();
GpuScheduler &getGraphicsQueueScheduler();
} // namespace amdgpu::device

View file

@ -1,14 +1,23 @@
#pragma once
#include "util/unreachable.hpp"
#include <atomic>
#include <bit>
#include <cassert>
#include <concepts>
#include <condition_variable>
#include <functional>
#include <mutex>
#include <pthread.h>
#include <thread>
#include <utility>
#include <vector>
namespace amdgpu::device {
inline void setThreadName(const char *name) {
pthread_setname_np(pthread_self(), name);
}
template <typename T> class Ref {
T *m_ref = nullptr;
@ -95,11 +104,13 @@ public:
template <typename T> Ref(T *) -> Ref<T>;
template <typename T> Ref(Ref<T>) -> Ref<T>;
enum class TaskState { InProgress, Complete, Canceled };
enum class TaskState { Created, InProgress, Complete, Canceled };
enum class TaskResult { Complete, Canceled, Reschedule };
struct AsyncTaskCtl {
std::atomic<unsigned> refs{0};
std::atomic<TaskState> stateStorage{TaskState::InProgress};
std::atomic<TaskState> stateStorage{TaskState::Created};
std::atomic<bool> cancelRequested{false};
virtual ~AsyncTaskCtl() = default;
@ -110,48 +121,29 @@ struct AsyncTaskCtl {
}
}
bool isCanceled() const {
return stateStorage.load(std::memory_order::relaxed) == TaskState::Canceled;
bool isCancelRequested() const {
return cancelRequested.load(std::memory_order::relaxed) == true;
}
bool isComplete() const {
return stateStorage.load(std::memory_order::relaxed) == TaskState::Complete;
}
bool isInProgress() const {
return stateStorage.load(std::memory_order::relaxed) ==
TaskState::InProgress;
bool isCanceled() const { return getState() == TaskState::Canceled; }
bool isComplete() const { return getState() == TaskState::Complete; }
bool isInProgress() const { return getState() == TaskState::InProgress; }
TaskState getState() const {
return stateStorage.load(std::memory_order::relaxed);
}
void cancel() {
auto state = TaskState::InProgress;
while (state == TaskState::InProgress) {
if (stateStorage.compare_exchange_weak(state, TaskState::Canceled,
std::memory_order::relaxed)) {
break;
}
}
stateStorage.notify_all();
}
void complete() {
auto state = TaskState::InProgress;
while (state != TaskState::Complete) {
if (stateStorage.compare_exchange_weak(state, TaskState::Complete,
std::memory_order::relaxed)) {
break;
}
}
stateStorage.notify_all();
}
void cancel() { cancelRequested.store(true, std::memory_order::relaxed); }
void wait() {
if (stateStorage.load(std::memory_order::relaxed) == TaskState::Created) {
util::unreachable("attempt to wait task that wasn't scheduled\n");
}
stateStorage.wait(TaskState::InProgress, std::memory_order::relaxed);
}
};
virtual void invoke() = 0;
struct CpuTaskCtl : AsyncTaskCtl {
virtual TaskResult invoke() = 0;
};
namespace detail {
@ -159,69 +151,100 @@ template <typename T>
concept LambdaWithoutClosure = requires(T t) { +t; };
}
template <typename T> struct AsyncTask;
template <typename T> struct AsyncCpuTask;
template <typename T>
requires(std::is_invocable_r_v<bool, T, const AsyncTaskCtl &> &&
detail::LambdaWithoutClosure<T>)
struct AsyncTask<T> : AsyncTaskCtl {
static constexpr bool (*fn)(const AsyncTaskCtl &) = +std::declval<T>();
requires requires(T t, const AsyncTaskCtl &ctl) {
{ t(ctl) } -> std::same_as<TaskResult>;
requires detail::LambdaWithoutClosure<T>;
}
struct AsyncCpuTask<T> : CpuTaskCtl {
static constexpr TaskResult (*fn)(const AsyncTaskCtl &) = +std::declval<T>();
AsyncTask() = default;
AsyncTask(T &&) {}
AsyncCpuTask() = default;
AsyncCpuTask(T &&) {}
void invoke() override {
TaskResult invoke() override {
auto &base = *static_cast<const AsyncTaskCtl *>(this);
if (fn(base)) {
complete();
}
return fn(base);
}
};
template <typename T>
requires std::is_invocable_r_v<bool, T, const AsyncTaskCtl &>
Ref<AsyncTaskCtl> createTask(T &&task) {
return Ref<AsyncTaskCtl>(new AsyncTask<T>(std::forward<T>(task)));
requires requires(T t, const AsyncTaskCtl &ctl) {
{ t(ctl) } -> std::same_as<TaskResult>;
requires !detail::LambdaWithoutClosure<T>;
}
struct AsyncCpuTask<T> : CpuTaskCtl {
alignas(T) std::byte taskStorage[sizeof(T)];
AsyncCpuTask(T &&t) { new (taskStorage) T(std::forward<T>(t)); }
~AsyncCpuTask() { std::bit_cast<T *>(&taskStorage)->~T(); }
TaskResult invoke() override {
auto &lambda = *std::bit_cast<T *>(&taskStorage);
auto &base = *static_cast<const AsyncTaskCtl *>(this);
return lambda(base);
}
};
template <typename T>
requires requires(T t, const AsyncTaskCtl &ctl) {
{ t(ctl) } -> std::same_as<TaskResult>;
}
Ref<CpuTaskCtl> createCpuTask(T &&task) {
return Ref<CpuTaskCtl>(new AsyncCpuTask<T>(std::forward<T>(task)));
}
template <typename T>
requires(std::is_invocable_r_v<bool, T, const AsyncTaskCtl &> &&
!detail::LambdaWithoutClosure<T>)
struct AsyncTask<T> : AsyncTaskCtl {
alignas(T) std::byte taskStorage[sizeof(T)];
AsyncTask() = default;
AsyncTask(T &&t) { new (taskStorage) T(std::forward<T>(t)); }
AsyncTask &operator=(T &&t) {
new (taskStorage) T(std::forward<T>(t));
return *this;
requires requires(T t) {
{ t() } -> std::same_as<TaskResult>;
}
Ref<CpuTaskCtl> createCpuTask(T &&task) {
return createCpuTask(
[task = std::forward<T>(task)](
const AsyncTaskCtl &) mutable -> TaskResult { return task(); });
}
~AsyncTask() {
if (isInProgress()) {
std::bit_cast<T *>(&taskStorage)->~T();
}
template <typename T>
requires requires(T t) {
{ t() } -> std::same_as<void>;
}
void invoke() override {
auto &lambda = *std::bit_cast<T *>(&taskStorage);
auto &base = *static_cast<const AsyncTaskCtl *>(this);
if (lambda(base)) {
complete();
Ref<CpuTaskCtl> createCpuTask(T &&task) {
return createCpuTask([task = std::forward<T>(task)](
const AsyncTaskCtl &ctl) mutable -> TaskResult {
if (ctl.isCancelRequested()) {
return TaskResult::Canceled;
}
std::bit_cast<T *>(&taskStorage)->~T();
task();
return TaskResult::Complete;
});
}
template <typename T>
requires requires(T t, const AsyncTaskCtl &ctl) {
{ t(ctl) } -> std::same_as<void>;
}
};
Ref<CpuTaskCtl> createCpuTask(T &&task) {
return createCpuTask([task = std::forward<T>(task)](const AsyncTaskCtl &ctl) {
if (ctl.isCancelRequested()) {
return TaskResult::Canceled;
}
task(ctl);
return TaskResult::Complete;
});
}
class Scheduler;
class TaskSet {
std::vector<Ref<AsyncTaskCtl>> tasks;
class CpuTaskSet {
std::vector<Ref<CpuTaskCtl>> tasks;
public:
void append(Ref<AsyncTaskCtl> task) { tasks.push_back(std::move(task)); }
void append(Ref<CpuTaskCtl> task) { tasks.push_back(std::move(task)); }
void wait() {
for (auto task : tasks) {
@ -234,9 +257,91 @@ public:
void enqueue(Scheduler &scheduler);
};
class TaskSet {
struct TaskEntry {
Ref<AsyncTaskCtl> ctl;
std::function<void()> schedule;
};
std::vector<TaskEntry> tasks;
public:
template <typename Scheduler, typename Task>
requires requires(Scheduler &sched, Ref<Task> task) {
sched.enqueue(std::move(task));
task->wait();
static_cast<Ref<AsyncTaskCtl>>(task);
}
void append(Scheduler &sched, Ref<Task> task) {
Ref<AsyncTaskCtl> rawTask = task;
auto schedFn = [sched = &sched, task = std::move(task)] {
sched->enqueue(std::move(task));
};
tasks.push_back({
.ctl = std::move(rawTask),
.schedule = std::move(schedFn),
});
}
void schedule() {
for (auto &task : tasks) {
if (auto schedule = std::exchange(task.schedule, nullptr)) {
schedule();
}
}
}
bool isCanceled() const {
for (auto &task : tasks) {
if (task.ctl->isCanceled()) {
return true;
}
}
return false;
}
bool isComplete() const {
for (auto &task : tasks) {
if (!task.ctl->isComplete()) {
return false;
}
}
return true;
}
bool isInProgress() const {
for (auto &task : tasks) {
if (task.ctl->isInProgress()) {
return true;
}
}
return false;
}
void clear() { tasks.clear(); }
void wait() const {
for (auto &task : tasks) {
assert(task.schedule == nullptr);
task.ctl->wait();
}
}
void cancel() {
for (auto &task : tasks) {
task.ctl->cancel();
}
}
};
class Scheduler {
std::vector<std::thread> workThreads;
std::vector<Ref<AsyncTaskCtl>> tasks;
std::vector<Ref<CpuTaskCtl>> tasks;
std::vector<Ref<CpuTaskCtl>> rescheduleTasks;
std::mutex taskMtx;
std::condition_variable taskCv;
std::atomic<bool> exit{false};
@ -244,7 +349,10 @@ class Scheduler {
public:
explicit Scheduler(std::size_t threadCount) {
for (std::size_t i = 0; i < threadCount; ++i) {
workThreads.push_back(std::thread{[this] { entry(); }});
workThreads.push_back(std::thread{[this, i] {
setThreadName(("CPU " + std::to_string(i)).c_str());
entry();
}});
}
}
@ -257,53 +365,88 @@ public:
}
}
template <typename T>
requires std::is_invocable_r_v<bool, T, const AsyncTaskCtl &>
Ref<AsyncTaskCtl> enqueue(T &&task) {
auto taskHandle = createTask(std::forward<T>(task));
enqueue(taskHandle);
return taskHandle;
}
void enqueue(Ref<AsyncTaskCtl> task) {
void enqueue(Ref<CpuTaskCtl> task) {
std::lock_guard lock(taskMtx);
TaskState prevState = TaskState::Created;
if (!task->stateStorage.compare_exchange_strong(
prevState, TaskState::InProgress, std::memory_order::relaxed)) {
util::unreachable("attempt to schedule cpu task in wrong state %u",
(unsigned)prevState);
}
tasks.push_back(std::move(task));
taskCv.notify_one();
}
template <typename T>
requires std::is_invocable_r_v<bool, T, const AsyncTaskCtl &>
void enqueue(TaskSet &set, T &&task) {
requires requires(T &&task) { createCpuTask(std::forward<T>(task)); }
Ref<AsyncTaskCtl> enqueue(T &&task) {
auto taskHandle = createCpuTask(std::forward<T>(task));
enqueue(taskHandle);
return taskHandle;
}
template <typename T>
requires requires(T &&task) { createCpuTask(std::forward<T>(task)); }
void enqueue(CpuTaskSet &set, T &&task) {
auto taskCtl = enqueue(std::forward<T>(task));
set.append(taskCtl);
}
private:
void entry() {
while (!exit.load(std::memory_order::relaxed)) {
Ref<AsyncTaskCtl> task;
Ref<CpuTaskCtl> fetchTask() {
std::unique_lock lock(taskMtx);
{
std::unique_lock lock(taskMtx);
if (tasks.empty()) {
taskCv.wait(lock);
}
if (tasks.empty()) {
continue;
}
task = std::move(tasks.back());
tasks.pop_back();
while (tasks.empty()) {
if (rescheduleTasks.empty() && tasks.empty()) {
taskCv.wait(lock);
}
task->invoke();
if (tasks.empty()) {
std::swap(rescheduleTasks, tasks);
}
}
auto result = std::move(tasks.back());
tasks.pop_back();
return result;
}
Ref<CpuTaskCtl> invokeTask(Ref<CpuTaskCtl> task) {
switch (task->invoke()) {
case TaskResult::Complete:
task->stateStorage.store(TaskState::Complete, std::memory_order::relaxed);
task->stateStorage.notify_all();
return {};
case TaskResult::Canceled:
task->stateStorage.store(TaskState::Canceled, std::memory_order::relaxed);
task->stateStorage.notify_all();
return {};
case TaskResult::Reschedule:
return task;
}
std::abort();
}
void entry() {
while (!exit.load(std::memory_order::relaxed)) {
Ref<CpuTaskCtl> task = fetchTask();
auto rescheduleTask = invokeTask(std::move(task));
if (rescheduleTask == nullptr) {
continue;
}
std::unique_lock lock(taskMtx);
rescheduleTasks.push_back(std::move(rescheduleTask));
taskCv.notify_one();
}
}
};
inline void TaskSet::enqueue(Scheduler &scheduler) {
inline void CpuTaskSet::enqueue(Scheduler &scheduler) {
for (auto task : tasks) {
scheduler.enqueue(std::move(task));
}

View file

@ -342,6 +342,89 @@ public:
bool operator!=(std::nullptr_t) const { return mSemaphore != nullptr; }
};
struct BinSemaphore {
VkSemaphore mSemaphore = VK_NULL_HANDLE;
public:
BinSemaphore(const BinSemaphore &) = delete;
BinSemaphore() = default;
BinSemaphore(BinSemaphore &&other) { *this = std::move(other); }
BinSemaphore &operator=(BinSemaphore &&other) {
std::swap(mSemaphore, other.mSemaphore);
return *this;
}
~BinSemaphore() {
if (mSemaphore != VK_NULL_HANDLE) {
vkDestroySemaphore(g_vkDevice, mSemaphore, nullptr);
}
}
static BinSemaphore Create() {
VkSemaphoreTypeCreateInfo typeCreateInfo = {
VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, nullptr,
VK_SEMAPHORE_TYPE_BINARY, 0};
VkSemaphoreCreateInfo createInfo = {VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
&typeCreateInfo, 0};
BinSemaphore result;
Verify() << vkCreateSemaphore(g_vkDevice, &createInfo, nullptr,
&result.mSemaphore);
return result;
}
VkSemaphore getHandle() const { return mSemaphore; }
bool operator==(std::nullptr_t) const { return mSemaphore == nullptr; }
};
struct Fence {
VkFence mFence = VK_NULL_HANDLE;
public:
Fence(const Fence &) = delete;
Fence() = default;
Fence(Fence &&other) { *this = std::move(other); }
Fence &operator=(Fence &&other) {
std::swap(mFence, other.mFence);
return *this;
}
~Fence() {
if (mFence != VK_NULL_HANDLE) {
vkDestroyFence(g_vkDevice, mFence, nullptr);
}
}
static Fence Create() {
VkFenceCreateInfo fenceCreateInfo = {VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
nullptr, 0};
Fence result;
Verify() << vkCreateFence(g_vkDevice, &fenceCreateInfo, nullptr,
&result.mFence);
return result;
}
void wait() const {
Verify() << vkWaitForFences(g_vkDevice, 1, &mFence, 1, UINT64_MAX);
}
bool isComplete() const {
return vkGetFenceStatus(g_vkDevice, mFence) == VK_SUCCESS;
}
void reset() { vkResetFences(g_vkDevice, 1, &mFence); }
VkFence getHandle() const { return mFence; }
bool operator==(std::nullptr_t) const { return mFence == nullptr; }
};
struct CommandBuffer {
VkCommandBuffer mCmdBuffer = VK_NULL_HANDLE;
@ -641,7 +724,7 @@ public:
void readFromBuffer(VkCommandBuffer cmdBuffer, VkBuffer buffer,
VkImageAspectFlags destAspect,
VkDeviceSize bufferOffset = 0) {
transitionLayout(cmdBuffer, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
transitionLayout(cmdBuffer, VK_IMAGE_LAYOUT_GENERAL);
VkBufferImageCopy region{};
region.bufferOffset = bufferOffset;
@ -654,13 +737,13 @@ public:
region.imageOffset = {0, 0, 0};
region.imageExtent = {mWidth, mHeight, 1};
vkCmdCopyBufferToImage(cmdBuffer, buffer, mImage,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &region);
vkCmdCopyBufferToImage(cmdBuffer, buffer, mImage, VK_IMAGE_LAYOUT_GENERAL,
1, &region);
}
void writeToBuffer(VkCommandBuffer cmdBuffer, VkBuffer buffer,
VkImageAspectFlags sourceAspect) {
transitionLayout(cmdBuffer, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
transitionLayout(cmdBuffer, VK_IMAGE_LAYOUT_GENERAL);
VkBufferImageCopy region{};
region.bufferOffset = 0;
@ -673,9 +756,8 @@ public:
region.imageOffset = {0, 0, 0};
region.imageExtent = {mWidth, mHeight, 1};
vkCmdCopyImageToBuffer(cmdBuffer, mImage,
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, 1,
&region);
vkCmdCopyImageToBuffer(cmdBuffer, mImage, VK_IMAGE_LAYOUT_GENERAL, buffer,
1, &region);
}
[[nodiscard]] Buffer writeToBuffer(VkCommandBuffer cmdBuffer,
@ -738,6 +820,7 @@ public:
-> std::pair<VkPipelineStageFlags, VkAccessFlags> {
switch (layout) {
case VK_IMAGE_LAYOUT_UNDEFINED:
case VK_IMAGE_LAYOUT_GENERAL:
case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR:
return {VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0};

File diff suppressed because it is too large Load diff

View file

@ -1,4 +1,5 @@
#include "amdgpu/RemoteMemory.hpp"
#include "amdgpu/device/gpu-scheduler.hpp"
#include "amdgpu/device/vk.hpp"
#include <algorithm>
#include <amdgpu/bridge/bridge.hpp>
@ -45,6 +46,33 @@ static void usage(std::FILE *out, const char *argv0) {
enum class PresenterMode { Window };
static VKAPI_ATTR VkBool32 VKAPI_CALL
debugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity,
VkDebugUtilsMessageTypeFlagsEXT messageType,
const VkDebugUtilsMessengerCallbackDataEXT *pCallbackData,
void *pUserData) {
std::fprintf(stderr, "validation layer: %s\n", pCallbackData->pMessage);
if (messageSeverity >= VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) {
std::abort();
}
return VK_FALSE;
}
static VkResult _vkCreateDebugUtilsMessengerEXT(
VkInstance instance, const VkDebugUtilsMessengerCreateInfoEXT *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
VkDebugUtilsMessengerEXT *pDebugMessenger) {
static auto func = (PFN_vkCreateDebugUtilsMessengerEXT)vkGetInstanceProcAddr(
instance, "vkCreateDebugUtilsMessengerEXT");
if (func != nullptr) {
return func(instance, pCreateInfo, pAllocator, pDebugMessenger);
} else {
return VK_ERROR_EXTENSION_NOT_PRESENT;
}
}
int main(int argc, const char *argv[]) {
if (argc == 2 && (argv[1] == std::string_view("-h") ||
argv[1] == std::string_view("--help"))) {
@ -172,19 +200,39 @@ int main(int argc, const char *argv[]) {
.apiVersion = VK_API_VERSION_1_3,
};
VkDebugUtilsMessengerCreateInfoEXT debugCreateInfo{};
debugCreateInfo.sType =
VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT;
debugCreateInfo.messageSeverity =
VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT |
VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT |
VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT;
debugCreateInfo.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
0
// VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT
;
debugCreateInfo.pfnUserCallback = debugCallback;
VkInstanceCreateInfo instanceCreateInfo = {};
instanceCreateInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
instanceCreateInfo.pNext = NULL;
instanceCreateInfo.pNext = &debugCreateInfo;
instanceCreateInfo.pApplicationInfo = &appInfo;
instanceCreateInfo.enabledExtensionCount = requiredInstanceExtensions.size();
instanceCreateInfo.ppEnabledExtensionNames =
requiredInstanceExtensions.data();
std::vector<const char *> enabledLayers;
// enabledLayers.push_back("VK_LAYER_KHRONOS_shader_object");
if (enableValidation) {
instanceCreateInfo.ppEnabledLayerNames = &validationLayerName;
instanceCreateInfo.enabledLayerCount = 1;
enabledLayers.push_back(validationLayerName);
}
instanceCreateInfo.ppEnabledLayerNames = enabledLayers.data();
instanceCreateInfo.enabledLayerCount = enabledLayers.size();
VkInstance vkInstance;
Verify() << vkCreateInstance(&instanceCreateInfo, nullptr, &vkInstance);
auto getVkPhyDevice = [&](unsigned index) {
@ -195,6 +243,10 @@ int main(int argc, const char *argv[]) {
return devices[index];
};
VkDebugUtilsMessengerEXT debugMessenger;
_vkCreateDebugUtilsMessengerEXT(vkInstance, &debugCreateInfo, nullptr,
&debugMessenger);
auto vkPhysicalDevice = getVkPhyDevice(gpuIndex);
VkPhysicalDeviceProperties vkPhyDeviceProperties;
@ -342,7 +394,7 @@ int main(int argc, const char *argv[]) {
std::vector<VkDeviceQueueCreateInfo> requestedQueues;
std::vector<float> defaultQueuePriorities;
defaultQueuePriorities.resize(8);
defaultQueuePriorities.resize(32);
for (uint32_t queueFamily = 0; queueFamily < queueFamiliesCount;
++queueFamily) {
@ -350,7 +402,10 @@ int main(int argc, const char *argv[]) {
requestedQueues.push_back(
{.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
.queueFamilyIndex = queueFamily,
.queueCount = 1,
.queueCount =
std::min<uint32_t>(queueFamilyProperties[queueFamily]
.queueFamilyProperties.queueCount,
defaultQueuePriorities.size()),
.pQueuePriorities = defaultQueuePriorities.data()});
} else if (queueFamiliesWithComputeSupport.contains(queueFamily) ||
queueFamiliesWithTransferSupport.contains(queueFamily)) {
@ -365,56 +420,6 @@ int main(int argc, const char *argv[]) {
}
}
// try to find queue that not graphics queue
bool requestedPresentQueue = false;
for (auto queueFamily : queueFamiliesWithPresentSupport) {
if (queueFamiliesWithGraphicsSupport.contains(queueFamily)) {
continue;
}
bool alreadyRequested = false;
for (auto &requested : requestedQueues) {
if (requested.queueFamilyIndex == queueFamily) {
alreadyRequested = true;
break;
}
}
if (!alreadyRequested) {
requestedQueues.push_back(
{.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
.queueFamilyIndex = queueFamily,
.queueCount = 1,
.pQueuePriorities = defaultQueuePriorities.data()});
}
requestedPresentQueue = true;
}
if (!requestedPresentQueue) {
for (auto queueFamily : queueFamiliesWithPresentSupport) {
bool alreadyRequested = false;
for (auto &requested : requestedQueues) {
if (requested.queueFamilyIndex == queueFamily) {
alreadyRequested = true;
break;
}
}
if (!alreadyRequested) {
requestedQueues.push_back(
{.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
.queueFamilyIndex = queueFamily,
.queueCount = 1,
.pQueuePriorities = defaultQueuePriorities.data()});
}
requestedPresentQueue = true;
}
}
VkPhysicalDeviceShaderObjectFeaturesEXT shaderObjectFeatures{
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_OBJECT_FEATURES_EXT,
.shaderObject = VK_TRUE};
@ -422,6 +427,7 @@ int main(int argc, const char *argv[]) {
VkPhysicalDeviceVulkan13Features phyDevFeatures13{
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES,
.pNext = &shaderObjectFeatures,
.synchronization2 = VK_TRUE,
.dynamicRendering = VK_TRUE,
.maintenance4 = VK_TRUE,
};
@ -601,26 +607,42 @@ int main(int argc, const char *argv[]) {
std::vector<std::pair<VkQueue, unsigned>> transferQueues;
std::vector<std::pair<VkQueue, unsigned>> graphicsQueues;
VkQueue presentQueue = VK_NULL_HANDLE;
unsigned presentQueueFamily;
for (auto &queueInfo : requestedQueues) {
if (queueFamiliesWithComputeSupport.contains(queueInfo.queueFamilyIndex)) {
for (uint32_t queueIndex = 0; queueIndex < queueInfo.queueCount;
++queueIndex) {
auto &[queue, index] = computeQueues.emplace_back();
index = queueInfo.queueFamilyIndex;
vkGetDeviceQueue(vkDevice, queueInfo.queueFamilyIndex, queueIndex,
&queue);
}
}
if (queueFamiliesWithGraphicsSupport.contains(queueInfo.queueFamilyIndex)) {
for (uint32_t queueIndex = 0; queueIndex < queueInfo.queueCount;
++queueIndex) {
if (presentQueue == VK_NULL_HANDLE &&
queueFamiliesWithPresentSupport.contains(
queueInfo.queueFamilyIndex)) {
presentQueueFamily = queueInfo.queueFamilyIndex;
vkGetDeviceQueue(vkDevice, queueInfo.queueFamilyIndex, 0,
&presentQueue);
continue;
}
auto &[queue, index] = graphicsQueues.emplace_back();
index = queueInfo.queueFamilyIndex;
vkGetDeviceQueue(vkDevice, queueInfo.queueFamilyIndex, queueIndex,
&queue);
}
continue;
}
if (queueFamiliesWithComputeSupport.contains(queueInfo.queueFamilyIndex)) {
uint32_t queueIndex = 0;
for (; queueIndex < queueInfo.queueCount; ++queueIndex) {
auto &[queue, index] = computeQueues.emplace_back();
index = queueInfo.queueFamilyIndex;
vkGetDeviceQueue(vkDevice, queueInfo.queueFamilyIndex, queueIndex,
&queue);
}
continue;
}
if (queueFamiliesWithTransferSupport.contains(queueInfo.queueFamilyIndex)) {
@ -631,14 +653,15 @@ int main(int argc, const char *argv[]) {
vkGetDeviceQueue(vkDevice, queueInfo.queueFamilyIndex, queueIndex,
&queue);
}
}
if (presentQueue == VK_NULL_HANDLE &&
queueFamiliesWithPresentSupport.contains(queueInfo.queueFamilyIndex)) {
vkGetDeviceQueue(vkDevice, queueInfo.queueFamilyIndex, 0, &presentQueue);
continue;
}
}
if (graphicsQueues.empty() && presentQueue != VK_NULL_HANDLE) {
graphicsQueues.push_back({presentQueue, presentQueueFamily});
}
Verify() << (computeQueues.size() > 1);
Verify() << (transferQueues.size() > 0);
Verify() << (graphicsQueues.size() > 0);
@ -651,19 +674,12 @@ int main(int argc, const char *argv[]) {
VkCommandPoolCreateInfo commandPoolCreateInfo = {
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
.queueFamilyIndex = graphicsQueues.front().second,
.queueFamilyIndex = presentQueueFamily,
};
VkCommandPool commandPool;
Verify() << vkCreateCommandPool(vkDevice, &commandPoolCreateInfo, nullptr,
&commandPool);
amdgpu::device::DrawContext dc{
// TODO
.queue = graphicsQueues.front().first,
.commandPool = commandPool,
};
std::vector<VkFence> inFlightFences(swapchainImages.size());
for (auto &fence : inFlightFences) {
@ -734,7 +750,7 @@ int main(int argc, const char *argv[]) {
g_hostMemory = memory;
{
amdgpu::device::AmdgpuDevice device(dc, bridgePuller.header);
amdgpu::device::AmdgpuDevice device(bridgePuller.header);
for (std::uint32_t end = bridge->memoryAreaCount, i = 0; i < end; ++i) {
auto area = bridge->memoryAreas[i];
@ -747,22 +763,21 @@ int main(int argc, const char *argv[]) {
VkCommandBufferAllocateInfo allocInfo{};
allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
allocInfo.commandPool = dc.commandPool;
allocInfo.commandPool = commandPool;
allocInfo.commandBufferCount = presentCmdBuffers.size();
vkAllocateCommandBuffers(vkDevice, &allocInfo, presentCmdBuffers.data());
}
std::vector<amdgpu::device::Ref<amdgpu::device::TaskChain>> flipTaskChain(
swapchainImages.size());
for (auto &chain : flipTaskChain) {
chain = amdgpu::device::TaskChain::Create();
}
std::printf("Initialization complete\n");
uint32_t imageIndex = 0;
bool isImageAcquired = false;
std::vector<std::vector<VkBuffer>> swapchainBufferHandles;
swapchainBufferHandles.resize(swapchainImages.size());
std::vector<std::vector<VkImage>> swapchainImageHandles;
swapchainImageHandles.resize(swapchainImages.size());
VkPipelineStageFlags submitPipelineStages =
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
while (!glfwWindowShouldClose(window)) {
glfwPollEvents();
@ -808,54 +823,27 @@ int main(int argc, const char *argv[]) {
vkBeginCommandBuffer(presentCmdBuffers[imageIndex], &beginInfo);
for (auto handle : swapchainBufferHandles[imageIndex]) {
vkDestroyBuffer(vkDevice, handle, nullptr);
}
for (auto handle : swapchainImageHandles[imageIndex]) {
vkDestroyImage(vkDevice, handle, nullptr);
}
swapchainBufferHandles[imageIndex].clear();
swapchainImageHandles[imageIndex].clear();
if (device.handleFlip(cmd.flip.bufferIndex, cmd.flip.arg,
presentCmdBuffers[imageIndex],
swapchainImages[imageIndex], swapchainExtent,
swapchainBufferHandles[imageIndex],
swapchainImageHandles[imageIndex])) {
vkEndCommandBuffer(presentCmdBuffers[imageIndex]);
VkSubmitInfo submitInfo{};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = &presentCmdBuffers[imageIndex];
submitInfo.waitSemaphoreCount = 1;
submitInfo.signalSemaphoreCount = 1;
submitInfo.pSignalSemaphores = &renderCompleteSemaphore;
submitInfo.pWaitSemaphores = &presentCompleteSemaphore;
submitInfo.pWaitDstStageMask = &submitPipelineStages;
Verify() << vkQueueSubmit(dc.queue, 1, &submitInfo,
inFlightFences[imageIndex]);
VkPresentInfoKHR presentInfo{};
presentInfo.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
presentInfo.waitSemaphoreCount = 1;
presentInfo.pWaitSemaphores = &renderCompleteSemaphore;
presentInfo.swapchainCount = 1;
presentInfo.pSwapchains = &swapchain;
presentInfo.pImageIndices = &imageIndex;
if (device.handleFlip(
presentQueue, presentCmdBuffers[imageIndex],
*flipTaskChain[imageIndex].get(), cmd.flip.bufferIndex,
cmd.flip.arg, swapchainImages[imageIndex], swapchainExtent,
presentCompleteSemaphore, renderCompleteSemaphore,
inFlightFences[imageIndex])) {
VkPresentInfoKHR presentInfo{
.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
.waitSemaphoreCount = 1,
.pWaitSemaphores = &renderCompleteSemaphore,
.swapchainCount = 1,
.pSwapchains = &swapchain,
.pImageIndices = &imageIndex,
};
if (vkQueuePresentKHR(presentQueue, &presentInfo) != VK_SUCCESS) {
std::printf("swapchain was invalidated\n");
createSwapchain();
}
// std::this_thread::sleep_for(std::chrono::seconds(3));
} else {
isImageAcquired = true;
}
break;
}
@ -876,17 +864,6 @@ int main(int argc, const char *argv[]) {
vkDestroySemaphore(vkDevice, presentCompleteSemaphore, nullptr);
vkDestroySemaphore(vkDevice, renderCompleteSemaphore, nullptr);
vkDestroyCommandPool(vkDevice, commandPool, nullptr);
for (auto &handles : swapchainImageHandles) {
for (auto handle : handles) {
vkDestroyImage(vkDevice, handle, nullptr);
}
}
for (auto &handles : swapchainBufferHandles) {
for (auto handle : handles) {
vkDestroyBuffer(vkDevice, handle, nullptr);
}
}
}
vkDestroySwapchainKHR(vkDevice, swapchain, nullptr);