From 6441b7b1f947fa8979ccfb4bc4b24b74c4791fed Mon Sep 17 00:00:00 2001 From: DH Date: Sun, 6 Aug 2023 21:33:01 +0300 Subject: [PATCH] [amdgpu] cache: fix small buffers write back --- .../include/amdgpu/device/gpu-scheduler.hpp | 40 ++++++++++ hw/amdgpu/device/src/device.cpp | 78 ++++++++++++------- 2 files changed, 88 insertions(+), 30 deletions(-) diff --git a/hw/amdgpu/device/include/amdgpu/device/gpu-scheduler.hpp b/hw/amdgpu/device/include/amdgpu/device/gpu-scheduler.hpp index fa928bae6..6233beec0 100644 --- a/hw/amdgpu/device/include/amdgpu/device/gpu-scheduler.hpp +++ b/hw/amdgpu/device/include/amdgpu/device/gpu-scheduler.hpp @@ -74,6 +74,38 @@ struct TaskChain { return add(queue, GpuTaskLayout::kInvalidId, std::move(invoke)); } + template + requires requires(T &&t) { + { t() } -> std::same_as; + } + std::uint64_t add(std::uint64_t waitId, T &&task) { + auto prevTaskId = getLastTaskId(); + auto id = nextTaskId++; + auto cpuTask = + createCpuTask([=, task = std::forward(task), + self = Ref(this)](const AsyncTaskCtl &) mutable { + if (waitId != GpuTaskLayout::kInvalidId) { + if (self->semaphore.getCounterValue() < waitId) { + return TaskResult::Reschedule; + } + } + + auto result = task(); + if (result != TaskResult::Complete) { + return result; + } + + if (prevTaskId != GpuTaskLayout::kInvalidId && waitId != prevTaskId) { + self->wait(prevTaskId); + } + + self->semaphore.signal(id); + return TaskResult::Complete; + }); + getCpuScheduler().enqueue(std::move(cpuTask)); + return id; + } + template requires requires(T &&t) { { t() } -> std::same_as; @@ -111,6 +143,14 @@ struct TaskChain { return add(GpuTaskLayout::kInvalidId, std::forward(task)); } + template + requires requires(T &&t) { + { t() } -> std::same_as; + } + std::uint64_t add(T &&task) { + return add(GpuTaskLayout::kInvalidId, std::forward(task)); + } + std::uint64_t getLastTaskId() const { return nextTaskId - 1; } std::uint64_t createExternalTask() { return nextTaskId++; } diff --git a/hw/amdgpu/device/src/device.cpp b/hw/amdgpu/device/src/device.cpp index 4d64f5bba..3bb16416f 100644 --- a/hw/amdgpu/device/src/device.cpp +++ b/hw/amdgpu/device/src/device.cpp @@ -2726,46 +2726,64 @@ struct CacheLine { entry->unlockMutableTask = [=, this] { if (entry->cacheMode != CacheMode::None) { lockReadWrite(address, size, entry->cacheMode == CacheMode::LazyWrite); + entry->syncState.map(address, address + size, tag); + + std::lock_guard lock(hostSyncMtx); + hostSyncTable.map(address, address + size, + {.tag = tag, .overlay = entry}); + } else { + std::lock_guard lock(hostSyncMtx); + hostSyncTable.map(address, address + size, + {.tag = tag, .overlay = memoryOverlay}); } - entry->syncState.map(address, address + size, tag); - std::lock_guard lock(hostSyncMtx); - hostSyncTable.map(address, address + size, - {.tag = tag, .overlay = entry}); }; if (entry->cacheMode != CacheMode::LazyWrite) { - auto writeBackTask = - createCpuTask([=, this](const AsyncTaskCtl &ctl) mutable { - if (ctl.isCancelRequested()) { - return TaskResult::Canceled; - } + auto writeBackTask = createCpuTask([=, this]( + const AsyncTaskCtl &ctl) mutable { + if (ctl.isCancelRequested()) { + return TaskResult::Canceled; + } - auto tag = writeBackTag.fetch_add(1, std::memory_order::relaxed); + auto taskChain = TaskChain::Create(); + Ref uploadBuffer; + auto tag = writeBackTag.fetch_add(1, std::memory_order::relaxed); - auto updateTaskChain = TaskChain::Create(); - auto uploadBuffer = getBuffer(tag, *updateTaskChain.get(), address, - size, 1, 1, shader::AccessOp::Load); - updateTaskChain->wait(); + if (entry->cacheMode == CacheMode::None) { + uploadBuffer = static_cast(entry.get()); + if (!uploadBuffer->tryLock(tag, shader::AccessOp::None).isLocked) { + taskChain->add([&] { + return uploadBuffer->tryLock(tag, shader::AccessOp::None).isLocked + ? TaskResult::Complete + : TaskResult::Reschedule; + }); + } + } else { + uploadBuffer = getBuffer(tag, *taskChain.get(), address, size, 1, 1, + shader::AccessOp::Load); + } + taskChain->wait(); - if (ctl.isCancelRequested()) { - uploadBuffer->unlock(tag); - return TaskResult::Canceled; - } + if (ctl.isCancelRequested()) { + uploadBuffer->unlock(tag); + return TaskResult::Canceled; + } - memoryOverlay->writeBuffer(*updateTaskChain.get(), uploadBuffer, - address, size); - uploadBuffer->unlock(tag); + memoryOverlay->writeBuffer(*taskChain.get(), uploadBuffer, address, + size); + uploadBuffer->unlock(tag); - if (ctl.isCancelRequested()) { - return TaskResult::Canceled; - } + if (ctl.isCancelRequested()) { + return TaskResult::Canceled; + } - updateTaskChain->wait(); - if (entry->cacheMode != CacheMode::None) { - unlockReadWrite(address, size); - } - return TaskResult::Complete; - }); + taskChain->wait(); + + if (entry->cacheMode != CacheMode::None) { + unlockReadWrite(address, size); + } + return TaskResult::Complete; + }); { std::lock_guard lock(entry->mtx);