[amdgpu] cache: fix small buffers write back

This commit is contained in:
DH 2023-08-06 21:33:01 +03:00
parent 292fd4762f
commit 6441b7b1f9
2 changed files with 88 additions and 30 deletions

View file

@ -74,6 +74,38 @@ struct TaskChain {
return add(queue, GpuTaskLayout::kInvalidId, std::move(invoke));
}
template <typename T>
requires requires(T &&t) {
{ t() } -> std::same_as<TaskResult>;
}
std::uint64_t add(std::uint64_t waitId, T &&task) {
auto prevTaskId = getLastTaskId();
auto id = nextTaskId++;
auto cpuTask =
createCpuTask([=, task = std::forward<T>(task),
self = Ref(this)](const AsyncTaskCtl &) mutable {
if (waitId != GpuTaskLayout::kInvalidId) {
if (self->semaphore.getCounterValue() < waitId) {
return TaskResult::Reschedule;
}
}
auto result = task();
if (result != TaskResult::Complete) {
return result;
}
if (prevTaskId != GpuTaskLayout::kInvalidId && waitId != prevTaskId) {
self->wait(prevTaskId);
}
self->semaphore.signal(id);
return TaskResult::Complete;
});
getCpuScheduler().enqueue(std::move(cpuTask));
return id;
}
template <typename T>
requires requires(T &&t) {
{ t() } -> std::same_as<void>;
@ -111,6 +143,14 @@ struct TaskChain {
return add(GpuTaskLayout::kInvalidId, std::forward<T>(task));
}
template <typename T>
requires requires(T &&t) {
{ t() } -> std::same_as<TaskResult>;
}
std::uint64_t add(T &&task) {
return add(GpuTaskLayout::kInvalidId, std::forward<T>(task));
}
std::uint64_t getLastTaskId() const { return nextTaskId - 1; }
std::uint64_t createExternalTask() { return nextTaskId++; }

View file

@ -2726,46 +2726,64 @@ struct CacheLine {
entry->unlockMutableTask = [=, this] {
if (entry->cacheMode != CacheMode::None) {
lockReadWrite(address, size, entry->cacheMode == CacheMode::LazyWrite);
entry->syncState.map(address, address + size, tag);
std::lock_guard lock(hostSyncMtx);
hostSyncTable.map(address, address + size,
{.tag = tag, .overlay = entry});
} else {
std::lock_guard lock(hostSyncMtx);
hostSyncTable.map(address, address + size,
{.tag = tag, .overlay = memoryOverlay});
}
entry->syncState.map(address, address + size, tag);
std::lock_guard lock(hostSyncMtx);
hostSyncTable.map(address, address + size,
{.tag = tag, .overlay = entry});
};
if (entry->cacheMode != CacheMode::LazyWrite) {
auto writeBackTask =
createCpuTask([=, this](const AsyncTaskCtl &ctl) mutable {
if (ctl.isCancelRequested()) {
return TaskResult::Canceled;
}
auto writeBackTask = createCpuTask([=, this](
const AsyncTaskCtl &ctl) mutable {
if (ctl.isCancelRequested()) {
return TaskResult::Canceled;
}
auto tag = writeBackTag.fetch_add(1, std::memory_order::relaxed);
auto taskChain = TaskChain::Create();
Ref<CacheBufferOverlay> uploadBuffer;
auto tag = writeBackTag.fetch_add(1, std::memory_order::relaxed);
auto updateTaskChain = TaskChain::Create();
auto uploadBuffer = getBuffer(tag, *updateTaskChain.get(), address,
size, 1, 1, shader::AccessOp::Load);
updateTaskChain->wait();
if (entry->cacheMode == CacheMode::None) {
uploadBuffer = static_cast<CacheBufferOverlay *>(entry.get());
if (!uploadBuffer->tryLock(tag, shader::AccessOp::None).isLocked) {
taskChain->add([&] {
return uploadBuffer->tryLock(tag, shader::AccessOp::None).isLocked
? TaskResult::Complete
: TaskResult::Reschedule;
});
}
} else {
uploadBuffer = getBuffer(tag, *taskChain.get(), address, size, 1, 1,
shader::AccessOp::Load);
}
taskChain->wait();
if (ctl.isCancelRequested()) {
uploadBuffer->unlock(tag);
return TaskResult::Canceled;
}
if (ctl.isCancelRequested()) {
uploadBuffer->unlock(tag);
return TaskResult::Canceled;
}
memoryOverlay->writeBuffer(*updateTaskChain.get(), uploadBuffer,
address, size);
uploadBuffer->unlock(tag);
memoryOverlay->writeBuffer(*taskChain.get(), uploadBuffer, address,
size);
uploadBuffer->unlock(tag);
if (ctl.isCancelRequested()) {
return TaskResult::Canceled;
}
if (ctl.isCancelRequested()) {
return TaskResult::Canceled;
}
updateTaskChain->wait();
if (entry->cacheMode != CacheMode::None) {
unlockReadWrite(address, size);
}
return TaskResult::Complete;
});
taskChain->wait();
if (entry->cacheMode != CacheMode::None) {
unlockReadWrite(address, size);
}
return TaskResult::Complete;
});
{
std::lock_guard lock(entry->mtx);