mirror of
https://github.com/RPCSX/rpcsx.git
synced 2026-01-25 18:10:48 +01:00
[amdgpu] WIP cache implementation
This commit is contained in:
parent
306fecf2ab
commit
760aea0e49
|
|
@ -1,15 +1,11 @@
|
|||
#pragma once
|
||||
|
||||
#include "amdgpu/RemoteMemory.hpp"
|
||||
#include "amdgpu/bridge/bridge.hpp"
|
||||
#include "amdgpu/shader/Converter.hpp"
|
||||
#include "amdgpu/shader/Instruction.hpp"
|
||||
#include "util/Verify.hpp"
|
||||
#include "util/area.hpp"
|
||||
|
||||
#include <cassert>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
namespace amdgpu::device {
|
||||
|
|
@ -1264,199 +1260,12 @@ struct GnmTBuffer {
|
|||
|
||||
static_assert(sizeof(GnmTBuffer) == sizeof(std::uint64_t) * 4);
|
||||
|
||||
struct ShaderModule {
|
||||
VkPipeline pipeline;
|
||||
VkPipelineLayout pipelineLayout;
|
||||
VkDescriptorSetLayout descriptorSetLayout;
|
||||
VkDescriptorPool descriptorPool;
|
||||
|
||||
void destroy() const;
|
||||
};
|
||||
|
||||
constexpr auto kPageSize = 0x4000;
|
||||
|
||||
struct AreaInfo {
|
||||
std::uint64_t beginAddress;
|
||||
std::uint64_t endAddress;
|
||||
};
|
||||
|
||||
struct NoInvalidationHandle {
|
||||
void handleInvalidation(std::uint64_t) {}
|
||||
};
|
||||
|
||||
struct StdSetInvalidationHandle {
|
||||
std::set<std::uint64_t, std::greater<>> invalidated;
|
||||
|
||||
void handleInvalidation(std::uint64_t address) {
|
||||
invalidated.insert(address);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename InvalidationHandleT = NoInvalidationHandle>
|
||||
class MemoryAreaTable : public InvalidationHandleT {
|
||||
enum class Kind { O, X };
|
||||
std::map<std::uint64_t, Kind> mAreas;
|
||||
|
||||
public:
|
||||
class iterator {
|
||||
using map_iterator = typename std::map<std::uint64_t, Kind>::iterator;
|
||||
map_iterator it;
|
||||
|
||||
public:
|
||||
iterator() = default;
|
||||
iterator(map_iterator it) : it(it) {}
|
||||
|
||||
AreaInfo operator*() const { return {it->first, std::next(it)->first}; }
|
||||
|
||||
iterator &operator++() const {
|
||||
++it;
|
||||
++it;
|
||||
return *this;
|
||||
}
|
||||
|
||||
iterator &operator--() const {
|
||||
--it;
|
||||
--it;
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool operator==(iterator other) const { return it == other.it; }
|
||||
bool operator!=(iterator other) const { return it != other.it; }
|
||||
};
|
||||
|
||||
iterator begin() { return iterator(mAreas.begin()); }
|
||||
iterator end() { return iterator(mAreas.end()); }
|
||||
|
||||
void clear() { mAreas.clear(); }
|
||||
|
||||
AreaInfo queryArea(std::uint64_t address) const {
|
||||
auto it = mAreas.lower_bound(address);
|
||||
assert(it != mAreas.end());
|
||||
std::uint64_t endAddress = 0;
|
||||
if (it->first != address) {
|
||||
assert(it->second == Kind::X);
|
||||
endAddress = it->first;
|
||||
--it;
|
||||
} else {
|
||||
assert(it->second == Kind::O);
|
||||
endAddress = std::next(it)->first;
|
||||
}
|
||||
|
||||
auto startAddress = std::uint64_t(it->first);
|
||||
|
||||
return {startAddress, endAddress};
|
||||
}
|
||||
|
||||
void map(std::uint64_t beginAddress, std::uint64_t endAddress) {
|
||||
auto [beginIt, beginInserted] = mAreas.emplace(beginAddress, Kind::O);
|
||||
auto [endIt, endInserted] = mAreas.emplace(endAddress, Kind::X);
|
||||
|
||||
if (!beginInserted) {
|
||||
if (beginIt->second == Kind::X) {
|
||||
// it was close, extend to open
|
||||
assert(beginIt != mAreas.begin());
|
||||
--beginIt;
|
||||
}
|
||||
} else if (beginIt != mAreas.begin()) {
|
||||
auto prevRangePointIt = std::prev(beginIt);
|
||||
|
||||
if (prevRangePointIt->second == Kind::O) {
|
||||
// we found range start before inserted one, remove insertion and extend
|
||||
// begin
|
||||
this->handleInvalidation(beginIt->first);
|
||||
mAreas.erase(beginIt);
|
||||
beginIt = prevRangePointIt;
|
||||
}
|
||||
}
|
||||
|
||||
if (!endInserted) {
|
||||
if (endIt->second == Kind::O) {
|
||||
// it was open, extend to close
|
||||
assert(endIt != mAreas.end());
|
||||
++endIt;
|
||||
}
|
||||
} else {
|
||||
auto nextRangePointIt = std::next(endIt);
|
||||
|
||||
if (nextRangePointIt != mAreas.end() &&
|
||||
nextRangePointIt->second == Kind::X) {
|
||||
// we found range end after inserted one, remove insertion and extend
|
||||
// end
|
||||
this->handleInvalidation(std::prev(endIt)->first);
|
||||
mAreas.erase(endIt);
|
||||
endIt = nextRangePointIt;
|
||||
}
|
||||
}
|
||||
|
||||
// eat everything in middle of the range
|
||||
++beginIt;
|
||||
while (beginIt != endIt) {
|
||||
this->handleInvalidation(std::prev(endIt)->first);
|
||||
beginIt = mAreas.erase(beginIt);
|
||||
}
|
||||
}
|
||||
|
||||
void unmap(std::uint64_t beginAddress, std::uint64_t endAddress) {
|
||||
auto beginIt = mAreas.lower_bound(beginAddress);
|
||||
|
||||
if (beginIt == mAreas.end() || beginIt->first >= endAddress) {
|
||||
return;
|
||||
}
|
||||
if (beginIt->first > beginAddress && beginIt->second == Kind::X) {
|
||||
// we have found end after unmap begin, need to insert new end
|
||||
this->handleInvalidation(std::prev(beginIt)->first);
|
||||
auto newBeginIt = mAreas.emplace_hint(beginIt, beginAddress, Kind::X);
|
||||
mAreas.erase(beginIt);
|
||||
|
||||
if (newBeginIt == mAreas.end()) {
|
||||
return;
|
||||
}
|
||||
|
||||
beginIt = std::next(newBeginIt);
|
||||
} else if (beginIt->second == Kind::X) {
|
||||
beginIt = ++beginIt;
|
||||
}
|
||||
|
||||
Kind lastKind = Kind::X;
|
||||
while (beginIt != mAreas.end() && beginIt->first <= endAddress) {
|
||||
lastKind = beginIt->second;
|
||||
if (lastKind == Kind::O) {
|
||||
this->handleInvalidation(std::prev(beginIt)->first);
|
||||
}
|
||||
beginIt = mAreas.erase(beginIt);
|
||||
}
|
||||
|
||||
if (lastKind != Kind::O) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Last removed was range open, need to insert new one at unmap end
|
||||
mAreas.emplace_hint(beginIt, endAddress, Kind::O);
|
||||
}
|
||||
|
||||
std::size_t totalMemory() const {
|
||||
std::size_t result = 0;
|
||||
|
||||
for (auto it = mAreas.begin(), end = mAreas.end(); it != end; ++it) {
|
||||
auto rangeBegin = it;
|
||||
auto rangeEnd = ++it;
|
||||
|
||||
result += rangeEnd->first - rangeBegin->first;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
extern MemoryAreaTable<StdSetInvalidationHandle> memoryAreaTable;
|
||||
|
||||
struct DrawContext {
|
||||
VkPipelineCache pipelineCache;
|
||||
VkQueue queue;
|
||||
VkCommandPool commandPool;
|
||||
std::vector<VkShaderModule> loadedShaderModules;
|
||||
|
||||
~DrawContext();
|
||||
};
|
||||
|
||||
void setVkDevice(VkDevice device,
|
||||
|
|
|
|||
303
hw/amdgpu/device/include/amdgpu/device/scheduler.hpp
Normal file
303
hw/amdgpu/device/include/amdgpu/device/scheduler.hpp
Normal file
|
|
@ -0,0 +1,303 @@
|
|||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <bit>
|
||||
#include <condition_variable>
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace amdgpu::device {
|
||||
template <typename T> class Ref {
|
||||
T *m_ref = nullptr;
|
||||
|
||||
public:
|
||||
Ref() = default;
|
||||
Ref(std::nullptr_t) {}
|
||||
|
||||
template <typename OT>
|
||||
requires(std::is_base_of_v<T, OT>)
|
||||
Ref(OT *ref) : m_ref(ref) {
|
||||
if (m_ref != nullptr) {
|
||||
ref->incRef();
|
||||
}
|
||||
}
|
||||
|
||||
template <typename OT>
|
||||
requires(std::is_base_of_v<T, OT>)
|
||||
Ref(const Ref<OT> &other) : m_ref(other.get()) {
|
||||
if (m_ref != nullptr) {
|
||||
m_ref->incRef();
|
||||
}
|
||||
}
|
||||
|
||||
template <typename OT>
|
||||
requires(std::is_base_of_v<T, OT>)
|
||||
Ref(Ref<OT> &&other) : m_ref(other.release()) {}
|
||||
|
||||
Ref(const Ref &other) : m_ref(other.get()) {
|
||||
if (m_ref != nullptr) {
|
||||
m_ref->incRef();
|
||||
}
|
||||
}
|
||||
Ref(Ref &&other) : m_ref(other.release()) {}
|
||||
|
||||
template <typename OT>
|
||||
requires(std::is_base_of_v<T, OT>)
|
||||
Ref &operator=(Ref<OT> &&other) {
|
||||
other.swap(*this);
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename OT>
|
||||
requires(std::is_base_of_v<T, OT>)
|
||||
Ref &operator=(OT *other) {
|
||||
*this = Ref(other);
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename OT>
|
||||
requires(std::is_base_of_v<T, OT>)
|
||||
Ref &operator=(const Ref<OT> &other) {
|
||||
*this = Ref(other);
|
||||
return *this;
|
||||
}
|
||||
|
||||
Ref &operator=(const Ref &other) {
|
||||
*this = Ref(other);
|
||||
return *this;
|
||||
}
|
||||
|
||||
Ref &operator=(Ref &&other) {
|
||||
other.swap(*this);
|
||||
return *this;
|
||||
}
|
||||
|
||||
~Ref() {
|
||||
if (m_ref != nullptr) {
|
||||
m_ref->decRef();
|
||||
}
|
||||
}
|
||||
|
||||
void swap(Ref<T> &other) { std::swap(m_ref, other.m_ref); }
|
||||
T *get() const { return m_ref; }
|
||||
T *release() { return std::exchange(m_ref, nullptr); }
|
||||
T *operator->() const { return m_ref; }
|
||||
explicit operator bool() const { return m_ref != nullptr; }
|
||||
bool operator==(std::nullptr_t) const { return m_ref == nullptr; }
|
||||
bool operator!=(std::nullptr_t) const { return m_ref != nullptr; }
|
||||
auto operator<=>(const T *other) const { return m_ref <=> other; }
|
||||
auto operator<=>(const Ref &other) const = default;
|
||||
};
|
||||
|
||||
enum class TaskState { InProgress, Complete, Canceled };
|
||||
|
||||
struct AsyncTaskCtl {
|
||||
std::atomic<unsigned> refs{0};
|
||||
std::atomic<TaskState> stateStorage{TaskState::InProgress};
|
||||
|
||||
virtual ~AsyncTaskCtl() = default;
|
||||
|
||||
void incRef() { refs.fetch_add(1, std::memory_order::relaxed); }
|
||||
void decRef() {
|
||||
if (refs.fetch_sub(1, std::memory_order::relaxed) == 1) {
|
||||
delete this;
|
||||
}
|
||||
}
|
||||
|
||||
bool isCanceled() const {
|
||||
return stateStorage.load(std::memory_order::relaxed) == TaskState::Canceled;
|
||||
}
|
||||
bool isComplete() const {
|
||||
return stateStorage.load(std::memory_order::relaxed) == TaskState::Complete;
|
||||
}
|
||||
bool isInProgress() const {
|
||||
return stateStorage.load(std::memory_order::relaxed) ==
|
||||
TaskState::InProgress;
|
||||
}
|
||||
|
||||
void cancel() {
|
||||
auto state = TaskState::InProgress;
|
||||
|
||||
while (state == TaskState::InProgress) {
|
||||
if (stateStorage.compare_exchange_weak(state, TaskState::Canceled,
|
||||
std::memory_order::relaxed)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
stateStorage.notify_all();
|
||||
}
|
||||
|
||||
void complete() {
|
||||
auto state = TaskState::InProgress;
|
||||
|
||||
while (state != TaskState::Complete) {
|
||||
if (stateStorage.compare_exchange_weak(state, TaskState::Complete,
|
||||
std::memory_order::relaxed)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
stateStorage.notify_all();
|
||||
}
|
||||
|
||||
void wait() {
|
||||
stateStorage.wait(TaskState::InProgress, std::memory_order::relaxed);
|
||||
}
|
||||
|
||||
virtual void invoke() = 0;
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
template <typename T>
|
||||
concept LambdaWithoutClosure = requires(T t) { +t; };
|
||||
}
|
||||
|
||||
template <typename T> struct AsyncTask;
|
||||
|
||||
template <typename T>
|
||||
requires(std::is_invocable_r_v<bool, T, const AsyncTaskCtl &> &&
|
||||
detail::LambdaWithoutClosure<T>)
|
||||
struct AsyncTask<T> : AsyncTaskCtl {
|
||||
static constexpr bool (*fn)(const AsyncTaskCtl &) = +std::declval<T>();
|
||||
|
||||
AsyncTask() = default;
|
||||
AsyncTask(T &&) {}
|
||||
|
||||
void invoke() override {
|
||||
auto &base = *static_cast<const AsyncTaskCtl *>(this);
|
||||
|
||||
if (fn(base)) {
|
||||
complete();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
requires std::is_invocable_r_v<bool, T, const AsyncTaskCtl &>
|
||||
Ref<AsyncTaskCtl> createTask(T &&task) {
|
||||
return Ref<AsyncTaskCtl>(new AsyncTask<T>(std::forward<T>(task)));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires(std::is_invocable_r_v<bool, T, const AsyncTaskCtl &> &&
|
||||
!detail::LambdaWithoutClosure<T>)
|
||||
struct AsyncTask<T> : AsyncTaskCtl {
|
||||
alignas(T) std::byte taskStorage[sizeof(T)];
|
||||
|
||||
AsyncTask() = default;
|
||||
AsyncTask(T &&t) { new (taskStorage) T(std::forward<T>(t)); }
|
||||
AsyncTask &operator=(T &&t) {
|
||||
new (taskStorage) T(std::forward<T>(t));
|
||||
return *this;
|
||||
}
|
||||
|
||||
void invoke() override {
|
||||
auto &lambda = *std::bit_cast<T *>(&taskStorage);
|
||||
auto &base = *static_cast<const AsyncTaskCtl *>(this);
|
||||
|
||||
if (lambda(base)) {
|
||||
complete();
|
||||
}
|
||||
|
||||
std::bit_cast<T *>(&taskStorage)->~T();
|
||||
}
|
||||
};
|
||||
|
||||
class Scheduler;
|
||||
class TaskSet {
|
||||
std::vector<Ref<AsyncTaskCtl>> tasks;
|
||||
|
||||
public:
|
||||
void append(Ref<AsyncTaskCtl> task) { tasks.push_back(std::move(task)); }
|
||||
|
||||
void wait() {
|
||||
for (auto task : tasks) {
|
||||
task->wait();
|
||||
}
|
||||
|
||||
tasks.clear();
|
||||
}
|
||||
|
||||
void enqueue(Scheduler &scheduler);
|
||||
};
|
||||
|
||||
class Scheduler {
|
||||
std::vector<std::thread> workThreads;
|
||||
std::vector<Ref<AsyncTaskCtl>> tasks;
|
||||
std::mutex taskMtx;
|
||||
std::condition_variable taskCv;
|
||||
std::atomic<bool> exit{false};
|
||||
|
||||
public:
|
||||
explicit Scheduler(std::size_t threadCount) {
|
||||
for (std::size_t i = 0; i < threadCount; ++i) {
|
||||
workThreads.push_back(std::thread{[this] { entry(); }});
|
||||
}
|
||||
}
|
||||
|
||||
~Scheduler() {
|
||||
exit = true;
|
||||
taskCv.notify_all();
|
||||
|
||||
for (auto &thread : workThreads) {
|
||||
thread.join();
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires std::is_invocable_r_v<bool, T, const AsyncTaskCtl &>
|
||||
Ref<AsyncTaskCtl> enqueue(T &&task) {
|
||||
auto taskHandle = createTask(std::forward<T>(task));
|
||||
enqueue(taskHandle);
|
||||
return taskHandle;
|
||||
}
|
||||
|
||||
void enqueue(Ref<AsyncTaskCtl> task) {
|
||||
std::lock_guard lock(taskMtx);
|
||||
tasks.push_back(std::move(task));
|
||||
taskCv.notify_one();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires std::is_invocable_r_v<bool, T, const AsyncTaskCtl &>
|
||||
void enqueue(TaskSet &set, T &&task) {
|
||||
auto taskCtl = enqueue(std::forward<T>(task));
|
||||
set.append(taskCtl);
|
||||
}
|
||||
|
||||
private:
|
||||
void entry() {
|
||||
while (!exit.load(std::memory_order::relaxed)) {
|
||||
Ref<AsyncTaskCtl> task;
|
||||
|
||||
if (task == nullptr) {
|
||||
std::unique_lock lock(taskMtx);
|
||||
|
||||
if (tasks.empty()) {
|
||||
taskCv.wait(lock);
|
||||
}
|
||||
|
||||
if (tasks.empty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
task = std::move(tasks.back());
|
||||
tasks.pop_back();
|
||||
}
|
||||
|
||||
if (task != nullptr) {
|
||||
task->invoke();
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
inline void TaskSet::enqueue(Scheduler &scheduler) {
|
||||
for (auto task : tasks) {
|
||||
scheduler.enqueue(std::move(task));
|
||||
}
|
||||
}
|
||||
} // namespace amdgpu::device
|
||||
|
|
@ -2,9 +2,12 @@
|
|||
|
||||
#include "tiler.hpp"
|
||||
#include "util/VerifyVulkan.hpp"
|
||||
#include "util/area.hpp"
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <mutex>
|
||||
#include <span>
|
||||
#include <utility>
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
|
@ -150,78 +153,58 @@ struct DeviceMemoryRef {
|
|||
VkDeviceSize offset = 0;
|
||||
VkDeviceSize size = 0;
|
||||
void *data = nullptr;
|
||||
void *allocator = nullptr;
|
||||
|
||||
void (*release)(DeviceMemoryRef &memoryRef) = nullptr;
|
||||
};
|
||||
|
||||
class MemoryResource {
|
||||
DeviceMemory mMemory;
|
||||
VkMemoryPropertyFlags mProperties = 0;
|
||||
std::size_t mSize = 0;
|
||||
std::size_t mAllocationOffset = 0;
|
||||
char *mData = nullptr;
|
||||
util::MemoryAreaTable<> table;
|
||||
const char *debugName = "<unknown>";
|
||||
|
||||
std::mutex mMtx;
|
||||
|
||||
public:
|
||||
MemoryResource(const MemoryResource &) = delete;
|
||||
|
||||
MemoryResource() = default;
|
||||
MemoryResource(MemoryResource &&other) = default;
|
||||
MemoryResource &operator=(MemoryResource &&other) = default;
|
||||
|
||||
~MemoryResource() {
|
||||
if (mMemory.getHandle() != nullptr && mData != nullptr) {
|
||||
vkUnmapMemory(g_vkDevice, mMemory.getHandle());
|
||||
}
|
||||
}
|
||||
|
||||
void clear() { mAllocationOffset = 0; }
|
||||
|
||||
static MemoryResource CreateFromFd(int fd, std::size_t size) {
|
||||
void initFromHost(void *data, std::size_t size) {
|
||||
assert(mMemory.getHandle() == nullptr);
|
||||
auto properties = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
||||
MemoryResource result;
|
||||
result.mMemory = DeviceMemory::CreateExternalFd(
|
||||
fd, size, findPhysicalMemoryTypeIndex(~0, properties));
|
||||
result.mProperties = properties;
|
||||
result.mSize = size;
|
||||
|
||||
return result;
|
||||
mMemory = DeviceMemory::CreateExternalHostMemory(data, size, properties);
|
||||
table.map(0, size);
|
||||
debugName = "direct";
|
||||
}
|
||||
|
||||
static MemoryResource CreateFromHost(void *data, std::size_t size) {
|
||||
void initHostVisible(std::size_t size) {
|
||||
assert(mMemory.getHandle() == nullptr);
|
||||
auto properties = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
||||
MemoryResource result;
|
||||
result.mMemory =
|
||||
DeviceMemory::CreateExternalHostMemory(data, size, properties);
|
||||
result.mProperties = properties;
|
||||
result.mSize = size;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static MemoryResource CreateHostVisible(std::size_t size) {
|
||||
auto properties = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
||||
MemoryResource result;
|
||||
result.mMemory = DeviceMemory::Allocate(size, ~0, properties);
|
||||
result.mProperties = properties;
|
||||
result.mSize = size;
|
||||
auto memory = DeviceMemory::Allocate(size, ~0, properties);
|
||||
|
||||
void *data = nullptr;
|
||||
Verify() << vkMapMemory(g_vkDevice, result.mMemory.getHandle(), 0, size, 0,
|
||||
&data);
|
||||
result.mData = reinterpret_cast<char *>(data);
|
||||
Verify() << vkMapMemory(g_vkDevice, memory.getHandle(), 0, size, 0, &data);
|
||||
|
||||
return result;
|
||||
mMemory = std::move(memory);
|
||||
table.map(0, size);
|
||||
mData = reinterpret_cast<char *>(data);
|
||||
debugName = "host";
|
||||
}
|
||||
|
||||
static MemoryResource CreateDeviceLocal(std::size_t size) {
|
||||
void initDeviceLocal(std::size_t size) {
|
||||
assert(mMemory.getHandle() == nullptr);
|
||||
auto properties = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
|
||||
|
||||
MemoryResource result;
|
||||
result.mMemory = DeviceMemory::Allocate(size, ~0, properties);
|
||||
result.mProperties = properties;
|
||||
result.mSize = size;
|
||||
return result;
|
||||
mMemory = DeviceMemory::Allocate(size, ~0, properties);
|
||||
table.map(0, size);
|
||||
debugName = "local";
|
||||
}
|
||||
|
||||
DeviceMemoryRef allocate(VkMemoryRequirements requirements) {
|
||||
|
|
@ -230,22 +213,55 @@ public:
|
|||
util::unreachable();
|
||||
}
|
||||
|
||||
auto offset = (mAllocationOffset + requirements.alignment - 1) &
|
||||
~(requirements.alignment - 1);
|
||||
mAllocationOffset = offset + requirements.size;
|
||||
if (mAllocationOffset > mSize) {
|
||||
util::unreachable("out of memory resource");
|
||||
std::lock_guard lock(mMtx);
|
||||
|
||||
for (auto elem : table) {
|
||||
auto offset = (elem.beginAddress + requirements.alignment - 1) &
|
||||
~(requirements.alignment - 1);
|
||||
|
||||
if (offset >= elem.endAddress) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto blockSize = elem.endAddress - offset;
|
||||
|
||||
if (blockSize < requirements.size) {
|
||||
continue;
|
||||
}
|
||||
|
||||
table.unmap(offset, offset + requirements.size);
|
||||
return {mMemory.getHandle(),
|
||||
offset,
|
||||
requirements.size,
|
||||
mData,
|
||||
this,
|
||||
[](DeviceMemoryRef &memoryRef) {
|
||||
auto self =
|
||||
reinterpret_cast<MemoryResource *>(memoryRef.allocator);
|
||||
self->deallocate(memoryRef);
|
||||
}};
|
||||
}
|
||||
|
||||
return {mMemory.getHandle(), offset, requirements.size, mData};
|
||||
util::unreachable("out of memory resource");
|
||||
}
|
||||
|
||||
void deallocate(DeviceMemoryRef memory) {
|
||||
std::lock_guard lock(mMtx);
|
||||
table.map(memory.offset, memory.offset + memory.size);
|
||||
}
|
||||
|
||||
void dump() {
|
||||
std::lock_guard lock(mMtx);
|
||||
|
||||
for (auto elem : table) {
|
||||
std::fprintf(stderr, "%zu - %zu\n", elem.beginAddress, elem.endAddress);
|
||||
}
|
||||
}
|
||||
|
||||
DeviceMemoryRef getFromOffset(std::uint64_t offset, std::size_t size) {
|
||||
return {mMemory.getHandle(), offset, size, nullptr};
|
||||
return {mMemory.getHandle(), offset, size, nullptr, nullptr, nullptr};
|
||||
}
|
||||
|
||||
std::size_t getSize() const { return mSize; }
|
||||
|
||||
explicit operator bool() const { return mMemory.getHandle() != nullptr; }
|
||||
};
|
||||
|
||||
|
|
@ -364,6 +380,10 @@ public:
|
|||
~Buffer() {
|
||||
if (mBuffer != nullptr) {
|
||||
vkDestroyBuffer(g_vkDevice, mBuffer, g_vkAllocator);
|
||||
|
||||
if (mMemory.release != nullptr) {
|
||||
mMemory.release(mMemory);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -589,12 +609,13 @@ public:
|
|||
return requirements;
|
||||
}
|
||||
|
||||
void readFromBuffer(VkCommandBuffer cmdBuffer, const Buffer &buffer,
|
||||
VkImageAspectFlags destAspect) {
|
||||
void readFromBuffer(VkCommandBuffer cmdBuffer, VkBuffer buffer,
|
||||
VkImageAspectFlags destAspect,
|
||||
VkDeviceSize bufferOffset = 0) {
|
||||
transitionLayout(cmdBuffer, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
|
||||
|
||||
VkBufferImageCopy region{};
|
||||
region.bufferOffset = 0;
|
||||
region.bufferOffset = bufferOffset;
|
||||
region.bufferRowLength = 0;
|
||||
region.bufferImageHeight = 0;
|
||||
region.imageSubresource.aspectMask = destAspect;
|
||||
|
|
@ -604,11 +625,11 @@ public:
|
|||
region.imageOffset = {0, 0, 0};
|
||||
region.imageExtent = {mWidth, mHeight, 1};
|
||||
|
||||
vkCmdCopyBufferToImage(cmdBuffer, buffer.getHandle(), mImage,
|
||||
vkCmdCopyBufferToImage(cmdBuffer, buffer, mImage,
|
||||
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion);
|
||||
}
|
||||
|
||||
void writeToBuffer(VkCommandBuffer cmdBuffer, const Buffer &buffer,
|
||||
void writeToBuffer(VkCommandBuffer cmdBuffer, VkBuffer buffer,
|
||||
VkImageAspectFlags sourceAspect) {
|
||||
transitionLayout(cmdBuffer, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
|
||||
|
||||
|
|
@ -624,8 +645,8 @@ public:
|
|||
region.imageExtent = {mWidth, mHeight, 1};
|
||||
|
||||
vkCmdCopyImageToBuffer(cmdBuffer, mImage,
|
||||
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
|
||||
buffer.getHandle(), 1, ®ion);
|
||||
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, 1,
|
||||
®ion);
|
||||
}
|
||||
|
||||
[[nodiscard]] Buffer writeToBuffer(VkCommandBuffer cmdBuffer,
|
||||
|
|
@ -635,7 +656,7 @@ public:
|
|||
pool, getMemoryRequirements().size,
|
||||
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
|
||||
|
||||
writeToBuffer(cmdBuffer, transferBuffer, sourceAspect);
|
||||
writeToBuffer(cmdBuffer, transferBuffer.getHandle(), sourceAspect);
|
||||
return transferBuffer;
|
||||
}
|
||||
|
||||
|
|
@ -661,7 +682,7 @@ public:
|
|||
transferBuffer.readFromImage(address, bpp, tileMode, width, height, 1,
|
||||
pitch);
|
||||
|
||||
readFromBuffer(cmdBuffer, transferBuffer, destAspect);
|
||||
readFromBuffer(cmdBuffer, transferBuffer.getHandle(), destAspect);
|
||||
|
||||
return transferBuffer;
|
||||
}
|
||||
|
|
@ -736,6 +757,7 @@ class Image2D {
|
|||
VkImageLayout mLayout = {};
|
||||
unsigned mWidth = 0;
|
||||
unsigned mHeight = 0;
|
||||
DeviceMemoryRef mMemory;
|
||||
|
||||
public:
|
||||
Image2D(const Image2D &) = delete;
|
||||
|
|
@ -746,6 +768,10 @@ public:
|
|||
~Image2D() {
|
||||
if (mImage != nullptr) {
|
||||
vkDestroyImage(g_vkDevice, mImage, g_vkAllocator);
|
||||
|
||||
if (mMemory.release != nullptr) {
|
||||
mMemory.release(mMemory);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -829,6 +855,7 @@ public:
|
|||
void bindMemory(DeviceMemoryRef memory) {
|
||||
Verify() << vkBindImageMemory(g_vkDevice, mImage, memory.deviceMemory,
|
||||
memory.offset);
|
||||
mMemory = memory;
|
||||
}
|
||||
|
||||
friend ImageRef;
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
182
hw/amdgpu/include/util/area.hpp
Normal file
182
hw/amdgpu/include/util/area.hpp
Normal file
|
|
@ -0,0 +1,182 @@
|
|||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <map>
|
||||
#include <set>
|
||||
|
||||
namespace util {
|
||||
struct AreaInfo {
|
||||
std::uint64_t beginAddress;
|
||||
std::uint64_t endAddress;
|
||||
};
|
||||
|
||||
struct NoInvalidationHandle {
|
||||
void handleInvalidation(std::uint64_t) {}
|
||||
};
|
||||
|
||||
struct StdSetInvalidationHandle {
|
||||
std::set<std::uint64_t, std::greater<>> invalidated;
|
||||
|
||||
void handleInvalidation(std::uint64_t address) {
|
||||
invalidated.insert(address);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename InvalidationHandleT = NoInvalidationHandle>
|
||||
class MemoryAreaTable : public InvalidationHandleT {
|
||||
enum class Kind { O, X };
|
||||
std::map<std::uint64_t, Kind> mAreas;
|
||||
|
||||
public:
|
||||
class iterator {
|
||||
using map_iterator = typename std::map<std::uint64_t, Kind>::iterator;
|
||||
map_iterator it;
|
||||
|
||||
public:
|
||||
iterator() = default;
|
||||
iterator(map_iterator it) : it(it) {}
|
||||
|
||||
AreaInfo operator*() const { return {it->first, std::next(it)->first}; }
|
||||
|
||||
iterator &operator++() {
|
||||
++it;
|
||||
++it;
|
||||
return *this;
|
||||
}
|
||||
|
||||
iterator &operator--() {
|
||||
--it;
|
||||
--it;
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool operator==(iterator other) const { return it == other.it; }
|
||||
bool operator!=(iterator other) const { return it != other.it; }
|
||||
};
|
||||
|
||||
iterator begin() { return iterator(mAreas.begin()); }
|
||||
iterator end() { return iterator(mAreas.end()); }
|
||||
|
||||
void clear() { mAreas.clear(); }
|
||||
|
||||
AreaInfo queryArea(std::uint64_t address) const {
|
||||
auto it = mAreas.lower_bound(address);
|
||||
assert(it != mAreas.end());
|
||||
std::uint64_t endAddress = 0;
|
||||
if (it->first != address) {
|
||||
assert(it->second == Kind::X);
|
||||
endAddress = it->first;
|
||||
--it;
|
||||
} else {
|
||||
assert(it->second == Kind::O);
|
||||
endAddress = std::next(it)->first;
|
||||
}
|
||||
|
||||
auto startAddress = std::uint64_t(it->first);
|
||||
|
||||
return {startAddress, endAddress};
|
||||
}
|
||||
|
||||
void map(std::uint64_t beginAddress, std::uint64_t endAddress) {
|
||||
auto [beginIt, beginInserted] = mAreas.emplace(beginAddress, Kind::O);
|
||||
auto [endIt, endInserted] = mAreas.emplace(endAddress, Kind::X);
|
||||
|
||||
if (!beginInserted) {
|
||||
if (beginIt->second == Kind::X) {
|
||||
// it was close, extend to open
|
||||
assert(beginIt != mAreas.begin());
|
||||
--beginIt;
|
||||
}
|
||||
} else if (beginIt != mAreas.begin()) {
|
||||
auto prevRangePointIt = std::prev(beginIt);
|
||||
|
||||
if (prevRangePointIt->second == Kind::O) {
|
||||
// we found range start before inserted one, remove insertion and extend
|
||||
// begin
|
||||
this->handleInvalidation(beginIt->first);
|
||||
mAreas.erase(beginIt);
|
||||
beginIt = prevRangePointIt;
|
||||
}
|
||||
}
|
||||
|
||||
if (!endInserted) {
|
||||
if (endIt->second == Kind::O) {
|
||||
// it was open, extend to close
|
||||
assert(endIt != mAreas.end());
|
||||
++endIt;
|
||||
}
|
||||
} else {
|
||||
auto nextRangePointIt = std::next(endIt);
|
||||
|
||||
if (nextRangePointIt != mAreas.end() &&
|
||||
nextRangePointIt->second == Kind::X) {
|
||||
// we found range end after inserted one, remove insertion and extend
|
||||
// end
|
||||
this->handleInvalidation(std::prev(endIt)->first);
|
||||
mAreas.erase(endIt);
|
||||
endIt = nextRangePointIt;
|
||||
}
|
||||
}
|
||||
|
||||
// eat everything in middle of the range
|
||||
++beginIt;
|
||||
while (beginIt != endIt) {
|
||||
this->handleInvalidation(std::prev(endIt)->first);
|
||||
beginIt = mAreas.erase(beginIt);
|
||||
}
|
||||
}
|
||||
|
||||
void unmap(std::uint64_t beginAddress, std::uint64_t endAddress) {
|
||||
auto beginIt = mAreas.lower_bound(beginAddress);
|
||||
|
||||
if (beginIt == mAreas.end() || beginIt->first >= endAddress) {
|
||||
return;
|
||||
}
|
||||
if (beginIt->first > beginAddress && beginIt->second == Kind::X) {
|
||||
// we have found end after unmap begin, need to insert new end
|
||||
this->handleInvalidation(std::prev(beginIt)->first);
|
||||
auto newBeginIt = mAreas.emplace_hint(beginIt, beginAddress, Kind::X);
|
||||
mAreas.erase(beginIt);
|
||||
|
||||
if (newBeginIt == mAreas.end()) {
|
||||
return;
|
||||
}
|
||||
|
||||
beginIt = std::next(newBeginIt);
|
||||
} else if (beginIt->second == Kind::X) {
|
||||
beginIt = ++beginIt;
|
||||
}
|
||||
|
||||
Kind lastKind = Kind::X;
|
||||
while (beginIt != mAreas.end() && beginIt->first <= endAddress) {
|
||||
lastKind = beginIt->second;
|
||||
if (lastKind == Kind::O) {
|
||||
this->handleInvalidation(std::prev(beginIt)->first);
|
||||
}
|
||||
beginIt = mAreas.erase(beginIt);
|
||||
}
|
||||
|
||||
if (lastKind != Kind::O) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Last removed was range open, need to insert new one at unmap end
|
||||
mAreas.emplace_hint(beginIt, endAddress, Kind::O);
|
||||
}
|
||||
|
||||
std::size_t totalMemory() const {
|
||||
std::size_t result = 0;
|
||||
|
||||
for (auto it = mAreas.begin(), end = mAreas.end(); it != end; ++it) {
|
||||
auto rangeBegin = it;
|
||||
auto rangeEnd = ++it;
|
||||
|
||||
result += rangeEnd->first - rangeBegin->first;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace util
|
||||
|
|
@ -4,6 +4,7 @@
|
|||
#include "Stage.hpp"
|
||||
|
||||
#include <amdgpu/RemoteMemory.hpp>
|
||||
#include <util/area.hpp>
|
||||
|
||||
#include <cstdint>
|
||||
#include <span>
|
||||
|
|
@ -25,7 +26,7 @@ struct Shader {
|
|||
};
|
||||
|
||||
Shader convert(RemoteMemory memory, Stage stage, std::uint64_t entry,
|
||||
std::span<const std::uint32_t> userSpgrs, int bindingOffset,
|
||||
std::uint32_t dimX = 1, std::uint32_t dimY = 1,
|
||||
std::uint32_t dimZ = 1);
|
||||
std::span<const std::uint32_t> userSpgrs, std::uint32_t dimX,
|
||||
std::uint32_t dimY, std::uint32_t dimZ,
|
||||
util::MemoryAreaTable<> &dependencies);
|
||||
} // namespace amdgpu::shader
|
||||
|
|
|
|||
|
|
@ -5,11 +5,11 @@
|
|||
#include "Stage.hpp"
|
||||
#include "TypeId.hpp"
|
||||
#include "Uniform.hpp"
|
||||
#include "util/area.hpp"
|
||||
|
||||
#include <amdgpu/RemoteMemory.hpp>
|
||||
#include <forward_list>
|
||||
#include <spirv/spirv-builder.hpp>
|
||||
#include <unordered_map>
|
||||
#include <util/unreachable.hpp>
|
||||
|
||||
#include <bit>
|
||||
|
|
@ -96,8 +96,11 @@ class ConverterContext {
|
|||
spirv::Function mDiscardFn;
|
||||
|
||||
public:
|
||||
ConverterContext(RemoteMemory memory, Stage stage)
|
||||
: mMemory(memory), mStage(stage) {
|
||||
util::MemoryAreaTable<> *dependencies = nullptr;
|
||||
|
||||
ConverterContext(RemoteMemory memory, Stage stage,
|
||||
util::MemoryAreaTable<> *dependencies)
|
||||
: mStage(stage), mMemory(memory), dependencies(dependencies) {
|
||||
mGlslStd450 = mBuilder.createExtInstImport("GLSL.std.450");
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
#pragma once
|
||||
|
||||
namespace amdgpu::shader {
|
||||
enum class Stage { None, Vertex, Fragment, Geometry, Compute };
|
||||
enum class Stage : unsigned char { None, Vertex, Fragment, Geometry, Compute };
|
||||
}
|
||||
|
|
|
|||
62
hw/amdgpu/shader/include/amdgpu/shader/UniformBindings.hpp
Normal file
62
hw/amdgpu/shader/include/amdgpu/shader/UniformBindings.hpp
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
#pragma once
|
||||
|
||||
#include "Stage.hpp"
|
||||
#include "util/unreachable.hpp"
|
||||
|
||||
namespace amdgpu::shader {
|
||||
struct UniformBindings {
|
||||
static constexpr auto kBufferSlots = 16;
|
||||
static constexpr auto kImageSlots = 16;
|
||||
static constexpr auto kSamplerSlots = 16;
|
||||
|
||||
static constexpr auto kBufferOffset = 0;
|
||||
static constexpr auto kImageOffset = kBufferOffset + kBufferSlots;
|
||||
static constexpr auto kSamplerOffset = kImageOffset + kImageSlots;
|
||||
|
||||
static constexpr auto kStageSize = kSamplerOffset + kSamplerSlots;
|
||||
|
||||
static constexpr auto kVertexOffset = 0;
|
||||
static constexpr auto kFragmentOffset = kStageSize;
|
||||
|
||||
static unsigned getBufferBinding(Stage stage, unsigned index) {
|
||||
if (index >= kBufferSlots) {
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
return index + getStageOffset(stage) + kBufferOffset;
|
||||
}
|
||||
|
||||
static unsigned getImageBinding(Stage stage, unsigned index) {
|
||||
if (index >= kImageSlots) {
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
return index + getStageOffset(stage) + kImageOffset;
|
||||
}
|
||||
|
||||
static unsigned getSamplerBinding(Stage stage, unsigned index) {
|
||||
if (index >= kSamplerSlots) {
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
return index + getStageOffset(stage) + kSamplerOffset;
|
||||
}
|
||||
|
||||
private:
|
||||
static unsigned getStageOffset(Stage stage) {
|
||||
switch (stage) {
|
||||
case Stage::Fragment:
|
||||
return kFragmentOffset;
|
||||
|
||||
case Stage::Vertex:
|
||||
return kVertexOffset;
|
||||
|
||||
case Stage::Compute:
|
||||
return kVertexOffset;
|
||||
|
||||
default:
|
||||
util::unreachable();
|
||||
}
|
||||
}
|
||||
};
|
||||
} // namespace amdgpu::shader
|
||||
|
|
@ -12,7 +12,7 @@ struct CfgBuilder {
|
|||
RemoteMemory memory;
|
||||
|
||||
std::size_t analyzeBb(cf::BasicBlock *bb, std::uint64_t *successors,
|
||||
std::size_t *successorsCount, auto pushWork) {
|
||||
std::size_t *successorsCount) {
|
||||
auto address = bb->getAddress();
|
||||
auto instBegin = memory.getPointer<std::uint32_t>(address);
|
||||
auto instHex = instBegin;
|
||||
|
|
@ -130,18 +130,10 @@ struct CfgBuilder {
|
|||
|
||||
std::uint64_t successors[2];
|
||||
std::size_t successorsCount = 0;
|
||||
std::size_t size = analyzeBb(bb, successors, &successorsCount,
|
||||
[&](std::uint64_t address) {
|
||||
if (processed.insert(address).second) {
|
||||
workList.push_back(address);
|
||||
}
|
||||
});
|
||||
std::size_t size = analyzeBb(bb, successors, &successorsCount);
|
||||
bb->setSize(size);
|
||||
|
||||
if (successorsCount == 2) {
|
||||
auto succ0Address = successors[0];
|
||||
auto succ1Address = successors[1];
|
||||
|
||||
branches.push_back(
|
||||
{address + size - 4, 2, {successors[0], successors[1]}});
|
||||
|
||||
|
|
|
|||
|
|
@ -2,21 +2,16 @@
|
|||
#include "CfBuilder.hpp"
|
||||
#include "ConverterContext.hpp"
|
||||
#include "Fragment.hpp"
|
||||
#include "FragmentTerminator.hpp"
|
||||
#include "Instruction.hpp"
|
||||
#include "RegisterId.hpp"
|
||||
#include "RegisterState.hpp"
|
||||
#include "UniformBindings.hpp"
|
||||
#include "amdgpu/RemoteMemory.hpp"
|
||||
#include "cf.hpp"
|
||||
#include "scf.hpp"
|
||||
#include "util/unreachable.hpp"
|
||||
#include <compare>
|
||||
#include <cstddef>
|
||||
#include <forward_list>
|
||||
#include <memory>
|
||||
#include <spirv/spirv.hpp>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
static void printInstructions(const scf::PrintOptions &options, unsigned depth,
|
||||
|
|
@ -365,9 +360,10 @@ private:
|
|||
amdgpu::shader::Shader
|
||||
amdgpu::shader::convert(RemoteMemory memory, Stage stage, std::uint64_t entry,
|
||||
std::span<const std::uint32_t> userSpgrs,
|
||||
int bindingOffset, std::uint32_t dimX,
|
||||
std::uint32_t dimY, std::uint32_t dimZ) {
|
||||
ConverterContext ctxt(memory, stage);
|
||||
std::uint32_t dimX, std::uint32_t dimY,
|
||||
std::uint32_t dimZ,
|
||||
util::MemoryAreaTable<> &dependencies) {
|
||||
ConverterContext ctxt(memory, stage, &dependencies);
|
||||
auto &builder = ctxt.getBuilder();
|
||||
builder.createCapability(spv::Capability::Shader);
|
||||
builder.createCapability(spv::Capability::ImageQuery);
|
||||
|
|
@ -412,9 +408,12 @@ amdgpu::shader::convert(RemoteMemory memory, Stage stage, std::uint64_t entry,
|
|||
std::fflush(stdout);
|
||||
mainFunction->exitFragment.outputs.clear();
|
||||
|
||||
std::size_t samplerCount = 0;
|
||||
std::size_t imageCount = 0;
|
||||
std::size_t bufferCount = 0;
|
||||
|
||||
for (auto &uniform : ctxt.getUniforms()) {
|
||||
auto &newUniform = result.uniforms.emplace_back();
|
||||
newUniform.binding = bindingOffset++;
|
||||
|
||||
for (int i = 0; i < 8; ++i) {
|
||||
newUniform.buffer[i] = uniform.buffer[i];
|
||||
|
|
@ -422,23 +421,29 @@ amdgpu::shader::convert(RemoteMemory memory, Stage stage, std::uint64_t entry,
|
|||
|
||||
std::uint32_t descriptorSet = 0;
|
||||
|
||||
switch (uniform.typeId) {
|
||||
case TypeId::Sampler:
|
||||
newUniform.kind = Shader::UniformKind::Sampler;
|
||||
newUniform.binding =
|
||||
UniformBindings::getSamplerBinding(stage, samplerCount++);
|
||||
break;
|
||||
case TypeId::Image2D:
|
||||
newUniform.kind = Shader::UniformKind::Image;
|
||||
newUniform.binding =
|
||||
UniformBindings::getImageBinding(stage, imageCount++);
|
||||
break;
|
||||
default:
|
||||
newUniform.kind = Shader::UniformKind::Buffer;
|
||||
newUniform.binding =
|
||||
UniformBindings::getBufferBinding(stage, bufferCount++);
|
||||
break;
|
||||
}
|
||||
|
||||
ctxt.getBuilder().createDecorate(
|
||||
uniform.variable, spv::Decoration::DescriptorSet, {{descriptorSet}});
|
||||
ctxt.getBuilder().createDecorate(uniform.variable, spv::Decoration::Binding,
|
||||
{{newUniform.binding}});
|
||||
|
||||
switch (uniform.typeId) {
|
||||
case TypeId::Sampler:
|
||||
newUniform.kind = Shader::UniformKind::Sampler;
|
||||
break;
|
||||
case TypeId::Image2D:
|
||||
newUniform.kind = Shader::UniformKind::Image;
|
||||
break;
|
||||
default:
|
||||
newUniform.kind = Shader::UniformKind::Buffer;
|
||||
break;
|
||||
}
|
||||
|
||||
newUniform.accessOp = uniform.accessOp;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1568,6 +1568,10 @@ void convertSmrd(Fragment &fragment, Smrd inst) {
|
|||
auto address =
|
||||
*optLoAddress | (static_cast<std::uint64_t>(*optHiAddress) << 32);
|
||||
|
||||
fragment.context->dependencies->map(address + (inst.offset << 2),
|
||||
address + (inst.offset << 2) +
|
||||
sizeof(std::uint32_t) * count);
|
||||
|
||||
auto data =
|
||||
memory.getPointer<std::uint32_t>(address + (inst.offset << 2));
|
||||
for (std::uint32_t i = 0; i < count; ++i) {
|
||||
|
|
@ -5574,6 +5578,8 @@ void amdgpu::shader::Fragment::convert(std::uint64_t size) {
|
|||
auto ptr = context->getMemory().getPointer<std::uint32_t>(registers->pc);
|
||||
auto endptr = ptr + size / sizeof(std::uint32_t);
|
||||
|
||||
context->dependencies->map(registers->pc, registers->pc + size);
|
||||
|
||||
while (ptr < endptr) {
|
||||
Instruction inst(ptr);
|
||||
// auto startPoint = builder.bodyRegion.getCurrentPosition();
|
||||
|
|
@ -5615,6 +5621,8 @@ Value amdgpu::shader::Fragment::getRegister(RegisterId id) {
|
|||
case 247:
|
||||
return {context->getFloat32Type(), context->getFloat32(-4.0f)};
|
||||
case 255: {
|
||||
context->dependencies->map(registers->pc,
|
||||
registers->pc + sizeof(std::uint32_t));
|
||||
auto ptr = context->getMemory().getPointer<std::uint32_t>(registers->pc);
|
||||
registers->pc += sizeof(std::uint32_t);
|
||||
return {context->getUInt32Type(), context->getUInt32(*ptr)};
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
#include "amdgpu/RemoteMemory.hpp"
|
||||
#include <algorithm>
|
||||
#include <amdgpu/bridge/bridge.hpp>
|
||||
#include <amdgpu/device/device.hpp>
|
||||
|
|
@ -33,6 +34,7 @@ static void usage(std::FILE *out, const char *argv0) {
|
|||
" --gpu <index> - specify physical gpu index to use, default is 0\n");
|
||||
std::fprintf(out,
|
||||
" --presenter <presenter mode> - set flip engine target\n");
|
||||
std::fprintf(out, " --no-validation - disable validation layers\n");
|
||||
std::fprintf(out, " -h, --help - show this message\n");
|
||||
std::fprintf(out, "\n");
|
||||
std::fprintf(out, " presenter mode:\n");
|
||||
|
|
@ -52,6 +54,7 @@ int main(int argc, const char *argv[]) {
|
|||
const char *shmName = "/rpcsx-os-memory";
|
||||
unsigned long gpuIndex = 0;
|
||||
auto presenter = PresenterMode::Window;
|
||||
bool noValidation = false;
|
||||
|
||||
for (int i = 1; i < argc; ++i) {
|
||||
if (argv[i] == std::string_view("--cmd-bridge")) {
|
||||
|
|
@ -106,6 +109,11 @@ int main(int argc, const char *argv[]) {
|
|||
continue;
|
||||
}
|
||||
|
||||
if (argv[i] == std::string_view("--no-validation")) {
|
||||
noValidation = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
usage(stderr, argv[0]);
|
||||
return 1;
|
||||
}
|
||||
|
|
@ -122,7 +130,7 @@ int main(int argc, const char *argv[]) {
|
|||
auto requiredInstanceExtensions = std::vector<const char *>(
|
||||
glfwExtensions, glfwExtensions + glfwExtensionCount);
|
||||
|
||||
bool enableValidation = true;
|
||||
bool enableValidation = !noValidation;
|
||||
|
||||
if (enableValidation) {
|
||||
requiredInstanceExtensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
|
||||
|
|
@ -248,6 +256,7 @@ int main(int argc, const char *argv[]) {
|
|||
// VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME,
|
||||
VK_EXT_SEPARATE_STENCIL_USAGE_EXTENSION_NAME,
|
||||
VK_KHR_SWAPCHAIN_EXTENSION_NAME,
|
||||
VK_EXT_SHADER_OBJECT_EXTENSION_NAME,
|
||||
};
|
||||
|
||||
if (isDeviceExtensionSupported(VK_EXT_DEBUG_MARKER_EXTENSION_NAME)) {
|
||||
|
|
@ -404,9 +413,16 @@ int main(int argc, const char *argv[]) {
|
|||
}
|
||||
}
|
||||
|
||||
VkPhysicalDeviceShaderObjectFeaturesEXT shaderObjectFeatures{
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_OBJECT_FEATURES_EXT,
|
||||
.shaderObject = VK_TRUE};
|
||||
|
||||
VkPhysicalDeviceVulkan13Features phyDevFeatures13{
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES,
|
||||
.maintenance4 = VK_TRUE};
|
||||
.pNext = &shaderObjectFeatures,
|
||||
.dynamicRendering = VK_TRUE,
|
||||
.maintenance4 = VK_TRUE,
|
||||
};
|
||||
|
||||
VkPhysicalDeviceVulkan12Features phyDevFeatures12{
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
|
||||
|
|
|
|||
Loading…
Reference in a new issue