mirror of
https://github.com/RPCSX/rpcsx.git
synced 2026-01-20 07:30:24 +01:00
gpu2: move shader resource management to cache
fixed descriptor set binding fixed 5_6_5 format swizzling fix rect calculation fix possible crash in scheduler implement lock-free bit pool utility
This commit is contained in:
parent
4e83c9e121
commit
0877d3f1cd
File diff suppressed because it is too large
Load diff
|
|
@ -5,12 +5,17 @@
|
|||
#include "gnm/constants.hpp"
|
||||
#include "rx/die.hpp"
|
||||
#include "shader/Access.hpp"
|
||||
#include "shader/Evaluator.hpp"
|
||||
#include "shader/GcnConverter.hpp"
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <ostream>
|
||||
#include <print>
|
||||
#include <rx/ConcurrentBitPool.hpp>
|
||||
#include <rx/MemoryTable.hpp>
|
||||
#include <shader/gcn.hpp>
|
||||
#include <utility>
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
namespace amdgpu {
|
||||
|
|
@ -22,11 +27,7 @@ struct ShaderKey {
|
|||
shader::gcn::Environment env;
|
||||
};
|
||||
|
||||
enum class ImageKind {
|
||||
Color,
|
||||
Depth,
|
||||
Stencil
|
||||
};
|
||||
enum class ImageKind { Color, Depth, Stencil };
|
||||
|
||||
struct ImageKey {
|
||||
std::uint64_t readAddress;
|
||||
|
|
@ -48,15 +49,6 @@ struct ImageKey {
|
|||
static ImageKey createFrom(const gnm::TBuffer &tbuffer);
|
||||
};
|
||||
|
||||
struct ImageViewKey : ImageKey {
|
||||
gnm::Swizzle R = gnm::Swizzle::R;
|
||||
gnm::Swizzle G = gnm::Swizzle::G;
|
||||
gnm::Swizzle B = gnm::Swizzle::B;
|
||||
gnm::Swizzle A = gnm::Swizzle::A;
|
||||
|
||||
static ImageViewKey createFrom(const gnm::TBuffer &tbuffer);
|
||||
};
|
||||
|
||||
struct SamplerKey {
|
||||
VkFilter magFilter;
|
||||
VkFilter minFilter;
|
||||
|
|
@ -98,6 +90,10 @@ struct Cache {
|
|||
};
|
||||
|
||||
static constexpr int getStageIndex(VkShaderStageFlagBits stage) {
|
||||
if (stage == VK_SHADER_STAGE_COMPUTE_BIT) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
auto it = std::find(kGraphicsStages.begin(), kGraphicsStages.end(), stage);
|
||||
|
||||
if (it == kGraphicsStages.end()) {
|
||||
|
|
@ -107,9 +103,10 @@ struct Cache {
|
|||
return it - kGraphicsStages.begin();
|
||||
}
|
||||
|
||||
static constexpr int getDescriptorBinding(VkDescriptorType type, int dim = 0) {
|
||||
auto it =
|
||||
std::find(kDescriptorBindings.begin(), kDescriptorBindings.end(), type + dim * 1000);
|
||||
static constexpr int getDescriptorBinding(VkDescriptorType type,
|
||||
int dim = 0) {
|
||||
auto it = std::find(kDescriptorBindings.begin(), kDescriptorBindings.end(),
|
||||
type + dim * 1000);
|
||||
|
||||
if (it == kDescriptorBindings.end()) {
|
||||
return -1;
|
||||
|
|
@ -124,17 +121,17 @@ struct Cache {
|
|||
int vmId = -1;
|
||||
|
||||
struct Shader {
|
||||
VkShaderEXT handle;
|
||||
VkShaderEXT handle = VK_NULL_HANDLE;
|
||||
shader::gcn::ShaderInfo *info;
|
||||
VkShaderStageFlagBits stage;
|
||||
};
|
||||
|
||||
struct Sampler {
|
||||
VkSampler handle;
|
||||
VkSampler handle = VK_NULL_HANDLE;
|
||||
};
|
||||
|
||||
struct Buffer {
|
||||
VkBuffer handle;
|
||||
VkBuffer handle = VK_NULL_HANDLE;
|
||||
std::uint64_t offset;
|
||||
std::uint64_t deviceAddress;
|
||||
TagId tagId;
|
||||
|
|
@ -142,7 +139,7 @@ struct Cache {
|
|||
};
|
||||
|
||||
struct IndexBuffer {
|
||||
VkBuffer handle;
|
||||
VkBuffer handle = VK_NULL_HANDLE;
|
||||
std::uint64_t offset;
|
||||
std::uint32_t indexCount;
|
||||
gnm::PrimitiveType primType;
|
||||
|
|
@ -150,73 +147,150 @@ struct Cache {
|
|||
};
|
||||
|
||||
struct Image {
|
||||
VkImage handle;
|
||||
VkImage handle = VK_NULL_HANDLE;
|
||||
VkImageSubresourceRange subresource;
|
||||
};
|
||||
|
||||
struct ImageView {
|
||||
VkImageView handle;
|
||||
VkImageView handle = VK_NULL_HANDLE;
|
||||
VkImage imageHandle;
|
||||
VkImageSubresourceRange subresource;
|
||||
};
|
||||
|
||||
class Tag {
|
||||
Cache *mParent = nullptr;
|
||||
Scheduler *mScheduler = nullptr;
|
||||
TagId mTagId{};
|
||||
class Tag;
|
||||
|
||||
private:
|
||||
struct MemoryTableSlot {
|
||||
std::uint64_t address;
|
||||
union {
|
||||
struct {
|
||||
std::uint64_t size : 40;
|
||||
std::uint64_t flags : 4;
|
||||
};
|
||||
std::uint64_t sizeAndFlags;
|
||||
};
|
||||
std::uint64_t deviceAddress;
|
||||
};
|
||||
|
||||
struct MemoryTable {
|
||||
std::uint32_t count;
|
||||
std::uint32_t pad;
|
||||
MemoryTableSlot slots[];
|
||||
};
|
||||
|
||||
struct ShaderResources : shader::eval::Evaluator {
|
||||
std::map<std::uint32_t, std::uint32_t> slotResources;
|
||||
std::span<const std::uint32_t> userSgprs;
|
||||
Tag *cacheTag = nullptr;
|
||||
|
||||
std::uint32_t slotOffset = 0;
|
||||
rx::MemoryTableWithPayload<Access> bufferMemoryTable;
|
||||
std::vector<std::pair<std::uint32_t, std::uint64_t>> resourceSlotToAddress;
|
||||
std::vector<Cache::Sampler> samplerResources;
|
||||
std::vector<Cache::ImageView> imageResources[3];
|
||||
|
||||
using Evaluator::eval;
|
||||
|
||||
void clear() {
|
||||
slotResources.clear();
|
||||
userSgprs = {};
|
||||
cacheTag = nullptr;
|
||||
slotOffset = 0;
|
||||
bufferMemoryTable.clear();
|
||||
resourceSlotToAddress.clear();
|
||||
samplerResources.clear();
|
||||
for (auto &res : imageResources) {
|
||||
res.clear();
|
||||
}
|
||||
|
||||
Evaluator::invalidate();
|
||||
}
|
||||
|
||||
void loadResources(shader::gcn::Resources &res,
|
||||
std::span<const std::uint32_t> userSgprs);
|
||||
void buildMemoryTable(MemoryTable &memoryTable);
|
||||
std::uint32_t getResourceSlot(std::uint32_t id);
|
||||
|
||||
template <typename T> T readPointer(std::uint64_t address) {
|
||||
T result{};
|
||||
cacheTag->readMemory(&result, address, sizeof(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
shader::eval::Value
|
||||
eval(shader::ir::InstructionId instId,
|
||||
std::span<const shader::ir::Operand> operands) override;
|
||||
};
|
||||
|
||||
struct TagStorage {
|
||||
struct MemoryTableConfigSlot {
|
||||
std::uint32_t bufferIndex;
|
||||
std::uint32_t configIndex;
|
||||
std::uint32_t resourceSlot;
|
||||
};
|
||||
|
||||
std::vector<std::shared_ptr<Entry>> mAcquiredResources;
|
||||
std::vector<std::array<VkDescriptorSet, kGraphicsStages.size()>>
|
||||
mGraphicsDescriptorSets;
|
||||
std::vector<MemoryTableConfigSlot> memoryTableConfigSlots;
|
||||
std::vector<std::uint32_t *> descriptorBuffers;
|
||||
ShaderResources shaderResources;
|
||||
|
||||
std::vector<VkDescriptorSet> mComputeDescriptorSets;
|
||||
TagStorage() = default;
|
||||
TagStorage(const TagStorage &) = delete;
|
||||
|
||||
public:
|
||||
Tag() = default;
|
||||
Tag(Cache *parent, Scheduler &scheduler, TagId id)
|
||||
: mParent(parent), mScheduler(&scheduler), mTagId(id) {}
|
||||
void clear() {
|
||||
mAcquiredResources.clear();
|
||||
memoryTableConfigSlots.clear();
|
||||
descriptorBuffers.clear();
|
||||
shaderResources.clear();
|
||||
}
|
||||
};
|
||||
|
||||
struct TagData {
|
||||
TagStorage *mStorage = nullptr;
|
||||
Scheduler *mScheduler = nullptr;
|
||||
Cache *mParent = nullptr;
|
||||
TagId mTagId{};
|
||||
std::uint32_t mAcquiredMemoryTable = -1;
|
||||
};
|
||||
|
||||
public:
|
||||
struct Tag : protected TagData {
|
||||
Tag(const Tag &) = delete;
|
||||
Tag(Tag &&other) { other.swap(*this); }
|
||||
Tag &operator=(Tag &&other) {
|
||||
other.swap(*this);
|
||||
Tag() noexcept = default;
|
||||
Tag(Tag &&other) noexcept { swap(other); }
|
||||
Tag &operator=(Tag &&other) noexcept {
|
||||
swap(other);
|
||||
return *this;
|
||||
}
|
||||
~Tag() { release(); }
|
||||
|
||||
void submitAndWait() {
|
||||
mScheduler->submit();
|
||||
mScheduler->wait();
|
||||
void swap(Tag &other) {
|
||||
std::swap(static_cast<TagData &>(*this), static_cast<TagData &>(other));
|
||||
}
|
||||
|
||||
Scheduler &getScheduler() const { return *mScheduler; }
|
||||
|
||||
~Tag() { release(); }
|
||||
Shader getShader(const ShaderKey &key,
|
||||
const ShaderKey *dependedKey = nullptr);
|
||||
|
||||
TagId getReadId() const { return TagId{std::uint64_t(mTagId) - 1}; }
|
||||
TagId getWriteId() const { return mTagId; }
|
||||
|
||||
void swap(Tag &other) {
|
||||
std::swap(mParent, other.mParent);
|
||||
std::swap(mScheduler, other.mScheduler);
|
||||
std::swap(mTagId, other.mTagId);
|
||||
std::swap(mAcquiredResources, other.mAcquiredResources);
|
||||
std::swap(mGraphicsDescriptorSets, other.mGraphicsDescriptorSets);
|
||||
std::swap(mComputeDescriptorSets, other.mComputeDescriptorSets);
|
||||
}
|
||||
|
||||
Cache *getCache() const { return mParent; }
|
||||
Device *getDevice() const { return mParent->mDevice; }
|
||||
Scheduler &getScheduler() const { return *mScheduler; }
|
||||
int getVmId() const { return mParent->mVmIm; }
|
||||
|
||||
Shader getShader(const ShaderKey &key,
|
||||
const ShaderKey *dependedKey = nullptr);
|
||||
Buffer getInternalHostVisibleBuffer(std::uint64_t size);
|
||||
Buffer getInternalDeviceLocalBuffer(std::uint64_t size);
|
||||
|
||||
void buildDescriptors(VkDescriptorSet descriptorSet);
|
||||
|
||||
Sampler getSampler(const SamplerKey &key);
|
||||
Buffer getBuffer(std::uint64_t address, std::uint64_t size, Access access);
|
||||
Buffer getInternalBuffer(std::uint64_t size);
|
||||
IndexBuffer getIndexBuffer(std::uint64_t address, std::uint32_t indexCount,
|
||||
gnm::PrimitiveType primType,
|
||||
gnm::IndexType indexType);
|
||||
Image getImage(const ImageKey &key, Access access);
|
||||
ImageView getImageView(const ImageViewKey &key, Access access);
|
||||
ImageView getImageView(const ImageKey &key, Access access);
|
||||
void readMemory(void *target, std::uint64_t address, std::uint64_t size);
|
||||
void writeMemory(const void *source, std::uint64_t address,
|
||||
std::uint64_t size);
|
||||
|
|
@ -232,28 +306,138 @@ struct Cache {
|
|||
return getCache()->getComputePipelineLayout();
|
||||
}
|
||||
|
||||
std::array<VkDescriptorSet, kGraphicsStages.size()>
|
||||
createGraphicsDescriptorSets() {
|
||||
auto result = getCache()->createGraphicsDescriptorSets();
|
||||
mGraphicsDescriptorSets.push_back(result);
|
||||
return result;
|
||||
}
|
||||
Buffer getMemoryTable() {
|
||||
if (mAcquiredMemoryTable + 1 == 0) {
|
||||
mAcquiredMemoryTable = mParent->mMemoryTablePool.acquire();
|
||||
}
|
||||
|
||||
auto &buffer = mParent->mMemoryTableBuffer;
|
||||
auto offset = mAcquiredMemoryTable * kMemoryTableSize;
|
||||
|
||||
Buffer result{
|
||||
.offset = offset,
|
||||
.deviceAddress = buffer.getAddress() + offset,
|
||||
.tagId = getReadId(),
|
||||
.data = buffer.getData() + offset,
|
||||
};
|
||||
|
||||
VkDescriptorSet createComputeDescriptorSet() {
|
||||
auto result = getCache()->createComputeDescriptorSet();
|
||||
mComputeDescriptorSets.push_back(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
std::shared_ptr<Entry> findShader(const ShaderKey &key,
|
||||
const ShaderKey *dependedKey = nullptr);
|
||||
friend Cache;
|
||||
};
|
||||
|
||||
struct GraphicsTag : Tag {
|
||||
GraphicsTag() = default;
|
||||
GraphicsTag(GraphicsTag &&other) noexcept { swap(other); }
|
||||
GraphicsTag &operator=(GraphicsTag &&other) noexcept {
|
||||
swap(other);
|
||||
return *this;
|
||||
}
|
||||
~GraphicsTag() { release(); }
|
||||
|
||||
std::array<VkDescriptorSet, kGraphicsStages.size()> getDescriptorSets() {
|
||||
if (mAcquiredGraphicsDescriptorSet + 1 == 0) {
|
||||
mAcquiredGraphicsDescriptorSet =
|
||||
mParent->mGraphicsDescriptorSetPool.acquire();
|
||||
}
|
||||
|
||||
return mParent->mGraphicsDescriptorSets[mAcquiredGraphicsDescriptorSet];
|
||||
}
|
||||
|
||||
Shader getShader(shader::gcn::Stage stage, const SpiShaderPgm &pgm,
|
||||
const Registers::Context &context,
|
||||
gnm::PrimitiveType vsPrimType,
|
||||
std::span<const VkViewport> viewPorts,
|
||||
std::span<const shader::gcn::PsVGprInput> psVgprInput);
|
||||
|
||||
Shader getPixelShader(const SpiShaderPgm &pgm,
|
||||
const Registers::Context &context,
|
||||
std::span<const VkViewport> viewPorts);
|
||||
|
||||
Shader getVertexShader(shader::gcn::Stage stage, const SpiShaderPgm &pgm,
|
||||
const Registers::Context &context,
|
||||
gnm::PrimitiveType vsPrimType,
|
||||
std::span<const VkViewport> viewPorts);
|
||||
void release();
|
||||
|
||||
void swap(GraphicsTag &other) {
|
||||
Tag::swap(other);
|
||||
std::swap(mAcquiredGraphicsDescriptorSet,
|
||||
other.mAcquiredGraphicsDescriptorSet);
|
||||
}
|
||||
|
||||
private:
|
||||
std::uint32_t mAcquiredGraphicsDescriptorSet = -1;
|
||||
};
|
||||
|
||||
struct ComputeTag : Tag {
|
||||
ComputeTag() = default;
|
||||
ComputeTag(ComputeTag &&other) noexcept { swap(other); }
|
||||
ComputeTag &operator=(ComputeTag &&other) noexcept {
|
||||
swap(other);
|
||||
return *this;
|
||||
}
|
||||
~ComputeTag() { release(); }
|
||||
|
||||
Shader getShader(const Registers::ComputeConfig &pgm);
|
||||
|
||||
VkDescriptorSet getDescriptorSet() {
|
||||
if (mAcquiredComputeDescriptorSet + 1 == 0) {
|
||||
mAcquiredComputeDescriptorSet =
|
||||
mParent->mComputeDescriptorSetPool.acquire();
|
||||
}
|
||||
|
||||
return mParent->mComputeDescriptorSets[mAcquiredComputeDescriptorSet];
|
||||
}
|
||||
|
||||
void release();
|
||||
|
||||
void swap(ComputeTag &other) {
|
||||
Tag::swap(other);
|
||||
std::swap(mAcquiredComputeDescriptorSet,
|
||||
other.mAcquiredComputeDescriptorSet);
|
||||
}
|
||||
|
||||
private:
|
||||
std::uint32_t mAcquiredComputeDescriptorSet = -1;
|
||||
};
|
||||
|
||||
private:
|
||||
template <typename T> T createTagImpl(Scheduler &scheduler) {
|
||||
T result;
|
||||
|
||||
auto id = mNextTagId.load(std::memory_order::acquire);
|
||||
while (!mNextTagId.compare_exchange_weak(
|
||||
id, TagId{static_cast<std::uint64_t>(id) + 2},
|
||||
std::memory_order::release, std::memory_order::relaxed)) {
|
||||
}
|
||||
|
||||
auto storageIndex = mTagStoragePool.acquire();
|
||||
|
||||
// std::println("acquire tag storage {}", storageIndex);
|
||||
result.mStorage = mTagStorages + storageIndex;
|
||||
result.mTagId = id;
|
||||
result.mParent = this;
|
||||
result.mScheduler = &scheduler;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public:
|
||||
Cache(Device *device, int vmId);
|
||||
~Cache();
|
||||
Tag createTag(Scheduler &scheduler);
|
||||
|
||||
vk::Buffer &getMemoryTableBuffer() { return mMemoryTableBuffer; }
|
||||
Tag createTag(Scheduler &scheduler) { return createTagImpl<Tag>(scheduler); }
|
||||
GraphicsTag createGraphicsTag(Scheduler &scheduler) {
|
||||
return createTagImpl<GraphicsTag>(scheduler);
|
||||
}
|
||||
ComputeTag createComputeTag(Scheduler &scheduler) {
|
||||
return createTagImpl<ComputeTag>(scheduler);
|
||||
}
|
||||
|
||||
vk::Buffer &getGdsBuffer() { return mGdsBuffer; }
|
||||
|
||||
void addFrameBuffer(Scheduler &scheduler, int index, std::uint64_t address,
|
||||
|
|
@ -273,21 +457,6 @@ struct Cache {
|
|||
flush(scheduler, 0, ~static_cast<std::uint64_t>(0));
|
||||
}
|
||||
|
||||
const std::array<VkDescriptorSetLayout, kGraphicsStages.size()> &
|
||||
getGraphicsDescriptorSetLayouts() const {
|
||||
return mGraphicsDescriptorSetLayouts;
|
||||
}
|
||||
|
||||
VkDescriptorSetLayout
|
||||
getGraphicsDescriptorSetLayout(VkShaderStageFlagBits stage) const {
|
||||
int index = getStageIndex(stage);
|
||||
rx::dieIf(index < 0, "getGraphicsDescriptorSetLayout: unexpected stage");
|
||||
return mGraphicsDescriptorSetLayouts[index];
|
||||
}
|
||||
|
||||
VkDescriptorSetLayout getComputeDescriptorSetLayout() const {
|
||||
return mComputeDescriptorSetLayout;
|
||||
}
|
||||
VkPipelineLayout getGraphicsPipelineLayout() const {
|
||||
return mGraphicsPipelineLayout;
|
||||
}
|
||||
|
|
@ -296,19 +465,8 @@ struct Cache {
|
|||
return mComputePipelineLayout;
|
||||
}
|
||||
|
||||
std::array<VkDescriptorSet, kGraphicsStages.size()>
|
||||
createGraphicsDescriptorSets();
|
||||
VkDescriptorSet createComputeDescriptorSet();
|
||||
|
||||
void destroyGraphicsDescriptorSets(
|
||||
const std::array<VkDescriptorSet, kGraphicsStages.size()> &set) {
|
||||
std::lock_guard lock(mDescriptorMtx);
|
||||
mGraphicsDescriptorSets.push_back(set);
|
||||
}
|
||||
|
||||
void destroyComputeDescriptorSet(VkDescriptorSet set) {
|
||||
std::lock_guard lock(mDescriptorMtx);
|
||||
mComputeDescriptorSets.push_back(set);
|
||||
auto &getGraphicsDescriptorSetLayouts() const {
|
||||
return mGraphicsDescriptorSetLayouts;
|
||||
}
|
||||
|
||||
private:
|
||||
|
|
@ -316,21 +474,31 @@ private:
|
|||
|
||||
Device *mDevice;
|
||||
int mVmIm;
|
||||
TagId mNextTagId{2};
|
||||
vk::Buffer mMemoryTableBuffer;
|
||||
std::atomic<TagId> mNextTagId{TagId{2}};
|
||||
vk::Buffer mGdsBuffer;
|
||||
|
||||
std::mutex mDescriptorMtx;
|
||||
static constexpr auto kMemoryTableSize = 0x10000;
|
||||
static constexpr auto kMemoryTableCount = 64;
|
||||
static constexpr auto kDescriptorSetCount = 128;
|
||||
static constexpr auto kTagStorageCount = 128;
|
||||
|
||||
rx::ConcurrentBitPool<kMemoryTableCount> mMemoryTablePool;
|
||||
vk::Buffer mMemoryTableBuffer;
|
||||
|
||||
std::array<VkDescriptorSetLayout, kGraphicsStages.size()>
|
||||
mGraphicsDescriptorSetLayouts{};
|
||||
VkDescriptorSetLayout mComputeDescriptorSetLayout{};
|
||||
VkPipelineLayout mGraphicsPipelineLayout{};
|
||||
VkPipelineLayout mComputePipelineLayout{};
|
||||
VkDescriptorPool mGraphicsDescriptorPool{};
|
||||
VkDescriptorPool mComputeDescriptorPool{};
|
||||
std::vector<std::array<VkDescriptorSet, kGraphicsStages.size()>>
|
||||
mGraphicsDescriptorSets;
|
||||
std::vector<VkDescriptorSet> mComputeDescriptorSets;
|
||||
VkDescriptorPool mDescriptorPool{};
|
||||
|
||||
rx::ConcurrentBitPool<kDescriptorSetCount> mGraphicsDescriptorSetPool;
|
||||
rx::ConcurrentBitPool<kDescriptorSetCount> mComputeDescriptorSetPool;
|
||||
rx::ConcurrentBitPool<kTagStorageCount> mTagStoragePool;
|
||||
std::array<VkDescriptorSet, kGraphicsStages.size()>
|
||||
mGraphicsDescriptorSets[kDescriptorSetCount];
|
||||
VkDescriptorSet mComputeDescriptorSets[kDescriptorSetCount];
|
||||
TagStorage mTagStorages[kTagStorageCount];
|
||||
std::map<SamplerKey, VkSampler> mSamplers;
|
||||
|
||||
std::shared_ptr<Entry> mFrameBuffers[10];
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
#pragma once
|
||||
#include "Cache.hpp"
|
||||
#include "FlipPipeline.hpp"
|
||||
#include "Pipe.hpp"
|
||||
#include "amdgpu/bridge/bridge.hpp"
|
||||
#include "amdgpu/tiler_vulkan.hpp"
|
||||
#include "FlipPipeline.hpp"
|
||||
#include "rx/MemoryTable.hpp"
|
||||
#include "shader/SemanticInfo.hpp"
|
||||
#include "shader/SpvConverter.hpp"
|
||||
|
|
@ -70,6 +70,14 @@ struct Device {
|
|||
return caches[vmId].createTag(scheduler);
|
||||
}
|
||||
|
||||
Cache::GraphicsTag getGraphicsTag(int vmId, Scheduler &scheduler) {
|
||||
return caches[vmId].createGraphicsTag(scheduler);
|
||||
}
|
||||
|
||||
Cache::ComputeTag getComputeTag(int vmId, Scheduler &scheduler) {
|
||||
return caches[vmId].createComputeTag(scheduler);
|
||||
}
|
||||
|
||||
void mapProcess(std::int64_t pid, int vmId, const char *shmName);
|
||||
void unmapProcess(std::int64_t pid);
|
||||
void protectMemory(int pid, std::uint64_t address, std::uint64_t size,
|
||||
|
|
|
|||
|
|
@ -242,37 +242,10 @@ FlipPipeline::FlipPipeline() {
|
|||
void FlipPipeline::bind(Scheduler &sched, FlipType type, VkImageView imageView,
|
||||
VkSampler sampler) {
|
||||
auto cmdBuffer = sched.getCommandBuffer();
|
||||
auto descriptorIndex = descriptorSetPool.acquire();
|
||||
|
||||
auto allocateDescriptorSetIndex = [this] {
|
||||
auto mask = freeDescriptorSets.load(std::memory_order::acquire);
|
||||
|
||||
while (true) {
|
||||
auto index = std::countr_one(mask);
|
||||
if (index >= std::size(descriptorSets)) {
|
||||
mask = freeDescriptorSets.load(std::memory_order::relaxed);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!freeDescriptorSets.compare_exchange_weak(
|
||||
mask, mask | (1 << index), std::memory_order::release,
|
||||
std::memory_order::relaxed)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
return index;
|
||||
}
|
||||
};
|
||||
|
||||
auto descriptorIndex = allocateDescriptorSetIndex();
|
||||
|
||||
sched.afterSubmit([this, descriptorIndex] {
|
||||
decltype(freeDescriptorSets)::value_type mask = 1 << descriptorIndex;
|
||||
|
||||
while (!freeDescriptorSets.compare_exchange_weak(
|
||||
mask, mask & ~(1 << descriptorIndex), std::memory_order::release,
|
||||
std::memory_order::acquire)) {
|
||||
}
|
||||
});
|
||||
sched.afterSubmit(
|
||||
[this, descriptorIndex] { descriptorSetPool.release(descriptorIndex); });
|
||||
|
||||
auto descriptorSet = descriptorSets[descriptorIndex];
|
||||
VkDescriptorImageInfo imageInfo = {
|
||||
|
|
|
|||
|
|
@ -1,8 +1,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "Scheduler.hpp"
|
||||
#include <atomic>
|
||||
#include <cstdint>
|
||||
#include <rx/ConcurrentBitPool.hpp>
|
||||
#include <vulkan/vulkan.h>
|
||||
|
||||
enum class FlipType {
|
||||
|
|
@ -11,6 +10,7 @@ enum class FlipType {
|
|||
};
|
||||
|
||||
struct FlipPipeline {
|
||||
static constexpr auto kDescriptorSetCount = 16;
|
||||
VkShaderModule flipVertShaderModule{};
|
||||
VkShaderModule flipFragStdShaderModule{};
|
||||
VkShaderModule flipFragAltShaderModule{};
|
||||
|
|
@ -18,8 +18,8 @@ struct FlipPipeline {
|
|||
VkDescriptorSetLayout descriptorSetLayout{};
|
||||
VkPipeline pipelines[2]{};
|
||||
VkDescriptorPool descriptorPool{};
|
||||
VkDescriptorSet descriptorSets[8]{};
|
||||
std::atomic<std::uint8_t> freeDescriptorSets{0};
|
||||
VkDescriptorSet descriptorSets[kDescriptorSetCount]{};
|
||||
rx::ConcurrentBitPool<kDescriptorSetCount, std::uint8_t> descriptorSetPool;
|
||||
|
||||
FlipPipeline(const FlipPipeline &) = delete;
|
||||
FlipPipeline();
|
||||
|
|
|
|||
|
|
@ -203,6 +203,8 @@ struct PaScRect {
|
|||
std::uint16_t top;
|
||||
std::uint16_t right;
|
||||
std::uint16_t bottom;
|
||||
|
||||
bool isValid() const { return left < right && top < bottom; }
|
||||
};
|
||||
|
||||
struct SpiShaderPgm {
|
||||
|
|
|
|||
|
|
@ -1,8 +1,6 @@
|
|||
#include "Renderer.hpp"
|
||||
#include "Device.hpp"
|
||||
#include "gnm/descriptors.hpp"
|
||||
#include "gnm/gnm.hpp"
|
||||
#include "rx/MemoryTable.hpp"
|
||||
|
||||
#include <amdgpu/tiler.hpp>
|
||||
#include <gnm/constants.hpp>
|
||||
|
|
@ -14,7 +12,6 @@
|
|||
#include <shaders/fill_red.frag.h>
|
||||
#include <shaders/rect_list.geom.h>
|
||||
|
||||
#include <bit>
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
using namespace shader;
|
||||
|
|
@ -35,39 +32,26 @@ VkRect2D toVkRect2D(amdgpu::PaScRect rect) {
|
|||
};
|
||||
}
|
||||
|
||||
amdgpu::PaScRect intersection(amdgpu::PaScRect rect, amdgpu::PaScRect scissor) {
|
||||
amdgpu::PaScRect intersection(amdgpu::PaScRect lhs, amdgpu::PaScRect rhs) {
|
||||
if (!lhs.isValid()) {
|
||||
return rhs;
|
||||
}
|
||||
|
||||
if (!rhs.isValid()) {
|
||||
return lhs;
|
||||
}
|
||||
|
||||
amdgpu::PaScRect result{
|
||||
.left = std::max(rect.left, scissor.left),
|
||||
.top = std::max(rect.top, scissor.top),
|
||||
.right = std::min(rect.right, scissor.right),
|
||||
.bottom = std::min(rect.bottom, scissor.bottom),
|
||||
.left = std::max(lhs.left, rhs.left),
|
||||
.top = std::max(lhs.top, rhs.top),
|
||||
.right = std::min(lhs.right, rhs.right),
|
||||
.bottom = std::min(lhs.bottom, rhs.bottom),
|
||||
};
|
||||
|
||||
result.top = std::min(result.top, result.bottom);
|
||||
result.bottom = std::max(result.top, result.bottom);
|
||||
result.left = std::min(result.left, result.right);
|
||||
result.right = std::max(result.left, result.right);
|
||||
return result;
|
||||
}
|
||||
} // namespace gnm
|
||||
|
||||
struct MemoryTableSlot {
|
||||
std::uint64_t address;
|
||||
union {
|
||||
struct {
|
||||
std::uint64_t size : 40;
|
||||
std::uint64_t flags : 4;
|
||||
};
|
||||
std::uint64_t sizeAndFlags;
|
||||
};
|
||||
std::uint64_t deviceAddress;
|
||||
};
|
||||
struct MemoryTable {
|
||||
std::uint32_t count;
|
||||
std::uint32_t pad;
|
||||
MemoryTableSlot slots[];
|
||||
};
|
||||
|
||||
static VkShaderEXT getPrimTypeRectGeomShader(amdgpu::Cache &cache) {
|
||||
static VkShaderEXT shader = VK_NULL_HANDLE;
|
||||
if (shader != VK_NULL_HANDLE) {
|
||||
|
|
@ -151,282 +135,6 @@ static VkPrimitiveTopology toVkPrimitiveType(gnm::PrimitiveType type) {
|
|||
}
|
||||
}
|
||||
|
||||
struct ShaderResources : eval::Evaluator {
|
||||
amdgpu::Cache::Tag *cacheTag;
|
||||
shader::eval::Evaluator evaluator;
|
||||
std::map<std::uint32_t, std::uint32_t> slotResources;
|
||||
std::span<const std::uint32_t> userSgprs;
|
||||
|
||||
std::uint32_t slotOffset = 0;
|
||||
rx::MemoryTableWithPayload<Access> bufferMemoryTable;
|
||||
std::vector<std::pair<std::uint32_t, std::uint64_t>> resourceSlotToAddress;
|
||||
std::vector<amdgpu::Cache::Sampler> samplerResources;
|
||||
std::vector<amdgpu::Cache::ImageView> imageResources[3];
|
||||
|
||||
using Evaluator::eval;
|
||||
|
||||
ShaderResources() = default;
|
||||
|
||||
void loadResources(shader::gcn::Resources &res,
|
||||
std::span<const std::uint32_t> userSgprs) {
|
||||
this->userSgprs = userSgprs;
|
||||
for (auto &pointer : res.pointers) {
|
||||
auto pointerBase = eval(pointer.base).zExtScalar();
|
||||
auto pointerOffset = eval(pointer.offset).zExtScalar();
|
||||
|
||||
if (!pointerBase || !pointerOffset) {
|
||||
res.dump();
|
||||
rx::die("failed to evaluate pointer");
|
||||
}
|
||||
|
||||
bufferMemoryTable.map(*pointerBase,
|
||||
*pointerBase + *pointerOffset + pointer.size,
|
||||
Access::Read);
|
||||
resourceSlotToAddress.push_back(
|
||||
{slotOffset + pointer.resourceSlot, *pointerBase});
|
||||
}
|
||||
|
||||
for (auto &bufferRes : res.buffers) {
|
||||
auto word0 = eval(bufferRes.words[0]).zExtScalar();
|
||||
auto word1 = eval(bufferRes.words[1]).zExtScalar();
|
||||
auto word2 = eval(bufferRes.words[2]).zExtScalar();
|
||||
auto word3 = eval(bufferRes.words[3]).zExtScalar();
|
||||
|
||||
if (!word0 || !word1 || !word2 || !word3) {
|
||||
res.dump();
|
||||
rx::die("failed to evaluate V#");
|
||||
}
|
||||
|
||||
gnm::VBuffer buffer{};
|
||||
std::memcpy(reinterpret_cast<std::uint32_t *>(&buffer), &*word0,
|
||||
sizeof(std::uint32_t));
|
||||
std::memcpy(reinterpret_cast<std::uint32_t *>(&buffer) + 1, &*word1,
|
||||
sizeof(std::uint32_t));
|
||||
std::memcpy(reinterpret_cast<std::uint32_t *>(&buffer) + 2, &*word2,
|
||||
sizeof(std::uint32_t));
|
||||
std::memcpy(reinterpret_cast<std::uint32_t *>(&buffer) + 3, &*word3,
|
||||
sizeof(std::uint32_t));
|
||||
|
||||
bufferMemoryTable.map(buffer.address(), buffer.address() + buffer.size(),
|
||||
bufferRes.access);
|
||||
resourceSlotToAddress.push_back(
|
||||
{slotOffset + bufferRes.resourceSlot, buffer.address()});
|
||||
}
|
||||
|
||||
for (auto &texture : res.textures) {
|
||||
auto word0 = eval(texture.words[0]).zExtScalar();
|
||||
auto word1 = eval(texture.words[1]).zExtScalar();
|
||||
auto word2 = eval(texture.words[2]).zExtScalar();
|
||||
auto word3 = eval(texture.words[3]).zExtScalar();
|
||||
|
||||
if (!word0 || !word1 || !word2 || !word3) {
|
||||
res.dump();
|
||||
rx::die("failed to evaluate 128 bit T#");
|
||||
}
|
||||
|
||||
gnm::TBuffer buffer{};
|
||||
std::memcpy(reinterpret_cast<std::uint32_t *>(&buffer), &*word0,
|
||||
sizeof(std::uint32_t));
|
||||
std::memcpy(reinterpret_cast<std::uint32_t *>(&buffer) + 1, &*word1,
|
||||
sizeof(std::uint32_t));
|
||||
std::memcpy(reinterpret_cast<std::uint32_t *>(&buffer) + 2, &*word2,
|
||||
sizeof(std::uint32_t));
|
||||
std::memcpy(reinterpret_cast<std::uint32_t *>(&buffer) + 3, &*word3,
|
||||
sizeof(std::uint32_t));
|
||||
|
||||
if (texture.words[4] != nullptr) {
|
||||
auto word4 = eval(texture.words[4]).zExtScalar();
|
||||
auto word5 = eval(texture.words[5]).zExtScalar();
|
||||
auto word6 = eval(texture.words[6]).zExtScalar();
|
||||
auto word7 = eval(texture.words[7]).zExtScalar();
|
||||
|
||||
if (!word4 || !word5 || !word6 || !word7) {
|
||||
res.dump();
|
||||
rx::die("failed to evaluate 256 bit T#");
|
||||
}
|
||||
|
||||
std::memcpy(reinterpret_cast<std::uint32_t *>(&buffer) + 4, &*word4,
|
||||
sizeof(std::uint32_t));
|
||||
std::memcpy(reinterpret_cast<std::uint32_t *>(&buffer) + 5, &*word5,
|
||||
sizeof(std::uint32_t));
|
||||
std::memcpy(reinterpret_cast<std::uint32_t *>(&buffer) + 6, &*word6,
|
||||
sizeof(std::uint32_t));
|
||||
std::memcpy(reinterpret_cast<std::uint32_t *>(&buffer) + 7, &*word7,
|
||||
sizeof(std::uint32_t));
|
||||
}
|
||||
|
||||
std::vector<amdgpu::Cache::ImageView> *resources = nullptr;
|
||||
|
||||
switch (buffer.type) {
|
||||
case gnm::TextureType::Array1D:
|
||||
case gnm::TextureType::Dim1D:
|
||||
resources = &imageResources[0];
|
||||
break;
|
||||
case gnm::TextureType::Dim2D:
|
||||
case gnm::TextureType::Array2D:
|
||||
case gnm::TextureType::Msaa2D:
|
||||
case gnm::TextureType::MsaaArray2D:
|
||||
case gnm::TextureType::Cube:
|
||||
resources = &imageResources[1];
|
||||
break;
|
||||
case gnm::TextureType::Dim3D:
|
||||
resources = &imageResources[2];
|
||||
break;
|
||||
}
|
||||
|
||||
rx::dieIf(resources == nullptr,
|
||||
"ShaderResources: unexpected texture type %u",
|
||||
static_cast<unsigned>(buffer.type));
|
||||
|
||||
slotResources[slotOffset + texture.resourceSlot] = resources->size();
|
||||
resources->push_back(cacheTag->getImageView(
|
||||
amdgpu::ImageViewKey::createFrom(buffer), texture.access));
|
||||
}
|
||||
|
||||
for (auto &sampler : res.samplers) {
|
||||
auto word0 = eval(sampler.words[0]).zExtScalar();
|
||||
auto word1 = eval(sampler.words[1]).zExtScalar();
|
||||
auto word2 = eval(sampler.words[2]).zExtScalar();
|
||||
auto word3 = eval(sampler.words[3]).zExtScalar();
|
||||
|
||||
if (!word0 || !word1 || !word2 || !word3) {
|
||||
res.dump();
|
||||
rx::die("failed to evaluate S#");
|
||||
}
|
||||
|
||||
gnm::SSampler sSampler{};
|
||||
std::memcpy(reinterpret_cast<std::uint32_t *>(&sSampler), &*word0,
|
||||
sizeof(std::uint32_t));
|
||||
std::memcpy(reinterpret_cast<std::uint32_t *>(&sSampler) + 1, &*word1,
|
||||
sizeof(std::uint32_t));
|
||||
std::memcpy(reinterpret_cast<std::uint32_t *>(&sSampler) + 2, &*word2,
|
||||
sizeof(std::uint32_t));
|
||||
std::memcpy(reinterpret_cast<std::uint32_t *>(&sSampler) + 3, &*word3,
|
||||
sizeof(std::uint32_t));
|
||||
|
||||
if (sampler.unorm) {
|
||||
sSampler.force_unorm_coords = true;
|
||||
}
|
||||
|
||||
slotResources[slotOffset + sampler.resourceSlot] =
|
||||
samplerResources.size();
|
||||
samplerResources.push_back(
|
||||
cacheTag->getSampler(amdgpu::SamplerKey::createFrom(sSampler)));
|
||||
}
|
||||
|
||||
slotOffset += res.slots;
|
||||
}
|
||||
|
||||
void buildMemoryTable(MemoryTable &memoryTable) {
|
||||
memoryTable.count = 0;
|
||||
|
||||
for (auto p : bufferMemoryTable) {
|
||||
auto size = p.endAddress - p.beginAddress;
|
||||
auto buffer = cacheTag->getBuffer(p.beginAddress, size, p.payload);
|
||||
|
||||
auto memoryTableSlot = memoryTable.count;
|
||||
memoryTable.slots[memoryTable.count++] = {
|
||||
.address = p.beginAddress,
|
||||
.size = size,
|
||||
.flags = static_cast<uint8_t>(p.payload),
|
||||
.deviceAddress = buffer.deviceAddress,
|
||||
};
|
||||
|
||||
for (auto [slot, address] : resourceSlotToAddress) {
|
||||
if (address >= p.beginAddress && address < p.endAddress) {
|
||||
slotResources[slot] = memoryTableSlot;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::uint32_t getResourceSlot(std::uint32_t id) {
|
||||
if (auto it = slotResources.find(id); it != slotResources.end()) {
|
||||
return it->second;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
template <typename T> T readPointer(std::uint64_t address) {
|
||||
T result{};
|
||||
cacheTag->readMemory(&result, address, sizeof(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
eval::Value eval(ir::InstructionId instId,
|
||||
std::span<const ir::Operand> operands) override {
|
||||
if (instId == ir::amdgpu::POINTER) {
|
||||
auto type = operands[0].getAsValue();
|
||||
auto loadSize = *operands[1].getAsInt32();
|
||||
auto base = eval(operands[2]).zExtScalar();
|
||||
auto offset = eval(operands[3]).zExtScalar();
|
||||
|
||||
if (!base || !offset) {
|
||||
rx::die("failed to evaluate pointer dependency");
|
||||
}
|
||||
|
||||
eval::Value result;
|
||||
auto address = *base + *offset;
|
||||
|
||||
switch (loadSize) {
|
||||
case 1:
|
||||
result = readPointer<std::uint8_t>(address);
|
||||
break;
|
||||
case 2:
|
||||
result = readPointer<std::uint16_t>(address);
|
||||
break;
|
||||
case 4:
|
||||
result = readPointer<std::uint32_t>(address);
|
||||
break;
|
||||
case 8:
|
||||
result = readPointer<std::uint64_t>(address);
|
||||
break;
|
||||
case 12:
|
||||
result = readPointer<u32vec3>(address);
|
||||
break;
|
||||
case 16:
|
||||
result = readPointer<u32vec4>(address);
|
||||
break;
|
||||
case 32:
|
||||
result = readPointer<std::array<std::uint32_t, 8>>(address);
|
||||
break;
|
||||
default:
|
||||
rx::die("unexpected pointer load size");
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
if (instId == ir::amdgpu::VBUFFER) {
|
||||
rx::die("resource depends on buffer value");
|
||||
}
|
||||
|
||||
if (instId == ir::amdgpu::TBUFFER) {
|
||||
rx::die("resource depends on texture value");
|
||||
}
|
||||
|
||||
if (instId == ir::amdgpu::SAMPLER) {
|
||||
rx::die("resource depends on sampler value");
|
||||
}
|
||||
|
||||
if (instId == ir::amdgpu::USER_SGPR) {
|
||||
auto index = static_cast<std::uint32_t>(*operands[1].getAsInt32());
|
||||
rx::dieIf(index >= userSgprs.size(), "out of user sgprs");
|
||||
return userSgprs[index];
|
||||
}
|
||||
|
||||
if (instId == ir::amdgpu::IMM) {
|
||||
auto address = static_cast<std::uint64_t>(*operands[1].getAsInt64());
|
||||
|
||||
std::uint32_t result;
|
||||
cacheTag->readMemory(&result, address, sizeof(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
return Evaluator::eval(instId, operands);
|
||||
}
|
||||
};
|
||||
|
||||
void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
|
||||
std::uint32_t vertexCount, std::uint32_t firstInstance,
|
||||
std::uint32_t instanceCount, std::uint64_t indiciesAddress,
|
||||
|
|
@ -449,7 +157,7 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
|
|||
return;
|
||||
}
|
||||
|
||||
auto cacheTag = pipe.device->getCacheTag(vmId, pipe.scheduler);
|
||||
auto cacheTag = pipe.device->getGraphicsTag(vmId, pipe.scheduler);
|
||||
auto targetMask = pipe.context.cbTargetMask.raw;
|
||||
|
||||
VkRenderingAttachmentInfo colorAttachments[8]{};
|
||||
|
|
@ -460,8 +168,12 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
|
|||
VkRect2D viewPortScissors[8]{};
|
||||
unsigned renderTargets = 0;
|
||||
|
||||
VkRenderingAttachmentInfo depthAttachment{};
|
||||
VkRenderingAttachmentInfo stencilAttachment{};
|
||||
VkRenderingAttachmentInfo depthAttachment{
|
||||
.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
|
||||
};
|
||||
VkRenderingAttachmentInfo stencilAttachment{
|
||||
.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
|
||||
};
|
||||
|
||||
auto depthAccess = Access::None;
|
||||
auto stencilAccess = Access::None;
|
||||
|
|
@ -484,12 +196,15 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
|
|||
}
|
||||
}
|
||||
|
||||
// FIXME
|
||||
stencilAccess = Access::None;
|
||||
|
||||
if (depthAccess != Access::None) {
|
||||
auto viewPortScissor = pipe.context.paScScreenScissor;
|
||||
auto viewPortRect = gnm::toVkRect2D(viewPortScissor);
|
||||
|
||||
auto imageView = cacheTag.getImageView(
|
||||
{{
|
||||
{
|
||||
.readAddress = pipe.context.dbZReadBase,
|
||||
.writeAddress = pipe.context.dbZWriteBase,
|
||||
.dfmt = gnm::getDataFormat(pipe.context.dbZInfo.format),
|
||||
|
|
@ -502,7 +217,7 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
|
|||
},
|
||||
.pitch = viewPortRect.extent.width,
|
||||
.kind = ImageKind::Depth,
|
||||
}},
|
||||
},
|
||||
depthAccess);
|
||||
|
||||
depthAttachment = {
|
||||
|
|
@ -533,12 +248,12 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
|
|||
}
|
||||
|
||||
auto viewPortScissor = pipe.context.paScScreenScissor;
|
||||
// viewPortScissor = gnm::intersection(
|
||||
// viewPortScissor, pipe.context.paScVportScissor[renderTargets]);
|
||||
// viewPortScissor =
|
||||
// gnm::intersection(viewPortScissor, pipe.context.paScWindowScissor);
|
||||
// viewPortScissor =
|
||||
// gnm::intersection(viewPortScissor, pipe.context.paScGenericScissor);
|
||||
viewPortScissor = gnm::intersection(
|
||||
viewPortScissor, pipe.context.paScVportScissor[renderTargets]);
|
||||
viewPortScissor =
|
||||
gnm::intersection(viewPortScissor, pipe.context.paScWindowScissor);
|
||||
viewPortScissor =
|
||||
gnm::intersection(viewPortScissor, pipe.context.paScGenericScissor);
|
||||
|
||||
auto viewPortRect = gnm::toVkRect2D(viewPortScissor);
|
||||
|
||||
|
|
@ -554,7 +269,7 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
|
|||
auto vkViewPortScissor = gnm::toVkRect2D(viewPortScissor);
|
||||
viewPortScissors[renderTargets] = vkViewPortScissor;
|
||||
|
||||
ImageViewKey renderTargetInfo{};
|
||||
ImageKey renderTargetInfo{};
|
||||
renderTargetInfo.type = gnm::TextureType::Dim2D;
|
||||
renderTargetInfo.pitch = vkViewPortScissor.extent.width;
|
||||
renderTargetInfo.readAddress = static_cast<std::uint64_t>(cbColor.base)
|
||||
|
|
@ -572,7 +287,6 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
|
|||
cbColor.info.linearGeneral
|
||||
? TileMode{.raw = 0}
|
||||
: getDefaultTileModes()[cbColor.attrib.tileModeIndex];
|
||||
// std::printf("draw to %lx\n", renderTargetInfo.address);
|
||||
|
||||
auto access = Access::None;
|
||||
|
||||
|
|
@ -640,13 +354,6 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
|
|||
if (renderTargets == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// if (pipe.context.cbTargetMask == 0) {
|
||||
// return;
|
||||
// }
|
||||
|
||||
// auto cache = pipe.device->getCache(vmId);
|
||||
|
||||
if (indiciesAddress == 0) {
|
||||
indexCount = vertexCount;
|
||||
}
|
||||
|
|
@ -659,244 +366,34 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
|
|||
VkShaderEXT shaders[stages.size()]{};
|
||||
|
||||
auto pipelineLayout = cacheTag.getGraphicsPipelineLayout();
|
||||
auto descriptorSets = cacheTag.createGraphicsDescriptorSets();
|
||||
|
||||
std::vector<std::uint32_t *> descriptorBuffers;
|
||||
auto &memoryTableBuffer = cacheTag.getCache()->getMemoryTableBuffer();
|
||||
std::uint64_t memoryTableAddress = memoryTableBuffer.getAddress();
|
||||
auto memoryTable = std::bit_cast<MemoryTable *>(memoryTableBuffer.getData());
|
||||
|
||||
std::uint64_t gdsAddress = cacheTag.getCache()->getGdsBuffer().getAddress();
|
||||
ShaderResources shaderResources;
|
||||
shaderResources.cacheTag = &cacheTag;
|
||||
|
||||
struct MemoryTableConfigSlot {
|
||||
std::uint32_t bufferIndex;
|
||||
std::uint32_t configIndex;
|
||||
std::uint32_t resourceSlot;
|
||||
};
|
||||
std::vector<MemoryTableConfigSlot> memoryTableConfigSlots;
|
||||
|
||||
auto addShader = [&](const SpiShaderPgm &pgm, shader::gcn::Stage stage) {
|
||||
shader::gcn::Environment env{
|
||||
.vgprCount = pgm.rsrc1.getVGprCount(),
|
||||
.sgprCount = pgm.rsrc1.getSGprCount(),
|
||||
.userSgprs = std::span(pgm.userData.data(), pgm.rsrc2.userSgpr),
|
||||
.supportsBarycentric = vk::context->supportsBarycentric,
|
||||
.supportsInt8 = vk::context->supportsInt8,
|
||||
.supportsInt64Atomics = vk::context->supportsInt64Atomics,
|
||||
};
|
||||
|
||||
auto shader = cacheTag.getShader({
|
||||
.address = pgm.address << 8,
|
||||
.stage = stage,
|
||||
.env = env,
|
||||
});
|
||||
|
||||
std::uint32_t slotOffset = shaderResources.slotOffset;
|
||||
|
||||
shaderResources.loadResources(
|
||||
shader.info->resources,
|
||||
std::span(pgm.userData.data(), pgm.rsrc2.userSgpr));
|
||||
|
||||
const auto &configSlots = shader.info->configSlots;
|
||||
|
||||
auto configSize = configSlots.size() * sizeof(std::uint32_t);
|
||||
auto configBuffer = cacheTag.getInternalBuffer(configSize);
|
||||
|
||||
auto configPtr = reinterpret_cast<std::uint32_t *>(configBuffer.data);
|
||||
|
||||
shader::gcn::PsVGprInput
|
||||
psVgprInput[static_cast<std::size_t>(shader::gcn::PsVGprInput::Count)];
|
||||
std::size_t psVgprInputs = 0;
|
||||
|
||||
if (stage == shader::gcn::Stage::Ps) {
|
||||
SpiPsInput spiInputAddr = pipe.context.spiPsInputAddr;
|
||||
|
||||
if (spiInputAddr.perspSampleEna) {
|
||||
psVgprInput[psVgprInputs++] = shader::gcn::PsVGprInput::IPerspSample;
|
||||
psVgprInput[psVgprInputs++] = shader::gcn::PsVGprInput::JPerspSample;
|
||||
}
|
||||
if (spiInputAddr.perspCenterEna) {
|
||||
psVgprInput[psVgprInputs++] = shader::gcn::PsVGprInput::IPerspCenter;
|
||||
psVgprInput[psVgprInputs++] = shader::gcn::PsVGprInput::JPerspCenter;
|
||||
}
|
||||
if (spiInputAddr.perspCentroidEna) {
|
||||
psVgprInput[psVgprInputs++] = shader::gcn::PsVGprInput::IPerspCentroid;
|
||||
psVgprInput[psVgprInputs++] = shader::gcn::PsVGprInput::JPerspCentroid;
|
||||
}
|
||||
if (spiInputAddr.perspPullModelEna) {
|
||||
psVgprInput[psVgprInputs++] = shader::gcn::PsVGprInput::IW;
|
||||
psVgprInput[psVgprInputs++] = shader::gcn::PsVGprInput::JW;
|
||||
psVgprInput[psVgprInputs++] = shader::gcn::PsVGprInput::_1W;
|
||||
}
|
||||
if (spiInputAddr.linearSampleEna) {
|
||||
psVgprInput[psVgprInputs++] = shader::gcn::PsVGprInput::ILinearSample;
|
||||
psVgprInput[psVgprInputs++] = shader::gcn::PsVGprInput::JLinearSample;
|
||||
}
|
||||
if (spiInputAddr.linearCenterEna) {
|
||||
psVgprInput[psVgprInputs++] = shader::gcn::PsVGprInput::ILinearCenter;
|
||||
psVgprInput[psVgprInputs++] = shader::gcn::PsVGprInput::JLinearCenter;
|
||||
}
|
||||
if (spiInputAddr.linearCentroidEna) {
|
||||
psVgprInput[psVgprInputs++] = shader::gcn::PsVGprInput::ILinearCentroid;
|
||||
psVgprInput[psVgprInputs++] = shader::gcn::PsVGprInput::JLinearCentroid;
|
||||
}
|
||||
if (spiInputAddr.posXFloatEna) {
|
||||
psVgprInput[psVgprInputs++] = shader::gcn::PsVGprInput::X;
|
||||
}
|
||||
if (spiInputAddr.posYFloatEna) {
|
||||
psVgprInput[psVgprInputs++] = shader::gcn::PsVGprInput::Y;
|
||||
}
|
||||
if (spiInputAddr.posZFloatEna) {
|
||||
psVgprInput[psVgprInputs++] = shader::gcn::PsVGprInput::Z;
|
||||
}
|
||||
if (spiInputAddr.posWFloatEna) {
|
||||
psVgprInput[psVgprInputs++] = shader::gcn::PsVGprInput::W;
|
||||
}
|
||||
if (spiInputAddr.frontFaceEna) {
|
||||
psVgprInput[psVgprInputs++] = shader::gcn::PsVGprInput::FrontFace;
|
||||
}
|
||||
if (spiInputAddr.ancillaryEna) {
|
||||
rx::die("unimplemented ancillary fs input");
|
||||
psVgprInput[psVgprInputs++] = shader::gcn::PsVGprInput::Ancillary;
|
||||
}
|
||||
if (spiInputAddr.sampleCoverageEna) {
|
||||
rx::die("unimplemented sample coverage fs input");
|
||||
psVgprInput[psVgprInputs++] = shader::gcn::PsVGprInput::SampleCoverage;
|
||||
}
|
||||
if (spiInputAddr.posFixedPtEna) {
|
||||
rx::die("unimplemented pos fixed fs input");
|
||||
psVgprInput[psVgprInputs++] = shader::gcn::PsVGprInput::PosFixed;
|
||||
}
|
||||
}
|
||||
|
||||
for (std::size_t index = 0; const auto &slot : configSlots) {
|
||||
switch (slot.type) {
|
||||
case shader::gcn::ConfigType::Imm:
|
||||
cacheTag.readMemory(&configPtr[index], slot.data,
|
||||
sizeof(std::uint32_t));
|
||||
break;
|
||||
case shader::gcn::ConfigType::UserSgpr:
|
||||
configPtr[index] = pgm.userData[slot.data];
|
||||
break;
|
||||
case shader::gcn::ConfigType::ViewPortOffsetX:
|
||||
configPtr[index] = std::bit_cast<std::uint32_t>(
|
||||
pipe.context.paClVports[slot.data].xOffset /
|
||||
(viewPorts[0].width / 2.f) -
|
||||
1);
|
||||
break;
|
||||
case shader::gcn::ConfigType::ViewPortOffsetY:
|
||||
configPtr[index] = std::bit_cast<std::uint32_t>(
|
||||
pipe.context.paClVports[slot.data].yOffset /
|
||||
(viewPorts[slot.data].height / 2.f) -
|
||||
1);
|
||||
break;
|
||||
case shader::gcn::ConfigType::ViewPortOffsetZ:
|
||||
configPtr[index] = std::bit_cast<std::uint32_t>(
|
||||
pipe.context.paClVports[slot.data].zOffset);
|
||||
break;
|
||||
case shader::gcn::ConfigType::ViewPortScaleX:
|
||||
configPtr[index] = std::bit_cast<std::uint32_t>(
|
||||
pipe.context.paClVports[slot.data].xScale /
|
||||
(viewPorts[slot.data].width / 2.f));
|
||||
break;
|
||||
case shader::gcn::ConfigType::ViewPortScaleY:
|
||||
configPtr[index] = std::bit_cast<std::uint32_t>(
|
||||
pipe.context.paClVports[slot.data].yScale /
|
||||
(viewPorts[slot.data].height / 2.f));
|
||||
break;
|
||||
case shader::gcn::ConfigType::ViewPortScaleZ:
|
||||
configPtr[index] = std::bit_cast<std::uint32_t>(
|
||||
pipe.context.paClVports[slot.data].zScale);
|
||||
break;
|
||||
case shader::gcn::ConfigType::PsInputVGpr:
|
||||
if (slot.data > psVgprInputs) {
|
||||
configPtr[index] = ~0;
|
||||
} else {
|
||||
configPtr[index] =
|
||||
std::bit_cast<std::uint32_t>(psVgprInput[slot.data]);
|
||||
}
|
||||
break;
|
||||
case shader::gcn::ConfigType::VsPrimType:
|
||||
if (indexBuffer.handle == VK_NULL_HANDLE &&
|
||||
pipe.uConfig.vgtPrimitiveType != indexBuffer.primType) {
|
||||
configPtr[index] =
|
||||
static_cast<std::uint32_t>(pipe.uConfig.vgtPrimitiveType.value);
|
||||
} else {
|
||||
configPtr[index] = 0;
|
||||
}
|
||||
break;
|
||||
|
||||
case shader::gcn::ConfigType::ResourceSlot:
|
||||
memoryTableConfigSlots.push_back({
|
||||
.bufferIndex = static_cast<std::uint32_t>(descriptorBuffers.size()),
|
||||
.configIndex = static_cast<std::uint32_t>(index),
|
||||
.resourceSlot = static_cast<std::uint32_t>(slotOffset + slot.data),
|
||||
});
|
||||
break;
|
||||
|
||||
case shader::gcn::ConfigType::MemoryTable:
|
||||
if (slot.data == 0) {
|
||||
configPtr[index] = static_cast<std::uint32_t>(memoryTableAddress);
|
||||
} else {
|
||||
configPtr[index] =
|
||||
static_cast<std::uint32_t>(memoryTableAddress >> 32);
|
||||
}
|
||||
break;
|
||||
case shader::gcn::ConfigType::Gds:
|
||||
if (slot.data == 0) {
|
||||
configPtr[index] = static_cast<std::uint32_t>(gdsAddress);
|
||||
} else {
|
||||
configPtr[index] = static_cast<std::uint32_t>(gdsAddress >> 32);
|
||||
}
|
||||
break;
|
||||
|
||||
case shader::gcn::ConfigType::CbCompSwap:
|
||||
configPtr[index] = std::bit_cast<std::uint32_t>(
|
||||
pipe.context.cbColor[slot.data].info.compSwap);
|
||||
break;
|
||||
}
|
||||
|
||||
++index;
|
||||
}
|
||||
|
||||
VkDescriptorBufferInfo bufferInfo{
|
||||
.buffer = configBuffer.handle,
|
||||
.offset = configBuffer.offset,
|
||||
.range = configSize,
|
||||
};
|
||||
|
||||
auto stageIndex = Cache::getStageIndex(shader.stage);
|
||||
|
||||
VkWriteDescriptorSet writeDescSet{
|
||||
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
||||
.dstSet = descriptorSets[stageIndex],
|
||||
.dstBinding = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
||||
.pBufferInfo = &bufferInfo,
|
||||
};
|
||||
|
||||
vkUpdateDescriptorSets(vk::context->device, 1, &writeDescSet, 0, nullptr);
|
||||
|
||||
shaders[stageIndex] = shader.handle
|
||||
? shader.handle
|
||||
: getFillRedFragShader(*cacheTag.getCache());
|
||||
descriptorBuffers.push_back(configPtr);
|
||||
};
|
||||
auto descriptorSets = cacheTag.getDescriptorSets();
|
||||
Cache::Shader vertexShader;
|
||||
|
||||
if (pipe.context.vgtShaderStagesEn.vsEn == amdgpu::VsStage::VsReal) {
|
||||
addShader(pipe.sh.spiShaderPgmVs, shader::gcn::Stage::VsVs);
|
||||
gnm::PrimitiveType vsPrimType = {};
|
||||
if (indexBuffer.handle == VK_NULL_HANDLE &&
|
||||
pipe.uConfig.vgtPrimitiveType != indexBuffer.primType) {
|
||||
vsPrimType = pipe.uConfig.vgtPrimitiveType.value;
|
||||
}
|
||||
|
||||
vertexShader =
|
||||
cacheTag.getVertexShader(gcn::Stage::VsVs, pipe.sh.spiShaderPgmVs,
|
||||
pipe.context, vsPrimType, viewPorts);
|
||||
}
|
||||
|
||||
if (true) {
|
||||
addShader(pipe.sh.spiShaderPgmPs, shader::gcn::Stage::Ps);
|
||||
} else {
|
||||
auto pixelShader =
|
||||
cacheTag.getPixelShader(pipe.sh.spiShaderPgmPs, pipe.context, viewPorts);
|
||||
|
||||
if (pixelShader.handle == nullptr) {
|
||||
shaders[Cache::getStageIndex(VK_SHADER_STAGE_FRAGMENT_BIT)] =
|
||||
getFillRedFragShader(*cacheTag.getCache());
|
||||
}
|
||||
|
||||
shaders[Cache::getStageIndex(VK_SHADER_STAGE_VERTEX_BIT)] =
|
||||
vertexShader.handle;
|
||||
shaders[Cache::getStageIndex(VK_SHADER_STAGE_FRAGMENT_BIT)] =
|
||||
pixelShader.handle;
|
||||
|
||||
if (pipe.uConfig.vgtPrimitiveType == gnm::PrimitiveType::RectList) {
|
||||
shaders[Cache::getStageIndex(VK_SHADER_STAGE_GEOMETRY_BIT)] =
|
||||
getPrimTypeRectGeomShader(*cacheTag.getCache());
|
||||
|
|
@ -906,18 +403,24 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
|
|||
vertexCount = indexBuffer.indexCount;
|
||||
}
|
||||
|
||||
auto commandBuffer = pipe.scheduler.getCommandBuffer();
|
||||
|
||||
VkRenderingInfo renderInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
|
||||
.renderArea = gnm::toVkRect2D(pipe.context.paScScreenScissor),
|
||||
.layerCount = 1,
|
||||
.colorAttachmentCount = renderTargets,
|
||||
.pColorAttachments = colorAttachments,
|
||||
.pDepthAttachment = &depthAttachment,
|
||||
// .pStencilAttachment = &stencilAttachment,
|
||||
.pDepthAttachment =
|
||||
depthAccess != Access::None ? &depthAttachment : nullptr,
|
||||
.pStencilAttachment =
|
||||
stencilAccess != Access::None ? &stencilAttachment : nullptr,
|
||||
};
|
||||
|
||||
cacheTag.buildDescriptors(descriptorSets[0]);
|
||||
|
||||
pipe.scheduler.afterSubmit([cacheTag = std::move(cacheTag)] {});
|
||||
|
||||
auto commandBuffer = pipe.scheduler.getCommandBuffer();
|
||||
|
||||
vkCmdBeginRendering(commandBuffer, &renderInfo);
|
||||
vkCmdSetRasterizerDiscardEnable(commandBuffer, VK_FALSE);
|
||||
|
||||
|
|
@ -991,57 +494,6 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
|
|||
|
||||
vk::CmdBindShadersEXT(commandBuffer, stages.size(), stages.data(), shaders);
|
||||
|
||||
shaderResources.buildMemoryTable(*memoryTable);
|
||||
|
||||
for (auto &sampler : shaderResources.samplerResources) {
|
||||
uint32_t index = &sampler - shaderResources.samplerResources.data();
|
||||
|
||||
VkDescriptorImageInfo samplerInfo{.sampler = sampler.handle};
|
||||
|
||||
VkWriteDescriptorSet writeDescSet{
|
||||
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
||||
.dstSet = descriptorSets[0],
|
||||
.dstBinding = Cache::getDescriptorBinding(VK_DESCRIPTOR_TYPE_SAMPLER),
|
||||
.dstArrayElement = index,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER,
|
||||
.pImageInfo = &samplerInfo,
|
||||
};
|
||||
|
||||
vkUpdateDescriptorSets(vk::context->device, 1, &writeDescSet, 0, nullptr);
|
||||
}
|
||||
|
||||
for (auto &imageResources : shaderResources.imageResources) {
|
||||
auto dim = (&imageResources - shaderResources.imageResources) + 1;
|
||||
for (auto &image : imageResources) {
|
||||
uint32_t index = &image - imageResources.data();
|
||||
|
||||
VkDescriptorImageInfo imageInfo{
|
||||
.imageView = image.handle,
|
||||
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
};
|
||||
|
||||
VkWriteDescriptorSet writeDescSet{
|
||||
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
||||
.dstSet = descriptorSets[0],
|
||||
.dstBinding = static_cast<uint32_t>(Cache::getDescriptorBinding(
|
||||
VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, dim)),
|
||||
.dstArrayElement = index,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
|
||||
.pImageInfo = &imageInfo,
|
||||
};
|
||||
|
||||
vkUpdateDescriptorSets(vk::context->device, 1, &writeDescSet, 0, nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto &mtConfig : memoryTableConfigSlots) {
|
||||
auto config = descriptorBuffers[mtConfig.bufferIndex];
|
||||
config[mtConfig.configIndex] =
|
||||
shaderResources.getResourceSlot(mtConfig.resourceSlot);
|
||||
}
|
||||
|
||||
if (indexBuffer.handle != VK_NULL_HANDLE) {
|
||||
vkCmdBindIndexBuffer(commandBuffer, indexBuffer.handle, indexBuffer.offset,
|
||||
gnm::toVkIndexType(indexBuffer.indexType));
|
||||
|
|
@ -1054,10 +506,19 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
|
|||
|
||||
vkCmdEndRendering(commandBuffer);
|
||||
pipe.scheduler.submit();
|
||||
pipe.scheduler.then([=, cacheTag = std::move(cacheTag),
|
||||
shaderResources = std::move(shaderResources)] {});
|
||||
}
|
||||
|
||||
// void amdgpu::dispatch(Scheduler &sched,
|
||||
// amdgpu::Registers::ComputeConfig &computeConfig, int
|
||||
// vmId, std::uint32_t groupCountX, std::uint32_t
|
||||
// groupCountY, std::uint32_t groupCountZ) {
|
||||
|
||||
// vkCmdDispatch(sched.getCommandBuffer(), groupCountX, groupCountY,
|
||||
// groupCountZ);
|
||||
|
||||
// sched.submit();
|
||||
// }
|
||||
|
||||
static void
|
||||
transitionImageLayout(VkCommandBuffer commandBuffer, VkImage image,
|
||||
VkImageLayout oldLayout, VkImageLayout newLayout,
|
||||
|
|
@ -1115,10 +576,10 @@ transitionImageLayout(VkCommandBuffer commandBuffer, VkImage image,
|
|||
|
||||
void amdgpu::flip(Cache::Tag &cacheTag, VkCommandBuffer commandBuffer,
|
||||
VkExtent2D targetExtent, std::uint64_t address,
|
||||
VkImageView target, VkExtent2D imageExtent,
|
||||
FlipType type, TileMode tileMode, gnm::DataFormat dfmt,
|
||||
VkImageView target, VkExtent2D imageExtent, FlipType type,
|
||||
TileMode tileMode, gnm::DataFormat dfmt,
|
||||
gnm::NumericFormat nfmt) {
|
||||
ImageViewKey framebuffer{};
|
||||
ImageKey framebuffer{};
|
||||
framebuffer.readAddress = address;
|
||||
framebuffer.type = gnm::TextureType::Dim2D;
|
||||
framebuffer.dfmt = dfmt;
|
||||
|
|
@ -1181,7 +642,8 @@ void amdgpu::flip(Cache::Tag &cacheTag, VkCommandBuffer commandBuffer,
|
|||
|
||||
vkCmdBeginRendering(commandBuffer, &renderInfo);
|
||||
|
||||
cacheTag.getDevice()->flipPipeline.bind(cacheTag.getScheduler(), type, imageView.handle, sampler.handle);
|
||||
cacheTag.getDevice()->flipPipeline.bind(cacheTag.getScheduler(), type,
|
||||
imageView.handle, sampler.handle);
|
||||
|
||||
vkCmdSetViewportWithCount(commandBuffer, 1, viewPorts);
|
||||
vkCmdSetScissorWithCount(commandBuffer, 1, viewPortScissors);
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
#include "amdgpu/tiler_vulkan.hpp"
|
||||
#include "Scheduler.hpp"
|
||||
#include "amdgpu/tiler.hpp"
|
||||
#include <bit>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <rx/ConcurrentBitPool.hpp>
|
||||
#include <vk.hpp>
|
||||
|
||||
#include <shaders/detiler1d.comp.h>
|
||||
|
|
@ -72,11 +72,11 @@ struct TilerShader {
|
|||
};
|
||||
|
||||
struct amdgpu::GpuTiler::Impl {
|
||||
static constexpr auto kDescriptorSetCount = 32;
|
||||
TilerDecriptorSetLayout descriptorSetLayout;
|
||||
std::mutex descriptorMtx;
|
||||
VkDescriptorSet descriptorSets[32]{};
|
||||
rx::ConcurrentBitPool<kDescriptorSetCount, std::uint32_t> descriptorSetPool;
|
||||
VkDescriptorSet descriptorSets[kDescriptorSetCount]{};
|
||||
VkDescriptorPool descriptorPool;
|
||||
std::uint32_t inUseDescriptorSets = 0;
|
||||
|
||||
vk::Buffer configData;
|
||||
TilerShader detilerLinear{descriptorSetLayout, spirv_detilerLinear_comp};
|
||||
|
|
@ -156,20 +156,10 @@ struct amdgpu::GpuTiler::Impl {
|
|||
vk::context->allocator);
|
||||
}
|
||||
|
||||
std::uint32_t allocateDescriptorSlot() {
|
||||
std::lock_guard lock(descriptorMtx);
|
||||
|
||||
auto result = std::countl_one(inUseDescriptorSets);
|
||||
rx::dieIf(result >= std::size(descriptorSets),
|
||||
"out of tiler descriptor sets");
|
||||
inUseDescriptorSets |= (1 << result);
|
||||
|
||||
return result;
|
||||
}
|
||||
std::uint32_t allocateDescriptorSlot() { return descriptorSetPool.acquire(); }
|
||||
|
||||
void releaseDescriptorSlot(std::uint32_t slot) {
|
||||
std::lock_guard lock(descriptorMtx);
|
||||
inUseDescriptorSets &= ~(1u << slot);
|
||||
descriptorSetPool.release(slot);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ public:
|
|||
virtual ~Evaluator() = default;
|
||||
|
||||
void invalidate(ir::Value node) { values.erase(node); }
|
||||
void invalidate() { values.clear(); }
|
||||
void setValue(ir::Value node, Value value) { values[node] = value; }
|
||||
|
||||
Value eval(const ir::Operand &op, ir::Value type = nullptr);
|
||||
|
|
|
|||
|
|
@ -74,7 +74,7 @@ VkFormat gnm::toVkFormat(DataFormat dfmt, NumericFormat nfmt) {
|
|||
case kDataFormat5_6_5:
|
||||
switch (nfmt) {
|
||||
case kNumericFormatUNorm:
|
||||
return VK_FORMAT_R5G6B5_UNORM_PACK16;
|
||||
return VK_FORMAT_B5G6R5_UNORM_PACK16;
|
||||
|
||||
default:
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -98,12 +98,12 @@ public:
|
|||
|
||||
wait();
|
||||
|
||||
for (auto &&fn : mAfterSubmitTasks) {
|
||||
fn();
|
||||
while (!mAfterSubmitTasks.empty()) {
|
||||
auto task = std::move(mAfterSubmitTasks.back());
|
||||
mAfterSubmitTasks.pop_back();
|
||||
std::move(task)();
|
||||
}
|
||||
|
||||
mAfterSubmitTasks.clear();
|
||||
|
||||
std::vector<std::move_only_function<void()>> taskList;
|
||||
|
||||
for (auto it = mTasks.begin(); it != mTasks.end(); it = mTasks.erase(it)) {
|
||||
|
|
|
|||
72
rx/include/rx/ConcurrentBitPool.hpp
Normal file
72
rx/include/rx/ConcurrentBitPool.hpp
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <bit>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
namespace rx {
|
||||
namespace detail {
|
||||
template <std::size_t Count> auto pickBitSetBaseType() {
|
||||
if constexpr (Count <= 8) {
|
||||
return std::array<std::atomic<std::uint8_t>, 1>{};
|
||||
} else if constexpr (Count <= 16) {
|
||||
return std::array<std::atomic<std::uint16_t>, 1>{};
|
||||
} else if constexpr (Count <= 32) {
|
||||
return std::array<std::atomic<std::uint32_t>, 1>{};
|
||||
} else {
|
||||
return std::array<std::atomic<std::uint64_t>, (Count + 63) / 64>();
|
||||
}
|
||||
}
|
||||
|
||||
template <std::size_t Count>
|
||||
using ConcurrentBitPoolBaseType = decltype(pickBitSetBaseType<Count>());
|
||||
} // namespace detail
|
||||
|
||||
template <std::size_t BitCount, typename ElementType = std::size_t>
|
||||
class ConcurrentBitPool {
|
||||
detail::ConcurrentBitPoolBaseType<BitCount> mStorage{{}};
|
||||
using WordType = std::remove_cvref_t<decltype(mStorage[0])>::value_type;
|
||||
static constexpr auto kWordBitWidth = sizeof(WordType) * 8;
|
||||
|
||||
public:
|
||||
ElementType acquire() {
|
||||
while (true) {
|
||||
for (auto &node : mStorage) {
|
||||
auto mask = node.load(std::memory_order::acquire);
|
||||
|
||||
auto bitIndex = std::countr_one(mask);
|
||||
if (bitIndex >= kWordBitWidth) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto pattern = static_cast<WordType>(1) << bitIndex;
|
||||
|
||||
if (!node.compare_exchange_strong(mask, mask | pattern,
|
||||
std::memory_order::release,
|
||||
std::memory_order::relaxed)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto wordIndex = &node - mStorage.data();
|
||||
return static_cast<ElementType>(kWordBitWidth * wordIndex + bitIndex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void release(ElementType index) {
|
||||
auto rawIndex = static_cast<std::size_t>(index);
|
||||
auto bitIndex = rawIndex % kWordBitWidth;
|
||||
auto wordIndex = rawIndex / kWordBitWidth;
|
||||
|
||||
WordType pattern = static_cast<WordType>(1) << bitIndex;
|
||||
WordType mask = pattern;
|
||||
|
||||
while (!mStorage[wordIndex].compare_exchange_weak(
|
||||
mask, mask & ~pattern, std::memory_order::release,
|
||||
std::memory_order::acquire)) {
|
||||
}
|
||||
}
|
||||
};
|
||||
} // namespace rx
|
||||
Loading…
Reference in a new issue