#pragma once #include "Pipe.hpp" #include "amdgpu/tiler.hpp" #include "gnm/constants.hpp" #include "rx/die.hpp" #include "shader/Access.hpp" #include "shader/Evaluator.hpp" #include "shader/GcnConverter.hpp" #include #include #include #include #include #include #include #include #include #include namespace amdgpu { using Access = shader::Access; struct ShaderKey { std::uint64_t address; shader::gcn::Stage stage; shader::gcn::Environment env; }; enum class ImageKind { Color, Depth, Stencil }; struct ImageKey { std::uint64_t readAddress; std::uint64_t writeAddress; gnm::TextureType type; gnm::DataFormat dfmt; gnm::NumericFormat nfmt; TileMode tileMode = {}; VkOffset3D offset = {}; VkExtent3D extent = {1, 1, 1}; std::uint32_t pitch = 1; unsigned baseMipLevel = 0; unsigned mipCount = 1; unsigned baseArrayLayer = 0; unsigned arrayLayerCount = 1; ImageKind kind = ImageKind::Color; bool pow2pad = false; static ImageKey createFrom(const gnm::TBuffer &tbuffer); }; struct SamplerKey { VkFilter magFilter; VkFilter minFilter; VkSamplerMipmapMode mipmapMode; VkSamplerAddressMode addressModeU; VkSamplerAddressMode addressModeV; VkSamplerAddressMode addressModeW; float mipLodBias; float maxAnisotropy; VkCompareOp compareOp; float minLod; float maxLod; VkBorderColor borderColor; bool anisotropyEnable; bool compareEnable; bool unnormalizedCoordinates; static SamplerKey createFrom(const gnm::SSampler &sampler); auto operator<=>(const SamplerKey &other) const = default; }; struct Cache { static constexpr std::array kGraphicsStages = { VK_SHADER_STAGE_VERTEX_BIT, VK_SHADER_STAGE_GEOMETRY_BIT, VK_SHADER_STAGE_FRAGMENT_BIT, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT, }; static constexpr std::array kDescriptorBindings = { VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_DESCRIPTOR_TYPE_SAMPLER, VkDescriptorType(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE + 1 * 1000), VkDescriptorType(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE + 2 * 1000), VkDescriptorType(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE + 3 * 1000), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, }; static constexpr int getStageIndex(VkShaderStageFlagBits stage) { if (stage == VK_SHADER_STAGE_COMPUTE_BIT) { return 0; } auto it = std::find(kGraphicsStages.begin(), kGraphicsStages.end(), stage); if (it == kGraphicsStages.end()) { return -1; } return it - kGraphicsStages.begin(); } static constexpr int getDescriptorBinding(VkDescriptorType type, int dim = 0) { auto it = std::find(kDescriptorBindings.begin(), kDescriptorBindings.end(), type + dim * 1000); if (it == kDescriptorBindings.end()) { return -1; } return it - kDescriptorBindings.begin(); } enum class TagId : std::uint64_t {}; struct Entry; int vmId = -1; struct Shader { VkShaderEXT handle = VK_NULL_HANDLE; shader::gcn::ShaderInfo *info; VkShaderStageFlagBits stage; }; struct Sampler { VkSampler handle = VK_NULL_HANDLE; }; struct Buffer { VkBuffer handle = VK_NULL_HANDLE; std::uint64_t offset; std::uint64_t deviceAddress; TagId tagId; std::byte *data; }; struct IndexBuffer { VkBuffer handle = VK_NULL_HANDLE; std::uint64_t offset; std::uint32_t indexCount; gnm::PrimitiveType primType; gnm::IndexType indexType; }; struct Image { VkImage handle = VK_NULL_HANDLE; VkImageSubresourceRange subresource; }; struct ImageView { VkImageView handle = VK_NULL_HANDLE; VkImage imageHandle; VkImageSubresourceRange subresource; }; class Tag; private: struct MemoryTableSlot { std::uint64_t address; union { struct { std::uint64_t size : 40; std::uint64_t flags : 4; }; std::uint64_t sizeAndFlags; }; std::uint64_t deviceAddress; }; struct MemoryTable { std::uint32_t count; std::uint32_t pad; MemoryTableSlot slots[]; }; struct ShaderResources : shader::eval::Evaluator { std::map slotResources; std::span userSgprs; Tag *cacheTag = nullptr; std::uint32_t slotOffset = 0; rx::MemoryTableWithPayload bufferMemoryTable; std::vector> resourceSlotToAddress; std::vector samplerResources; std::vector imageResources[3]; using Evaluator::eval; void clear() { slotResources.clear(); userSgprs = {}; cacheTag = nullptr; slotOffset = 0; bufferMemoryTable.clear(); resourceSlotToAddress.clear(); samplerResources.clear(); for (auto &res : imageResources) { res.clear(); } Evaluator::invalidate(); } void loadResources(shader::gcn::Resources &res, std::span userSgprs); void buildMemoryTable(MemoryTable &memoryTable); std::uint32_t getResourceSlot(std::uint32_t id); template T readPointer(std::uint64_t address) { T result{}; cacheTag->readMemory(&result, address, sizeof(result)); return result; } shader::eval::Value eval(shader::ir::InstructionId instId, std::span operands) override; }; struct TagStorage { struct MemoryTableConfigSlot { std::uint32_t bufferIndex; std::uint32_t configIndex; std::uint32_t resourceSlot; }; std::vector> mAcquiredResources; std::vector memoryTableConfigSlots; std::vector descriptorBuffers; ShaderResources shaderResources; TagStorage() = default; TagStorage(const TagStorage &) = delete; void clear() { mAcquiredResources.clear(); memoryTableConfigSlots.clear(); descriptorBuffers.clear(); shaderResources.clear(); } }; struct TagData { TagStorage *mStorage = nullptr; Scheduler *mScheduler = nullptr; Cache *mParent = nullptr; TagId mTagId{}; std::uint32_t mAcquiredMemoryTable = -1; }; public: struct Tag : protected TagData { Tag(const Tag &) = delete; Tag() noexcept = default; Tag(Tag &&other) noexcept { swap(other); } Tag &operator=(Tag &&other) noexcept { swap(other); return *this; } ~Tag() { release(); } void swap(Tag &other) { std::swap(static_cast(*this), static_cast(other)); } Shader getShader(const ShaderKey &key, const ShaderKey *dependedKey = nullptr); TagId getReadId() const { return TagId{std::uint64_t(mTagId) - 1}; } TagId getWriteId() const { return mTagId; } Cache *getCache() const { return mParent; } Device *getDevice() const { return mParent->mDevice; } Scheduler &getScheduler() const { return *mScheduler; } int getVmId() const { return mParent->mVmIm; } Buffer getInternalHostVisibleBuffer(std::uint64_t size); Buffer getInternalDeviceLocalBuffer(std::uint64_t size); void buildDescriptors(VkDescriptorSet descriptorSet); Sampler getSampler(const SamplerKey &key); Buffer getBuffer(std::uint64_t address, std::uint64_t size, Access access); IndexBuffer getIndexBuffer(std::uint64_t address, std::uint32_t indexCount, gnm::PrimitiveType primType, gnm::IndexType indexType); Image getImage(const ImageKey &key, Access access); ImageView getImageView(const ImageKey &key, Access access); void readMemory(void *target, std::uint64_t address, std::uint64_t size); void writeMemory(const void *source, std::uint64_t address, std::uint64_t size); int compareMemory(const void *source, std::uint64_t address, std::uint64_t size); void release(); VkPipelineLayout getGraphicsPipelineLayout() const { return getCache()->getGraphicsPipelineLayout(); } VkPipelineLayout getComputePipelineLayout() const { return getCache()->getComputePipelineLayout(); } Buffer getMemoryTable() { if (mAcquiredMemoryTable + 1 == 0) { mAcquiredMemoryTable = mParent->mMemoryTablePool.acquire(); } auto &buffer = mParent->mMemoryTableBuffer; auto offset = mAcquiredMemoryTable * kMemoryTableSize; Buffer result{ .offset = offset, .deviceAddress = buffer.getAddress() + offset, .tagId = getReadId(), .data = buffer.getData() + offset, }; return result; } std::shared_ptr findShader(const ShaderKey &key, const ShaderKey *dependedKey = nullptr); friend Cache; }; struct GraphicsTag : Tag { GraphicsTag() = default; GraphicsTag(GraphicsTag &&other) noexcept { swap(other); } GraphicsTag &operator=(GraphicsTag &&other) noexcept { swap(other); return *this; } ~GraphicsTag() { release(); } std::array getDescriptorSets() { if (mAcquiredGraphicsDescriptorSet + 1 == 0) { mAcquiredGraphicsDescriptorSet = mParent->mGraphicsDescriptorSetPool.acquire(); } return mParent->mGraphicsDescriptorSets[mAcquiredGraphicsDescriptorSet]; } Shader getShader(shader::gcn::Stage stage, const SpiShaderPgm &pgm, const Registers::Context &context, gnm::PrimitiveType vsPrimType, std::span viewPorts, std::span psVgprInput); Shader getPixelShader(const SpiShaderPgm &pgm, const Registers::Context &context, std::span viewPorts); Shader getVertexShader(shader::gcn::Stage stage, const SpiShaderPgm &pgm, const Registers::Context &context, gnm::PrimitiveType vsPrimType, std::span viewPorts); void release(); void swap(GraphicsTag &other) { Tag::swap(other); std::swap(mAcquiredGraphicsDescriptorSet, other.mAcquiredGraphicsDescriptorSet); } private: std::uint32_t mAcquiredGraphicsDescriptorSet = -1; }; struct ComputeTag : Tag { ComputeTag() = default; ComputeTag(ComputeTag &&other) noexcept { swap(other); } ComputeTag &operator=(ComputeTag &&other) noexcept { swap(other); return *this; } ~ComputeTag() { release(); } Shader getShader(const Registers::ComputeConfig &pgm); VkDescriptorSet getDescriptorSet() { if (mAcquiredComputeDescriptorSet + 1 == 0) { mAcquiredComputeDescriptorSet = mParent->mComputeDescriptorSetPool.acquire(); } return mParent->mComputeDescriptorSets[mAcquiredComputeDescriptorSet]; } void release(); void swap(ComputeTag &other) { Tag::swap(other); std::swap(mAcquiredComputeDescriptorSet, other.mAcquiredComputeDescriptorSet); } private: std::uint32_t mAcquiredComputeDescriptorSet = -1; }; private: template T createTagImpl(Scheduler &scheduler) { T result; auto id = mNextTagId.load(std::memory_order::acquire); while (!mNextTagId.compare_exchange_weak( id, TagId{static_cast(id) + 2}, std::memory_order::release, std::memory_order::relaxed)) { } auto storageIndex = mTagStoragePool.acquire(); // std::println("acquire tag storage {}", storageIndex); result.mStorage = mTagStorages + storageIndex; result.mTagId = id; result.mParent = this; result.mScheduler = &scheduler; return result; } public: Cache(Device *device, int vmId); ~Cache(); Tag createTag(Scheduler &scheduler) { return createTagImpl(scheduler); } GraphicsTag createGraphicsTag(Scheduler &scheduler) { return createTagImpl(scheduler); } ComputeTag createComputeTag(Scheduler &scheduler) { return createTagImpl(scheduler); } vk::Buffer &getGdsBuffer() { return mGdsBuffer; } void addFrameBuffer(Scheduler &scheduler, int index, std::uint64_t address, std::uint32_t width, std::uint32_t height, int format, TileMode tileMode); void removeFrameBuffer(Scheduler &scheduler, int index); VkImage getFrameBuffer(Scheduler &scheduler, int index); void invalidate(Scheduler &scheduler, std::uint64_t address, std::uint64_t size); void invalidate(Scheduler &scheduler) { invalidate(scheduler, 0, ~static_cast(0)); } void flush(Scheduler &scheduler, std::uint64_t address, std::uint64_t size); void flush(Scheduler &scheduler) { flush(scheduler, 0, ~static_cast(0)); } VkPipelineLayout getGraphicsPipelineLayout() const { return mGraphicsPipelineLayout; } VkPipelineLayout getComputePipelineLayout() const { return mComputePipelineLayout; } auto &getGraphicsDescriptorSetLayouts() const { return mGraphicsDescriptorSetLayouts; } private: TagId getSyncTag(std::uint64_t address, std::uint64_t size, TagId currentTag); Device *mDevice; int mVmIm; std::atomic mNextTagId{TagId{2}}; vk::Buffer mGdsBuffer; static constexpr auto kMemoryTableSize = 0x10000; static constexpr auto kMemoryTableCount = 64; static constexpr auto kDescriptorSetCount = 128; static constexpr auto kTagStorageCount = 128; rx::ConcurrentBitPool mMemoryTablePool; vk::Buffer mMemoryTableBuffer; std::array mGraphicsDescriptorSetLayouts{}; VkDescriptorSetLayout mComputeDescriptorSetLayout{}; VkPipelineLayout mGraphicsPipelineLayout{}; VkPipelineLayout mComputePipelineLayout{}; VkDescriptorPool mDescriptorPool{}; rx::ConcurrentBitPool mGraphicsDescriptorSetPool; rx::ConcurrentBitPool mComputeDescriptorSetPool; rx::ConcurrentBitPool mTagStoragePool; std::array mGraphicsDescriptorSets[kDescriptorSetCount]; VkDescriptorSet mComputeDescriptorSets[kDescriptorSetCount]; TagStorage mTagStorages[kTagStorageCount]; std::map mSamplers; std::shared_ptr mFrameBuffers[10]; rx::MemoryTableWithPayload> mBuffers; rx::MemoryTableWithPayload> mIndexBuffers; rx::MemoryTableWithPayload> mImages; rx::MemoryTableWithPayload> mShaders; rx::MemoryTableWithPayload> mSyncTable; }; } // namespace amdgpu