mirror of
https://github.com/RPCSX/rpcsx.git
synced 2026-01-05 08:10:10 +01:00
gpu rewrite initial commit
This commit is contained in:
parent
0d4ed51cd9
commit
4cf808facd
|
|
@ -3,7 +3,8 @@ project(rpcsx)
|
|||
|
||||
set(CMAKE_CXX_EXTENSIONS off)
|
||||
set(CMAKE_CXX_STANDARD 23)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED on)
|
||||
set(CMAKE_BUILD_RPATH_USE_ORIGIN on)
|
||||
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
|
||||
|
||||
|
|
@ -41,7 +42,7 @@ function(add_precompiled_vulkan_spirv target)
|
|||
|
||||
add_custom_command(
|
||||
OUTPUT ${outputpath}
|
||||
COMMAND $<TARGET_FILE:glslang::glslang-standalone> -V --target-env vulkan1.3 --vn "${varname}" -o "${outputpath}" "${CMAKE_CURRENT_SOURCE_DIR}/${input}"
|
||||
COMMAND $<TARGET_FILE:glslang::glslang-standalone> -V --target-env vulkan1.2 --vn "${varname}" -o "${outputpath}" "${CMAKE_CURRENT_SOURCE_DIR}/${input}"
|
||||
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${input}" glslang::glslang-standalone
|
||||
COMMENT "Generating ${outputname}..."
|
||||
)
|
||||
|
|
@ -65,6 +66,7 @@ add_subdirectory(tools)
|
|||
add_subdirectory(orbis-kernel)
|
||||
add_subdirectory(rpcsx-os)
|
||||
add_subdirectory(rpcsx-gpu)
|
||||
add_subdirectory(rpcsx-gpu2)
|
||||
add_subdirectory(hw/amdgpu)
|
||||
add_subdirectory(rx)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
#version 450
|
||||
|
||||
layout (triangles) in;
|
||||
layout (triangles, invocations = 1) in;
|
||||
layout (triangle_strip, max_vertices = 4) out;
|
||||
|
||||
void main(void)
|
||||
|
|
|
|||
36
rpcsx-gpu2/CMakeLists.txt
Normal file
36
rpcsx-gpu2/CMakeLists.txt
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
find_package(glfw3 3.3 REQUIRED)
|
||||
|
||||
add_precompiled_vulkan_spirv(rpcsx-gpu-shaders
|
||||
shaders/fill_red.frag.glsl
|
||||
shaders/flip.frag.glsl
|
||||
shaders/flip.vert.glsl
|
||||
shaders/rect_list.geom.glsl
|
||||
)
|
||||
|
||||
add_executable(rpcsx-gpu2
|
||||
Cache.cpp
|
||||
main.cpp
|
||||
Device.cpp
|
||||
Pipe.cpp
|
||||
Registers.cpp
|
||||
Renderer.cpp
|
||||
)
|
||||
|
||||
target_link_libraries(rpcsx-gpu2
|
||||
PUBLIC
|
||||
rpcsx-gpu-shaders
|
||||
amdgpu::bridge
|
||||
rx
|
||||
gcn-shader
|
||||
glfw
|
||||
amdgpu::tiler::cpu
|
||||
amdgpu::tiler::vulkan
|
||||
rdna-semantic-spirv
|
||||
gnm::vulkan
|
||||
gnm
|
||||
)
|
||||
|
||||
install(TARGETS rpcsx-gpu2 RUNTIME DESTINATION bin)
|
||||
set_target_properties(rpcsx-gpu2 PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
||||
|
||||
add_subdirectory(lib)
|
||||
1109
rpcsx-gpu2/Cache.cpp
Normal file
1109
rpcsx-gpu2/Cache.cpp
Normal file
File diff suppressed because it is too large
Load diff
333
rpcsx-gpu2/Cache.hpp
Normal file
333
rpcsx-gpu2/Cache.hpp
Normal file
|
|
@ -0,0 +1,333 @@
|
|||
#pragma once
|
||||
|
||||
#include "Pipe.hpp"
|
||||
#include "amdgpu/tiler.hpp"
|
||||
#include "gnm/constants.hpp"
|
||||
#include "rx/die.hpp"
|
||||
#include "shader/Access.hpp"
|
||||
#include "shader/GcnConverter.hpp"
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <rx/MemoryTable.hpp>
|
||||
#include <shader/gcn.hpp>
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
namespace amdgpu {
|
||||
using Access = shader::Access;
|
||||
|
||||
struct ShaderKey {
|
||||
std::uint64_t address;
|
||||
shader::gcn::Stage stage;
|
||||
shader::gcn::Environment env;
|
||||
};
|
||||
|
||||
struct ImageKey {
|
||||
std::uint64_t address;
|
||||
gnm::TextureType type;
|
||||
gnm::DataFormat dfmt;
|
||||
gnm::NumericFormat nfmt;
|
||||
TileMode tileMode = {};
|
||||
VkOffset3D offset = {};
|
||||
VkExtent3D extent = {1, 1, 1};
|
||||
std::uint32_t pitch = 1;
|
||||
unsigned baseMipLevel = 0;
|
||||
unsigned mipCount = 1;
|
||||
unsigned baseArrayLayer = 0;
|
||||
unsigned arrayLayerCount = 1;
|
||||
bool pow2pad = false;
|
||||
|
||||
static ImageKey createFrom(const gnm::TBuffer &tbuffer);
|
||||
};
|
||||
|
||||
struct ImageViewKey : ImageKey {
|
||||
gnm::Swizzle R = gnm::Swizzle::R;
|
||||
gnm::Swizzle G = gnm::Swizzle::G;
|
||||
gnm::Swizzle B = gnm::Swizzle::B;
|
||||
gnm::Swizzle A = gnm::Swizzle::A;
|
||||
|
||||
static ImageViewKey createFrom(const gnm::TBuffer &tbuffer);
|
||||
};
|
||||
|
||||
struct SamplerKey {
|
||||
VkFilter magFilter;
|
||||
VkFilter minFilter;
|
||||
VkSamplerMipmapMode mipmapMode;
|
||||
VkSamplerAddressMode addressModeU;
|
||||
VkSamplerAddressMode addressModeV;
|
||||
VkSamplerAddressMode addressModeW;
|
||||
float mipLodBias;
|
||||
float maxAnisotropy;
|
||||
VkCompareOp compareOp;
|
||||
float minLod;
|
||||
float maxLod;
|
||||
VkBorderColor borderColor;
|
||||
bool anisotropyEnable;
|
||||
bool compareEnable;
|
||||
bool unnormalizedCoordinates;
|
||||
|
||||
static SamplerKey createFrom(const gnm::SSampler &sampler);
|
||||
|
||||
auto operator<=>(const SamplerKey &other) const = default;
|
||||
};
|
||||
|
||||
struct Cache {
|
||||
static constexpr std::array kGraphicsStages = {
|
||||
VK_SHADER_STAGE_VERTEX_BIT,
|
||||
VK_SHADER_STAGE_GEOMETRY_BIT,
|
||||
VK_SHADER_STAGE_FRAGMENT_BIT,
|
||||
VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
|
||||
VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
|
||||
};
|
||||
|
||||
static constexpr std::array kDescriptorBindings = {
|
||||
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
||||
VK_DESCRIPTOR_TYPE_SAMPLER,
|
||||
VkDescriptorType(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE + 1 * 1000),
|
||||
VkDescriptorType(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE + 2 * 1000),
|
||||
VkDescriptorType(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE + 3 * 1000),
|
||||
VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
||||
};
|
||||
|
||||
static constexpr int getStageIndex(VkShaderStageFlagBits stage) {
|
||||
auto it = std::find(kGraphicsStages.begin(), kGraphicsStages.end(), stage);
|
||||
|
||||
if (it == kGraphicsStages.end()) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return it - kGraphicsStages.begin();
|
||||
}
|
||||
|
||||
static constexpr int getDescriptorBinding(VkDescriptorType type, int dim = 0) {
|
||||
auto it =
|
||||
std::find(kDescriptorBindings.begin(), kDescriptorBindings.end(), type + dim * 1000);
|
||||
|
||||
if (it == kDescriptorBindings.end()) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return it - kDescriptorBindings.begin();
|
||||
}
|
||||
|
||||
enum class TagId : std::uint64_t {};
|
||||
struct Entry;
|
||||
|
||||
int vmId = -1;
|
||||
|
||||
struct Shader {
|
||||
VkShaderEXT handle;
|
||||
shader::gcn::ShaderInfo *info;
|
||||
VkShaderStageFlagBits stage;
|
||||
};
|
||||
|
||||
struct Sampler {
|
||||
VkSampler handle;
|
||||
};
|
||||
|
||||
struct Buffer {
|
||||
VkBuffer handle;
|
||||
std::uint64_t offset;
|
||||
std::uint64_t deviceAddress;
|
||||
TagId tagId;
|
||||
std::byte *data;
|
||||
};
|
||||
|
||||
struct IndexBuffer {
|
||||
VkBuffer handle;
|
||||
std::uint64_t offset;
|
||||
std::uint32_t indexCount;
|
||||
gnm::PrimitiveType primType;
|
||||
gnm::IndexType indexType;
|
||||
};
|
||||
|
||||
struct Image {
|
||||
VkImage handle;
|
||||
};
|
||||
|
||||
struct ImageView {
|
||||
VkImageView handle;
|
||||
VkImage imageHandle;
|
||||
};
|
||||
|
||||
class Tag {
|
||||
Cache *mParent = nullptr;
|
||||
Scheduler *mScheduler = nullptr;
|
||||
TagId mTagId{};
|
||||
|
||||
std::vector<std::shared_ptr<Entry>> mAcquiredResources;
|
||||
std::vector<std::array<VkDescriptorSet, kGraphicsStages.size()>>
|
||||
mGraphicsDescriptorSets;
|
||||
|
||||
std::vector<VkDescriptorSet> mComputeDescriptorSets;
|
||||
|
||||
public:
|
||||
Tag() = default;
|
||||
Tag(Cache *parent, Scheduler &scheduler, TagId id)
|
||||
: mParent(parent), mScheduler(&scheduler), mTagId(id) {}
|
||||
Tag(const Tag &) = delete;
|
||||
Tag(Tag &&other) { other.swap(*this); }
|
||||
Tag &operator=(Tag &&other) {
|
||||
other.swap(*this);
|
||||
return *this;
|
||||
}
|
||||
|
||||
void submitAndWait() {
|
||||
mScheduler->submit();
|
||||
mScheduler->wait();
|
||||
}
|
||||
|
||||
~Tag() { release(); }
|
||||
|
||||
TagId getReadId() const { return TagId{std::uint64_t(mTagId) - 1}; }
|
||||
TagId getWriteId() const { return mTagId; }
|
||||
|
||||
void swap(Tag &other) {
|
||||
std::swap(mParent, other.mParent);
|
||||
std::swap(mScheduler, other.mScheduler);
|
||||
std::swap(mTagId, other.mTagId);
|
||||
std::swap(mAcquiredResources, other.mAcquiredResources);
|
||||
std::swap(mGraphicsDescriptorSets, other.mGraphicsDescriptorSets);
|
||||
std::swap(mComputeDescriptorSets, other.mComputeDescriptorSets);
|
||||
}
|
||||
|
||||
Cache *getCache() const { return mParent; }
|
||||
Device *getDevice() const { return mParent->mDevice; }
|
||||
int getVmId() const { return mParent->mVmIm; }
|
||||
|
||||
Shader getShader(const ShaderKey &key,
|
||||
const ShaderKey *dependedKey = nullptr);
|
||||
Sampler getSampler(const SamplerKey &key);
|
||||
Buffer getBuffer(std::uint64_t address, std::uint64_t size, Access access);
|
||||
Buffer getInternalBuffer(std::uint64_t size);
|
||||
IndexBuffer getIndexBuffer(std::uint64_t address, std::uint32_t indexCount,
|
||||
gnm::PrimitiveType primType,
|
||||
gnm::IndexType indexType);
|
||||
Image getImage(const ImageKey &key, Access access);
|
||||
ImageView getImageView(const ImageViewKey &key, Access access);
|
||||
void readMemory(void *target, std::uint64_t address, std::uint64_t size);
|
||||
void writeMemory(const void *source, std::uint64_t address,
|
||||
std::uint64_t size);
|
||||
int compareMemory(const void *source, std::uint64_t address,
|
||||
std::uint64_t size);
|
||||
void release();
|
||||
|
||||
VkPipelineLayout getGraphicsPipelineLayout() const {
|
||||
return getCache()->getGraphicsPipelineLayout();
|
||||
}
|
||||
|
||||
VkPipelineLayout getComputePipelineLayout() const {
|
||||
return getCache()->getComputePipelineLayout();
|
||||
}
|
||||
|
||||
std::array<VkDescriptorSet, kGraphicsStages.size()>
|
||||
createGraphicsDescriptorSets() {
|
||||
auto result = getCache()->createGraphicsDescriptorSets();
|
||||
mGraphicsDescriptorSets.push_back(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
VkDescriptorSet createComputeDescriptorSet() {
|
||||
auto result = getCache()->createComputeDescriptorSet();
|
||||
mComputeDescriptorSets.push_back(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
std::shared_ptr<Entry> findShader(const ShaderKey &key,
|
||||
const ShaderKey *dependedKey = nullptr);
|
||||
};
|
||||
|
||||
Cache(Device *device, int vmId);
|
||||
~Cache();
|
||||
Tag createTag(Scheduler &scheduler);
|
||||
|
||||
vk::Buffer &getMemoryTableBuffer() { return mMemoryTableBuffer; }
|
||||
vk::Buffer &getGdsBuffer() { return mGdsBuffer; }
|
||||
|
||||
void addFrameBuffer(Scheduler &scheduler, int index, std::uint64_t address,
|
||||
std::uint32_t width, std::uint32_t height, int format,
|
||||
TileMode tileMode);
|
||||
void removeFrameBuffer(Scheduler &scheduler, int index);
|
||||
VkImage getFrameBuffer(Scheduler &scheduler, int index);
|
||||
void invalidate(Scheduler &scheduler, std::uint64_t address,
|
||||
std::uint64_t size);
|
||||
|
||||
void invalidate(Scheduler &scheduler) {
|
||||
invalidate(scheduler, 0, ~static_cast<std::uint64_t>(0));
|
||||
}
|
||||
|
||||
void flush(Scheduler &scheduler, std::uint64_t address, std::uint64_t size);
|
||||
void flush(Scheduler &scheduler) {
|
||||
flush(scheduler, 0, ~static_cast<std::uint64_t>(0));
|
||||
}
|
||||
|
||||
const std::array<VkDescriptorSetLayout, kGraphicsStages.size()> &
|
||||
getGraphicsDescriptorSetLayouts() const {
|
||||
return mGraphicsDescriptorSetLayouts;
|
||||
}
|
||||
|
||||
VkDescriptorSetLayout
|
||||
getGraphicsDescriptorSetLayout(VkShaderStageFlagBits stage) const {
|
||||
int index = getStageIndex(stage);
|
||||
rx::dieIf(index < 0, "getGraphicsDescriptorSetLayout: unexpected stage");
|
||||
return mGraphicsDescriptorSetLayouts[index];
|
||||
}
|
||||
|
||||
VkDescriptorSetLayout getComputeDescriptorSetLayout() const {
|
||||
return mComputeDescriptorSetLayout;
|
||||
}
|
||||
VkPipelineLayout getGraphicsPipelineLayout() const {
|
||||
return mGraphicsPipelineLayout;
|
||||
}
|
||||
|
||||
VkPipelineLayout getComputePipelineLayout() const {
|
||||
return mComputePipelineLayout;
|
||||
}
|
||||
|
||||
std::array<VkDescriptorSet, kGraphicsStages.size()>
|
||||
createGraphicsDescriptorSets();
|
||||
VkDescriptorSet createComputeDescriptorSet();
|
||||
|
||||
void destroyGraphicsDescriptorSets(
|
||||
const std::array<VkDescriptorSet, kGraphicsStages.size()> &set) {
|
||||
std::lock_guard lock(mDescriptorMtx);
|
||||
mGraphicsDescriptorSets.push_back(set);
|
||||
}
|
||||
|
||||
void destroyComputeDescriptorSet(VkDescriptorSet set) {
|
||||
std::lock_guard lock(mDescriptorMtx);
|
||||
mComputeDescriptorSets.push_back(set);
|
||||
}
|
||||
|
||||
private:
|
||||
TagId getSyncTag(std::uint64_t address, std::uint64_t size, TagId currentTag);
|
||||
|
||||
Device *mDevice;
|
||||
int mVmIm;
|
||||
TagId mNextTagId{2};
|
||||
vk::Buffer mMemoryTableBuffer;
|
||||
vk::Buffer mGdsBuffer;
|
||||
|
||||
std::mutex mDescriptorMtx;
|
||||
std::array<VkDescriptorSetLayout, kGraphicsStages.size()>
|
||||
mGraphicsDescriptorSetLayouts{};
|
||||
VkDescriptorSetLayout mComputeDescriptorSetLayout{};
|
||||
VkPipelineLayout mGraphicsPipelineLayout{};
|
||||
VkPipelineLayout mComputePipelineLayout{};
|
||||
VkDescriptorPool mGraphicsDescriptorPool{};
|
||||
VkDescriptorPool mComputeDescriptorPool{};
|
||||
std::vector<std::array<VkDescriptorSet, kGraphicsStages.size()>>
|
||||
mGraphicsDescriptorSets;
|
||||
std::vector<VkDescriptorSet> mComputeDescriptorSets;
|
||||
std::map<SamplerKey, VkSampler> mSamplers;
|
||||
|
||||
std::shared_ptr<Entry> mFrameBuffers[10];
|
||||
|
||||
rx::MemoryTableWithPayload<std::shared_ptr<Entry>> mBuffers;
|
||||
rx::MemoryTableWithPayload<std::shared_ptr<Entry>> mIndexBuffers;
|
||||
rx::MemoryTableWithPayload<std::shared_ptr<Entry>> mImages;
|
||||
rx::MemoryTableWithPayload<std::shared_ptr<Entry>> mShaders;
|
||||
|
||||
rx::MemoryTableWithPayload<std::shared_ptr<Entry>> mSyncTable;
|
||||
};
|
||||
} // namespace amdgpu
|
||||
508
rpcsx-gpu2/Device.cpp
Normal file
508
rpcsx-gpu2/Device.cpp
Normal file
|
|
@ -0,0 +1,508 @@
|
|||
#include "Device.hpp"
|
||||
#include "Renderer.hpp"
|
||||
#include "amdgpu/tiler.hpp"
|
||||
#include "gnm/constants.hpp"
|
||||
#include "gnm/pm4.hpp"
|
||||
#include "rx/bits.hpp"
|
||||
#include "rx/die.hpp"
|
||||
#include "rx/mem.hpp"
|
||||
#include "shader/spv.hpp"
|
||||
#include "shaders/rdna-semantic-spirv.hpp"
|
||||
#include "vk.hpp"
|
||||
#include <fcntl.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
using namespace amdgpu;
|
||||
|
||||
Device::Device() {
|
||||
if (!shader::spv::validate(g_rdna_semantic_spirv)) {
|
||||
shader::spv::dump(g_rdna_semantic_spirv, true);
|
||||
rx::die("builtin semantic validation failed");
|
||||
}
|
||||
|
||||
if (auto sem = shader::spv::deserialize(
|
||||
shaderSemanticContext, g_rdna_semantic_spirv,
|
||||
shaderSemanticContext.getUnknownLocation())) {
|
||||
auto shaderSemantic = *sem;
|
||||
shader::gcn::canonicalizeSemantic(shaderSemanticContext, shaderSemantic);
|
||||
shader::gcn::collectSemanticModuleInfo(gcnSemanticModuleInfo,
|
||||
shaderSemantic);
|
||||
gcnSemantic = shader::gcn::collectSemanticInfo(gcnSemanticModuleInfo);
|
||||
} else {
|
||||
rx::die("failed to deserialize builtin semantics\n");
|
||||
}
|
||||
|
||||
for (int index = 0; auto &cache : caches) {
|
||||
cache.vmId = index++;
|
||||
}
|
||||
|
||||
for (auto &pipe : graphicsPipes) {
|
||||
pipe.device = this;
|
||||
}
|
||||
|
||||
// for (auto &pipe : computePipes) {
|
||||
// pipe.device = this;
|
||||
// }
|
||||
}
|
||||
|
||||
Device::~Device() {
|
||||
for (auto fd : dmemFd) {
|
||||
if (fd >= 0) {
|
||||
::close(fd);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto &[pid, info] : processInfo) {
|
||||
if (info.vmFd >= 0) {
|
||||
::close(info.vmFd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Device::mapProcess(std::int64_t pid, int vmId, const char *shmName) {
|
||||
auto &process = processInfo[pid];
|
||||
process.vmId = vmId;
|
||||
|
||||
auto memory = amdgpu::RemoteMemory{vmId};
|
||||
|
||||
std::string pidVmName = shmName;
|
||||
pidVmName += '-';
|
||||
pidVmName += std::to_string(pid);
|
||||
int memoryFd = ::shm_open(pidVmName.c_str(), O_RDWR, S_IRUSR | S_IWUSR);
|
||||
process.vmFd = memoryFd;
|
||||
|
||||
if (memoryFd < 0) {
|
||||
std::printf("failed to process %x shared memory\n", (int)pid);
|
||||
std::abort();
|
||||
}
|
||||
|
||||
for (auto [startAddress, endAddress, slot] : process.vmTable) {
|
||||
auto gpuProt = slot.prot >> 4;
|
||||
if (gpuProt == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto devOffset = slot.offset + startAddress - slot.baseAddress;
|
||||
int mapFd = memoryFd;
|
||||
|
||||
if (slot.memoryType >= 0) {
|
||||
mapFd = dmemFd[slot.memoryType];
|
||||
}
|
||||
|
||||
auto mmapResult =
|
||||
::mmap(memory.getPointer(startAddress), endAddress - startAddress,
|
||||
gpuProt, MAP_FIXED | MAP_SHARED, mapFd, devOffset);
|
||||
|
||||
if (mmapResult == MAP_FAILED) {
|
||||
std::printf("failed to map process %x memory, address %lx-%lx, type %x\n",
|
||||
(int)pid, startAddress, endAddress, slot.memoryType);
|
||||
std::abort();
|
||||
}
|
||||
|
||||
handleProtectChange(vmId, startAddress, endAddress - startAddress,
|
||||
slot.prot);
|
||||
}
|
||||
}
|
||||
|
||||
void Device::unmapProcess(std::int64_t pid) {
|
||||
auto &process = processInfo[pid];
|
||||
auto startAddress = static_cast<std::uint64_t>(process.vmId) << 40;
|
||||
auto size = static_cast<std::uint64_t>(1) << 40;
|
||||
rx::mem::reserve(reinterpret_cast<void *>(startAddress), size);
|
||||
|
||||
::close(process.vmFd);
|
||||
process.vmFd = -1;
|
||||
process.vmId = -1;
|
||||
}
|
||||
|
||||
void Device::protectMemory(int pid, std::uint64_t address, std::uint64_t size,
|
||||
int prot) {
|
||||
auto &process = processInfo[pid];
|
||||
|
||||
auto vmSlotIt = process.vmTable.queryArea(address);
|
||||
if (vmSlotIt == process.vmTable.end()) {
|
||||
std::abort();
|
||||
}
|
||||
|
||||
auto vmSlot = (*vmSlotIt).payload;
|
||||
|
||||
process.vmTable.map(address, address + size,
|
||||
VmMapSlot{
|
||||
.memoryType = vmSlot.memoryType,
|
||||
.prot = static_cast<int>(prot),
|
||||
.offset = vmSlot.offset,
|
||||
.baseAddress = vmSlot.baseAddress,
|
||||
});
|
||||
|
||||
if (process.vmId >= 0) {
|
||||
auto memory = amdgpu::RemoteMemory{process.vmId};
|
||||
rx::mem::protect(memory.getPointer(address), size, prot >> 4);
|
||||
handleProtectChange(process.vmId, address, size, prot);
|
||||
}
|
||||
}
|
||||
|
||||
void Device::onCommandBuffer(std::int64_t pid, int cmdHeader,
|
||||
std::uint64_t address, std::uint64_t size) {
|
||||
auto &process = processInfo[pid];
|
||||
if (process.vmId < 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto memory = RemoteMemory{process.vmId};
|
||||
|
||||
auto op = rx::getBits(cmdHeader, 15, 8);
|
||||
|
||||
if (op == gnm::IT_INDIRECT_BUFFER_CNST) {
|
||||
graphicsPipes[0].setCeQueue(Queue::createFromRange(
|
||||
process.vmId, memory.getPointer<std::uint32_t>(address),
|
||||
size / sizeof(std::uint32_t)));
|
||||
} else if (op == gnm::IT_INDIRECT_BUFFER) {
|
||||
graphicsPipes[0].setDeQueue(
|
||||
Queue::createFromRange(process.vmId,
|
||||
memory.getPointer<std::uint32_t>(address),
|
||||
size / sizeof(std::uint32_t)),
|
||||
1);
|
||||
} else {
|
||||
rx::die("unimplemented command buffer %x", cmdHeader);
|
||||
}
|
||||
}
|
||||
|
||||
bool Device::processPipes() {
|
||||
bool allProcessed = true;
|
||||
|
||||
// for (auto &pipe : computePipes) {
|
||||
// if (!pipe.processAllRings()) {
|
||||
// allProcessed = false;
|
||||
// }
|
||||
// }
|
||||
|
||||
for (auto &pipe : graphicsPipes) {
|
||||
if (!pipe.processAllRings()) {
|
||||
allProcessed = false;
|
||||
}
|
||||
}
|
||||
|
||||
return allProcessed;
|
||||
}
|
||||
|
||||
static void
|
||||
transitionImageLayout(VkCommandBuffer commandBuffer, VkImage image,
|
||||
VkImageLayout oldLayout, VkImageLayout newLayout,
|
||||
const VkImageSubresourceRange &subresourceRange) {
|
||||
VkImageMemoryBarrier barrier{};
|
||||
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
|
||||
barrier.oldLayout = oldLayout;
|
||||
barrier.newLayout = newLayout;
|
||||
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.image = image;
|
||||
barrier.subresourceRange = subresourceRange;
|
||||
|
||||
auto layoutToStageAccess = [](VkImageLayout layout)
|
||||
-> std::pair<VkPipelineStageFlags, VkAccessFlags> {
|
||||
switch (layout) {
|
||||
case VK_IMAGE_LAYOUT_UNDEFINED:
|
||||
case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR:
|
||||
case VK_IMAGE_LAYOUT_GENERAL:
|
||||
return {VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0};
|
||||
|
||||
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
|
||||
return {VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT};
|
||||
|
||||
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
|
||||
return {VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT};
|
||||
|
||||
case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
|
||||
return {VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_ACCESS_SHADER_READ_BIT};
|
||||
|
||||
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
|
||||
return {VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT,
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT};
|
||||
|
||||
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
|
||||
return {VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
||||
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
|
||||
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT};
|
||||
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
};
|
||||
|
||||
auto [sourceStage, sourceAccess] = layoutToStageAccess(oldLayout);
|
||||
auto [destinationStage, destinationAccess] = layoutToStageAccess(newLayout);
|
||||
|
||||
barrier.srcAccessMask = sourceAccess;
|
||||
barrier.dstAccessMask = destinationAccess;
|
||||
|
||||
vkCmdPipelineBarrier(commandBuffer, sourceStage, destinationStage, 0, 0,
|
||||
nullptr, 0, nullptr, 1, &barrier);
|
||||
}
|
||||
|
||||
bool Device::flip(std::int64_t pid, int bufferIndex, std::uint64_t arg,
|
||||
VkCommandBuffer commandBuffer, VkImage swapchainImage,
|
||||
VkImageView swapchainImageView, VkFence fence) {
|
||||
auto &pipe = graphicsPipes[0];
|
||||
auto &scheduler = pipe.scheduler;
|
||||
auto &process = processInfo[pid];
|
||||
if (process.vmId < 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto &buffer = process.buffers[bufferIndex];
|
||||
auto &bufferAttr = process.bufferAttributes[buffer.attrId];
|
||||
|
||||
gnm::DataFormat dfmt;
|
||||
gnm::NumericFormat nfmt;
|
||||
CbCompSwap compSwap;
|
||||
switch (bufferAttr.pixelFormat) {
|
||||
case 0x80000000:
|
||||
// bgra
|
||||
dfmt = gnm::kDataFormat8_8_8_8;
|
||||
nfmt = gnm::kNumericFormatSNormNoZero;
|
||||
compSwap = CbCompSwap::Alt;
|
||||
break;
|
||||
|
||||
case 0x80002200:
|
||||
// rgba
|
||||
dfmt = gnm::kDataFormat8_8_8_8;
|
||||
nfmt = gnm::kNumericFormatSNormNoZero;
|
||||
compSwap = CbCompSwap::Std;
|
||||
break;
|
||||
|
||||
case 0x88060000:
|
||||
// bgra
|
||||
dfmt = gnm::kDataFormat2_10_10_10;
|
||||
nfmt = gnm::kNumericFormatSNormNoZero;
|
||||
compSwap = CbCompSwap::Alt;
|
||||
break;
|
||||
|
||||
default:
|
||||
rx::die("unimplemented color buffer format %x", bufferAttr.pixelFormat);
|
||||
}
|
||||
|
||||
// std::printf("displaying buffer %lx\n", buffer.address);
|
||||
VkCommandBufferBeginInfo beginInfo{};
|
||||
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
|
||||
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
|
||||
|
||||
vkBeginCommandBuffer(commandBuffer, &beginInfo);
|
||||
|
||||
auto cacheTag = getCacheTag(process.vmId, scheduler);
|
||||
|
||||
if (true) {
|
||||
transitionImageLayout(commandBuffer, swapchainImage,
|
||||
VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
|
||||
{
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.levelCount = 1,
|
||||
.layerCount = 1,
|
||||
});
|
||||
|
||||
amdgpu::flip(cacheTag, commandBuffer, vk::context->swapchainExtent,
|
||||
buffer.address, swapchainImageView,
|
||||
{bufferAttr.width, bufferAttr.height}, compSwap,
|
||||
getDefaultTileModes()[13], dfmt, nfmt);
|
||||
|
||||
transitionImageLayout(commandBuffer, swapchainImage,
|
||||
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
|
||||
VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
|
||||
{
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.levelCount = 1,
|
||||
.layerCount = 1,
|
||||
});
|
||||
} else {
|
||||
ImageKey frameKey{
|
||||
.address = buffer.address,
|
||||
.type = gnm::TextureType::Dim2D,
|
||||
.dfmt = dfmt,
|
||||
.nfmt = nfmt,
|
||||
.tileMode = getDefaultTileModes()[13],
|
||||
.extent =
|
||||
{
|
||||
.width = bufferAttr.width,
|
||||
.height = bufferAttr.height,
|
||||
.depth = 1,
|
||||
},
|
||||
.pitch = bufferAttr.width,
|
||||
.mipCount = 1,
|
||||
.arrayLayerCount = 1,
|
||||
};
|
||||
|
||||
auto image = cacheTag.getImage(frameKey, Access::Read);
|
||||
|
||||
scheduler.submit();
|
||||
scheduler.wait();
|
||||
|
||||
transitionImageLayout(commandBuffer, swapchainImage,
|
||||
VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
||||
{
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.levelCount = 1,
|
||||
.layerCount = 1,
|
||||
});
|
||||
|
||||
VkImageBlit region{
|
||||
.srcSubresource = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.mipLevel = 0,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1},
|
||||
.srcOffsets = {{},
|
||||
{static_cast<int32_t>(bufferAttr.width),
|
||||
static_cast<int32_t>(bufferAttr.height), 1}},
|
||||
.dstSubresource = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.mipLevel = 0,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1},
|
||||
.dstOffsets =
|
||||
{{},
|
||||
{static_cast<int32_t>(vk::context->swapchainExtent.width),
|
||||
static_cast<int32_t>(vk::context->swapchainExtent.height), 1}},
|
||||
};
|
||||
|
||||
vkCmdBlitImage(commandBuffer, image.handle, VK_IMAGE_LAYOUT_GENERAL,
|
||||
swapchainImage, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1,
|
||||
®ion, VK_FILTER_LINEAR);
|
||||
|
||||
transitionImageLayout(commandBuffer, swapchainImage,
|
||||
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
||||
VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
|
||||
{
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.levelCount = 1,
|
||||
.layerCount = 1,
|
||||
});
|
||||
}
|
||||
|
||||
auto submitCompleteTask = scheduler.createExternalSubmit();
|
||||
|
||||
{
|
||||
vkEndCommandBuffer(commandBuffer);
|
||||
|
||||
VkSemaphoreSubmitInfo signalSemSubmitInfos[] = {
|
||||
{
|
||||
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
|
||||
.semaphore = vk::context->renderCompleteSemaphore,
|
||||
.value = 1,
|
||||
.stageMask = VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT,
|
||||
},
|
||||
{
|
||||
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
|
||||
.semaphore = scheduler.getSemaphoreHandle(),
|
||||
.value = submitCompleteTask,
|
||||
.stageMask = VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT,
|
||||
},
|
||||
};
|
||||
|
||||
VkSemaphoreSubmitInfo waitSemSubmitInfos[] = {
|
||||
{
|
||||
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
|
||||
.semaphore = vk::context->presentCompleteSemaphore,
|
||||
.value = 1,
|
||||
.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
||||
},
|
||||
{
|
||||
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
|
||||
.semaphore = scheduler.getSemaphoreHandle(),
|
||||
.value = submitCompleteTask - 1,
|
||||
.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
||||
},
|
||||
};
|
||||
|
||||
VkCommandBufferSubmitInfo cmdBufferSubmitInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
|
||||
.commandBuffer = commandBuffer,
|
||||
};
|
||||
|
||||
VkSubmitInfo2 submitInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
|
||||
.waitSemaphoreInfoCount = 1,
|
||||
.pWaitSemaphoreInfos = waitSemSubmitInfos,
|
||||
.commandBufferInfoCount = 1,
|
||||
.pCommandBufferInfos = &cmdBufferSubmitInfo,
|
||||
.signalSemaphoreInfoCount = 2,
|
||||
.pSignalSemaphoreInfos = signalSemSubmitInfos,
|
||||
};
|
||||
|
||||
vkQueueSubmit2(vk::context->presentQueue, 1, &submitInfo, fence);
|
||||
// vkQueueWaitIdle(queue);
|
||||
}
|
||||
|
||||
scheduler.then([=, this, cacheTag = std::move(cacheTag)] {
|
||||
bridge->flipBuffer[process.vmId] = bufferIndex;
|
||||
bridge->flipArg[process.vmId] = arg;
|
||||
bridge->flipCount[process.vmId] = bridge->flipCount[process.vmId] + 1;
|
||||
|
||||
auto mem = RemoteMemory{process.vmId};
|
||||
auto bufferInUse =
|
||||
mem.getPointer<std::uint64_t>(bridge->bufferInUseAddress[process.vmId]);
|
||||
if (bufferInUse != nullptr) {
|
||||
bufferInUse[bufferIndex] = 0;
|
||||
}
|
||||
});
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void Device::mapMemory(std::int64_t pid, std::uint64_t address,
|
||||
std::uint64_t size, int memoryType, int dmemIndex,
|
||||
int prot, std::int64_t offset) {
|
||||
auto &process = processInfo[pid];
|
||||
|
||||
process.vmTable.map(address, address + size,
|
||||
VmMapSlot{
|
||||
.memoryType = memoryType >= 0 ? dmemIndex : -1,
|
||||
.prot = prot,
|
||||
.offset = offset,
|
||||
.baseAddress = address,
|
||||
});
|
||||
|
||||
if (process.vmId < 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto memory = amdgpu::RemoteMemory{process.vmId};
|
||||
|
||||
int mapFd = process.vmFd;
|
||||
|
||||
if (memoryType >= 0) {
|
||||
mapFd = dmemFd[dmemIndex];
|
||||
}
|
||||
|
||||
auto mmapResult = ::mmap(memory.getPointer(address), size, prot >> 4,
|
||||
MAP_FIXED | MAP_SHARED, mapFd, offset);
|
||||
|
||||
if (mmapResult == MAP_FAILED) {
|
||||
rx::die("failed to map process %x memory, address %lx-%lx, type %x",
|
||||
(int)pid, address, address + size, memoryType);
|
||||
}
|
||||
|
||||
handleProtectChange(process.vmId, address, size, prot);
|
||||
}
|
||||
|
||||
void Device::registerBuffer(std::int64_t pid, bridge::CmdBuffer buffer) {
|
||||
auto &process = processInfo[pid];
|
||||
|
||||
if (buffer.attrId >= 10 || buffer.index >= 10) {
|
||||
rx::die("out of buffers %u, %u", buffer.attrId, buffer.index);
|
||||
}
|
||||
|
||||
process.buffers[buffer.index] = buffer;
|
||||
}
|
||||
|
||||
void Device::registerBufferAttribute(std::int64_t pid,
|
||||
bridge::CmdBufferAttribute attr) {
|
||||
auto &process = processInfo[pid];
|
||||
if (attr.attrId >= 10) {
|
||||
rx::die("out of buffer attributes %u", attr.attrId);
|
||||
}
|
||||
|
||||
process.bufferAttributes[attr.attrId] = attr;
|
||||
}
|
||||
|
||||
void Device::handleProtectChange(int vmId, std::uint64_t address,
|
||||
std::uint64_t size, int prot) {}
|
||||
91
rpcsx-gpu2/Device.hpp
Normal file
91
rpcsx-gpu2/Device.hpp
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
#pragma once
|
||||
#include "Cache.hpp"
|
||||
#include "Pipe.hpp"
|
||||
#include "amdgpu/bridge/bridge.hpp"
|
||||
#include "amdgpu/tiler_vulkan.hpp"
|
||||
#include "gnm/descriptors.hpp"
|
||||
#include "rx/MemoryTable.hpp"
|
||||
#include "shader/SemanticInfo.hpp"
|
||||
#include "shader/SpvConverter.hpp"
|
||||
#include "shader/gcn.hpp"
|
||||
#include <unordered_map>
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
namespace amdgpu {
|
||||
|
||||
struct VmMapSlot {
|
||||
int memoryType;
|
||||
int prot;
|
||||
std::int64_t offset;
|
||||
std::uint64_t baseAddress;
|
||||
|
||||
auto operator<=>(const VmMapSlot &) const = default;
|
||||
};
|
||||
|
||||
struct ProcessInfo {
|
||||
int vmId = -1;
|
||||
int vmFd = -1;
|
||||
amdgpu::bridge::CmdBufferAttribute bufferAttributes[10];
|
||||
amdgpu::bridge::CmdBuffer buffers[10];
|
||||
rx::MemoryTableWithPayload<VmMapSlot> vmTable;
|
||||
};
|
||||
|
||||
struct RemoteMemory {
|
||||
int vmId;
|
||||
|
||||
template <typename T = void> T *getPointer(std::uint64_t address) const {
|
||||
return address ? reinterpret_cast<T *>(
|
||||
static_cast<std::uint64_t>(vmId) << 40 | address)
|
||||
: nullptr;
|
||||
}
|
||||
};
|
||||
|
||||
struct Device {
|
||||
static constexpr auto kComputePipeCount = 8;
|
||||
static constexpr auto kGfxPipeCount = 2;
|
||||
|
||||
shader::SemanticInfo gcnSemantic;
|
||||
shader::spv::Context shaderSemanticContext;
|
||||
shader::gcn::SemanticModuleInfo gcnSemanticModuleInfo;
|
||||
amdgpu::bridge::BridgeHeader *bridge;
|
||||
|
||||
Registers::Config config;
|
||||
|
||||
GpuTiler tiler;
|
||||
|
||||
GraphicsPipe graphicsPipes[kGfxPipeCount]{0, 1};
|
||||
// ComputePipe computePipes[kComputePipeCount]{0, 1, 2, 3, 4, 5, 6, 7};
|
||||
|
||||
int dmemFd[3] = {-1, -1, -1};
|
||||
std::unordered_map<std::int64_t, ProcessInfo> processInfo;
|
||||
|
||||
Cache caches[6]{
|
||||
{this, 0}, {this, 1}, {this, 2}, {this, 3}, {this, 4}, {this, 5},
|
||||
};
|
||||
|
||||
Device();
|
||||
~Device();
|
||||
|
||||
Cache::Tag getCacheTag(int vmId, Scheduler &scheduler) {
|
||||
return caches[vmId].createTag(scheduler);
|
||||
}
|
||||
|
||||
void mapProcess(std::int64_t pid, int vmId, const char *shmName);
|
||||
void unmapProcess(std::int64_t pid);
|
||||
void protectMemory(int pid, std::uint64_t address, std::uint64_t size,
|
||||
int prot);
|
||||
void onCommandBuffer(std::int64_t pid, int cmdHeader, std::uint64_t address,
|
||||
std::uint64_t size);
|
||||
bool processPipes();
|
||||
bool flip(std::int64_t pid, int bufferIndex, std::uint64_t arg,
|
||||
VkCommandBuffer commandBuffer, VkImage swapchainImage,
|
||||
VkImageView swapchainImageView, VkFence fence);
|
||||
void mapMemory(std::int64_t pid, std::uint64_t address, std::uint64_t size,
|
||||
int memoryType, int dmemIndex, int prot, std::int64_t offset);
|
||||
void registerBuffer(std::int64_t pid, bridge::CmdBuffer buffer);
|
||||
void registerBufferAttribute(std::int64_t pid,
|
||||
bridge::CmdBufferAttribute attr);
|
||||
void handleProtectChange(int vmId, std::uint64_t address, std::uint64_t size,
|
||||
int prot);
|
||||
};
|
||||
} // namespace amdgpu
|
||||
987
rpcsx-gpu2/Pipe.cpp
Normal file
987
rpcsx-gpu2/Pipe.cpp
Normal file
|
|
@ -0,0 +1,987 @@
|
|||
#include "Pipe.hpp"
|
||||
#include "Device.hpp"
|
||||
#include "Registers.hpp"
|
||||
#include "Renderer.hpp"
|
||||
#include "gnm/mmio.hpp"
|
||||
#include "gnm/pm4.hpp"
|
||||
#include "vk.hpp"
|
||||
#include <cstdio>
|
||||
#include <rx/bits.hpp>
|
||||
#include <rx/die.hpp>
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
using namespace amdgpu;
|
||||
|
||||
static Scheduler createGfxScheduler(int index) {
|
||||
auto queue = vk::context->presentQueue;
|
||||
auto family = vk::context->presentQueueFamily;
|
||||
|
||||
if (index != 0) {
|
||||
for (auto [otherQueue, otherFamily] : vk::context->graphicsQueues) {
|
||||
if (family != otherFamily) {
|
||||
queue = otherQueue;
|
||||
family = otherFamily;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return Scheduler{queue, family};
|
||||
}
|
||||
|
||||
static Scheduler createComputeScheduler(int index) {
|
||||
auto &compQueues = vk::context->computeQueues;
|
||||
auto [queue, family] = compQueues[index % compQueues.size()];
|
||||
|
||||
return Scheduler{queue, family};
|
||||
}
|
||||
|
||||
static bool compare(int cmpFn, std::uint32_t poll, std::uint32_t mask,
|
||||
std::uint32_t ref) {
|
||||
poll &= mask;
|
||||
ref &= mask;
|
||||
|
||||
switch (cmpFn) {
|
||||
case 0:
|
||||
return true;
|
||||
case 1:
|
||||
return poll < ref;
|
||||
case 2:
|
||||
return poll <= ref;
|
||||
case 3:
|
||||
return poll == ref;
|
||||
case 4:
|
||||
return poll != ref;
|
||||
case 5:
|
||||
return poll >= ref;
|
||||
case 6:
|
||||
return poll > ref;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
ComputePipe::ComputePipe(int index) : scheduler(createComputeScheduler(index)) {
|
||||
for (auto &handler : commandHandlers) {
|
||||
handler = &ComputePipe::unknownPacket;
|
||||
}
|
||||
|
||||
commandHandlers[gnm::IT_NOP] = &ComputePipe::handleNop;
|
||||
}
|
||||
|
||||
bool ComputePipe::processAllRings() {
|
||||
bool allProcessed = true;
|
||||
|
||||
for (auto &ring : queues) {
|
||||
processRing(ring);
|
||||
|
||||
if (ring.rptr != ring.wptr) {
|
||||
allProcessed = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return allProcessed;
|
||||
}
|
||||
|
||||
void ComputePipe::processRing(Queue &queue) {
|
||||
while (queue.rptr != queue.wptr) {
|
||||
if (queue.rptr >= queue.base + queue.size) {
|
||||
queue.rptr = queue.base;
|
||||
}
|
||||
|
||||
auto header = *queue.rptr;
|
||||
auto type = rx::getBits(header, 31, 30);
|
||||
|
||||
if (type == 3) {
|
||||
auto op = rx::getBits(header, 15, 8);
|
||||
auto len = rx::getBits(header, 29, 16) + 2;
|
||||
|
||||
// std::fprintf(stderr, "queue %d: %s\n", queue.indirectLevel,
|
||||
// gnm::pm4OpcodeToString(op));
|
||||
|
||||
if (op == gnm::IT_COND_EXEC) {
|
||||
rx::die("unimplemented COND_EXEC");
|
||||
}
|
||||
|
||||
auto handler = commandHandlers[op];
|
||||
if (!(this->*handler)(queue)) {
|
||||
return;
|
||||
}
|
||||
|
||||
queue.rptr += len;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (type == 2) {
|
||||
++queue.rptr;
|
||||
continue;
|
||||
}
|
||||
|
||||
rx::die("unexpected pm4 packet type %u", type);
|
||||
}
|
||||
}
|
||||
|
||||
bool ComputePipe::unknownPacket(Queue &queue) {
|
||||
auto op = rx::getBits(queue.rptr[0], 15, 8);
|
||||
|
||||
rx::die("unimplemented compute pm4 packet: %s, queue %u\n",
|
||||
gnm::pm4OpcodeToString(op), queue.indirectLevel);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ComputePipe::handleNop(Queue &queue) { return true; }
|
||||
|
||||
GraphicsPipe::GraphicsPipe(int index) : scheduler(createGfxScheduler(index)) {
|
||||
for (auto &processorHandlers : commandHandlers) {
|
||||
for (auto &handler : processorHandlers) {
|
||||
handler = &GraphicsPipe::unknownPacket;
|
||||
}
|
||||
|
||||
processorHandlers[gnm::IT_NOP] = &GraphicsPipe::handleNop;
|
||||
}
|
||||
|
||||
auto &dataHandlers = commandHandlers[2];
|
||||
auto &deHandlers = commandHandlers[1];
|
||||
auto &ceHandlers = commandHandlers[0];
|
||||
|
||||
deHandlers[gnm::IT_SET_BASE] = &GraphicsPipe::setBase;
|
||||
deHandlers[gnm::IT_CLEAR_STATE] = &GraphicsPipe::clearState;
|
||||
|
||||
deHandlers[gnm::IT_INDEX_BUFFER_SIZE] = &GraphicsPipe::indexBufferSize;
|
||||
deHandlers[gnm::IT_DISPATCH_DIRECT] = &GraphicsPipe::dispatchDirect;
|
||||
deHandlers[gnm::IT_DISPATCH_INDIRECT] = &GraphicsPipe::dispatchIndirect;
|
||||
|
||||
// IT_ATOMIC_GDS
|
||||
// IT_OCCLUSION_QUERY
|
||||
deHandlers[gnm::IT_SET_PREDICATION] = &GraphicsPipe::setPredication;
|
||||
|
||||
// IT_REG_RMW
|
||||
|
||||
// IT_COND_EXEC
|
||||
// IT_PRED_EXEC
|
||||
|
||||
deHandlers[gnm::IT_DRAW_INDIRECT] = &GraphicsPipe::drawIndirect;
|
||||
deHandlers[gnm::IT_DRAW_INDEX_INDIRECT] = &GraphicsPipe::drawIndexIndirect;
|
||||
deHandlers[gnm::IT_INDEX_BASE] = &GraphicsPipe::indexBase;
|
||||
deHandlers[gnm::IT_DRAW_INDEX_2] = &GraphicsPipe::drawIndex2;
|
||||
|
||||
deHandlers[gnm::IT_CONTEXT_CONTROL] = &GraphicsPipe::contextControl;
|
||||
|
||||
deHandlers[gnm::IT_INDEX_TYPE] = &GraphicsPipe::indexType;
|
||||
// IT_DRAW_INDIRECT_MULTI
|
||||
deHandlers[gnm::IT_DRAW_INDEX_AUTO] = &GraphicsPipe::drawIndexAuto;
|
||||
deHandlers[gnm::IT_NUM_INSTANCES] = &GraphicsPipe::numInstances;
|
||||
deHandlers[gnm::IT_DRAW_INDEX_MULTI_AUTO] = &GraphicsPipe::drawIndexMultiAuto;
|
||||
|
||||
// IT_INDIRECT_BUFFER_CNST
|
||||
// IT_STRMOUT_BUFFER_UPDATE
|
||||
|
||||
deHandlers[gnm::IT_DRAW_INDEX_OFFSET_2] = &GraphicsPipe::drawIndexOffset2;
|
||||
deHandlers[gnm::IT_DRAW_PREAMBLE] = &GraphicsPipe::drawPreamble;
|
||||
|
||||
deHandlers[gnm::IT_WRITE_DATA] = &GraphicsPipe::writeData;
|
||||
deHandlers[gnm::IT_MEM_SEMAPHORE] = &GraphicsPipe::memSemaphore;
|
||||
// IT_COPY_DW
|
||||
deHandlers[gnm::IT_WAIT_REG_MEM] = &GraphicsPipe::waitRegMem;
|
||||
deHandlers[gnm::IT_INDIRECT_BUFFER] = &GraphicsPipe::indirectBuffer;
|
||||
// IT_COPY_DATA
|
||||
deHandlers[gnm::IT_PFP_SYNC_ME] = &GraphicsPipe::pfpSyncMe;
|
||||
// IT_SURFACE_SYNC
|
||||
deHandlers[gnm::IT_COND_WRITE] = &GraphicsPipe::condWrite;
|
||||
deHandlers[gnm::IT_EVENT_WRITE] = &GraphicsPipe::eventWrite;
|
||||
deHandlers[gnm::IT_EVENT_WRITE_EOP] = &GraphicsPipe::eventWriteEop;
|
||||
deHandlers[gnm::IT_EVENT_WRITE_EOS] = &GraphicsPipe::eventWriteEos;
|
||||
deHandlers[gnm::IT_RELEASE_MEM] = &GraphicsPipe::releaseMem;
|
||||
// IT_PREAMBLE_CNTL
|
||||
deHandlers[gnm::IT_DMA_DATA] = &GraphicsPipe::dmaData;
|
||||
deHandlers[gnm::IT_ACQUIRE_MEM] = &GraphicsPipe::acquireMem;
|
||||
// IT_REWIND
|
||||
|
||||
// IT_LOAD_UCONFIG_REG
|
||||
// IT_LOAD_SH_REG
|
||||
// IT_LOAD_CONFIG_REG
|
||||
// IT_LOAD_CONTEXT_REG
|
||||
deHandlers[gnm::IT_SET_CONFIG_REG] = &GraphicsPipe::setConfigReg;
|
||||
deHandlers[gnm::IT_SET_CONTEXT_REG] = &GraphicsPipe::setContextReg;
|
||||
// IT_SET_CONTEXT_REG_INDIRECT
|
||||
deHandlers[gnm::IT_SET_SH_REG] = &GraphicsPipe::setShReg;
|
||||
// IT_SET_SH_REG_OFFSET
|
||||
// IT_SET_QUEUE_REG
|
||||
deHandlers[gnm::IT_SET_UCONFIG_REG] = &GraphicsPipe::setUConfigReg;
|
||||
// IT_SCRATCH_RAM_WRITE
|
||||
// IT_SCRATCH_RAM_READ
|
||||
deHandlers[gnm::IT_INCREMENT_DE_COUNTER] = &GraphicsPipe::incrementDeCounter;
|
||||
deHandlers[gnm::IT_WAIT_ON_CE_COUNTER] = &GraphicsPipe::waitOnCeCounter;
|
||||
deHandlers[gnm::IT_SET_CE_DE_COUNTERS] = &GraphicsPipe::setCeDeCounters;
|
||||
// IT_WAIT_ON_AVAIL_BUFFER
|
||||
// IT_SWITCH_BUFFER
|
||||
// IT_SET_RESOURCES
|
||||
// IT_MAP_PROCESS
|
||||
// IT_MAP_QUEUES
|
||||
// IT_UNMAP_QUEUES
|
||||
// IT_QUERY_STATUS
|
||||
// IT_RUN_LIST
|
||||
// IT_DISPATCH_DRAW_PREAMBLE
|
||||
// IT_DISPATCH_DRAW
|
||||
|
||||
ceHandlers[gnm::IT_WAIT_ON_DE_COUNTER_DIFF] =
|
||||
&GraphicsPipe::waitOnDeCounterDiff;
|
||||
ceHandlers[gnm::IT_INCREMENT_CE_COUNTER] = &GraphicsPipe::incrementCeCounter;
|
||||
ceHandlers[gnm::IT_LOAD_CONST_RAM] = &GraphicsPipe::loadConstRam;
|
||||
ceHandlers[gnm::IT_WRITE_CONST_RAM] = &GraphicsPipe::writeConstRam;
|
||||
ceHandlers[gnm::IT_DUMP_CONST_RAM] = &GraphicsPipe::dumpConstRam;
|
||||
}
|
||||
|
||||
void GraphicsPipe::setCeQueue(Queue queue) {
|
||||
queue.indirectLevel = -1;
|
||||
ceQueue = queue;
|
||||
}
|
||||
|
||||
void GraphicsPipe::setDeQueue(Queue queue, int ring) {
|
||||
rx::dieIf(ring > 2, "out of indirect gfx rings, %u", ring);
|
||||
queue.indirectLevel = 2 - ring;
|
||||
deQueues[ring] = queue;
|
||||
}
|
||||
|
||||
std::uint32_t *GraphicsPipe::getMmRegister(std::uint32_t dwAddress) {
|
||||
// if (dwAddress >= Registers::Config::kMmioOffset &&
|
||||
// dwAddress < Registers::Config::kMmioOffset +
|
||||
// sizeof(Registers::Config) / sizeof(std::uint32_t)) {
|
||||
// return reinterpret_cast<std::uint32_t *>(&config) + (dwAddress -
|
||||
// Registers::Config::kMmioOffset);
|
||||
// }
|
||||
|
||||
if (dwAddress >= Registers::ShaderConfig::kMmioOffset &&
|
||||
dwAddress < Registers::ShaderConfig::kMmioOffset +
|
||||
sizeof(Registers::ShaderConfig) / sizeof(std::uint32_t)) {
|
||||
return reinterpret_cast<std::uint32_t *>(&sh) +
|
||||
(dwAddress - Registers::ShaderConfig::kMmioOffset);
|
||||
}
|
||||
|
||||
if (dwAddress >= Registers::UConfig::kMmioOffset &&
|
||||
dwAddress < Registers::UConfig::kMmioOffset +
|
||||
sizeof(Registers::UConfig) / sizeof(std::uint32_t)) {
|
||||
return reinterpret_cast<std::uint32_t *>(&uConfig) +
|
||||
(dwAddress - Registers::UConfig::kMmioOffset);
|
||||
}
|
||||
|
||||
if (dwAddress >= Registers::Context::kMmioOffset &&
|
||||
dwAddress < Registers::Context::kMmioOffset +
|
||||
sizeof(Registers::Context) / sizeof(std::uint32_t)) {
|
||||
return reinterpret_cast<std::uint32_t *>(&context) +
|
||||
(dwAddress - Registers::Context::kMmioOffset);
|
||||
}
|
||||
|
||||
rx::die("unexpected memory mapped register address %x, %s", dwAddress,
|
||||
gnm::mmio::registerName(dwAddress));
|
||||
}
|
||||
|
||||
bool GraphicsPipe::processAllRings() {
|
||||
bool allProcessed = true;
|
||||
|
||||
if (ceQueue.rptr != ceQueue.wptr) {
|
||||
processRing(ceQueue);
|
||||
|
||||
if (ceQueue.rptr != ceQueue.wptr) {
|
||||
allProcessed = false;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
auto &queue = deQueues[i];
|
||||
processRing(queue);
|
||||
|
||||
if (queue.rptr != queue.wptr) {
|
||||
allProcessed = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return allProcessed;
|
||||
}
|
||||
|
||||
void GraphicsPipe::processRing(Queue &queue) {
|
||||
auto cp = 1;
|
||||
if (queue.indirectLevel < 0) {
|
||||
cp = 0;
|
||||
} else if (queue.indirectLevel == 2) {
|
||||
cp = 2;
|
||||
}
|
||||
|
||||
while (queue.rptr != queue.wptr) {
|
||||
if (queue.rptr >= queue.base + queue.size) {
|
||||
queue.rptr = queue.base;
|
||||
}
|
||||
|
||||
auto header = *queue.rptr;
|
||||
auto type = rx::getBits(header, 31, 30);
|
||||
|
||||
if (type == 3) {
|
||||
auto op = rx::getBits(header, 15, 8);
|
||||
auto len = rx::getBits(header, 29, 16) + 2;
|
||||
|
||||
// std::fprintf(stderr, "queue %d: %s\n", queue.indirectLevel,
|
||||
// gnm::pm4OpcodeToString(op));
|
||||
|
||||
if (op == gnm::IT_COND_EXEC) {
|
||||
rx::die("unimplemented COND_EXEC");
|
||||
}
|
||||
|
||||
auto handler = commandHandlers[cp][op];
|
||||
if (!(this->*handler)(queue)) {
|
||||
return;
|
||||
}
|
||||
|
||||
queue.rptr += len;
|
||||
|
||||
if (op == gnm::IT_INDIRECT_BUFFER || op == gnm::IT_INDIRECT_BUFFER_CNST) {
|
||||
break;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if (type == 2) {
|
||||
++queue.rptr;
|
||||
continue;
|
||||
}
|
||||
|
||||
rx::die("unexpected pm4 packet type %u", type);
|
||||
}
|
||||
}
|
||||
|
||||
bool GraphicsPipe::handleNop(Queue &queue) { return true; }
|
||||
|
||||
bool GraphicsPipe::setBase(Queue &queue) {
|
||||
auto baseIndex = queue.rptr[1] & 0xf;
|
||||
|
||||
switch (baseIndex) {
|
||||
case 0: {
|
||||
auto address0 = queue.rptr[2] & ~3;
|
||||
auto address1 = queue.rptr[3] & ((1 << 16) - 1);
|
||||
|
||||
displayListPatchBase =
|
||||
address0 | (static_cast<std::uint64_t>(address1) << 32);
|
||||
break;
|
||||
}
|
||||
case 1: {
|
||||
auto address0 = queue.rptr[2] & ~3;
|
||||
auto address1 = queue.rptr[3] & ((1 << 16) - 1);
|
||||
|
||||
drawIndexIndirPatchBase =
|
||||
address0 | (static_cast<std::uint64_t>(address1) << 32);
|
||||
break;
|
||||
}
|
||||
|
||||
case 2: {
|
||||
auto cs1Index = queue.rptr[2] & ((1 << 16) - 1);
|
||||
auto cs2Index = queue.rptr[3] & ((1 << 16) - 1);
|
||||
gdsPartitionBases[0] = cs1Index;
|
||||
gdsPartitionBases[1] = cs2Index;
|
||||
break;
|
||||
}
|
||||
|
||||
case 3: {
|
||||
auto cs1Index = queue.rptr[2] & ((1 << 16) - 1);
|
||||
auto cs2Index = queue.rptr[3] & ((1 << 16) - 1);
|
||||
cePartitionBases[0] = cs1Index;
|
||||
cePartitionBases[1] = cs2Index;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
rx::die("pm4: unknown SET_BASE index %u", baseIndex);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GraphicsPipe::clearState(Queue &queue) {
|
||||
context = Registers::Context::Default;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GraphicsPipe::contextControl(Queue &queue) { return true; }
|
||||
bool GraphicsPipe::acquireMem(Queue &queue) { return true; }
|
||||
bool GraphicsPipe::releaseMem(Queue &queue) {
|
||||
auto eventCntl = queue.rptr[1];
|
||||
auto dataCntl = queue.rptr[2];
|
||||
auto addressLo = queue.rptr[3] & ~3;
|
||||
auto addressHi = queue.rptr[3] & ~3;
|
||||
auto dataLo = queue.rptr[4];
|
||||
auto dataHi = queue.rptr[5];
|
||||
|
||||
auto eventIndex = rx::getBits(eventCntl, 11, 8);
|
||||
auto eventType = rx::getBits(eventCntl, 5, 0);
|
||||
auto dataSel = rx::getBits(dataCntl, 31, 29);
|
||||
auto intSel = rx::getBits(dataCntl, 25, 24);
|
||||
|
||||
auto address = addressLo | (static_cast<std::uint64_t>(addressHi) << 32);
|
||||
auto pointer = RemoteMemory{queue.vmId}.getPointer<std::uint64_t>(address);
|
||||
|
||||
context.vgtEventInitiator = eventType;
|
||||
|
||||
switch (dataSel) {
|
||||
case 0: // none
|
||||
break;
|
||||
case 1: // 32 bit, low
|
||||
*reinterpret_cast<std::uint32_t *>(pointer) = dataLo;
|
||||
break;
|
||||
case 2: // 64 bit
|
||||
*pointer = dataLo | (static_cast<std::uint64_t>(dataHi) << 32);
|
||||
break;
|
||||
case 3: // 64 bit, global GPU clock
|
||||
*pointer = std::chrono::duration_cast<std::chrono::nanoseconds>(
|
||||
std::chrono::system_clock::now().time_since_epoch())
|
||||
.count();
|
||||
break;
|
||||
case 4: // 64 bit, perf counter
|
||||
*pointer = std::chrono::duration_cast<std::chrono::nanoseconds>(
|
||||
std::chrono::steady_clock::now().time_since_epoch())
|
||||
.count();
|
||||
break;
|
||||
|
||||
default:
|
||||
rx::die("unimplemented event release mem data %#x", dataSel);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GraphicsPipe::drawPreamble(Queue &queue) { return true; }
|
||||
|
||||
bool GraphicsPipe::indexBufferSize(Queue &queue) {
|
||||
vgtIndexBufferSize = queue.rptr[1];
|
||||
return true;
|
||||
}
|
||||
bool GraphicsPipe::dispatchDirect(Queue &queue) {
|
||||
auto dimX = queue.rptr[1];
|
||||
auto dimY = queue.rptr[2];
|
||||
auto dimZ = queue.rptr[3];
|
||||
auto dispatchInitiator = queue.rptr[4];
|
||||
sh.compute.computeDispatchInitiator = dispatchInitiator;
|
||||
|
||||
// FIXME
|
||||
return true;
|
||||
}
|
||||
bool GraphicsPipe::dispatchIndirect(Queue &queue) {
|
||||
auto offset = queue.rptr[1];
|
||||
auto dispatchInitiator = queue.rptr[2];
|
||||
|
||||
sh.compute.computeDispatchInitiator = dispatchInitiator;
|
||||
auto buffer = RemoteMemory{queue.vmId}.getPointer<std::uint32_t>(
|
||||
drawIndexIndirPatchBase + offset);
|
||||
|
||||
auto dimX = buffer[0];
|
||||
auto dimY = buffer[1];
|
||||
auto dimZ = buffer[2];
|
||||
|
||||
// FIXME
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GraphicsPipe::setPredication(Queue &queue) {
|
||||
auto startAddressLo = queue.rptr[1] & ~0xf;
|
||||
auto predProperties = queue.rptr[2];
|
||||
|
||||
auto startAddressHi = rx::getBits(predProperties, 15, 0);
|
||||
auto predBool = rx::getBit(predProperties, 8);
|
||||
auto hint = rx::getBit(predProperties, 12);
|
||||
auto predOp = rx::getBits(predProperties, 18, 16);
|
||||
auto cont = rx::getBit(predProperties, 31);
|
||||
|
||||
switch (predOp) {
|
||||
case 0: // clear predicate
|
||||
case 1: // set ZPass predicate
|
||||
case 2: // set PrimCount predicate
|
||||
break;
|
||||
}
|
||||
|
||||
// TODO
|
||||
|
||||
return true;
|
||||
}
|
||||
bool GraphicsPipe::drawIndirect(Queue &queue) {
|
||||
auto dataOffset = queue.rptr[1];
|
||||
auto baseVtxLoc = queue.rptr[2] & ((1 << 16) - 1);
|
||||
auto startInstLoc = queue.rptr[3] & ((1 << 16) - 1);
|
||||
auto drawInitiator = queue.rptr[4];
|
||||
|
||||
context.vgtDrawInitiator = drawInitiator;
|
||||
|
||||
auto buffer = RemoteMemory{queue.vmId}.getPointer<std::uint32_t>(
|
||||
drawIndexIndirPatchBase + dataOffset);
|
||||
|
||||
std::uint32_t vertexCountPerInstance = buffer[0];
|
||||
std::uint32_t instanceCount = buffer[1];
|
||||
std::uint32_t startVertexLocation = buffer[2];
|
||||
std::uint32_t startInstanceLocation = buffer[3];
|
||||
|
||||
// FIXME
|
||||
rx::die("drawIndirect");
|
||||
return true;
|
||||
}
|
||||
bool GraphicsPipe::drawIndexIndirect(Queue &queue) {
|
||||
auto dataOffset = queue.rptr[1];
|
||||
auto baseVtxLoc = queue.rptr[2] & ((1 << 16) - 1);
|
||||
auto drawInitiator = queue.rptr[3];
|
||||
|
||||
auto buffer = RemoteMemory{queue.vmId}.getPointer<std::uint32_t>(
|
||||
drawIndexIndirPatchBase + dataOffset);
|
||||
|
||||
context.vgtDrawInitiator = drawInitiator;
|
||||
|
||||
std::uint32_t indexCountPerInstance = buffer[0];
|
||||
std::uint32_t instanceCount = buffer[1];
|
||||
std::uint32_t startIndexLocation = buffer[2];
|
||||
std::uint32_t baseVertexLocation = buffer[3];
|
||||
std::uint32_t startInstanceLocation = buffer[4];
|
||||
|
||||
// FIXME
|
||||
rx::die("drawIndexIndirect");
|
||||
return true;
|
||||
}
|
||||
bool GraphicsPipe::indexBase(Queue &queue) {
|
||||
auto addressLo = queue.rptr[1] << 1;
|
||||
auto addressHi = queue.rptr[2] & ((1 << 16) - 1);
|
||||
auto address = addressLo | (static_cast<std::uint64_t>(addressHi) << 32);
|
||||
vgtIndexBase = address;
|
||||
return true;
|
||||
}
|
||||
bool GraphicsPipe::drawIndex2(Queue &queue) {
|
||||
auto maxSize = queue.rptr[1];
|
||||
auto indexOffset = queue.rptr[2];
|
||||
auto indexCount = queue.rptr[3];
|
||||
auto drawInitiator = queue.rptr[4];
|
||||
|
||||
context.vgtDrawInitiator = drawInitiator;
|
||||
uConfig.vgtNumIndices = indexCount;
|
||||
|
||||
draw(*this, queue.vmId, 0, indexCount, 0, uConfig.vgtNumInstances,
|
||||
vgtIndexBase + indexOffset, maxSize);
|
||||
return true;
|
||||
}
|
||||
bool GraphicsPipe::indexType(Queue &queue) {
|
||||
uConfig.vgtIndexType = static_cast<gnm::IndexType>(queue.rptr[1] & 1);
|
||||
return true;
|
||||
}
|
||||
bool GraphicsPipe::drawIndexAuto(Queue &queue) {
|
||||
auto indexCount = queue.rptr[1];
|
||||
auto drawInitiator = queue.rptr[2];
|
||||
|
||||
uConfig.vgtNumIndices = indexCount;
|
||||
context.vgtDrawInitiator = drawInitiator;
|
||||
|
||||
draw(*this, queue.vmId, 0, indexCount, 0, uConfig.vgtNumInstances, 0, 0);
|
||||
return true;
|
||||
}
|
||||
bool GraphicsPipe::numInstances(Queue &queue) {
|
||||
uConfig.vgtNumInstances = std::max(queue.rptr[1], 1u);
|
||||
return true;
|
||||
}
|
||||
bool GraphicsPipe::drawIndexMultiAuto(Queue &queue) {
|
||||
auto primCount = queue.rptr[1];
|
||||
auto drawInitiator = queue.rptr[2];
|
||||
auto control = queue.rptr[3];
|
||||
|
||||
auto indexOffset = rx::getBits(control, 15, 0);
|
||||
auto primType = rx::getBits(control, 20, 16);
|
||||
auto indexCount = rx::getBits(control, 31, 21);
|
||||
|
||||
context.vgtDrawInitiator = drawInitiator;
|
||||
uConfig.vgtPrimitiveType = static_cast<gnm::PrimitiveType>(primType);
|
||||
uConfig.vgtNumIndices = indexCount;
|
||||
|
||||
// FIXME
|
||||
return true;
|
||||
}
|
||||
bool GraphicsPipe::drawIndexOffset2(Queue &queue) {
|
||||
auto maxSize = queue.rptr[1];
|
||||
auto indexOffset = queue.rptr[2];
|
||||
auto indexCount = queue.rptr[3];
|
||||
auto drawInitiator = queue.rptr[4];
|
||||
|
||||
context.vgtDrawInitiator = drawInitiator;
|
||||
// FIXME
|
||||
return true;
|
||||
}
|
||||
bool GraphicsPipe::writeData(Queue &queue) {
|
||||
auto len = rx::getBits(queue.rptr[0], 29, 16) - 1;
|
||||
auto control = queue.rptr[1];
|
||||
auto dstAddressLo = queue.rptr[2];
|
||||
auto dstAddressHi = queue.rptr[3];
|
||||
auto data = queue.rptr + 4;
|
||||
|
||||
auto engineSel = rx::getBits(control, 31, 30);
|
||||
auto wrConfirm = rx::getBit(control, 20);
|
||||
auto wrOneAddress = rx::getBit(control, 16);
|
||||
auto dstSel = rx::getBits(control, 11, 8);
|
||||
|
||||
std::uint32_t *dstPointer = nullptr;
|
||||
|
||||
switch (dstSel) {
|
||||
case 0: // memory mapped register
|
||||
dstPointer = getMmRegister(dstAddressLo & ((1 << 16) - 1));
|
||||
break;
|
||||
|
||||
case 1: // memory sync
|
||||
case 5: { // memory async
|
||||
auto address =
|
||||
(dstAddressLo & ~3) | (static_cast<std::uint64_t>(dstAddressHi) << 32);
|
||||
dstPointer = RemoteMemory{queue.vmId}.getPointer<std::uint32_t>(address);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
rx::die("unimplemented write data, dst sel = %#x", dstSel);
|
||||
}
|
||||
|
||||
if (wrOneAddress) {
|
||||
for (std::uint32_t i = 0; i < len; ++i) {
|
||||
*dstPointer = data[i];
|
||||
}
|
||||
} else {
|
||||
std::memcpy(dstPointer, data, len * sizeof(std::uint32_t));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
bool GraphicsPipe::memSemaphore(Queue &queue) {
|
||||
// FIXME
|
||||
return true;
|
||||
}
|
||||
bool GraphicsPipe::waitRegMem(Queue &queue) {
|
||||
auto engine = rx::getBit(queue.rptr[1], 8);
|
||||
auto memSpace = rx::getBit(queue.rptr[1], 4);
|
||||
auto function = rx::getBits(queue.rptr[1], 2, 0);
|
||||
auto pollAddressLo = queue.rptr[2];
|
||||
auto pollAddressHi = queue.rptr[3] & ((1 << 16) - 1);
|
||||
auto reference = queue.rptr[4];
|
||||
auto mask = queue.rptr[5];
|
||||
auto pollInterval = queue.rptr[6];
|
||||
|
||||
std::uint32_t pollData;
|
||||
|
||||
if (memSpace == 0) {
|
||||
pollData = *getMmRegister(pollAddressLo & ((1 << 16) - 1));
|
||||
} else {
|
||||
auto pollAddress = (pollAddressLo & ~3) |
|
||||
(static_cast<std::uint64_t>(pollAddressHi) << 32);
|
||||
pollData = *RemoteMemory{queue.vmId}.getPointer<std::uint32_t>(pollAddress);
|
||||
}
|
||||
|
||||
return compare(function, pollData, mask, reference);
|
||||
}
|
||||
bool GraphicsPipe::indirectBuffer(Queue &queue) {
|
||||
rx::dieIf(queue.indirectLevel < 0, "unexpected indirect buffer from CP");
|
||||
|
||||
auto addressLo = queue.rptr[1] & ~3;
|
||||
auto addressHi = queue.rptr[2] & ((1 << 16) - 1);
|
||||
auto vmId = queue.rptr[3] >> 24;
|
||||
auto ibSize = queue.rptr[4] & ((1 << 20) - 1);
|
||||
auto address = addressLo | (static_cast<std::uint64_t>(addressHi) << 32);
|
||||
|
||||
auto rptr = RemoteMemory{queue.vmId}.getPointer<std::uint32_t>(address);
|
||||
setDeQueue(Queue::createFromRange(queue.vmId, rptr, ibSize),
|
||||
queue.indirectLevel + 1);
|
||||
return true;
|
||||
}
|
||||
bool GraphicsPipe::pfpSyncMe(Queue &queue) {
|
||||
// TODO
|
||||
return true;
|
||||
}
|
||||
bool GraphicsPipe::condWrite(Queue &queue) {
|
||||
auto writeSpace = rx::getBit(queue.rptr[1], 8);
|
||||
auto pollSpace = rx::getBit(queue.rptr[1], 4);
|
||||
auto function = rx::getBits(queue.rptr[1], 2, 0);
|
||||
auto pollAddressLo = queue.rptr[2];
|
||||
auto pollAddressHi = queue.rptr[3] & ((1 << 16) - 1);
|
||||
auto reference = queue.rptr[4];
|
||||
auto mask = queue.rptr[5];
|
||||
auto writeAddressLo = queue.rptr[6];
|
||||
auto writeAddressHi = queue.rptr[7] & ((1 << 16) - 1);
|
||||
auto writeData = queue.rptr[8];
|
||||
|
||||
std::uint32_t pollData;
|
||||
|
||||
if (pollSpace == 0) {
|
||||
pollData = *getMmRegister(pollAddressLo & ((1 << 16) - 1));
|
||||
} else {
|
||||
auto pollAddress = (pollAddressLo & ~3) |
|
||||
(static_cast<std::uint64_t>(pollAddressHi) << 32);
|
||||
pollData = *RemoteMemory{queue.vmId}.getPointer<std::uint32_t>(pollAddress);
|
||||
}
|
||||
|
||||
if (compare(function, pollData, mask, reference)) {
|
||||
if (writeSpace == 0) {
|
||||
*getMmRegister(writeAddressLo & ((1 << 16) - 1)) = writeData;
|
||||
} else {
|
||||
auto writeAddress = (writeAddressLo & ~3) |
|
||||
(static_cast<std::uint64_t>(writeAddressHi) << 32);
|
||||
|
||||
*RemoteMemory{queue.vmId}.getPointer<std::uint32_t>(writeAddress) =
|
||||
writeData;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GraphicsPipe::eventWrite(Queue &queue) {
|
||||
enum {
|
||||
kEventZPassDone = 1,
|
||||
kEventSamplePipelineStat = 2,
|
||||
kEventSampleStreamOutStat = 3,
|
||||
kEventPartialFlush = 4,
|
||||
};
|
||||
|
||||
auto eventCntl = queue.rptr[1];
|
||||
auto invL2 = rx::getBit(eventCntl, 20);
|
||||
auto eventIndex = rx::getBits(eventCntl, 11, 8);
|
||||
auto eventType = rx::getBits(eventCntl, 5, 0);
|
||||
|
||||
context.vgtEventInitiator = eventType;
|
||||
|
||||
if (eventIndex == kEventZPassDone || eventIndex == kEventSamplePipelineStat ||
|
||||
eventIndex == kEventSampleStreamOutStat) {
|
||||
auto addressLo = queue.rptr[2] & ~7;
|
||||
auto addressHi = queue.rptr[3] & ((1 << 16) - 1);
|
||||
auto address = addressLo | (static_cast<std::uint64_t>(addressHi) << 32);
|
||||
rx::die("unimplemented event write, event index %#x, address %lx",
|
||||
eventIndex, address);
|
||||
return true;
|
||||
}
|
||||
|
||||
// FIXME
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GraphicsPipe::eventWriteEop(Queue &queue) {
|
||||
auto eventCntl = queue.rptr[1];
|
||||
auto addressLo = queue.rptr[2] & ~3;
|
||||
auto dataCntl = queue.rptr[3];
|
||||
auto dataLo = queue.rptr[4];
|
||||
auto dataHi = queue.rptr[5];
|
||||
|
||||
auto invL2 = rx::getBit(eventCntl, 20);
|
||||
auto eventIndex = rx::getBits(eventCntl, 11, 8);
|
||||
auto eventType = rx::getBits(eventCntl, 5, 0);
|
||||
auto dataSel = rx::getBits(dataCntl, 31, 29);
|
||||
auto intSel = rx::getBits(dataCntl, 25, 24);
|
||||
auto addressHi = rx::getBits(dataCntl, 15, 0);
|
||||
|
||||
auto address = addressLo | (static_cast<std::uint64_t>(addressHi) << 32);
|
||||
auto pointer = RemoteMemory{queue.vmId}.getPointer<std::uint64_t>(address);
|
||||
|
||||
context.vgtEventInitiator = eventType;
|
||||
|
||||
switch (dataSel) {
|
||||
case 0: // none
|
||||
break;
|
||||
case 1: // 32 bit, low
|
||||
*reinterpret_cast<std::uint32_t *>(pointer) = dataLo;
|
||||
break;
|
||||
case 2: // 64 bit
|
||||
*pointer = dataLo | (static_cast<std::uint64_t>(dataHi) << 32);
|
||||
break;
|
||||
case 3: // 64 bit, global GPU clock
|
||||
*pointer = std::chrono::duration_cast<std::chrono::nanoseconds>(
|
||||
std::chrono::system_clock::now().time_since_epoch())
|
||||
.count();
|
||||
break;
|
||||
case 4: // 64 bit, perf counter
|
||||
*pointer = std::chrono::duration_cast<std::chrono::nanoseconds>(
|
||||
std::chrono::steady_clock::now().time_since_epoch())
|
||||
.count();
|
||||
break;
|
||||
|
||||
default:
|
||||
rx::die("unimplemented event write eop data %#x", dataSel);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GraphicsPipe::eventWriteEos(Queue &queue) {
|
||||
auto eventCntl = queue.rptr[1];
|
||||
auto addressLo = queue.rptr[2] & ~3;
|
||||
auto cmdInfo = queue.rptr[3];
|
||||
auto dataInfo = queue.rptr[4];
|
||||
|
||||
auto eventIndex = rx::getBits(eventCntl, 11, 8);
|
||||
auto eventType = rx::getBits(eventCntl, 5, 0);
|
||||
auto cmd = rx::getBits(cmdInfo, 31, 29);
|
||||
auto addressHi = rx::getBits(cmdInfo, 15, 0);
|
||||
|
||||
auto address = addressLo | (static_cast<std::uint64_t>(addressHi) << 32);
|
||||
auto pointer = RemoteMemory{queue.vmId}.getPointer<std::uint32_t>(address);
|
||||
|
||||
context.vgtEventInitiator = eventType;
|
||||
|
||||
switch (cmd) {
|
||||
case 1: { // store GDS data to memory
|
||||
auto sizeDw = rx::getBits(dataInfo, 31, 16);
|
||||
auto gdsIndexDw = rx::getBits(dataInfo, 15, 0);
|
||||
rx::die("unimplemented event write eos gds data");
|
||||
break;
|
||||
}
|
||||
|
||||
case 2: // after GDS writes confirm, store 32 bit DATA to memory as fence
|
||||
*pointer = dataInfo;
|
||||
break;
|
||||
|
||||
default:
|
||||
rx::die("unexpected event write eos command: %#x", cmd);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GraphicsPipe::dmaData(Queue &queue) {
|
||||
// FIXME
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GraphicsPipe::setConfigReg(Queue &queue) {
|
||||
rx::dieIf(queue.indirectLevel != 0, "setConfigReg from queue %d",
|
||||
queue.indirectLevel);
|
||||
|
||||
auto len = rx::getBits(queue.rptr[0], 29, 16);
|
||||
auto offset = queue.rptr[1];
|
||||
auto data = queue.rptr + 2;
|
||||
|
||||
rx::dieIf(
|
||||
(offset + len) * sizeof(std::uint32_t) > sizeof(device->config),
|
||||
"out of Config regs, offset: %u, count %u, %s\n", offset, len,
|
||||
gnm::mmio::registerName(decltype(device->config)::kMmioOffset + offset));
|
||||
|
||||
std::memcpy(reinterpret_cast<std::uint32_t *>(&device->config) + offset, data,
|
||||
sizeof(std::uint32_t) * len);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GraphicsPipe::setShReg(Queue &queue) {
|
||||
auto len = rx::getBits(queue.rptr[0], 29, 16);
|
||||
auto offset = queue.rptr[1];
|
||||
auto data = queue.rptr + 2;
|
||||
|
||||
rx::dieIf((offset + len) * sizeof(std::uint32_t) > sizeof(sh),
|
||||
"out of SH regs, offset: %u, count %u, %s\n", offset, len,
|
||||
gnm::mmio::registerName(decltype(sh)::kMmioOffset + offset));
|
||||
|
||||
std::memcpy(reinterpret_cast<std::uint32_t *>(&sh) + offset, data,
|
||||
sizeof(std::uint32_t) * len);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GraphicsPipe::setUConfigReg(Queue &queue) {
|
||||
auto len = rx::getBits(queue.rptr[0], 29, 16);
|
||||
auto offset = queue.rptr[1];
|
||||
auto data = queue.rptr + 2;
|
||||
|
||||
rx::dieIf((offset + len) * sizeof(std::uint32_t) > sizeof(uConfig),
|
||||
"out of UConfig regs, offset: %u, count %u, %s\n", offset, len,
|
||||
gnm::mmio::registerName(decltype(uConfig)::kMmioOffset + offset));
|
||||
|
||||
std::memcpy(reinterpret_cast<std::uint32_t *>(&uConfig) + offset, data,
|
||||
sizeof(std::uint32_t) * len);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GraphicsPipe::setContextReg(Queue &queue) {
|
||||
auto len = rx::getBits(queue.rptr[0], 29, 16);
|
||||
auto offset = queue.rptr[1];
|
||||
auto data = queue.rptr + 2;
|
||||
|
||||
rx::dieIf((offset + len) * sizeof(std::uint32_t) > sizeof(context),
|
||||
"out of Context regs, offset: %u, count %u, %s\n", offset, len,
|
||||
gnm::mmio::registerName(decltype(context)::kMmioOffset + offset));
|
||||
|
||||
std::memcpy(reinterpret_cast<std::uint32_t *>(&context) + offset, data,
|
||||
sizeof(std::uint32_t) * len);
|
||||
|
||||
// for (std::size_t i = 0; i < len; ++i) {
|
||||
// std::fprintf(stderr,
|
||||
// "writing to %s value %x\n",
|
||||
// gnm::mmio::registerName(decltype(context)::kMmioOffset + offset + i),
|
||||
// data[i]);
|
||||
// }
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GraphicsPipe::setCeDeCounters(Queue &queue) {
|
||||
auto counterLo = queue.rptr[1];
|
||||
auto counterHi = queue.rptr[2];
|
||||
auto counter = counterLo | (static_cast<std::uint64_t>(counterHi) << 32);
|
||||
deCounter = counter;
|
||||
ceCounter = counter;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GraphicsPipe::waitOnCeCounter(Queue &queue) {
|
||||
auto counterLo = queue.rptr[1];
|
||||
auto counterHi = queue.rptr[2];
|
||||
auto counter = counterLo | (static_cast<std::uint64_t>(counterHi) << 32);
|
||||
return deCounter >= counter;
|
||||
}
|
||||
|
||||
bool GraphicsPipe::waitOnDeCounterDiff(Queue &queue) {
|
||||
auto waitDiff = queue.rptr[1];
|
||||
auto diff = ceCounter - deCounter;
|
||||
return diff < waitDiff;
|
||||
}
|
||||
|
||||
bool GraphicsPipe::incrementCeCounter(Queue &queue) {
|
||||
ceCounter++;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GraphicsPipe::incrementDeCounter(Queue &queue) {
|
||||
deCounter++;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GraphicsPipe::loadConstRam(Queue &queue) {
|
||||
std::uint32_t addressLo = queue.rptr[1];
|
||||
std::uint32_t addressHi = queue.rptr[2];
|
||||
std::uint32_t numDw = queue.rptr[3] & ((1 << 15) - 1);
|
||||
std::uint32_t offset =
|
||||
(queue.rptr[4] & ((1 << 16) - 1)) / sizeof(std::uint32_t);
|
||||
auto address = addressLo | (static_cast<std::uint64_t>(addressHi) << 32);
|
||||
std::memcpy(constantMemory + offset,
|
||||
RemoteMemory{queue.vmId}.getPointer(address),
|
||||
numDw * sizeof(std::uint32_t));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GraphicsPipe::writeConstRam(Queue &queue) {
|
||||
std::uint32_t offset =
|
||||
(queue.rptr[1] & ((1 << 16) - 1)) / sizeof(std::uint32_t);
|
||||
std::uint32_t data = queue.rptr[2];
|
||||
std::memcpy(constantMemory + offset, &data, sizeof(std::uint32_t));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GraphicsPipe::dumpConstRam(Queue &queue) {
|
||||
std::uint32_t offset =
|
||||
(queue.rptr[1] & ((1 << 16) - 1)) / sizeof(std::uint32_t);
|
||||
std::uint32_t numDw = queue.rptr[2] & ((1 << 15) - 1);
|
||||
std::uint32_t addressLo = queue.rptr[3];
|
||||
std::uint32_t addressHi = queue.rptr[4];
|
||||
auto address = addressLo | (static_cast<std::uint64_t>(addressHi) << 32);
|
||||
std::memcpy(RemoteMemory{queue.vmId}.getPointer(address),
|
||||
constantMemory + offset, numDw * sizeof(std::uint32_t));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GraphicsPipe::unknownPacket(Queue &queue) {
|
||||
auto op = rx::getBits(queue.rptr[0], 15, 8);
|
||||
|
||||
rx::die("unimplemented gfx pm4 packet: %s, queue %u\n",
|
||||
gnm::pm4OpcodeToString(op), queue.indirectLevel);
|
||||
}
|
||||
135
rpcsx-gpu2/Pipe.hpp
Normal file
135
rpcsx-gpu2/Pipe.hpp
Normal file
|
|
@ -0,0 +1,135 @@
|
|||
#pragma once
|
||||
#include "Registers.hpp"
|
||||
#include "Scheduler.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
namespace amdgpu {
|
||||
class Device;
|
||||
|
||||
struct Queue {
|
||||
int vmId = -1;
|
||||
int indirectLevel = -1;
|
||||
std::uint32_t *doorbell{};
|
||||
std::uint32_t *base{};
|
||||
std::uint64_t size{};
|
||||
std::uint32_t *rptr{};
|
||||
std::uint32_t *wptr{};
|
||||
|
||||
static Queue createFromRange(int vmId, std::uint32_t *base,
|
||||
std::uint64_t size, int indirectLevel = 0,
|
||||
std::uint32_t *doorbell = nullptr) {
|
||||
Queue result;
|
||||
result.vmId = vmId;
|
||||
result.indirectLevel = indirectLevel;
|
||||
result.doorbell = doorbell;
|
||||
result.base = base;
|
||||
result.size = size;
|
||||
result.rptr = base;
|
||||
result.wptr = base + size;
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
struct ComputePipe {
|
||||
Device *device;
|
||||
Scheduler scheduler;
|
||||
|
||||
using CommandHandler = bool (ComputePipe::*)(Queue &);
|
||||
CommandHandler commandHandlers[255];
|
||||
Queue queues[8];
|
||||
Registers::ComputeConfig computeConfig;
|
||||
|
||||
ComputePipe(int index);
|
||||
|
||||
bool processAllRings();
|
||||
void processRing(Queue &queue);
|
||||
void mapQueue(int queueId, Queue queue);
|
||||
|
||||
bool setShReg(Queue &queue);
|
||||
bool unknownPacket(Queue &queue);
|
||||
bool handleNop(Queue &queue);
|
||||
};
|
||||
|
||||
struct GraphicsPipe {
|
||||
Device *device;
|
||||
Scheduler scheduler;
|
||||
|
||||
std::uint64_t ceCounter = 0;
|
||||
std::uint64_t deCounter = 0;
|
||||
std::uint64_t displayListPatchBase = 0;
|
||||
std::uint64_t drawIndexIndirPatchBase = 0;
|
||||
std::uint64_t gdsPartitionBases[2]{};
|
||||
std::uint64_t cePartitionBases[2]{};
|
||||
std::uint64_t vgtIndexBase = 0;
|
||||
std::uint32_t vgtIndexBufferSize = 0;
|
||||
|
||||
std::uint32_t constantMemory[(48 * 1024) / sizeof(std::uint32_t)]{};
|
||||
|
||||
Registers::ShaderConfig sh;
|
||||
Registers::Context context;
|
||||
Registers::UConfig uConfig;
|
||||
|
||||
Queue deQueues[3];
|
||||
Queue ceQueue;
|
||||
|
||||
using CommandHandler = bool (GraphicsPipe::*)(Queue &);
|
||||
CommandHandler commandHandlers[3][255];
|
||||
|
||||
GraphicsPipe(int index);
|
||||
|
||||
void setCeQueue(Queue queue);
|
||||
void setDeQueue(Queue queue, int ring);
|
||||
|
||||
bool processAllRings();
|
||||
void processRing(Queue &queue);
|
||||
|
||||
bool drawPreamble(Queue &queue);
|
||||
bool indexBufferSize(Queue &queue);
|
||||
bool handleNop(Queue &queue);
|
||||
bool contextControl(Queue &queue);
|
||||
bool acquireMem(Queue &queue);
|
||||
bool releaseMem(Queue &queue);
|
||||
bool dispatchDirect(Queue &queue);
|
||||
bool dispatchIndirect(Queue &queue);
|
||||
bool writeData(Queue &queue);
|
||||
bool memSemaphore(Queue &queue);
|
||||
bool waitRegMem(Queue &queue);
|
||||
bool indirectBuffer(Queue &queue);
|
||||
bool condWrite(Queue &queue);
|
||||
bool eventWrite(Queue &queue);
|
||||
bool eventWriteEop(Queue &queue);
|
||||
bool eventWriteEos(Queue &queue);
|
||||
bool dmaData(Queue &queue);
|
||||
bool setBase(Queue &queue);
|
||||
bool clearState(Queue &queue);
|
||||
bool setPredication(Queue &queue);
|
||||
bool drawIndirect(Queue &queue);
|
||||
bool drawIndexIndirect(Queue &queue);
|
||||
bool indexBase(Queue &queue);
|
||||
bool drawIndex2(Queue &queue);
|
||||
bool indexType(Queue &queue);
|
||||
bool drawIndexAuto(Queue &queue);
|
||||
bool numInstances(Queue &queue);
|
||||
bool drawIndexMultiAuto(Queue &queue);
|
||||
bool drawIndexOffset2(Queue &queue);
|
||||
bool pfpSyncMe(Queue &queue);
|
||||
bool setCeDeCounters(Queue &queue);
|
||||
bool waitOnCeCounter(Queue &queue);
|
||||
bool waitOnDeCounterDiff(Queue &queue);
|
||||
bool incrementCeCounter(Queue &queue);
|
||||
bool incrementDeCounter(Queue &queue);
|
||||
bool loadConstRam(Queue &queue);
|
||||
bool writeConstRam(Queue &queue);
|
||||
bool dumpConstRam(Queue &queue);
|
||||
bool setConfigReg(Queue &queue);
|
||||
bool setShReg(Queue &queue);
|
||||
bool setUConfigReg(Queue &queue);
|
||||
bool setContextReg(Queue &queue);
|
||||
|
||||
bool unknownPacket(Queue &queue);
|
||||
|
||||
std::uint32_t *getMmRegister(std::uint32_t dwAddress);
|
||||
};
|
||||
} // namespace amdgpu
|
||||
52
rpcsx-gpu2/Registers.cpp
Normal file
52
rpcsx-gpu2/Registers.cpp
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
#include "Registers.hpp"
|
||||
|
||||
amdgpu::Registers::Context amdgpu::Registers::Context::Default = [] {
|
||||
amdgpu::Registers::Context result{};
|
||||
result.paScScreenScissor.bottom = 0x4000;
|
||||
result.paScScreenScissor.right = 0x4000;
|
||||
|
||||
result.paScWindowScissor.top = 0x8000;
|
||||
result.paScWindowScissor.bottom = 0x4000;
|
||||
result.paScWindowScissor.right = 0x4000;
|
||||
|
||||
for (auto &clipRect : result.paScClipRect) {
|
||||
clipRect.bottom = 0x4000;
|
||||
clipRect.right = 0x4000;
|
||||
}
|
||||
|
||||
result.unk_8c = 0xaa99aaaa;
|
||||
result.paScGenericScissor.top = 0x8000;
|
||||
result.paScGenericScissor.bottom = 0x4000;
|
||||
result.paScGenericScissor.right = 0x4000;
|
||||
|
||||
for (auto &vportScissor : result.paScVportScissor) {
|
||||
vportScissor.top = 0x8000;
|
||||
vportScissor.bottom = 0x4000;
|
||||
vportScissor.right = 0x4000;
|
||||
}
|
||||
|
||||
for (auto &vportZ : result.paScVportZ) {
|
||||
vportZ.min = 0.0f;
|
||||
vportZ.max = 1.0f;
|
||||
}
|
||||
|
||||
result.unk_d4 = 0x2a00161a;
|
||||
result.spiPsInControl = 2;
|
||||
result.paClClipCntl = 0x0009'0000;
|
||||
result.paSuScModeCntl.polyMode = gnm::PolyMode::Dual;
|
||||
result.vgtGsPerEs = 256;
|
||||
result.vgtEsPerGs = 128;
|
||||
result.vgtGsPerVs = 2;
|
||||
result.iaMultiVgtParam = 0xff;
|
||||
result.unk_2f7 = 0x00001000;
|
||||
result.paSuVtxCntl.pixCenterHalf = true;
|
||||
result.paSuVtxCntl.roundMode = gnm::RoundMode::RoundToEven;
|
||||
result.paClGbVertClipAdj = 1.0f;
|
||||
result.paClGbVertDiscAdj = 1.0f;
|
||||
result.paClGbHorzClipAdj = 1.0f;
|
||||
result.paClGbHorzDiscAdj = 1.0f;
|
||||
result.unk_316 = 0xe;
|
||||
result.vgtOutDeallocCntl = 0x10;
|
||||
return result;
|
||||
}();
|
||||
|
||||
931
rpcsx-gpu2/Registers.hpp
Normal file
931
rpcsx-gpu2/Registers.hpp
Normal file
|
|
@ -0,0 +1,931 @@
|
|||
#pragma once
|
||||
|
||||
#include "amdgpu/tiler.hpp"
|
||||
#include "gnm/constants.hpp"
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
#include <type_traits>
|
||||
|
||||
namespace amdgpu {
|
||||
enum class Engine {
|
||||
ME,
|
||||
PFP,
|
||||
CE,
|
||||
};
|
||||
|
||||
enum class EventIndex {
|
||||
OTHER,
|
||||
ZPASS_DONE,
|
||||
SAMAPE_PIPELINE_STAT,
|
||||
SAMPLE_STREAM_OUT_STATS,
|
||||
CS_VS_PS_PARTIAL_FLUSH,
|
||||
ANY_EOP_TIMESTAMP,
|
||||
CS_PS_EOS,
|
||||
};
|
||||
|
||||
enum class ProtectionFaultAccess : std::uint32_t {
|
||||
Read = 0,
|
||||
Write = 1,
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
#pragma pack(push, 1)
|
||||
template <std::size_t Count> struct Padding {
|
||||
private:
|
||||
std::uint32_t _[Count];
|
||||
};
|
||||
} // namespace detail
|
||||
|
||||
template <std::size_t Offset, typename ImplT = std::uint32_t>
|
||||
struct Register : detail::Padding<Offset>, ImplT {
|
||||
Register() = default;
|
||||
Register(const Register &) = default;
|
||||
Register &operator=(const Register &) = default;
|
||||
|
||||
Register &operator=(const ImplT &newValue) {
|
||||
*static_cast<ImplT *>(this) = newValue;
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
template <std::size_t Offset, typename ImplT>
|
||||
requires(std::is_integral_v<ImplT> || std::is_floating_point_v<ImplT> ||
|
||||
std::is_enum_v<ImplT>)
|
||||
struct Register<Offset, ImplT> : detail::Padding<Offset> {
|
||||
ImplT value;
|
||||
|
||||
Register() = default;
|
||||
Register(const Register &) = default;
|
||||
Register &operator=(const Register &) = default;
|
||||
Register &operator=(ImplT newValue) {
|
||||
value = newValue;
|
||||
return *this;
|
||||
}
|
||||
|
||||
operator ImplT() { return value; }
|
||||
};
|
||||
|
||||
struct CbColorAttrib {
|
||||
union {
|
||||
struct {
|
||||
std::uint32_t tileModeIndex : 5;
|
||||
std::uint32_t fmaskTileModeIndex : 4;
|
||||
std::uint32_t : 3;
|
||||
std::uint32_t numSamples : 3;
|
||||
std::uint32_t numFragments : 2;
|
||||
std::uint32_t forceDstAlpha1 : 1;
|
||||
};
|
||||
|
||||
std::uint32_t raw;
|
||||
};
|
||||
};
|
||||
|
||||
struct CbColorView {
|
||||
union {
|
||||
struct {
|
||||
std::uint32_t sliceStart : 11;
|
||||
std::uint32_t : 2;
|
||||
std::uint32_t sliceMax : 11;
|
||||
};
|
||||
std::uint32_t raw;
|
||||
};
|
||||
};
|
||||
|
||||
struct CbColorControl {
|
||||
union {
|
||||
struct {
|
||||
std::uint32_t : 3;
|
||||
std::uint32_t degammaEnable : 1;
|
||||
gnm::CbMode mode : 3;
|
||||
std::uint32_t : 9;
|
||||
std::uint32_t rop3 : 8;
|
||||
};
|
||||
std::uint32_t raw;
|
||||
};
|
||||
};
|
||||
|
||||
struct CbShaderMask {
|
||||
union {
|
||||
struct {
|
||||
std::uint32_t output0Enable : 4;
|
||||
std::uint32_t output1Enable : 4;
|
||||
std::uint32_t output2Enable : 4;
|
||||
std::uint32_t output3Enable : 4;
|
||||
std::uint32_t output4Enable : 4;
|
||||
std::uint32_t output5Enable : 4;
|
||||
std::uint32_t output6Enable : 4;
|
||||
std::uint32_t output7Enable : 4;
|
||||
};
|
||||
std::uint32_t raw;
|
||||
};
|
||||
};
|
||||
|
||||
struct CbTargetMask {
|
||||
union {
|
||||
struct {
|
||||
std::uint32_t target0Enable : 4;
|
||||
std::uint32_t target1Enable : 4;
|
||||
std::uint32_t target2Enable : 4;
|
||||
std::uint32_t target3Enable : 4;
|
||||
std::uint32_t target4Enable : 4;
|
||||
std::uint32_t target5Enable : 4;
|
||||
std::uint32_t target6Enable : 4;
|
||||
std::uint32_t target7Enable : 4;
|
||||
};
|
||||
std::uint32_t raw;
|
||||
};
|
||||
};
|
||||
|
||||
enum class CbCompSwap : std::uint32_t {
|
||||
Std,
|
||||
Alt,
|
||||
StdRev,
|
||||
AltRev,
|
||||
};
|
||||
|
||||
struct CbColorInfo {
|
||||
union {
|
||||
struct {
|
||||
std::uint32_t endian : 2;
|
||||
gnm::DataFormat dfmt : 5;
|
||||
std::uint32_t linearGeneral : 1;
|
||||
gnm::NumericFormat nfmt : 3;
|
||||
CbCompSwap compSwap : 2;
|
||||
std::uint32_t fastClear : 1;
|
||||
std::uint32_t compression : 1;
|
||||
std::uint32_t blendClamp : 1;
|
||||
std::uint32_t blendBypass : 1;
|
||||
std::uint32_t simpleFloat : 1;
|
||||
std::uint32_t roundMode : 1;
|
||||
std::uint32_t cmaskIsLinear : 1;
|
||||
std::uint32_t blendOptDontRdDst : 3;
|
||||
std::uint32_t blendOptDiscardPixel : 3;
|
||||
};
|
||||
|
||||
std::uint32_t raw;
|
||||
};
|
||||
};
|
||||
|
||||
struct CbColor {
|
||||
std::uint32_t base;
|
||||
std::uint32_t pitch;
|
||||
std::uint32_t slice;
|
||||
CbColorView view;
|
||||
CbColorInfo info;
|
||||
CbColorAttrib attrib;
|
||||
std::uint32_t dccBase;
|
||||
std::uint32_t cmask;
|
||||
std::uint32_t cmaskSlice : 14;
|
||||
std::uint32_t fmask;
|
||||
std::uint32_t fmaskSlice;
|
||||
std::uint32_t clearWord0;
|
||||
std::uint32_t clearWord1;
|
||||
std::uint32_t clearWord2;
|
||||
std::uint32_t clearWord3;
|
||||
};
|
||||
|
||||
struct PaClVport {
|
||||
float xScale;
|
||||
float xOffset;
|
||||
float yScale;
|
||||
float yOffset;
|
||||
float zScale;
|
||||
float zOffset;
|
||||
};
|
||||
|
||||
struct PaScVportZ {
|
||||
float min;
|
||||
float max;
|
||||
};
|
||||
|
||||
struct PaScRect {
|
||||
std::uint16_t left;
|
||||
std::uint16_t top;
|
||||
std::uint16_t right;
|
||||
std::uint16_t bottom;
|
||||
};
|
||||
|
||||
struct SpiShaderPgm {
|
||||
std::uint32_t rsrc3;
|
||||
std::uint64_t address;
|
||||
|
||||
union {
|
||||
struct {
|
||||
std::uint32_t vgprs : 6;
|
||||
std::uint32_t sgprs : 4;
|
||||
std::uint32_t priority : 2;
|
||||
std::uint32_t floatMode : 8;
|
||||
std::uint32_t priv : 1;
|
||||
std::uint32_t dx10Clamp : 1;
|
||||
std::uint32_t debugMode : 1;
|
||||
std::uint32_t ieeeMode : 1;
|
||||
};
|
||||
|
||||
struct {
|
||||
std::uint32_t : 24;
|
||||
std::uint32_t cuGroupEnable : 1;
|
||||
} es;
|
||||
|
||||
struct {
|
||||
std::uint32_t : 24;
|
||||
std::uint32_t cuGroupEnable : 1;
|
||||
} gs;
|
||||
|
||||
struct {
|
||||
std::uint32_t : 24;
|
||||
std::uint32_t vgprCompCnt : 2;
|
||||
} ls;
|
||||
|
||||
struct {
|
||||
std::uint32_t : 24;
|
||||
std::uint32_t cuGroupDisable : 1;
|
||||
} ps;
|
||||
|
||||
struct {
|
||||
std::uint32_t : 24;
|
||||
std::uint32_t vgprCompCnt : 2;
|
||||
std::uint32_t cuGroupEnable : 1;
|
||||
} vs;
|
||||
|
||||
std::uint8_t getVGprCount() const { return (vgprs + 1) * 4; }
|
||||
std::uint8_t getSGprCount() const { return (sgprs + 1) * 8; }
|
||||
|
||||
std::uint32_t raw;
|
||||
} rsrc1;
|
||||
|
||||
union {
|
||||
struct {
|
||||
std::uint32_t scratchEn : 1;
|
||||
std::uint32_t userSgpr : 5;
|
||||
std::uint32_t trapPresent : 1;
|
||||
};
|
||||
|
||||
struct {
|
||||
std::uint32_t : 7;
|
||||
std::uint32_t ocLdsEn : 1;
|
||||
std::uint32_t soBase0En : 1;
|
||||
std::uint32_t soBase1En : 1;
|
||||
std::uint32_t soBase2En : 1;
|
||||
std::uint32_t soBase3En : 1;
|
||||
std::uint32_t soEn : 1;
|
||||
std::uint32_t excpEn : 7;
|
||||
} vs;
|
||||
|
||||
struct {
|
||||
std::uint32_t : 7;
|
||||
std::uint32_t ocLdsEn : 1;
|
||||
std::uint32_t excpEn : 7;
|
||||
} es;
|
||||
|
||||
struct {
|
||||
std::uint32_t : 7;
|
||||
std::uint32_t excpEn : 7;
|
||||
} gs;
|
||||
|
||||
struct {
|
||||
std::uint32_t : 7;
|
||||
std::uint32_t ocLdsEn : 1;
|
||||
std::uint32_t tgSizeEn : 1;
|
||||
std::uint32_t excpEn : 7;
|
||||
} hs;
|
||||
|
||||
struct {
|
||||
std::uint32_t : 7;
|
||||
std::uint32_t ldsSize : 9;
|
||||
std::uint32_t excpEn : 7;
|
||||
} ls;
|
||||
std::uint32_t raw;
|
||||
} rsrc2;
|
||||
|
||||
std::array<std::uint32_t, 16> userData;
|
||||
};
|
||||
|
||||
struct VmProtectionFault {
|
||||
std::uint32_t protection : 8;
|
||||
std::uint32_t : 4;
|
||||
std::uint32_t client : 8;
|
||||
std::uint32_t : 4;
|
||||
ProtectionFaultAccess rw : 1;
|
||||
std::uint32_t vmid : 4;
|
||||
std::uint32_t : 3;
|
||||
};
|
||||
|
||||
enum class LsStage : std::uint32_t {
|
||||
LsOff,
|
||||
LsOn,
|
||||
CsOn,
|
||||
};
|
||||
|
||||
enum class EsStage : std::uint32_t {
|
||||
EsOff,
|
||||
EsDs,
|
||||
EsReal,
|
||||
};
|
||||
|
||||
enum class VsStage : std::uint32_t {
|
||||
VsReal,
|
||||
VsDs,
|
||||
VsCopy,
|
||||
};
|
||||
|
||||
struct VgtShaderStagesEn {
|
||||
union {
|
||||
struct {
|
||||
LsStage lsEn : 2;
|
||||
bool hsEn : 1;
|
||||
EsStage esEn : 2;
|
||||
bool gsEn : 1;
|
||||
VsStage vsEn : 2;
|
||||
bool dynamicHs : 1;
|
||||
};
|
||||
std::uint32_t raw;
|
||||
};
|
||||
};
|
||||
|
||||
struct FbInfo {
|
||||
std::uint16_t base; // address >> 24
|
||||
std::uint16_t unk;
|
||||
};
|
||||
|
||||
struct DbDepthControl {
|
||||
union {
|
||||
struct {
|
||||
bool stencilEnable : 1;
|
||||
bool depthEnable : 1;
|
||||
bool depthWriteEnable : 1;
|
||||
bool depthBoundsEnable : 1;
|
||||
gnm::CompareFunc zFunc : 3;
|
||||
bool backFaceEnable : 1;
|
||||
gnm::CompareFunc stencilFunc : 3;
|
||||
std::uint32_t : 9;
|
||||
gnm::CompareFunc stencilFuncBackFace : 3;
|
||||
std::uint32_t : 7;
|
||||
bool enableColorWritesOnDepthFail : 1;
|
||||
bool disableColorWritesOnDepthPass : 1;
|
||||
};
|
||||
|
||||
std::uint32_t raw;
|
||||
};
|
||||
};
|
||||
|
||||
struct DbZInfo {
|
||||
union {
|
||||
struct {
|
||||
gnm::ZFormat format : 2;
|
||||
std::uint32_t numSamples : 2;
|
||||
std::uint32_t : 16;
|
||||
std::uint32_t tileModeIndex : 3;
|
||||
std::uint32_t : 4;
|
||||
bool allowExpClear : 1;
|
||||
std::uint32_t readSize : 1; // 0 - 256 bit, 1 - 512 bit
|
||||
bool tileSurfaceEnable : 1;
|
||||
std::uint32_t : 1;
|
||||
bool zRangePrecision : 1;
|
||||
};
|
||||
|
||||
std::uint32_t raw;
|
||||
};
|
||||
};
|
||||
|
||||
struct DbRenderControl {
|
||||
union {
|
||||
struct {
|
||||
bool depthClearEnable : 1;
|
||||
bool stencilClearEnable : 1;
|
||||
bool depthCopy : 1;
|
||||
bool stencilCopy : 1;
|
||||
bool resummarizeEnable : 1;
|
||||
bool stencilCompressDisable : 1;
|
||||
bool depthCompressDisable : 1;
|
||||
bool copyCentroid : 1;
|
||||
std::uint32_t copySample : 4;
|
||||
};
|
||||
|
||||
std::uint32_t raw;
|
||||
};
|
||||
};
|
||||
|
||||
struct CbBlendControl {
|
||||
union {
|
||||
struct {
|
||||
gnm::BlendMultiplier colorSrcBlend : 5;
|
||||
gnm::BlendFunc colorCombFcn : 3;
|
||||
gnm::BlendMultiplier colorDstBlend : 5;
|
||||
std::uint32_t : 3;
|
||||
gnm::BlendMultiplier alphaSrcBlend : 5;
|
||||
gnm::BlendFunc alphaCombFcn : 3;
|
||||
gnm::BlendMultiplier alphaDstBlend : 5;
|
||||
|
||||
bool separateAlphaBlend : 1;
|
||||
bool enable : 1;
|
||||
bool disableRop3 : 1;
|
||||
};
|
||||
|
||||
std::uint32_t raw;
|
||||
};
|
||||
};
|
||||
|
||||
struct PaSuScModeCntl {
|
||||
union {
|
||||
struct {
|
||||
bool cullFront : 1;
|
||||
bool cullBack : 1;
|
||||
gnm::Face face : 1;
|
||||
gnm::PolyMode polyMode : 2;
|
||||
gnm::PolyModePtype polyModeFrontPtype : 3;
|
||||
gnm::PolyModePtype polyModeBackPtype : 3;
|
||||
bool polyOffsetFrontEnable : 1;
|
||||
bool polyOffsetBackEnable : 1;
|
||||
bool polyOffsetParaEnable : 1;
|
||||
std::uint32_t : 2;
|
||||
bool vtxWindowOffsetEnable : 1;
|
||||
std::uint32_t : 2;
|
||||
bool provokingVtxLast : 1;
|
||||
bool perspCorrDis : 1;
|
||||
bool multiPrimIbEna : 1;
|
||||
};
|
||||
|
||||
std::uint32_t raw;
|
||||
};
|
||||
};
|
||||
|
||||
struct PaSuVtxCntl {
|
||||
union {
|
||||
struct {
|
||||
bool pixCenterHalf : 1;
|
||||
gnm::RoundMode roundMode : 2;
|
||||
gnm::QuantMode quantMode : 3;
|
||||
};
|
||||
|
||||
std::uint32_t raw;
|
||||
};
|
||||
};
|
||||
|
||||
struct SpiPsInput {
|
||||
union {
|
||||
struct {
|
||||
bool perspSampleEna : 1;
|
||||
bool perspCenterEna : 1;
|
||||
bool perspCentroidEna : 1;
|
||||
bool perspPullModelEna : 1;
|
||||
bool linearSampleEna : 1;
|
||||
bool linearCenterEna : 1;
|
||||
bool linearCentroidEna : 1;
|
||||
bool lineStippleTexEna : 1;
|
||||
bool posXFloatEna : 1;
|
||||
bool posYFloatEna : 1;
|
||||
bool posZFloatEna : 1;
|
||||
bool posWFloatEna : 1;
|
||||
bool frontFaceEna : 1;
|
||||
bool ancillaryEna : 1;
|
||||
bool sampleCoverageEna : 1;
|
||||
bool posFixedPtEna : 1;
|
||||
};
|
||||
|
||||
std::uint32_t raw;
|
||||
};
|
||||
};
|
||||
|
||||
enum class SpiPsDefaultVal : std::uint8_t {
|
||||
X0_Y0_Z0_W0,
|
||||
X0_Y0_Z0_W1,
|
||||
X1_Y1_Z1_W0,
|
||||
X1_Y1_Z1_W1,
|
||||
};
|
||||
|
||||
struct SpiPsInputCntl {
|
||||
union {
|
||||
struct {
|
||||
std::uint32_t offset : 4;
|
||||
bool useDefaultVal : 1;
|
||||
std::uint32_t : 3;
|
||||
SpiPsDefaultVal defaultVal : 2;
|
||||
bool flatShade : 1;
|
||||
std::uint32_t : 2;
|
||||
std::uint32_t cylWrap : 4;
|
||||
bool ptSpriteTex : 1;
|
||||
};
|
||||
|
||||
std::uint32_t raw;
|
||||
};
|
||||
};
|
||||
struct Registers {
|
||||
static constexpr auto kRegisterCount = 0xf000;
|
||||
|
||||
struct Config {
|
||||
static constexpr auto kMmioOffset = 0x2000;
|
||||
|
||||
Register<0xad, std::array<std::uint32_t, 3>> cpPrtLodStatsCntls;
|
||||
Register<0x1c0> cpRbRptr;
|
||||
Register<0x1bf> cpRb1Rptr;
|
||||
Register<0x1be> cpRb2Rptr;
|
||||
Register<0x232> vgtEsGsRingSize;
|
||||
Register<0x233> vgtGsVsRingSize;
|
||||
Register<0x262> vgtTfRingSize;
|
||||
Register<0x26e> vgtTfMemoryBase;
|
||||
Register<0x3c0, std::array<std::uint32_t, 4>> sqBufRsrcWords;
|
||||
Register<0x3c4, std::array<std::uint32_t, 7>> sqImgRsrcWords;
|
||||
Register<0x3cc, std::array<std::uint32_t, 4>> sqImgSampWords;
|
||||
Register<0x644, std::array<TileMode, 32>> gbTileModes;
|
||||
Register<0x664, std::array<MacroTileMode, 16>> gbMacroTileModes;
|
||||
};
|
||||
|
||||
struct ComputeConfig {
|
||||
static constexpr auto kMmioOffset = 0x2e00;
|
||||
|
||||
std::uint32_t computeDispatchInitiator;
|
||||
std::uint32_t _pad0[6];
|
||||
std::uint32_t computeNumThreadX;
|
||||
std::uint32_t computeNumThreadY;
|
||||
std::uint32_t computeNumThreadZ;
|
||||
std::uint32_t _pad1[2];
|
||||
std::uint32_t computePgmLo;
|
||||
std::uint32_t computePgmHi;
|
||||
std::uint32_t _pad2[4];
|
||||
std::uint32_t computePgmRsrc1;
|
||||
std::uint32_t computePgmRsrc2;
|
||||
std::uint32_t _pad3[1];
|
||||
std::uint32_t computeResourceLimits;
|
||||
std::uint32_t computeStaticThreadMgmtSe0;
|
||||
std::uint32_t computeStaticThreadMgmtSe1;
|
||||
std::uint32_t computeTmpRingSize;
|
||||
std::uint32_t _pad4[39];
|
||||
std::array<std::uint32_t, 16> userData;
|
||||
};
|
||||
|
||||
struct ShaderConfig {
|
||||
static constexpr auto kMmioOffset = 0x2c00;
|
||||
|
||||
union {
|
||||
Register<0x7, SpiShaderPgm> spiShaderPgmPs;
|
||||
Register<0x47, SpiShaderPgm> spiShaderPgmVs;
|
||||
Register<0x87, SpiShaderPgm> spiShaderPgmGs;
|
||||
Register<0xc7, SpiShaderPgm> spiShaderPgmEs;
|
||||
Register<0x107, SpiShaderPgm> spiShaderPgmHs;
|
||||
Register<0x147, SpiShaderPgm> spiShaderPgmLs;
|
||||
Register<0x200, ComputeConfig> compute;
|
||||
};
|
||||
};
|
||||
|
||||
struct Context {
|
||||
static constexpr auto kMmioOffset = 0xa000;
|
||||
static Context Default;
|
||||
|
||||
union {
|
||||
Register<0x0, DbRenderControl> dbRenderControl;
|
||||
Register<0x1> dbCountControl;
|
||||
Register<0x2> dbDepthView;
|
||||
Register<0x3> dbRenderOverride;
|
||||
Register<0x4> dbRenderOverride2;
|
||||
Register<0x5> dbHTileDataBase;
|
||||
Register<0x8, float> dbDepthBoundsMin;
|
||||
Register<0x9, float> dbDepthBoundsMax;
|
||||
Register<0xa> dbStencilClear;
|
||||
Register<0xb, float> dbDepthClear;
|
||||
Register<0xc, PaScRect> paScScreenScissor;
|
||||
Register<0xf> dbDepthInfo;
|
||||
Register<0x10, DbZInfo> dbZInfo;
|
||||
Register<0x11> dbStencilInfo;
|
||||
Register<0x12> dbZReadBase;
|
||||
Register<0x13> dbStencilReadBase;
|
||||
Register<0x14> dbZWriteBase;
|
||||
Register<0x15> dbStencilWriteBase;
|
||||
Register<0x16> dbDepthSize;
|
||||
Register<0x17> dbDepthSlice;
|
||||
Register<0x20> taBcBaseAddr;
|
||||
Register<0x80> paScWindowOffset;
|
||||
Register<0x81, PaScRect> paScWindowScissor;
|
||||
Register<0x83> paScClipRectRule;
|
||||
Register<0x84, std::array<PaScRect, 4>> paScClipRect;
|
||||
Register<0x8c> unk_8c;
|
||||
Register<0x8d> paSuHardwareScreenOffset;
|
||||
Register<0x8e, CbTargetMask> cbTargetMask;
|
||||
Register<0x8f, CbShaderMask> cbShaderMask;
|
||||
Register<0x90, PaScRect> paScGenericScissor;
|
||||
Register<0x94, std::array<PaScRect, 16>> paScVportScissor;
|
||||
Register<0xb4, std::array<PaScVportZ, 16>> paScVportZ;
|
||||
Register<0xd4> unk_d4;
|
||||
Register<0xd8> cpPerfMonCntxCntl;
|
||||
Register<0x100> vgtMaxVtxIndx;
|
||||
Register<0x101> vgtMinVtxIndx;
|
||||
Register<0x102> vgtIndxOffset;
|
||||
Register<0x103> vgtMultiPrimIbResetIndx;
|
||||
Register<0x105, float> cbBlendRed;
|
||||
Register<0x106, float> cbBlendGreen;
|
||||
Register<0x107, float> cbBlendBlue;
|
||||
Register<0x108, float> cbBlendAlpha;
|
||||
Register<0x10b> dbStencilControl;
|
||||
Register<0x10c> dbStencilRefMask;
|
||||
Register<0x10d> dbStencilRefMaskBf;
|
||||
Register<0x10f, std::array<PaClVport, 16>> paClVports;
|
||||
Register<0x16f> paClUcp0X;
|
||||
Register<0x170> paClUcp0Y;
|
||||
Register<0x171> paClUcp0Z;
|
||||
Register<0x172> paClUcp0W;
|
||||
Register<0x191, std::array<SpiPsInputCntl, 32>> spiPsInputCntl;
|
||||
Register<0x1b1> spiVsOutConfig;
|
||||
Register<0x1b3, SpiPsInput> spiPsInputEna;
|
||||
Register<0x1b4, SpiPsInput> spiPsInputAddr;
|
||||
Register<0x1b6> spiPsInControl;
|
||||
Register<0x1b8> spiBarycCntl;
|
||||
Register<0x1ba> spiTmpRingSize;
|
||||
Register<0x1c3> spiShaderPosFormat;
|
||||
Register<0x1c4> spiShaderZFormat;
|
||||
Register<0x1c5> spiShaderColFormat;
|
||||
Register<0x1e0, std::array<CbBlendControl, 8>> cbBlendControl;
|
||||
Register<0x1f9> vgtDmaBaseHi;
|
||||
Register<0x1fa> vgtDmaBase;
|
||||
Register<0x1fc> vgtDrawInitiator;
|
||||
Register<0x1fd> vgtImmedData;
|
||||
Register<0x200, DbDepthControl> dbDepthControl;
|
||||
Register<0x201> dbEqaa;
|
||||
Register<0x202, CbColorControl> cbColorControl;
|
||||
Register<0x203> dbShaderControl;
|
||||
Register<0x204> paClClipCntl;
|
||||
Register<0x205, PaSuScModeCntl> paSuScModeCntl;
|
||||
Register<0x206> paClVteCntl;
|
||||
Register<0x207> paClVsOutCntl;
|
||||
Register<0x280> paSuPointSize;
|
||||
Register<0x281> paSuPointMinmax;
|
||||
Register<0x282> paSuLineCntl;
|
||||
Register<0x284> vgtOutputPathCntl;
|
||||
Register<0x286> vgtHosMaxTessLevel;
|
||||
Register<0x287> vgtHosMinTessLevel;
|
||||
Register<0x290> vgtGsMode;
|
||||
Register<0x291> vgtGsOnChipCntl;
|
||||
Register<0x292> paScModeCntl0;
|
||||
Register<0x293> paScModeCntl1;
|
||||
Register<0x295> vgtGsPerEs;
|
||||
Register<0x296> vgtEsPerGs;
|
||||
Register<0x297> vgtGsPerVs;
|
||||
Register<0x298, std::array<std::uint32_t, 3>> vgtGsVsRingOffsets;
|
||||
Register<0x29b> vgtGsOutPrimType;
|
||||
Register<0x29d> vgtDmaSize;
|
||||
Register<0x29e> vgtDmaMaxSize;
|
||||
Register<0x29f> vgtDmaIndexType;
|
||||
Register<0x2a1> vgtPrimitiveIdEn;
|
||||
Register<0x2a2> vgtDmaNumInstances;
|
||||
Register<0x2a4> vgtEventInitiator;
|
||||
Register<0x2a5> vgtMultiPrimIbResetEn;
|
||||
Register<0x2a8> vgtInstanceStepRate0;
|
||||
Register<0x2a9> vgtInstanceStepRate1;
|
||||
Register<0x2aa> iaMultiVgtParam;
|
||||
Register<0x2ab> vgtEsGsRingItemSize;
|
||||
Register<0x2ac> vgtGsVsRingItemSize;
|
||||
Register<0x2ad> vgtReuseOff;
|
||||
Register<0x2ae> vgtVtxCntEn;
|
||||
Register<0x2af> dbHTileSurface;
|
||||
Register<0x2b0> dbSResultsCompareState0;
|
||||
Register<0x2b1> dbSResultsCompareState1;
|
||||
Register<0x2b4> vgtStrmOutBufferSize0;
|
||||
Register<0x2b5> vgtStrmOutVtxStride0;
|
||||
Register<0x2b8> vgtStrmOutBufferSize1;
|
||||
Register<0x2b9> vgtStrmOutVtxStride1;
|
||||
Register<0x2bc> vgtStrmOutBufferSize2;
|
||||
Register<0x2bd> vgtStrmOutVtxStride2;
|
||||
Register<0x2c0> vgtStrmOutBufferSize3;
|
||||
Register<0x2c1> vgtStrmOutVtxStride3;
|
||||
Register<0x2ca> vgtStrmOutDrawOpaqueOffset;
|
||||
Register<0x2cb> vgtStrmOutDrawOpaqueBufferFilledSize;
|
||||
Register<0x2cc> vgtStrmOutDrawOpaqueVertexStride;
|
||||
Register<0x2ce> vgtGsMaxVertOut;
|
||||
Register<0x2d5, VgtShaderStagesEn> vgtShaderStagesEn;
|
||||
Register<0x2d6> vgtLsHsConfig;
|
||||
Register<0x2d7, std::array<std::uint32_t, 4>> vgtGsVertItemSizes;
|
||||
Register<0x2db> vgtTfParam;
|
||||
Register<0x2dc> dbAlphaToMask;
|
||||
Register<0x2dd> vgtDispatchDrawIndex;
|
||||
Register<0x2de> paSuPolyOffsetDbFmtCntl;
|
||||
Register<0x2df> paSuPolyOffsetClamp;
|
||||
Register<0x2e0> paSuPolyOffsetFrontScale;
|
||||
Register<0x2e1> paSuPolyOffsetFrontOffset;
|
||||
Register<0x2e2> paSuPolyOffsetBackScale;
|
||||
Register<0x2e3> paSuPolyOffsetBackOffset;
|
||||
Register<0x2e4> vgtGsInstanceCnt;
|
||||
Register<0x2e5> vgtStrmOutConfig;
|
||||
Register<0x2e6> vgtStrmOutBufferConfig;
|
||||
Register<0x2f5> paScCentroidPriority0;
|
||||
Register<0x2f6> paScCentroidPriority1;
|
||||
Register<0x2f7> unk_2f7;
|
||||
Register<0x2f8> paScAaConfig;
|
||||
Register<0x2f9, PaSuVtxCntl> paSuVtxCntl;
|
||||
Register<0x2fa, float> paClGbVertClipAdj;
|
||||
Register<0x2fb, float> paClGbVertDiscAdj;
|
||||
Register<0x2fc, float> paClGbHorzClipAdj;
|
||||
Register<0x2fd, float> paClGbHorzDiscAdj;
|
||||
Register<0x2fe, std::array<std::uint32_t, 4>> paScAaSampleLocsPixelX0Y0;
|
||||
Register<0x302, std::array<std::uint32_t, 4>> paScAaSampleLocsPixelX1Y0;
|
||||
Register<0x306, std::array<std::uint32_t, 4>> paScAaSampleLocsPixelX0Y1;
|
||||
Register<0x30a, std::array<std::uint32_t, 4>> paScAaSampleLocsPixelX1Y1;
|
||||
Register<0x30e> paScAaMaskX0Y0_X1Y0;
|
||||
Register<0x30f> paScAaMaskX0Y1_X1Y1;
|
||||
Register<0x316> unk_316;
|
||||
Register<0x317> vgtOutDeallocCntl;
|
||||
Register<0x318, std::array<CbColor, 8>> cbColor;
|
||||
};
|
||||
};
|
||||
|
||||
struct UConfig {
|
||||
static constexpr auto kMmioOffset = 0xc000;
|
||||
|
||||
union {
|
||||
Register<0x3f> cpStrmOutCntl;
|
||||
Register<0x79> cpCoherBaseHi;
|
||||
Register<0x7d> cpCoherSize;
|
||||
Register<0x7e> cpCoherBase;
|
||||
Register<0x8b> cpDmaReadTags;
|
||||
Register<0x8c> cpCoherSizeHi;
|
||||
Register<0x200> grbmGfxIndex;
|
||||
Register<0x242, gnm::PrimitiveType> vgtPrimitiveType;
|
||||
Register<0x243, gnm::IndexType> vgtIndexType;
|
||||
Register<0x24c> vgtNumIndices;
|
||||
Register<0x24d> vgtNumInstances;
|
||||
Register<0x340, std::array<std::uint32_t, 4>> sqThreadTraceUserdata;
|
||||
Register<0x41d> gdsOaCntl;
|
||||
Register<0x41e> gdsOaCounter;
|
||||
Register<0x41f> gdsOaAddress;
|
||||
};
|
||||
};
|
||||
|
||||
struct Counters {
|
||||
static constexpr auto kMmioOffset = 0xd000;
|
||||
|
||||
union {
|
||||
Register<0x0, std::uint64_t> cpgPerfCounter1;
|
||||
Register<0x2, std::uint64_t> cpgPerfCounter0;
|
||||
Register<0x4, std::uint64_t> cpcPerfCounter1;
|
||||
Register<0x6, std::uint64_t> cpcPerfCounter0;
|
||||
Register<0x8, std::uint64_t> cpfPerfCounter1;
|
||||
Register<0xa, std::uint64_t> cpfPerfCounter0;
|
||||
Register<0x80, std::array<std::uint64_t, 4>> wdPerfCounters;
|
||||
Register<0x88, std::array<std::uint64_t, 4>> iaPerfCounters;
|
||||
Register<0x90, std::array<std::uint64_t, 4>> vgtPerfCounters;
|
||||
Register<0x100, std::array<std::uint64_t, 4>> paSuPerfCounters;
|
||||
Register<0x140, std::array<std::uint64_t, 8>> paScPerfCounters;
|
||||
Register<0x180> spiPerfCounter0Hi;
|
||||
Register<0x181> spiPerfCounter0Lo;
|
||||
Register<0x182> spiPerfCounter1Hi;
|
||||
Register<0x183> spiPerfCounter1Lo;
|
||||
Register<0x184> spiPerfCounter2Hi;
|
||||
Register<0x185> spiPerfCounter2Lo;
|
||||
Register<0x186> spiPerfCounter3Hi;
|
||||
Register<0x187> spiPerfCounter3Lo;
|
||||
Register<0x188> spiPerfCounter4Hi;
|
||||
Register<0x189> spiPerfCounter4Lo;
|
||||
Register<0x18a> spiPerfCounter5Hi;
|
||||
Register<0x18b> spiPerfCounter5Lo;
|
||||
Register<0x1c0, std::array<std::uint64_t, 16>> sqPerfCounters;
|
||||
Register<0x240, std::array<std::uint64_t, 4>> sxPerfCounters;
|
||||
Register<0x280, std::array<std::uint64_t, 4>> gdsPerfCounters;
|
||||
Register<0x2c0, std::array<std::uint64_t, 2>> taPerfCounters;
|
||||
Register<0x300, std::array<std::uint64_t, 2>> tdPerfCounters;
|
||||
Register<0x340, std::array<std::uint64_t, 4>> tcpPerfCounters;
|
||||
Register<0x380, std::array<std::uint64_t, 4>> tccPerfCounters;
|
||||
Register<0x390, std::array<std::uint64_t, 4>> tcaPerfCounters;
|
||||
Register<0x3a0, std::array<std::uint64_t, 4>> tcsPerfCounters;
|
||||
Register<0x406, std::array<std::uint64_t, 4>> cbPerfCounters;
|
||||
Register<0x440, std::array<std::uint64_t, 4>> dbPerfCounters;
|
||||
Register<0x800> cpgPerfCounter1Select;
|
||||
Register<0x801> cpgPerfCounter0Select1;
|
||||
Register<0x802> cpgPerfCounter0Select;
|
||||
Register<0x803> cpcPerfCounter1Select;
|
||||
Register<0x804> cpcPerfCounter0Select1;
|
||||
Register<0x805> cpfPerfCounter1Select;
|
||||
Register<0x806> cpfPerfCounter0Select1;
|
||||
Register<0x807> cpfPerfCounter0Select;
|
||||
Register<0x808> cpPerfMonCntl;
|
||||
Register<0x809> cpcPerfCounter0Select;
|
||||
Register<0x880> wdPerfCounter0Select;
|
||||
Register<0x881> wdPerfCounter1Select;
|
||||
Register<0x882> wdPerfCounter2Select;
|
||||
Register<0x883> wdPerfCounter3Select;
|
||||
Register<0x884> iaPerfCounter0Select;
|
||||
Register<0x885> iaPerfCounter1Select;
|
||||
Register<0x886> iaPerfCounter2Select;
|
||||
Register<0x887> iaPerfCounter3Select;
|
||||
Register<0x888> iaPerfCounter0Select1;
|
||||
Register<0x88c> vgtPerfCounter0Select;
|
||||
Register<0x88d> vgtPerfCounter1Select;
|
||||
Register<0x88e> vgtPerfCounter2Select;
|
||||
Register<0x88f> vgtPerfCounter3Select;
|
||||
Register<0x890> vgtPerfCounter0Select1;
|
||||
Register<0x891> vgtPerfCounter1Select1;
|
||||
Register<0x900> paSuPerfCounter0Select;
|
||||
Register<0x901> paSuPerfCounter0Select1;
|
||||
Register<0x902> paSuPerfCounter1Select;
|
||||
Register<0x903> paSuPerfCounter1Select1;
|
||||
Register<0x904> paSuPerfCounter2Select;
|
||||
Register<0x905> paSuPerfCounter3Select;
|
||||
Register<0x940> paScPerfCounter0Select;
|
||||
Register<0x941> paScPerfCounter0Select1;
|
||||
Register<0x942> paScPerfCounter1Select;
|
||||
Register<0x943> paScPerfCounter2Select;
|
||||
Register<0x944> paScPerfCounter3Select;
|
||||
Register<0x945> paScPerfCounter4Select;
|
||||
Register<0x946> paScPerfCounter5Select;
|
||||
Register<0x947> paScPerfCounter6Select;
|
||||
Register<0x948> paScPerfCounter7Select;
|
||||
Register<0x980> spiPerfCounter0Select;
|
||||
Register<0x981> spiPerfCounter1Select;
|
||||
Register<0x982> spiPerfCounter2Select;
|
||||
Register<0x983> spiPerfCounter3Select;
|
||||
Register<0x984> spiPerfCounter0Select1;
|
||||
Register<0x985> spiPerfCounter1Select1;
|
||||
Register<0x986> spiPerfCounter2Select1;
|
||||
Register<0x987> spiPerfCounter3Select1;
|
||||
Register<0x988> spiPerfCounter4Select;
|
||||
Register<0x989> spiPerfCounter5Select;
|
||||
Register<0x98a> spiPerfCounterBins;
|
||||
Register<0x9c0, std::array<std::uint32_t, 16>> sqPerfCountersSelect;
|
||||
Register<0x9e0> sqPerfCounterCtrl;
|
||||
Register<0xa40> sxPerfCounter0Select;
|
||||
Register<0xa41> sxPerfCounter1Select;
|
||||
Register<0xa42> sxPerfCounter2Select;
|
||||
Register<0xa43> sxPerfCounter3Select;
|
||||
Register<0xa44> sxPerfCounter0Select1;
|
||||
Register<0xa45> sxPerfCounter1Select1;
|
||||
Register<0xa80> gdsPerfCounter0Select;
|
||||
Register<0xa81> gdsPerfCounter1Select;
|
||||
Register<0xa82> gdsPerfCounter2Select;
|
||||
Register<0xa83> gdsPerfCounter3Select;
|
||||
Register<0xa84> gdsPerfCounter0Select1;
|
||||
Register<0xac0> taPerfCounter0Select;
|
||||
Register<0xac1> taPerfCounter0Select1;
|
||||
Register<0xac2> taPerfCounter1Select;
|
||||
Register<0xb00> tdPerfCounter0Select;
|
||||
Register<0xb01> tdPerfCounter0Select1;
|
||||
Register<0xb02> tdPerfCounter1Select;
|
||||
Register<0xb40> tcpPerfCounter0Select;
|
||||
Register<0xb41> tcpPerfCounter0Select1;
|
||||
Register<0xb42> tcpPerfCounter1Select;
|
||||
Register<0xb43> tcpPerfCounter1Select1;
|
||||
Register<0xb44> tcpPerfCounter2Select;
|
||||
Register<0xb45> tcpPerfCounter3Select;
|
||||
Register<0xb80> tccPerfCounter0Select;
|
||||
Register<0xb81> tccPerfCounter0Select1;
|
||||
Register<0xb82> tccPerfCounter1Select;
|
||||
Register<0xb83> tccPerfCounter1Select1;
|
||||
Register<0xb84> tccPerfCounter2Select;
|
||||
Register<0xb85> tccPerfCounter3Select;
|
||||
Register<0xb90> tcaPerfCounter0Select;
|
||||
Register<0xb91> tcaPerfCounter0Select1;
|
||||
Register<0xb92> tcaPerfCounter1Select;
|
||||
Register<0xb93> tcaPerfCounter1Select1;
|
||||
Register<0xb94> tcaPerfCounter2Select;
|
||||
Register<0xb95> tcaPerfCounter3Select;
|
||||
Register<0xba0> tcsPerfCounter0Select;
|
||||
Register<0xba1> tcsPerfCounter0Select1;
|
||||
Register<0xba2> tcsPerfCounter1Select;
|
||||
Register<0xba3> tcsPerfCounter2Select;
|
||||
Register<0xba4> tcsPerfCounter3Select;
|
||||
Register<0xc00> cbPerfCounterFilter;
|
||||
Register<0xc01> cbPerfCounter0Select;
|
||||
Register<0xc02> cbPerfCounter0Select1;
|
||||
Register<0xc03> cbPerfCounter1Select;
|
||||
Register<0xc04> cbPerfCounter2Select;
|
||||
Register<0xc05> cbPerfCounter3Select;
|
||||
Register<0xc40> dbPerfCounter0Select;
|
||||
Register<0xc41> dbPerfCounter0Select1;
|
||||
Register<0xc42> dbPerfCounter1Select;
|
||||
Register<0xc43> dbPerfCounter1Select1;
|
||||
Register<0xc44> dbPerfCounter2Select;
|
||||
Register<0xc46> dbPerfCounter3Select;
|
||||
};
|
||||
};
|
||||
|
||||
union {
|
||||
Register<0x50c, std::uint32_t> vmContext0ProtectionIntrCtl;
|
||||
Register<0x50d, std::uint32_t> vmContext1ProtectionIntrCtl;
|
||||
Register<0x536, VmProtectionFault> vmContext0ProtectionFault;
|
||||
Register<0x537, VmProtectionFault> vmContext1ProtectionFault;
|
||||
Register<0x53e, std::uint32_t>
|
||||
vmContext0ProtectionFaultPage; // address >> 12
|
||||
Register<0x53f, std::uint32_t>
|
||||
vmContext1ProtectionFaultPage; // address >> 12
|
||||
Register<0x809, FbInfo> fbInfo;
|
||||
Register<0xf82, std::uint32_t> ihRptr;
|
||||
Register<0xf83, std::uint32_t> ihWptr;
|
||||
|
||||
Register<Config::kMmioOffset, Config> config;
|
||||
Register<ShaderConfig::kMmioOffset, ShaderConfig> sh;
|
||||
|
||||
Register<0x3045> cpRbWptr;
|
||||
Register<0x3064> cpRb1Wptr;
|
||||
Register<0x3069> cpRb2Wptr;
|
||||
Register<0x3049> cpIntCntl;
|
||||
Register<0x304a> cpIntStatus;
|
||||
Register<0x306a, std::array<std::uint32_t, 3>> cpIntCntlRings;
|
||||
Register<0x306d, std::array<std::uint32_t, 3>> cpIntStatusRings;
|
||||
Register<0x324b> cpHqdQueuePriority;
|
||||
Register<0x324c> cpHqdQuantum;
|
||||
|
||||
Register<Context::kMmioOffset, Context> context;
|
||||
Register<UConfig::kMmioOffset, UConfig> uconfig;
|
||||
Register<Counters::kMmioOffset, Counters> counters;
|
||||
|
||||
std::uint32_t raw[kRegisterCount];
|
||||
};
|
||||
};
|
||||
|
||||
#pragma pack(pop)
|
||||
} // namespace amdgpu
|
||||
1273
rpcsx-gpu2/Renderer.cpp
Normal file
1273
rpcsx-gpu2/Renderer.cpp
Normal file
File diff suppressed because it is too large
Load diff
17
rpcsx-gpu2/Renderer.hpp
Normal file
17
rpcsx-gpu2/Renderer.hpp
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
#pragma once
|
||||
|
||||
#include "Cache.hpp"
|
||||
#include "Pipe.hpp"
|
||||
#include <cstdint>
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
namespace amdgpu {
|
||||
void draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
|
||||
std::uint32_t vertexCount, std::uint32_t firstInstance,
|
||||
std::uint32_t instanceCount, std::uint64_t indiciesAddress,
|
||||
std::uint32_t indexCount);
|
||||
void flip(Cache::Tag &cacheTag, VkCommandBuffer commandBuffer,
|
||||
VkExtent2D targetExtent, std::uint64_t address, VkImageView target,
|
||||
VkExtent2D imageExtent, CbCompSwap compSwap, TileMode tileMode,
|
||||
gnm::DataFormat dfmt, gnm::NumericFormat nfmt);
|
||||
} // namespace amdgpu
|
||||
4
rpcsx-gpu2/lib/CMakeLists.txt
Normal file
4
rpcsx-gpu2/lib/CMakeLists.txt
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
add_subdirectory(amdgpu-tiler)
|
||||
add_subdirectory(gcn-shader)
|
||||
add_subdirectory(vk)
|
||||
add_subdirectory(gnm)
|
||||
22
rpcsx-gpu2/lib/amdgpu-tiler/CMakeLists.txt
Normal file
22
rpcsx-gpu2/lib/amdgpu-tiler/CMakeLists.txt
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
add_precompiled_vulkan_spirv(amdgpu_tiler_vulkan_shaders
|
||||
shaders/tiler1d.comp.glsl
|
||||
shaders/tiler2d.comp.glsl
|
||||
shaders/tilerLinear.comp.glsl
|
||||
shaders/detiler1d.comp.glsl
|
||||
shaders/detiler2d.comp.glsl
|
||||
shaders/detilerLinear.comp.glsl
|
||||
)
|
||||
|
||||
add_library(amdgpu_tiler STATIC src/tiler.cpp)
|
||||
target_include_directories(amdgpu_tiler PUBLIC include)
|
||||
|
||||
add_library(amdgpu_tiler_cpu STATIC src/tiler_cpu.cpp)
|
||||
add_library(amdgpu_tiler_vulkan STATIC src/tiler_vulkan.cpp)
|
||||
|
||||
target_link_libraries(amdgpu_tiler PUBLIC gnm)
|
||||
target_link_libraries(amdgpu_tiler_cpu PUBLIC amdgpu_tiler)
|
||||
target_link_libraries(amdgpu_tiler_vulkan PUBLIC amdgpu_tiler amdgpu_tiler_vulkan_shaders vk)
|
||||
|
||||
add_library(amdgpu::tiler ALIAS amdgpu_tiler)
|
||||
add_library(amdgpu::tiler::cpu ALIAS amdgpu_tiler_cpu)
|
||||
add_library(amdgpu::tiler::vulkan ALIAS amdgpu_tiler_vulkan)
|
||||
505
rpcsx-gpu2/lib/amdgpu-tiler/include/amdgpu/tiler.hpp
Normal file
505
rpcsx-gpu2/lib/amdgpu-tiler/include/amdgpu/tiler.hpp
Normal file
|
|
@ -0,0 +1,505 @@
|
|||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <gnm/constants.hpp>
|
||||
#include <gnm/descriptors.hpp>
|
||||
|
||||
namespace amdgpu {
|
||||
inline constexpr uint32_t kMicroTileWidth = 8;
|
||||
inline constexpr uint32_t kMicroTileHeight = 8;
|
||||
inline constexpr uint32_t kDramRowSize = 0x400;
|
||||
inline constexpr uint32_t kPipeInterleaveBytes = 256;
|
||||
|
||||
enum ArrayMode {
|
||||
kArrayModeLinearGeneral = 0x00000000,
|
||||
kArrayModeLinearAligned = 0x00000001,
|
||||
kArrayMode1dTiledThin = 0x00000002,
|
||||
kArrayMode1dTiledThick = 0x00000003,
|
||||
kArrayMode2dTiledThin = 0x00000004,
|
||||
kArrayModeTiledThinPrt = 0x00000005,
|
||||
kArrayMode2dTiledThinPrt = 0x00000006,
|
||||
kArrayMode2dTiledThick = 0x00000007,
|
||||
kArrayMode2dTiledXThick = 0x00000008,
|
||||
kArrayModeTiledThickPrt = 0x00000009,
|
||||
kArrayMode2dTiledThickPrt = 0x0000000a,
|
||||
kArrayMode3dTiledThinPrt = 0x0000000b,
|
||||
kArrayMode3dTiledThin = 0x0000000c,
|
||||
kArrayMode3dTiledThick = 0x0000000d,
|
||||
kArrayMode3dTiledXThick = 0x0000000e,
|
||||
kArrayMode3dTiledThickPrt = 0x0000000f,
|
||||
};
|
||||
|
||||
enum MicroTileMode {
|
||||
kMicroTileModeDisplay = 0x00000000,
|
||||
kMicroTileModeThin = 0x00000001,
|
||||
kMicroTileModeDepth = 0x00000002,
|
||||
kMicroTileModeRotated = 0x00000003,
|
||||
kMicroTileModeThick = 0x00000004,
|
||||
};
|
||||
|
||||
enum PipeConfig {
|
||||
kPipeConfigP8_32x32_8x16 = 0x0000000a,
|
||||
kPipeConfigP8_32x32_16x16 = 0x0000000c,
|
||||
kPipeConfigP16 = 0x00000012,
|
||||
};
|
||||
|
||||
enum TileSplit {
|
||||
kTileSplit64B = 0x00000000,
|
||||
kTileSplit128B = 0x00000001,
|
||||
kTileSplit256B = 0x00000002,
|
||||
kTileSplit512B = 0x00000003,
|
||||
kTileSplit1KB = 0x00000004,
|
||||
kTileSplit2KB = 0x00000005,
|
||||
kTileSplit4KB = 0x00000006,
|
||||
};
|
||||
|
||||
enum SampleSplit {
|
||||
kSampleSplit1 = 0x00000000,
|
||||
kSampleSplit2 = 0x00000001,
|
||||
kSampleSplit4 = 0x00000002,
|
||||
kSampleSplit8 = 0x00000003,
|
||||
};
|
||||
|
||||
enum NumBanks {
|
||||
kNumBanks2 = 0x00000000,
|
||||
kNumBanks4 = 0x00000001,
|
||||
kNumBanks8 = 0x00000002,
|
||||
kNumBanks16 = 0x00000003,
|
||||
};
|
||||
|
||||
enum BankWidth {
|
||||
kBankWidth1 = 0x00000000,
|
||||
kBankWidth2 = 0x00000001,
|
||||
kBankWidth4 = 0x00000002,
|
||||
kBankWidth8 = 0x00000003,
|
||||
};
|
||||
|
||||
enum BankHeight {
|
||||
kBankHeight1 = 0x00000000,
|
||||
kBankHeight2 = 0x00000001,
|
||||
kBankHeight4 = 0x00000002,
|
||||
kBankHeight8 = 0x00000003,
|
||||
};
|
||||
|
||||
enum MacroTileAspect {
|
||||
kMacroTileAspect1 = 0x00000000,
|
||||
kMacroTileAspect2 = 0x00000001,
|
||||
kMacroTileAspect4 = 0x00000002,
|
||||
kMacroTileAspect8 = 0x00000003,
|
||||
};
|
||||
|
||||
struct TileMode {
|
||||
std::uint32_t raw;
|
||||
|
||||
constexpr ArrayMode arrayMode() const {
|
||||
return ArrayMode((raw & 0x0000003c) >> 2);
|
||||
}
|
||||
constexpr PipeConfig pipeConfig() const {
|
||||
return PipeConfig((raw & 0x000007c0) >> 6);
|
||||
}
|
||||
constexpr TileSplit tileSplit() const {
|
||||
return TileSplit((raw & 0x00003800) >> 11);
|
||||
}
|
||||
constexpr MicroTileMode microTileMode() const {
|
||||
return MicroTileMode((raw & 0x01c00000) >> 22);
|
||||
}
|
||||
constexpr SampleSplit sampleSplit() const {
|
||||
return SampleSplit((raw & 0x06000000) >> 25);
|
||||
}
|
||||
constexpr std::uint32_t altPipeConfig() const {
|
||||
return (raw & 0xf8000000) >> 27;
|
||||
}
|
||||
|
||||
constexpr TileMode &arrayMode(ArrayMode mode) {
|
||||
raw = (raw & ~0x0000003c) |
|
||||
(static_cast<std::uint32_t>(mode) << 2) & 0x0000003c;
|
||||
return *this;
|
||||
}
|
||||
constexpr TileMode &pipeConfig(PipeConfig mode) {
|
||||
raw = (raw & ~0x000007c0) |
|
||||
(static_cast<std::uint32_t>(mode) << 6) & 0x000007c0;
|
||||
return *this;
|
||||
}
|
||||
constexpr TileMode &tileSplit(TileSplit mode) {
|
||||
raw = (raw & ~0x00003800) |
|
||||
(static_cast<std::uint32_t>(mode) << 11) & 0x00003800;
|
||||
return *this;
|
||||
}
|
||||
constexpr TileMode µTileMode(MicroTileMode mode) {
|
||||
raw = (raw & ~0x01c00000) |
|
||||
(static_cast<std::uint32_t>(mode) << 22) & 0x01c00000;
|
||||
return *this;
|
||||
}
|
||||
constexpr TileMode &sampleSplit(SampleSplit mode) {
|
||||
raw = (raw & ~0x06000000) |
|
||||
(static_cast<std::uint32_t>(mode) << 25) & 0x06000000;
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
struct MacroTileMode {
|
||||
std::uint32_t raw;
|
||||
|
||||
constexpr std::uint32_t bankWidth() const { return (raw & 0x00000003) >> 0; }
|
||||
constexpr std::uint32_t bankHeight() const { return (raw & 0x0000000c) >> 2; }
|
||||
constexpr MacroTileAspect macroTileAspect() const {
|
||||
return MacroTileAspect((raw & 0x00000030) >> 4);
|
||||
}
|
||||
constexpr std::uint32_t numBanks() const { return (raw & 0x000000c0) >> 6; }
|
||||
|
||||
constexpr std::uint32_t altBankHeight() const {
|
||||
return (raw & 0x00000300) >> 8;
|
||||
}
|
||||
constexpr std::uint32_t altMacroTileAspect() const {
|
||||
return (raw & 0x00000c00) >> 10;
|
||||
}
|
||||
constexpr std::uint32_t altNumBanks() const {
|
||||
return (raw & 0x00003000) >> 12;
|
||||
}
|
||||
};
|
||||
|
||||
struct SurfaceInfo {
|
||||
std::uint32_t width;
|
||||
std::uint32_t height;
|
||||
std::uint32_t depth;
|
||||
std::uint32_t pitch;
|
||||
int arrayLayerCount;
|
||||
int numFragments;
|
||||
int bitsPerElement;
|
||||
std::uint64_t totalSize;
|
||||
|
||||
struct SubresourceInfo {
|
||||
std::uint32_t dataWidth;
|
||||
std::uint32_t dataHeight;
|
||||
std::uint32_t dataDepth;
|
||||
std::uint64_t offset;
|
||||
std::uint64_t tiledSize;
|
||||
std::uint64_t linearSize;
|
||||
};
|
||||
|
||||
SubresourceInfo subresources[16];
|
||||
|
||||
void setSubresourceInfo(int mipLevel, const SubresourceInfo &subresource) {
|
||||
subresources[mipLevel] = subresource;
|
||||
}
|
||||
|
||||
const SubresourceInfo &getSubresourceInfo(int mipLevel) const {
|
||||
return subresources[mipLevel];
|
||||
}
|
||||
};
|
||||
|
||||
constexpr uint32_t getMicroTileThickness(ArrayMode arrayMode) {
|
||||
switch (arrayMode) {
|
||||
case kArrayMode1dTiledThick:
|
||||
case kArrayMode2dTiledThick:
|
||||
case kArrayMode3dTiledThick:
|
||||
case kArrayModeTiledThickPrt:
|
||||
case kArrayMode2dTiledThickPrt:
|
||||
case kArrayMode3dTiledThickPrt:
|
||||
return 4;
|
||||
case kArrayMode2dTiledXThick:
|
||||
case kArrayMode3dTiledXThick:
|
||||
return 8;
|
||||
case kArrayModeLinearGeneral:
|
||||
case kArrayModeLinearAligned:
|
||||
case kArrayMode1dTiledThin:
|
||||
case kArrayMode2dTiledThin:
|
||||
case kArrayModeTiledThinPrt:
|
||||
case kArrayMode2dTiledThinPrt:
|
||||
case kArrayMode3dTiledThinPrt:
|
||||
case kArrayMode3dTiledThin:
|
||||
return 1;
|
||||
}
|
||||
|
||||
std::abort();
|
||||
}
|
||||
|
||||
constexpr bool isMacroTiled(ArrayMode arrayMode) {
|
||||
switch (arrayMode) {
|
||||
case kArrayModeLinearGeneral:
|
||||
case kArrayModeLinearAligned:
|
||||
case kArrayMode1dTiledThin:
|
||||
case kArrayMode1dTiledThick:
|
||||
return false;
|
||||
case kArrayMode2dTiledThin:
|
||||
case kArrayModeTiledThinPrt:
|
||||
case kArrayMode2dTiledThinPrt:
|
||||
case kArrayMode2dTiledThick:
|
||||
case kArrayMode2dTiledXThick:
|
||||
case kArrayModeTiledThickPrt:
|
||||
case kArrayMode2dTiledThickPrt:
|
||||
case kArrayMode3dTiledThinPrt:
|
||||
case kArrayMode3dTiledThin:
|
||||
case kArrayMode3dTiledThick:
|
||||
case kArrayMode3dTiledXThick:
|
||||
case kArrayMode3dTiledThickPrt:
|
||||
return true;
|
||||
}
|
||||
|
||||
std::abort();
|
||||
}
|
||||
|
||||
constexpr bool isPrt(ArrayMode arrayMode) {
|
||||
switch (arrayMode) {
|
||||
case kArrayModeLinearGeneral:
|
||||
case kArrayModeLinearAligned:
|
||||
case kArrayMode1dTiledThin:
|
||||
case kArrayMode1dTiledThick:
|
||||
case kArrayMode2dTiledThin:
|
||||
case kArrayMode2dTiledThick:
|
||||
case kArrayMode2dTiledXThick:
|
||||
case kArrayMode3dTiledThin:
|
||||
case kArrayMode3dTiledThick:
|
||||
case kArrayMode3dTiledXThick:
|
||||
return false;
|
||||
|
||||
case kArrayModeTiledThinPrt:
|
||||
case kArrayMode2dTiledThinPrt:
|
||||
case kArrayModeTiledThickPrt:
|
||||
case kArrayMode2dTiledThickPrt:
|
||||
case kArrayMode3dTiledThinPrt:
|
||||
case kArrayMode3dTiledThickPrt:
|
||||
return true;
|
||||
}
|
||||
|
||||
std::abort();
|
||||
}
|
||||
|
||||
constexpr std::array<MacroTileMode, 16> getDefaultMacroTileModes() {
|
||||
return {{
|
||||
{.raw = 0x26e8},
|
||||
{.raw = 0x26d4},
|
||||
{.raw = 0x21d0},
|
||||
{.raw = 0x21d0},
|
||||
{.raw = 0x2080},
|
||||
{.raw = 0x2040},
|
||||
{.raw = 0x1000},
|
||||
{.raw = 0x0000},
|
||||
{.raw = 0x36ec},
|
||||
{.raw = 0x26e8},
|
||||
{.raw = 0x21d4},
|
||||
{.raw = 0x20d0},
|
||||
{.raw = 0x1080},
|
||||
{.raw = 0x1040},
|
||||
{.raw = 0x0000},
|
||||
{.raw = 0x0000},
|
||||
}};
|
||||
}
|
||||
|
||||
constexpr std::array<TileMode, 32> getDefaultTileModes() {
|
||||
return {{
|
||||
{.raw = 0x90800310}, {.raw = 0x90800b10}, {.raw = 0x90801310},
|
||||
{.raw = 0x90801b10}, {.raw = 0x90802310}, {.raw = 0x90800308},
|
||||
{.raw = 0x90801318}, {.raw = 0x90802318}, {.raw = 0x90000304},
|
||||
{.raw = 0x90000308}, {.raw = 0x92000310}, {.raw = 0x92000294},
|
||||
{.raw = 0x92000318}, {.raw = 0x90400308}, {.raw = 0x92400310},
|
||||
{.raw = 0x924002b0}, {.raw = 0x92400294}, {.raw = 0x92400318},
|
||||
{.raw = 0x9240032c}, {.raw = 0x9100030c}, {.raw = 0x9100031c},
|
||||
{.raw = 0x910002b4}, {.raw = 0x910002a4}, {.raw = 0x91000328},
|
||||
{.raw = 0x910002bc}, {.raw = 0x91000320}, {.raw = 0x910002b8},
|
||||
{.raw = 0x90c00308}, {.raw = 0x92c00310}, {.raw = 0x92c00294},
|
||||
{.raw = 0x92c00318}, {.raw = 0x00000000},
|
||||
}};
|
||||
}
|
||||
|
||||
constexpr std::uint32_t getElementIndex(std::uint32_t x, std::uint32_t y,
|
||||
std::uint32_t z,
|
||||
std::uint32_t bitsPerElement,
|
||||
MicroTileMode microTileMode,
|
||||
ArrayMode arrayMode) {
|
||||
std::uint32_t elem = 0;
|
||||
|
||||
if (microTileMode == kMicroTileModeDisplay) {
|
||||
switch (bitsPerElement) {
|
||||
case 8:
|
||||
elem |= ((x >> 0) & 0x1) << 0;
|
||||
elem |= ((x >> 1) & 0x1) << 1;
|
||||
elem |= ((x >> 2) & 0x1) << 2;
|
||||
elem |= ((y >> 1) & 0x1) << 3;
|
||||
elem |= ((y >> 0) & 0x1) << 4;
|
||||
elem |= ((y >> 2) & 0x1) << 5;
|
||||
break;
|
||||
case 16:
|
||||
elem |= ((x >> 0) & 0x1) << 0;
|
||||
elem |= ((x >> 1) & 0x1) << 1;
|
||||
elem |= ((x >> 2) & 0x1) << 2;
|
||||
elem |= ((y >> 0) & 0x1) << 3;
|
||||
elem |= ((y >> 1) & 0x1) << 4;
|
||||
elem |= ((y >> 2) & 0x1) << 5;
|
||||
break;
|
||||
case 32:
|
||||
elem |= ((x >> 0) & 0x1) << 0;
|
||||
elem |= ((x >> 1) & 0x1) << 1;
|
||||
elem |= ((y >> 0) & 0x1) << 2;
|
||||
elem |= ((x >> 2) & 0x1) << 3;
|
||||
elem |= ((y >> 1) & 0x1) << 4;
|
||||
elem |= ((y >> 2) & 0x1) << 5;
|
||||
break;
|
||||
case 64:
|
||||
elem |= ((x >> 0) & 0x1) << 0;
|
||||
elem |= ((y >> 0) & 0x1) << 1;
|
||||
elem |= ((x >> 1) & 0x1) << 2;
|
||||
elem |= ((x >> 2) & 0x1) << 3;
|
||||
elem |= ((y >> 1) & 0x1) << 4;
|
||||
elem |= ((y >> 2) & 0x1) << 5;
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
} else if (microTileMode == kMicroTileModeThin ||
|
||||
microTileMode == kMicroTileModeDepth) {
|
||||
elem |= ((x >> 0) & 0x1) << 0;
|
||||
elem |= ((y >> 0) & 0x1) << 1;
|
||||
elem |= ((x >> 1) & 0x1) << 2;
|
||||
elem |= ((y >> 1) & 0x1) << 3;
|
||||
elem |= ((x >> 2) & 0x1) << 4;
|
||||
elem |= ((y >> 2) & 0x1) << 5;
|
||||
|
||||
switch (arrayMode) {
|
||||
case kArrayMode2dTiledXThick:
|
||||
case kArrayMode3dTiledXThick:
|
||||
elem |= ((z >> 2) & 0x1) << 8;
|
||||
case kArrayMode1dTiledThick:
|
||||
case kArrayMode2dTiledThick:
|
||||
case kArrayMode3dTiledThick:
|
||||
case kArrayModeTiledThickPrt:
|
||||
case kArrayMode2dTiledThickPrt:
|
||||
case kArrayMode3dTiledThickPrt:
|
||||
elem |= ((z >> 0) & 0x1) << 6;
|
||||
elem |= ((z >> 1) & 0x1) << 7;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
} else if (microTileMode == kMicroTileModeThick) {
|
||||
switch (arrayMode) {
|
||||
case kArrayMode2dTiledXThick:
|
||||
case kArrayMode3dTiledXThick:
|
||||
elem |= ((z >> 2) & 0x1) << 8;
|
||||
|
||||
case kArrayMode1dTiledThick:
|
||||
case kArrayMode2dTiledThick:
|
||||
case kArrayMode3dTiledThick:
|
||||
case kArrayModeTiledThickPrt:
|
||||
case kArrayMode2dTiledThickPrt:
|
||||
case kArrayMode3dTiledThickPrt:
|
||||
if (bitsPerElement == 8 || bitsPerElement == 16) {
|
||||
elem |= ((x >> 0) & 0x1) << 0;
|
||||
elem |= ((y >> 0) & 0x1) << 1;
|
||||
elem |= ((x >> 1) & 0x1) << 2;
|
||||
elem |= ((y >> 1) & 0x1) << 3;
|
||||
elem |= ((z >> 0) & 0x1) << 4;
|
||||
elem |= ((z >> 1) & 0x1) << 5;
|
||||
elem |= ((x >> 2) & 0x1) << 6;
|
||||
elem |= ((y >> 2) & 0x1) << 7;
|
||||
} else if (bitsPerElement == 32) {
|
||||
elem |= ((x >> 0) & 0x1) << 0;
|
||||
elem |= ((y >> 0) & 0x1) << 1;
|
||||
elem |= ((x >> 1) & 0x1) << 2;
|
||||
elem |= ((z >> 0) & 0x1) << 3;
|
||||
elem |= ((y >> 1) & 0x1) << 4;
|
||||
elem |= ((z >> 1) & 0x1) << 5;
|
||||
elem |= ((x >> 2) & 0x1) << 6;
|
||||
elem |= ((y >> 2) & 0x1) << 7;
|
||||
} else if (bitsPerElement == 64 || bitsPerElement == 128) {
|
||||
elem |= ((x >> 0) & 0x1) << 0;
|
||||
elem |= ((y >> 0) & 0x1) << 1;
|
||||
elem |= ((z >> 0) & 0x1) << 2;
|
||||
elem |= ((x >> 1) & 0x1) << 3;
|
||||
elem |= ((y >> 1) & 0x1) << 4;
|
||||
elem |= ((z >> 1) & 0x1) << 5;
|
||||
elem |= ((x >> 2) & 0x1) << 6;
|
||||
elem |= ((y >> 2) & 0x1) << 7;
|
||||
} else {
|
||||
std::abort();
|
||||
}
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
}
|
||||
return elem;
|
||||
}
|
||||
|
||||
constexpr uint32_t getPipeIndex(uint32_t x, uint32_t y, PipeConfig pipeCfg) {
|
||||
uint32_t pipe = 0;
|
||||
switch (pipeCfg) {
|
||||
case kPipeConfigP8_32x32_8x16:
|
||||
pipe |= (((x >> 4) ^ (y >> 3) ^ (x >> 5)) & 0x1) << 0;
|
||||
pipe |= (((x >> 3) ^ (y >> 4)) & 0x1) << 1;
|
||||
pipe |= (((x >> 5) ^ (y >> 5)) & 0x1) << 2;
|
||||
break;
|
||||
case kPipeConfigP8_32x32_16x16:
|
||||
pipe |= (((x >> 3) ^ (y >> 3) ^ (x >> 4)) & 0x1) << 0;
|
||||
pipe |= (((x >> 4) ^ (y >> 4)) & 0x1) << 1;
|
||||
pipe |= (((x >> 5) ^ (y >> 5)) & 0x1) << 2;
|
||||
break;
|
||||
case kPipeConfigP16:
|
||||
pipe |= (((x >> 3) ^ (y >> 3) ^ (x >> 4)) & 0x1) << 0;
|
||||
pipe |= (((x >> 4) ^ (y >> 4)) & 0x1) << 1;
|
||||
pipe |= (((x >> 5) ^ (y >> 5)) & 0x1) << 2;
|
||||
pipe |= (((x >> 6) ^ (y >> 5)) & 0x1) << 3;
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
return pipe;
|
||||
}
|
||||
|
||||
constexpr uint32_t getBankIndex(std::uint32_t x, std::uint32_t y,
|
||||
std::uint32_t bank_width,
|
||||
std::uint32_t bank_height,
|
||||
std::uint32_t num_banks,
|
||||
std::uint32_t num_pipes) {
|
||||
std::uint32_t x_shift_offset = std::countr_zero(bank_width * num_pipes);
|
||||
std::uint32_t y_shift_offset = std::countr_zero(bank_height);
|
||||
std::uint32_t xs = x >> x_shift_offset;
|
||||
std::uint32_t ys = y >> y_shift_offset;
|
||||
std::uint32_t bank = 0;
|
||||
switch (num_banks) {
|
||||
case 2:
|
||||
bank |= (((xs >> 3) ^ (ys >> 3)) & 0x1) << 0;
|
||||
break;
|
||||
case 4:
|
||||
bank |= (((xs >> 3) ^ (ys >> 4)) & 0x1) << 0;
|
||||
bank |= (((xs >> 4) ^ (ys >> 3)) & 0x1) << 1;
|
||||
break;
|
||||
case 8:
|
||||
bank |= (((xs >> 3) ^ (ys >> 5)) & 0x1) << 0;
|
||||
bank |= (((xs >> 4) ^ (ys >> 4) ^ (ys >> 5)) & 0x1) << 1;
|
||||
bank |= (((xs >> 5) ^ (ys >> 3)) & 0x1) << 2;
|
||||
break;
|
||||
case 16:
|
||||
bank |= (((xs >> 3) ^ (ys >> 6)) & 0x1) << 0;
|
||||
bank |= (((xs >> 4) ^ (ys >> 5) ^ (ys >> 6)) & 0x1) << 1;
|
||||
bank |= (((xs >> 5) ^ (ys >> 4)) & 0x1) << 2;
|
||||
bank |= (((xs >> 6) ^ (ys >> 3)) & 0x1) << 3;
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
|
||||
return bank;
|
||||
}
|
||||
|
||||
constexpr std::uint32_t getPipeCount(PipeConfig pipeConfig) {
|
||||
switch (pipeConfig) {
|
||||
case kPipeConfigP8_32x32_8x16:
|
||||
case kPipeConfigP8_32x32_16x16:
|
||||
return 8;
|
||||
case kPipeConfigP16:
|
||||
return 16;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
}
|
||||
|
||||
SurfaceInfo computeSurfaceInfo(TileMode tileMode, gnm::TextureType type,
|
||||
gnm::DataFormat dfmt, std::uint32_t width,
|
||||
std::uint32_t height, std::uint32_t depth,
|
||||
std::uint32_t pitch, int baseArrayLayer,
|
||||
int arrayCount, int baseMipLevel, int mipCount,
|
||||
bool pow2pad);
|
||||
SurfaceInfo computeSurfaceInfo(const gnm::TBuffer &tbuffer, TileMode tileMode);
|
||||
} // namespace amdgpu
|
||||
14
rpcsx-gpu2/lib/amdgpu-tiler/include/amdgpu/tiler_cpu.hpp
Normal file
14
rpcsx-gpu2/lib/amdgpu-tiler/include/amdgpu/tiler_cpu.hpp
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
#pragma once
|
||||
|
||||
#include "gnm/constants.hpp"
|
||||
#include "tiler.hpp"
|
||||
#include <cstdint>
|
||||
|
||||
namespace amdgpu {
|
||||
std::uint64_t getTiledOffset(gnm::TextureType texType, bool isPow2Padded,
|
||||
int numFragments, gnm::DataFormat dfmt,
|
||||
amdgpu::TileMode tileMode,
|
||||
amdgpu::MacroTileMode macroTileMode, int mipLevel,
|
||||
int arraySlice, int width, int height, int depth,
|
||||
int pitch, int x, int y, int z, int fragmentIndex);
|
||||
}
|
||||
24
rpcsx-gpu2/lib/amdgpu-tiler/include/amdgpu/tiler_vulkan.hpp
Normal file
24
rpcsx-gpu2/lib/amdgpu-tiler/include/amdgpu/tiler_vulkan.hpp
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
#pragma once
|
||||
#include "tiler.hpp"
|
||||
#include <Scheduler.hpp>
|
||||
#include <memory>
|
||||
|
||||
namespace amdgpu {
|
||||
struct GpuTiler {
|
||||
struct Impl;
|
||||
GpuTiler();
|
||||
~GpuTiler();
|
||||
|
||||
void detile(Scheduler &scheduler, const amdgpu::SurfaceInfo &info,
|
||||
amdgpu::TileMode tileMode, std::uint64_t srcTiledAddress,
|
||||
std::uint64_t dstLinearAddress, int mipLevel, int baseArray,
|
||||
int arrayCount);
|
||||
void tile(Scheduler &scheduler, const amdgpu::SurfaceInfo &info,
|
||||
amdgpu::TileMode tileMode, std::uint64_t srcLinearAddress,
|
||||
std::uint64_t dstTiledAddress, int mipLevel, int baseArray,
|
||||
int arrayCount);
|
||||
|
||||
private:
|
||||
std::unique_ptr<Impl> mImpl;
|
||||
};
|
||||
} // namespace amdgpu
|
||||
76
rpcsx-gpu2/lib/amdgpu-tiler/shaders/detiler1d.comp.glsl
Normal file
76
rpcsx-gpu2/lib/amdgpu-tiler/shaders/detiler1d.comp.glsl
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
#version 460
|
||||
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types : enable
|
||||
#extension GL_EXT_shader_atomic_int64 : enable
|
||||
#extension GL_EXT_shader_atomic_float : enable
|
||||
#extension GL_EXT_shader_image_load_formatted : enable
|
||||
#extension GL_KHR_memory_scope_semantics : enable
|
||||
#extension GL_EXT_shared_memory_block : enable
|
||||
#extension GL_EXT_scalar_block_layout : enable
|
||||
#extension GL_EXT_null_initializer : enable
|
||||
#extension GL_EXT_buffer_reference2 : enable
|
||||
#extension GL_EXT_buffer_reference_uvec2 : enable
|
||||
|
||||
#include "tiler.glsl"
|
||||
|
||||
void main() {
|
||||
uvec3 pos = gl_GlobalInvocationID;
|
||||
uint64_t tiledSliceOffset = 0;
|
||||
uint64_t linearSliceOffset = 0;
|
||||
if (config.tiledSurfaceSize != 0) {
|
||||
tiledSliceOffset = pos.z * config.tiledSurfaceSize;
|
||||
linearSliceOffset = pos.z * config.linearSurfaceSize;
|
||||
pos.z = 0;
|
||||
}
|
||||
|
||||
uint64_t tiledByteOffset = getTiledBitOffset1D(
|
||||
config.tileMode,
|
||||
pos,
|
||||
config.dataSize,
|
||||
config.bitsPerElement
|
||||
) / 8;
|
||||
|
||||
tiledByteOffset += tiledSliceOffset;
|
||||
|
||||
uint64_t linearByteOffset = computeLinearElementByteOffset(
|
||||
pos,
|
||||
0,
|
||||
config.dataSize.x,
|
||||
config.dataSize.x * config.dataSize.y,
|
||||
config.bitsPerElement,
|
||||
1 << config.numFragments
|
||||
);
|
||||
|
||||
linearByteOffset += linearSliceOffset;
|
||||
|
||||
switch ((config.bitsPerElement + 7) / 8) {
|
||||
case 1:
|
||||
buffer_reference_uint8_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint8_t(config.srcAddress + tiledByteOffset).data;
|
||||
break;
|
||||
|
||||
case 2:
|
||||
buffer_reference_uint16_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint16_t(config.srcAddress + tiledByteOffset).data;
|
||||
break;
|
||||
|
||||
case 4:
|
||||
buffer_reference_uint32_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint32_t(config.srcAddress + tiledByteOffset).data;
|
||||
break;
|
||||
|
||||
case 8:
|
||||
buffer_reference_uint64_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint64_t(config.srcAddress + tiledByteOffset).data;
|
||||
break;
|
||||
|
||||
case 16:
|
||||
buffer_reference_uint64_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint64_t(config.srcAddress + tiledByteOffset).data;
|
||||
buffer_reference_uint64_t(config.dstAddress + linearByteOffset + 8).data = buffer_reference_uint64_t(config.srcAddress + tiledByteOffset + 8).data;
|
||||
break;
|
||||
|
||||
case 32:
|
||||
buffer_reference_uint64_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint64_t(config.srcAddress + tiledByteOffset).data;
|
||||
buffer_reference_uint64_t(config.dstAddress + linearByteOffset + 8).data = buffer_reference_uint64_t(config.srcAddress + tiledByteOffset + 8).data;
|
||||
buffer_reference_uint64_t(config.dstAddress + linearByteOffset + 16).data = buffer_reference_uint64_t(config.srcAddress + tiledByteOffset + 16).data;
|
||||
buffer_reference_uint64_t(config.dstAddress + linearByteOffset + 24).data = buffer_reference_uint64_t(config.srcAddress + tiledByteOffset + 24).data;
|
||||
break;
|
||||
}
|
||||
}
|
||||
76
rpcsx-gpu2/lib/amdgpu-tiler/shaders/detiler2d.comp.glsl
Normal file
76
rpcsx-gpu2/lib/amdgpu-tiler/shaders/detiler2d.comp.glsl
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
#version 460
|
||||
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types : enable
|
||||
#extension GL_EXT_shader_atomic_int64 : enable
|
||||
#extension GL_EXT_shader_atomic_float : enable
|
||||
#extension GL_EXT_shader_image_load_formatted : enable
|
||||
#extension GL_KHR_memory_scope_semantics : enable
|
||||
#extension GL_EXT_shared_memory_block : enable
|
||||
#extension GL_EXT_scalar_block_layout : enable
|
||||
#extension GL_EXT_null_initializer : enable
|
||||
#extension GL_EXT_buffer_reference2 : enable
|
||||
#extension GL_EXT_buffer_reference_uvec2 : enable
|
||||
|
||||
#include "tiler.glsl"
|
||||
|
||||
void main() {
|
||||
uvec3 pos = gl_GlobalInvocationID;
|
||||
uint64_t tiledSliceOffset = 0;
|
||||
uint64_t linearSliceOffset = 0;
|
||||
if (config.tiledSurfaceSize != 0) {
|
||||
tiledSliceOffset = pos.z * config.tiledSurfaceSize;
|
||||
linearSliceOffset = pos.z * config.linearSurfaceSize;
|
||||
pos.z = 0;
|
||||
}
|
||||
|
||||
uint64_t tiledByteOffset = getTiledBitOffset1D(
|
||||
config.tileMode,
|
||||
pos,
|
||||
config.dataSize,
|
||||
config.bitsPerElement
|
||||
) / 8;
|
||||
|
||||
tiledByteOffset += tiledSliceOffset;
|
||||
|
||||
uint64_t linearByteOffset = computeLinearElementByteOffset(
|
||||
pos,
|
||||
0,
|
||||
config.dataSize.x,
|
||||
config.dataSize.x * config.dataSize.y,
|
||||
config.bitsPerElement,
|
||||
1 << config.numFragments
|
||||
);
|
||||
|
||||
linearByteOffset += linearSliceOffset;
|
||||
|
||||
switch ((config.bitsPerElement + 7) / 8) {
|
||||
case 1:
|
||||
buffer_reference_uint8_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint8_t(config.srcAddress + tiledByteOffset).data;
|
||||
break;
|
||||
|
||||
case 2:
|
||||
buffer_reference_uint16_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint16_t(config.srcAddress + tiledByteOffset).data;
|
||||
break;
|
||||
|
||||
case 4:
|
||||
buffer_reference_uint32_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint32_t(config.srcAddress + tiledByteOffset).data;
|
||||
break;
|
||||
|
||||
case 8:
|
||||
buffer_reference_uint64_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint64_t(config.srcAddress + tiledByteOffset).data;
|
||||
break;
|
||||
|
||||
case 16:
|
||||
buffer_reference_uint64_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint64_t(config.srcAddress + tiledByteOffset).data;
|
||||
buffer_reference_uint64_t(config.dstAddress + linearByteOffset + 8).data = buffer_reference_uint64_t(config.srcAddress + tiledByteOffset + 8).data;
|
||||
break;
|
||||
|
||||
case 32:
|
||||
buffer_reference_uint64_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint64_t(config.srcAddress + tiledByteOffset).data;
|
||||
buffer_reference_uint64_t(config.dstAddress + linearByteOffset + 8).data = buffer_reference_uint64_t(config.srcAddress + tiledByteOffset + 8).data;
|
||||
buffer_reference_uint64_t(config.dstAddress + linearByteOffset + 16).data = buffer_reference_uint64_t(config.srcAddress + tiledByteOffset + 16).data;
|
||||
buffer_reference_uint64_t(config.dstAddress + linearByteOffset + 24).data = buffer_reference_uint64_t(config.srcAddress + tiledByteOffset + 24).data;
|
||||
break;
|
||||
}
|
||||
}
|
||||
76
rpcsx-gpu2/lib/amdgpu-tiler/shaders/detilerLinear.comp.glsl
Normal file
76
rpcsx-gpu2/lib/amdgpu-tiler/shaders/detilerLinear.comp.glsl
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
#version 460
|
||||
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types : enable
|
||||
#extension GL_EXT_shader_atomic_int64 : enable
|
||||
#extension GL_EXT_shader_atomic_float : enable
|
||||
#extension GL_EXT_shader_image_load_formatted : enable
|
||||
#extension GL_KHR_memory_scope_semantics : enable
|
||||
#extension GL_EXT_shared_memory_block : enable
|
||||
#extension GL_EXT_scalar_block_layout : enable
|
||||
#extension GL_EXT_null_initializer : enable
|
||||
#extension GL_EXT_buffer_reference2 : enable
|
||||
#extension GL_EXT_buffer_reference_uvec2 : enable
|
||||
|
||||
#include "tiler.glsl"
|
||||
|
||||
void main() {
|
||||
uvec3 pos = gl_GlobalInvocationID;
|
||||
uint64_t tiledSliceOffset = 0;
|
||||
uint64_t linearSliceOffset = 0;
|
||||
if (config.tiledSurfaceSize != 0) {
|
||||
tiledSliceOffset = pos.z * config.tiledSurfaceSize;
|
||||
linearSliceOffset = pos.z * config.linearSurfaceSize;
|
||||
pos.z = 0;
|
||||
}
|
||||
|
||||
uint64_t tiledByteOffset = computeLinearOffset(
|
||||
config.bitsPerElement,
|
||||
config.dataSize.y,
|
||||
config.dataSize.x,
|
||||
pos
|
||||
) / 8;
|
||||
|
||||
tiledByteOffset += tiledSliceOffset;
|
||||
|
||||
uint64_t linearByteOffset = computeLinearElementByteOffset(
|
||||
pos,
|
||||
0,
|
||||
config.dataSize.x,
|
||||
config.dataSize.x * config.dataSize.y,
|
||||
config.bitsPerElement,
|
||||
1 << config.numFragments
|
||||
);
|
||||
|
||||
linearByteOffset += linearSliceOffset;
|
||||
|
||||
switch ((config.bitsPerElement + 7) / 8) {
|
||||
case 1:
|
||||
buffer_reference_uint8_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint8_t(config.srcAddress + tiledByteOffset).data;
|
||||
break;
|
||||
|
||||
case 2:
|
||||
buffer_reference_uint16_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint16_t(config.srcAddress + tiledByteOffset).data;
|
||||
break;
|
||||
|
||||
case 4:
|
||||
buffer_reference_uint32_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint32_t(config.srcAddress + tiledByteOffset).data;
|
||||
break;
|
||||
|
||||
case 8:
|
||||
buffer_reference_uint64_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint64_t(config.srcAddress + tiledByteOffset).data;
|
||||
break;
|
||||
|
||||
case 16:
|
||||
buffer_reference_uint64_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint64_t(config.srcAddress + tiledByteOffset).data;
|
||||
buffer_reference_uint64_t(config.dstAddress + linearByteOffset + 8).data = buffer_reference_uint64_t(config.srcAddress + tiledByteOffset + 8).data;
|
||||
break;
|
||||
|
||||
case 32:
|
||||
buffer_reference_uint64_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint64_t(config.srcAddress + tiledByteOffset).data;
|
||||
buffer_reference_uint64_t(config.dstAddress + linearByteOffset + 8).data = buffer_reference_uint64_t(config.srcAddress + tiledByteOffset + 8).data;
|
||||
buffer_reference_uint64_t(config.dstAddress + linearByteOffset + 16).data = buffer_reference_uint64_t(config.srcAddress + tiledByteOffset + 16).data;
|
||||
buffer_reference_uint64_t(config.dstAddress + linearByteOffset + 24).data = buffer_reference_uint64_t(config.srcAddress + tiledByteOffset + 24).data;
|
||||
break;
|
||||
}
|
||||
}
|
||||
716
rpcsx-gpu2/lib/amdgpu-tiler/shaders/tiler.glsl
Normal file
716
rpcsx-gpu2/lib/amdgpu-tiler/shaders/tiler.glsl
Normal file
|
|
@ -0,0 +1,716 @@
|
|||
|
||||
#define FOR_ALL_BASE_TYPES(OP) \
|
||||
OP(int8_t) \
|
||||
OP(uint8_t) \
|
||||
OP(int16_t) \
|
||||
OP(uint16_t) \
|
||||
OP(float16_t) \
|
||||
OP(int32_t) \
|
||||
OP(uint32_t) \
|
||||
OP(float32_t) \
|
||||
OP(int64_t) \
|
||||
OP(uint64_t) \
|
||||
OP(float64_t) \
|
||||
|
||||
#define DEFINE_BUFFER_REFERENCE(TYPE) \
|
||||
layout(buffer_reference) buffer buffer_reference_##TYPE { \
|
||||
TYPE data; \
|
||||
}; \
|
||||
|
||||
FOR_ALL_BASE_TYPES(DEFINE_BUFFER_REFERENCE)
|
||||
|
||||
#define U32ARRAY_FETCH_BITS(ARRAY, START, BITCOUNT) ((ARRAY[(START) >> 5] >> ((START) & 31)) & ((1 << (BITCOUNT)) - 1))
|
||||
#define U64ARRAY_FETCH_BITS(ARRAY, START, BITCOUNT) ((ARRAY[(START) >> 6] >> ((START) & 63)) & ((uint64_t(1) << (BITCOUNT)) - 1))
|
||||
|
||||
uint64_t tbuffer_base(u64vec4 tbuffer) {
|
||||
return U64ARRAY_FETCH_BITS(tbuffer, 0, 38);
|
||||
}
|
||||
uint32_t tbuffer_mtype_L2(u64vec4 tbuffer) {
|
||||
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 38, 2));
|
||||
}
|
||||
uint32_t tbuffer_min_lod(u64vec4 tbuffer) {
|
||||
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 40, 12));
|
||||
}
|
||||
uint32_t tbuffer_dfmt(u64vec4 tbuffer) {
|
||||
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 52, 6));
|
||||
}
|
||||
uint32_t tbuffer_nfmt(u64vec4 tbuffer) {
|
||||
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 58, 4));
|
||||
}
|
||||
uint32_t tbuffer_mtype_l1(u64vec4 tbuffer) {
|
||||
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 62, 2) | (U64ARRAY_FETCH_BITS(tbuffer, 122, 1) << 2));
|
||||
}
|
||||
uint32_t tbuffer_width(u64vec4 tbuffer) {
|
||||
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 64, 14));
|
||||
}
|
||||
uint32_t tbuffer_height(u64vec4 tbuffer) {
|
||||
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 78, 14));
|
||||
}
|
||||
uint32_t tbuffer_perfMod(u64vec4 tbuffer) {
|
||||
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 92, 3));
|
||||
}
|
||||
bool tbuffer_interlaced(u64vec4 tbuffer) {
|
||||
return U64ARRAY_FETCH_BITS(tbuffer, 95, 1) != 0;
|
||||
}
|
||||
uint32_t tbuffer_dst_sel_x(u64vec4 tbuffer) {
|
||||
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 96, 3));
|
||||
}
|
||||
uint32_t tbuffer_dst_sel_y(u64vec4 tbuffer) {
|
||||
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 99, 3));
|
||||
}
|
||||
uint32_t tbuffer_dst_sel_z(u64vec4 tbuffer) {
|
||||
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 102, 3));
|
||||
}
|
||||
uint32_t tbuffer_dst_sel_w(u64vec4 tbuffer) {
|
||||
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 105, 3));
|
||||
}
|
||||
uint32_t tbuffer_base_level(u64vec4 tbuffer) {
|
||||
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 108, 4));
|
||||
}
|
||||
uint32_t tbuffer_last_level(u64vec4 tbuffer) {
|
||||
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 112, 4));
|
||||
}
|
||||
uint32_t tbuffer_tiling_idx(u64vec4 tbuffer) {
|
||||
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 116, 5));
|
||||
}
|
||||
bool tbuffer_pow2pad(u64vec4 tbuffer) {
|
||||
return U64ARRAY_FETCH_BITS(tbuffer, 121, 1) != 0;
|
||||
}
|
||||
uint32_t tbuffer_type(u64vec4 tbuffer) {
|
||||
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 124, 4));
|
||||
}
|
||||
uint32_t tbuffer_depth(u64vec4 tbuffer) {
|
||||
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 128, 13));
|
||||
}
|
||||
uint32_t tbuffer_pitch(u64vec4 tbuffer) {
|
||||
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 141, 14));
|
||||
}
|
||||
uint32_t tbuffer_base_array(u64vec4 tbuffer) {
|
||||
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 160, 13));
|
||||
}
|
||||
uint32_t tbuffer_last_array(u64vec4 tbuffer) {
|
||||
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 173, 13));
|
||||
}
|
||||
uint32_t tbuffer_min_lod_warn(u64vec4 tbuffer) {
|
||||
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 192, 12));
|
||||
}
|
||||
uint32_t tbuffer_counter_bank_id(u64vec4 tbuffer) {
|
||||
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 204, 8));
|
||||
}
|
||||
bool tbuffer_LOD_hdw_cnt_en(u64vec4 tbuffer) {
|
||||
return U64ARRAY_FETCH_BITS(tbuffer, 212, 1) != 0;
|
||||
}
|
||||
|
||||
const int kTextureType1D = 8;
|
||||
const int kTextureType2D = 9;
|
||||
const int kTextureType3D = 10;
|
||||
const int kTextureTypeCube = 11;
|
||||
const int kTextureTypeArray1D = 12;
|
||||
const int kTextureTypeArray2D = 13;
|
||||
const int kTextureTypeMsaa2D = 14;
|
||||
const int kTextureTypeMsaaArray2D = 15;
|
||||
|
||||
const uint32_t kMicroTileWidth = 8;
|
||||
const uint32_t kMicroTileHeight = 8;
|
||||
const uint32_t kDramRowSize = 0x400;
|
||||
const uint32_t kPipeInterleaveBytes = 256;
|
||||
|
||||
|
||||
const uint32_t kDataFormatInvalid = 0x00000000;
|
||||
const uint32_t kDataFormat8 = 0x00000001;
|
||||
const uint32_t kDataFormat16 = 0x00000002;
|
||||
const uint32_t kDataFormat8_8 = 0x00000003;
|
||||
const uint32_t kDataFormat32 = 0x00000004;
|
||||
const uint32_t kDataFormat16_16 = 0x00000005;
|
||||
const uint32_t kDataFormat10_11_11 = 0x00000006;
|
||||
const uint32_t kDataFormat11_11_10 = 0x00000007;
|
||||
const uint32_t kDataFormat10_10_10_2 = 0x00000008;
|
||||
const uint32_t kDataFormat2_10_10_10 = 0x00000009;
|
||||
const uint32_t kDataFormat8_8_8_8 = 0x0000000a;
|
||||
const uint32_t kDataFormat32_32 = 0x0000000b;
|
||||
const uint32_t kDataFormat16_16_16_16 = 0x0000000c;
|
||||
const uint32_t kDataFormat32_32_32 = 0x0000000d;
|
||||
const uint32_t kDataFormat32_32_32_32 = 0x0000000e;
|
||||
const uint32_t kDataFormat5_6_5 = 0x00000010;
|
||||
const uint32_t kDataFormat1_5_5_5 = 0x00000011;
|
||||
const uint32_t kDataFormat5_5_5_1 = 0x00000012;
|
||||
const uint32_t kDataFormat4_4_4_4 = 0x00000013;
|
||||
const uint32_t kDataFormat8_24 = 0x00000014;
|
||||
const uint32_t kDataFormat24_8 = 0x00000015;
|
||||
const uint32_t kDataFormatX24_8_32 = 0x00000016;
|
||||
const uint32_t kDataFormatGB_GR = 0x00000020;
|
||||
const uint32_t kDataFormatBG_RG = 0x00000021;
|
||||
const uint32_t kDataFormat5_9_9_9 = 0x00000022;
|
||||
const uint32_t kDataFormatBc1 = 0x00000023;
|
||||
const uint32_t kDataFormatBc2 = 0x00000024;
|
||||
const uint32_t kDataFormatBc3 = 0x00000025;
|
||||
const uint32_t kDataFormatBc4 = 0x00000026;
|
||||
const uint32_t kDataFormatBc5 = 0x00000027;
|
||||
const uint32_t kDataFormatBc6 = 0x00000028;
|
||||
const uint32_t kDataFormatBc7 = 0x00000029;
|
||||
const uint32_t kDataFormatFmask8_S2_F1 = 0x0000002C;
|
||||
const uint32_t kDataFormatFmask8_S4_F1 = 0x0000002D;
|
||||
const uint32_t kDataFormatFmask8_S8_F1 = 0x0000002E;
|
||||
const uint32_t kDataFormatFmask8_S2_F2 = 0x0000002F;
|
||||
const uint32_t kDataFormatFmask8_S4_F2 = 0x00000030;
|
||||
const uint32_t kDataFormatFmask8_S4_F4 = 0x00000031;
|
||||
const uint32_t kDataFormatFmask16_S16_F1 = 0x00000032;
|
||||
const uint32_t kDataFormatFmask16_S8_F2 = 0x00000033;
|
||||
const uint32_t kDataFormatFmask32_S16_F2 = 0x00000034;
|
||||
const uint32_t kDataFormatFmask32_S8_F4 = 0x00000035;
|
||||
const uint32_t kDataFormatFmask32_S8_F8 = 0x00000036;
|
||||
const uint32_t kDataFormatFmask64_S16_F4 = 0x00000037;
|
||||
const uint32_t kDataFormatFmask64_S16_F8 = 0x00000038;
|
||||
const uint32_t kDataFormat4_4 = 0x00000039;
|
||||
const uint32_t kDataFormat6_5_5 = 0x0000003A;
|
||||
const uint32_t kDataFormat1 = 0x0000003B;
|
||||
const uint32_t kDataFormat1Reversed = 0x0000003C;
|
||||
|
||||
const uint32_t kNumericFormatUNorm = 0x00000000;
|
||||
const uint32_t kNumericFormatSNorm = 0x00000001;
|
||||
const uint32_t kNumericFormatUScaled = 0x00000002;
|
||||
const uint32_t kNumericFormatSScaled = 0x00000003;
|
||||
const uint32_t kNumericFormatUInt = 0x00000004;
|
||||
const uint32_t kNumericFormatSInt = 0x00000005;
|
||||
const uint32_t kNumericFormatSNormNoZero = 0x00000006;
|
||||
const uint32_t kNumericFormatFloat = 0x00000007;
|
||||
const uint32_t kNumericFormatSrgb = 0x00000009;
|
||||
const uint32_t kNumericFormatUBNorm = 0x0000000A;
|
||||
const uint32_t kNumericFormatUBNormNoZero = 0x0000000B;
|
||||
const uint32_t kNumericFormatUBInt = 0x0000000C;
|
||||
const uint32_t kNumericFormatUBScaled = 0x0000000D;
|
||||
|
||||
const uint32_t kArrayModeLinearGeneral = 0x00000000;
|
||||
const uint32_t kArrayModeLinearAligned = 0x00000001;
|
||||
const uint32_t kArrayMode1dTiledThin = 0x00000002;
|
||||
const uint32_t kArrayMode1dTiledThick = 0x00000003;
|
||||
const uint32_t kArrayMode2dTiledThin = 0x00000004;
|
||||
const uint32_t kArrayModeTiledThinPrt = 0x00000005;
|
||||
const uint32_t kArrayMode2dTiledThinPrt = 0x00000006;
|
||||
const uint32_t kArrayMode2dTiledThick = 0x00000007;
|
||||
const uint32_t kArrayMode2dTiledXThick = 0x00000008;
|
||||
const uint32_t kArrayModeTiledThickPrt = 0x00000009;
|
||||
const uint32_t kArrayMode2dTiledThickPrt = 0x0000000a;
|
||||
const uint32_t kArrayMode3dTiledThinPrt = 0x0000000b;
|
||||
const uint32_t kArrayMode3dTiledThin = 0x0000000c;
|
||||
const uint32_t kArrayMode3dTiledThick = 0x0000000d;
|
||||
const uint32_t kArrayMode3dTiledXThick = 0x0000000e;
|
||||
const uint32_t kArrayMode3dTiledThickPrt = 0x0000000f;
|
||||
|
||||
const uint32_t kMicroTileModeDisplay = 0x00000000;
|
||||
const uint32_t kMicroTileModeThin = 0x00000001;
|
||||
const uint32_t kMicroTileModeDepth = 0x00000002;
|
||||
const uint32_t kMicroTileModeRotated = 0x00000003;
|
||||
const uint32_t kMicroTileModeThick = 0x00000004;
|
||||
|
||||
const uint32_t kPipeConfigP8_32x32_8x16 = 0x0000000a;
|
||||
const uint32_t kPipeConfigP8_32x32_16x16 = 0x0000000c;
|
||||
const uint32_t kPipeConfigP16 = 0x00000012;
|
||||
|
||||
|
||||
|
||||
uint32_t getMicroTileThickness(uint32_t arrayMode) {
|
||||
switch (arrayMode) {
|
||||
case kArrayMode1dTiledThick:
|
||||
case kArrayMode2dTiledThick:
|
||||
case kArrayMode3dTiledThick:
|
||||
case kArrayModeTiledThickPrt:
|
||||
case kArrayMode2dTiledThickPrt:
|
||||
case kArrayMode3dTiledThickPrt:
|
||||
return 4;
|
||||
case kArrayMode2dTiledXThick:
|
||||
case kArrayMode3dTiledXThick:
|
||||
return 8;
|
||||
case kArrayModeLinearGeneral:
|
||||
case kArrayModeLinearAligned:
|
||||
case kArrayMode1dTiledThin:
|
||||
case kArrayMode2dTiledThin:
|
||||
case kArrayModeTiledThinPrt:
|
||||
case kArrayMode2dTiledThinPrt:
|
||||
case kArrayMode3dTiledThinPrt:
|
||||
case kArrayMode3dTiledThin:
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
bool isMacroTiled(uint32_t arrayMode) {
|
||||
switch (arrayMode) {
|
||||
case kArrayModeLinearGeneral:
|
||||
case kArrayModeLinearAligned:
|
||||
case kArrayMode1dTiledThin:
|
||||
case kArrayMode1dTiledThick:
|
||||
return false;
|
||||
case kArrayMode2dTiledThin:
|
||||
case kArrayModeTiledThinPrt:
|
||||
case kArrayMode2dTiledThinPrt:
|
||||
case kArrayMode2dTiledThick:
|
||||
case kArrayMode2dTiledXThick:
|
||||
case kArrayModeTiledThickPrt:
|
||||
case kArrayMode2dTiledThickPrt:
|
||||
case kArrayMode3dTiledThinPrt:
|
||||
case kArrayMode3dTiledThin:
|
||||
case kArrayMode3dTiledThick:
|
||||
case kArrayMode3dTiledXThick:
|
||||
case kArrayMode3dTiledThickPrt:
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool isPrt(uint32_t arrayMode) {
|
||||
switch (arrayMode) {
|
||||
case kArrayModeLinearGeneral:
|
||||
case kArrayModeLinearAligned:
|
||||
case kArrayMode1dTiledThin:
|
||||
case kArrayMode1dTiledThick:
|
||||
case kArrayMode2dTiledThin:
|
||||
case kArrayMode2dTiledThick:
|
||||
case kArrayMode2dTiledXThick:
|
||||
case kArrayMode3dTiledThin:
|
||||
case kArrayMode3dTiledThick:
|
||||
case kArrayMode3dTiledXThick:
|
||||
return false;
|
||||
|
||||
case kArrayModeTiledThinPrt:
|
||||
case kArrayMode2dTiledThinPrt:
|
||||
case kArrayModeTiledThickPrt:
|
||||
case kArrayMode2dTiledThickPrt:
|
||||
case kArrayMode3dTiledThinPrt:
|
||||
case kArrayMode3dTiledThickPrt:
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
int getTexelsPerElement(uint32_t dfmt) {
|
||||
switch (dfmt) {
|
||||
case kDataFormatBc1:
|
||||
case kDataFormatBc2:
|
||||
case kDataFormatBc3:
|
||||
case kDataFormatBc4:
|
||||
case kDataFormatBc5:
|
||||
case kDataFormatBc6:
|
||||
case kDataFormatBc7:
|
||||
return 16;
|
||||
case kDataFormat1:
|
||||
case kDataFormat1Reversed:
|
||||
return 8;
|
||||
case kDataFormatGB_GR:
|
||||
case kDataFormatBG_RG:
|
||||
return 2;
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
int getBitsPerElement(uint32_t dfmt) {
|
||||
switch (dfmt) {
|
||||
case kDataFormatInvalid:
|
||||
return 0;
|
||||
case kDataFormat8:
|
||||
return 8;
|
||||
case kDataFormat16:
|
||||
return 16;
|
||||
case kDataFormat8_8:
|
||||
return 16;
|
||||
case kDataFormat32:
|
||||
return 32;
|
||||
case kDataFormat16_16:
|
||||
return 32;
|
||||
case kDataFormat10_11_11:
|
||||
return 32;
|
||||
case kDataFormat11_11_10:
|
||||
return 32;
|
||||
case kDataFormat10_10_10_2:
|
||||
return 32;
|
||||
case kDataFormat2_10_10_10:
|
||||
return 32;
|
||||
case kDataFormat8_8_8_8:
|
||||
return 32;
|
||||
case kDataFormat32_32:
|
||||
return 64;
|
||||
case kDataFormat16_16_16_16:
|
||||
return 64;
|
||||
case kDataFormat32_32_32:
|
||||
return 96;
|
||||
case kDataFormat32_32_32_32:
|
||||
return 128;
|
||||
case kDataFormat5_6_5:
|
||||
return 16;
|
||||
case kDataFormat1_5_5_5:
|
||||
return 16;
|
||||
case kDataFormat5_5_5_1:
|
||||
return 16;
|
||||
case kDataFormat4_4_4_4:
|
||||
return 16;
|
||||
case kDataFormat8_24:
|
||||
return 32;
|
||||
case kDataFormat24_8:
|
||||
return 32;
|
||||
case kDataFormatX24_8_32:
|
||||
return 64;
|
||||
case kDataFormatGB_GR:
|
||||
return 16;
|
||||
case kDataFormatBG_RG:
|
||||
return 16;
|
||||
case kDataFormat5_9_9_9:
|
||||
return 32;
|
||||
case kDataFormatBc1:
|
||||
return 4;
|
||||
case kDataFormatBc2:
|
||||
return 8;
|
||||
case kDataFormatBc3:
|
||||
return 8;
|
||||
case kDataFormatBc4:
|
||||
return 4;
|
||||
case kDataFormatBc5:
|
||||
return 8;
|
||||
case kDataFormatBc6:
|
||||
return 8;
|
||||
case kDataFormatBc7:
|
||||
return 8;
|
||||
case kDataFormatFmask8_S2_F1:
|
||||
return 8;
|
||||
case kDataFormatFmask8_S4_F1:
|
||||
return 8;
|
||||
case kDataFormatFmask8_S8_F1:
|
||||
return 8;
|
||||
case kDataFormatFmask8_S2_F2:
|
||||
return 8;
|
||||
case kDataFormatFmask8_S4_F2:
|
||||
return 8;
|
||||
case kDataFormatFmask8_S4_F4:
|
||||
return 8;
|
||||
case kDataFormatFmask16_S16_F1:
|
||||
return 16;
|
||||
case kDataFormatFmask16_S8_F2:
|
||||
return 16;
|
||||
case kDataFormatFmask32_S16_F2:
|
||||
return 32;
|
||||
case kDataFormatFmask32_S8_F4:
|
||||
return 32;
|
||||
case kDataFormatFmask32_S8_F8:
|
||||
return 32;
|
||||
case kDataFormatFmask64_S16_F4:
|
||||
return 64;
|
||||
case kDataFormatFmask64_S16_F8:
|
||||
return 64;
|
||||
case kDataFormat4_4:
|
||||
return 8;
|
||||
case kDataFormat6_5_5:
|
||||
return 16;
|
||||
case kDataFormat1:
|
||||
return 1;
|
||||
case kDataFormat1Reversed:
|
||||
return 1;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
int getTotalBitsPerElement(uint32_t dfmt) {
|
||||
return getBitsPerElement(dfmt) * getTexelsPerElement(dfmt);
|
||||
}
|
||||
|
||||
int getNumComponentsPerElement(uint32_t dfmt) {
|
||||
switch (dfmt) {
|
||||
case kDataFormatInvalid:
|
||||
return 0;
|
||||
case kDataFormat8:
|
||||
return 1;
|
||||
case kDataFormat16:
|
||||
return 1;
|
||||
case kDataFormat8_8:
|
||||
return 2;
|
||||
case kDataFormat32:
|
||||
return 1;
|
||||
case kDataFormat16_16:
|
||||
return 2;
|
||||
case kDataFormat10_11_11:
|
||||
return 3;
|
||||
case kDataFormat11_11_10:
|
||||
return 3;
|
||||
case kDataFormat10_10_10_2:
|
||||
return 4;
|
||||
case kDataFormat2_10_10_10:
|
||||
return 4;
|
||||
case kDataFormat8_8_8_8:
|
||||
return 4;
|
||||
case kDataFormat32_32:
|
||||
return 2;
|
||||
case kDataFormat16_16_16_16:
|
||||
return 4;
|
||||
case kDataFormat32_32_32:
|
||||
return 3;
|
||||
case kDataFormat32_32_32_32:
|
||||
return 4;
|
||||
case kDataFormat5_6_5:
|
||||
return 3;
|
||||
case kDataFormat1_5_5_5:
|
||||
return 4;
|
||||
case kDataFormat5_5_5_1:
|
||||
return 4;
|
||||
case kDataFormat4_4_4_4:
|
||||
return 4;
|
||||
case kDataFormat8_24:
|
||||
return 2;
|
||||
case kDataFormat24_8:
|
||||
return 2;
|
||||
case kDataFormatX24_8_32:
|
||||
return 2;
|
||||
case kDataFormatGB_GR:
|
||||
return 3;
|
||||
case kDataFormatBG_RG:
|
||||
return 3;
|
||||
case kDataFormat5_9_9_9:
|
||||
return 3;
|
||||
case kDataFormatBc1:
|
||||
return 4;
|
||||
case kDataFormatBc2:
|
||||
return 4;
|
||||
case kDataFormatBc3:
|
||||
return 4;
|
||||
case kDataFormatBc4:
|
||||
return 1;
|
||||
case kDataFormatBc5:
|
||||
return 2;
|
||||
case kDataFormatBc6:
|
||||
return 3;
|
||||
case kDataFormatBc7:
|
||||
return 4;
|
||||
case kDataFormatFmask8_S2_F1:
|
||||
return 2;
|
||||
case kDataFormatFmask8_S4_F1:
|
||||
return 2;
|
||||
case kDataFormatFmask8_S8_F1:
|
||||
return 2;
|
||||
case kDataFormatFmask8_S2_F2:
|
||||
return 2;
|
||||
case kDataFormatFmask8_S4_F2:
|
||||
return 2;
|
||||
case kDataFormatFmask8_S4_F4:
|
||||
return 2;
|
||||
case kDataFormatFmask16_S16_F1:
|
||||
return 2;
|
||||
case kDataFormatFmask16_S8_F2:
|
||||
return 2;
|
||||
case kDataFormatFmask32_S16_F2:
|
||||
return 2;
|
||||
case kDataFormatFmask32_S8_F4:
|
||||
return 2;
|
||||
case kDataFormatFmask32_S8_F8:
|
||||
return 2;
|
||||
case kDataFormatFmask64_S16_F4:
|
||||
return 2;
|
||||
case kDataFormatFmask64_S16_F8:
|
||||
return 2;
|
||||
case kDataFormat4_4:
|
||||
return 2;
|
||||
case kDataFormat6_5_5:
|
||||
return 3;
|
||||
case kDataFormat1:
|
||||
return 1;
|
||||
case kDataFormat1Reversed:
|
||||
return 1;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
uint32_t tileMode_getArrayMode(uint32_t tileMode) {
|
||||
return (tileMode & 0x0000003c) >> 2;
|
||||
}
|
||||
uint32_t tileMode_getPipeConfig(uint32_t tileMode) {
|
||||
return (tileMode & 0x000007c0) >> 6;
|
||||
}
|
||||
uint32_t tileMode_getTileSplit(uint32_t tileMode) {
|
||||
return (tileMode & 0x00003800) >> 11;
|
||||
}
|
||||
uint32_t tileMode_getMicroTileMode(uint32_t tileMode) {
|
||||
return (tileMode & 0x01c00000) >> 22;
|
||||
}
|
||||
uint32_t tileMode_getSampleSplit(uint32_t tileMode) {
|
||||
return (tileMode & 0x06000000) >> 25;
|
||||
}
|
||||
|
||||
uint32_t bit_ceil(uint32_t x) {
|
||||
x = x - 1;
|
||||
x |= x >> 1;
|
||||
x |= x >> 2;
|
||||
x |= x >> 4;
|
||||
x |= x >> 8;
|
||||
x |= x >> 16;
|
||||
return x + 1;
|
||||
}
|
||||
|
||||
uint32_t getElementIndex(uvec3 pos, uint32_t bitsPerElement, uint32_t microTileMode, uint32_t arrayMode) {
|
||||
uint32_t elem = 0;
|
||||
|
||||
if (microTileMode == kMicroTileModeDisplay) {
|
||||
switch (bitsPerElement) {
|
||||
case 8:
|
||||
elem |= ((pos.x >> 0) & 0x1) << 0;
|
||||
elem |= ((pos.x >> 1) & 0x1) << 1;
|
||||
elem |= ((pos.x >> 2) & 0x1) << 2;
|
||||
elem |= ((pos.y >> 1) & 0x1) << 3;
|
||||
elem |= ((pos.y >> 0) & 0x1) << 4;
|
||||
elem |= ((pos.y >> 2) & 0x1) << 5;
|
||||
break;
|
||||
case 16:
|
||||
elem |= ((pos.x >> 0) & 0x1) << 0;
|
||||
elem |= ((pos.x >> 1) & 0x1) << 1;
|
||||
elem |= ((pos.x >> 2) & 0x1) << 2;
|
||||
elem |= ((pos.y >> 0) & 0x1) << 3;
|
||||
elem |= ((pos.y >> 1) & 0x1) << 4;
|
||||
elem |= ((pos.y >> 2) & 0x1) << 5;
|
||||
break;
|
||||
case 32:
|
||||
elem |= ((pos.x >> 0) & 0x1) << 0;
|
||||
elem |= ((pos.x >> 1) & 0x1) << 1;
|
||||
elem |= ((pos.y >> 0) & 0x1) << 2;
|
||||
elem |= ((pos.x >> 2) & 0x1) << 3;
|
||||
elem |= ((pos.y >> 1) & 0x1) << 4;
|
||||
elem |= ((pos.y >> 2) & 0x1) << 5;
|
||||
break;
|
||||
case 64:
|
||||
elem |= ((pos.x >> 0) & 0x1) << 0;
|
||||
elem |= ((pos.y >> 0) & 0x1) << 1;
|
||||
elem |= ((pos.x >> 1) & 0x1) << 2;
|
||||
elem |= ((pos.x >> 2) & 0x1) << 3;
|
||||
elem |= ((pos.y >> 1) & 0x1) << 4;
|
||||
elem |= ((pos.y >> 2) & 0x1) << 5;
|
||||
break;
|
||||
}
|
||||
} else if (microTileMode == kMicroTileModeThin ||
|
||||
microTileMode == kMicroTileModeDepth) {
|
||||
elem |= ((pos.x >> 0) & 0x1) << 0;
|
||||
elem |= ((pos.y >> 0) & 0x1) << 1;
|
||||
elem |= ((pos.x >> 1) & 0x1) << 2;
|
||||
elem |= ((pos.y >> 1) & 0x1) << 3;
|
||||
elem |= ((pos.x >> 2) & 0x1) << 4;
|
||||
elem |= ((pos.y >> 2) & 0x1) << 5;
|
||||
|
||||
switch (arrayMode) {
|
||||
case kArrayMode2dTiledXThick:
|
||||
case kArrayMode3dTiledXThick:
|
||||
elem |= ((pos.z >> 2) & 0x1) << 8;
|
||||
case kArrayMode1dTiledThick:
|
||||
case kArrayMode2dTiledThick:
|
||||
case kArrayMode3dTiledThick:
|
||||
case kArrayModeTiledThickPrt:
|
||||
case kArrayMode2dTiledThickPrt:
|
||||
case kArrayMode3dTiledThickPrt:
|
||||
elem |= ((pos.z >> 0) & 0x1) << 6;
|
||||
elem |= ((pos.z >> 1) & 0x1) << 7;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
} else if (microTileMode == kMicroTileModeThick) {
|
||||
switch (arrayMode) {
|
||||
case kArrayMode2dTiledXThick:
|
||||
case kArrayMode3dTiledXThick:
|
||||
elem |= ((pos.z >> 2) & 0x1) << 8;
|
||||
|
||||
case kArrayMode1dTiledThick:
|
||||
case kArrayMode2dTiledThick:
|
||||
case kArrayMode3dTiledThick:
|
||||
case kArrayModeTiledThickPrt:
|
||||
case kArrayMode2dTiledThickPrt:
|
||||
case kArrayMode3dTiledThickPrt:
|
||||
if (bitsPerElement == 8 || bitsPerElement == 16) {
|
||||
elem |= ((pos.x >> 0) & 0x1) << 0;
|
||||
elem |= ((pos.y >> 0) & 0x1) << 1;
|
||||
elem |= ((pos.x >> 1) & 0x1) << 2;
|
||||
elem |= ((pos.y >> 1) & 0x1) << 3;
|
||||
elem |= ((pos.z >> 0) & 0x1) << 4;
|
||||
elem |= ((pos.z >> 1) & 0x1) << 5;
|
||||
elem |= ((pos.x >> 2) & 0x1) << 6;
|
||||
elem |= ((pos.y >> 2) & 0x1) << 7;
|
||||
} else if (bitsPerElement == 32) {
|
||||
elem |= ((pos.x >> 0) & 0x1) << 0;
|
||||
elem |= ((pos.y >> 0) & 0x1) << 1;
|
||||
elem |= ((pos.x >> 1) & 0x1) << 2;
|
||||
elem |= ((pos.z >> 0) & 0x1) << 3;
|
||||
elem |= ((pos.y >> 1) & 0x1) << 4;
|
||||
elem |= ((pos.z >> 1) & 0x1) << 5;
|
||||
elem |= ((pos.x >> 2) & 0x1) << 6;
|
||||
elem |= ((pos.y >> 2) & 0x1) << 7;
|
||||
} else if (bitsPerElement == 64 || bitsPerElement == 128) {
|
||||
elem |= ((pos.x >> 0) & 0x1) << 0;
|
||||
elem |= ((pos.y >> 0) & 0x1) << 1;
|
||||
elem |= ((pos.z >> 0) & 0x1) << 2;
|
||||
elem |= ((pos.x >> 1) & 0x1) << 3;
|
||||
elem |= ((pos.y >> 1) & 0x1) << 4;
|
||||
elem |= ((pos.z >> 1) & 0x1) << 5;
|
||||
elem |= ((pos.x >> 2) & 0x1) << 6;
|
||||
elem |= ((pos.y >> 2) & 0x1) << 7;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return elem;
|
||||
}
|
||||
|
||||
uint64_t computeLinearElementByteOffset(
|
||||
uvec3 pos, uint32_t fragmentIndex, uint32_t pitch,
|
||||
uint32_t slicePitchElems, uint32_t bitsPerElement,
|
||||
uint32_t numFragmentsPerPixel) {
|
||||
uint64_t absoluteElementIndex = pos.z * slicePitchElems + pos.y * pitch + pos.x;
|
||||
return ((absoluteElementIndex * bitsPerElement * numFragmentsPerPixel) +
|
||||
(bitsPerElement * fragmentIndex)) / 8;
|
||||
}
|
||||
|
||||
uint64_t computeLinearOffset(uint32_t bitsPerElement, uint height, uint pitch, uvec3 pos) {
|
||||
uint paddedHeight = height;
|
||||
uint paddedWidth = pitch;
|
||||
|
||||
if (bitsPerElement == 1) {
|
||||
bitsPerElement *= 8;
|
||||
paddedWidth = max((paddedWidth + 7) / 8, 1);
|
||||
}
|
||||
|
||||
uint64_t tiledRowSizeBits = uint64_t(bitsPerElement) * paddedWidth;
|
||||
uint64_t tiledSliceBits = uint64_t(paddedWidth) * paddedHeight * bitsPerElement;
|
||||
return tiledSliceBits * pos.z + tiledRowSizeBits * pos.y + bitsPerElement * pos.x;
|
||||
}
|
||||
|
||||
uint64_t getTiledBitOffset1D(uint32_t tileMode, uvec3 pos, uvec2 dataSize, uint32_t bitsPerElement) {
|
||||
uint32_t arrayMode = tileMode_getArrayMode(tileMode);
|
||||
|
||||
uint32_t paddedWidth = dataSize.x;
|
||||
uint32_t paddedHeight = dataSize.y;
|
||||
|
||||
int tileThickness = (arrayMode == kArrayMode1dTiledThick) ? 4 : 1;
|
||||
|
||||
uint64_t tileBytes = (kMicroTileWidth * kMicroTileHeight * tileThickness * bitsPerElement + 7) / 8;
|
||||
uint32_t tilesPerRow = paddedWidth / kMicroTileWidth;
|
||||
uint32_t tilesPerSlice = max(tilesPerRow * (paddedHeight / kMicroTileHeight), 1);
|
||||
|
||||
uint64_t elementIndex = getElementIndex(pos, bitsPerElement,
|
||||
tileMode_getMicroTileMode(tileMode), arrayMode);
|
||||
|
||||
uint64_t sliceOffset = (pos.z / tileThickness) * tilesPerSlice * tileBytes;
|
||||
|
||||
uint64_t tileRowIndex = pos.y / kMicroTileHeight;
|
||||
uint64_t tileColumnIndex = pos.x / kMicroTileWidth;
|
||||
uint64_t tileOffset =
|
||||
(tileRowIndex * tilesPerRow + tileColumnIndex) * tileBytes;
|
||||
|
||||
uint64_t elementOffset = elementIndex * bitsPerElement;
|
||||
return (sliceOffset + tileOffset) * 8 + elementOffset;
|
||||
}
|
||||
|
||||
layout(binding=0) uniform Config {
|
||||
uint64_t srcAddress;
|
||||
uint64_t dstAddress;
|
||||
uvec2 dataSize;
|
||||
uint32_t tileMode;
|
||||
uint32_t numFragments;
|
||||
uint32_t bitsPerElement;
|
||||
uint32_t tiledSurfaceSize;
|
||||
uint32_t linearSurfaceSize;
|
||||
} config;
|
||||
76
rpcsx-gpu2/lib/amdgpu-tiler/shaders/tiler1d.comp.glsl
Normal file
76
rpcsx-gpu2/lib/amdgpu-tiler/shaders/tiler1d.comp.glsl
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
#version 460
|
||||
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types : enable
|
||||
#extension GL_EXT_shader_atomic_int64 : enable
|
||||
#extension GL_EXT_shader_atomic_float : enable
|
||||
#extension GL_EXT_shader_image_load_formatted : enable
|
||||
#extension GL_KHR_memory_scope_semantics : enable
|
||||
#extension GL_EXT_shared_memory_block : enable
|
||||
#extension GL_EXT_scalar_block_layout : enable
|
||||
#extension GL_EXT_null_initializer : enable
|
||||
#extension GL_EXT_buffer_reference2 : enable
|
||||
#extension GL_EXT_buffer_reference_uvec2 : enable
|
||||
|
||||
#include "tiler.glsl"
|
||||
|
||||
void main() {
|
||||
uvec3 pos = gl_GlobalInvocationID;
|
||||
uint64_t tiledSliceOffset = 0;
|
||||
uint64_t linearSliceOffset = 0;
|
||||
if (config.tiledSurfaceSize != 0) {
|
||||
tiledSliceOffset = pos.z * config.tiledSurfaceSize;
|
||||
linearSliceOffset = pos.z * config.linearSurfaceSize;
|
||||
pos.z = 0;
|
||||
}
|
||||
|
||||
uint64_t tiledByteOffset = getTiledBitOffset1D(
|
||||
config.tileMode,
|
||||
pos,
|
||||
config.dataSize,
|
||||
config.bitsPerElement
|
||||
) / 8;
|
||||
|
||||
tiledByteOffset += tiledSliceOffset;
|
||||
|
||||
uint64_t linearByteOffset = computeLinearElementByteOffset(
|
||||
pos,
|
||||
0,
|
||||
config.dataSize.x,
|
||||
config.dataSize.x * config.dataSize.y,
|
||||
config.bitsPerElement,
|
||||
1 << config.numFragments
|
||||
);
|
||||
|
||||
linearByteOffset += linearSliceOffset;
|
||||
|
||||
switch ((config.bitsPerElement + 7) / 8) {
|
||||
case 1:
|
||||
buffer_reference_uint8_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint8_t(config.srcAddress + linearByteOffset).data;
|
||||
break;
|
||||
|
||||
case 2:
|
||||
buffer_reference_uint16_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint16_t(config.srcAddress + linearByteOffset).data;
|
||||
break;
|
||||
|
||||
case 4:
|
||||
buffer_reference_uint32_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint32_t(config.srcAddress + linearByteOffset).data;
|
||||
break;
|
||||
|
||||
case 8:
|
||||
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset).data;
|
||||
break;
|
||||
|
||||
case 16:
|
||||
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset).data;
|
||||
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset + 8).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset + 8).data;
|
||||
break;
|
||||
|
||||
case 32:
|
||||
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset).data;
|
||||
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset + 8).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset + 8).data;
|
||||
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset + 16).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset + 16).data;
|
||||
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset + 24).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset + 24).data;
|
||||
break;
|
||||
}
|
||||
}
|
||||
76
rpcsx-gpu2/lib/amdgpu-tiler/shaders/tiler2d.comp.glsl
Normal file
76
rpcsx-gpu2/lib/amdgpu-tiler/shaders/tiler2d.comp.glsl
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
#version 460
|
||||
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types : enable
|
||||
#extension GL_EXT_shader_atomic_int64 : enable
|
||||
#extension GL_EXT_shader_atomic_float : enable
|
||||
#extension GL_EXT_shader_image_load_formatted : enable
|
||||
#extension GL_KHR_memory_scope_semantics : enable
|
||||
#extension GL_EXT_shared_memory_block : enable
|
||||
#extension GL_EXT_scalar_block_layout : enable
|
||||
#extension GL_EXT_null_initializer : enable
|
||||
#extension GL_EXT_buffer_reference2 : enable
|
||||
#extension GL_EXT_buffer_reference_uvec2 : enable
|
||||
|
||||
#include "tiler.glsl"
|
||||
|
||||
void main() {
|
||||
uvec3 pos = gl_GlobalInvocationID;
|
||||
uint64_t tiledSliceOffset = 0;
|
||||
uint64_t linearSliceOffset = 0;
|
||||
if (config.tiledSurfaceSize != 0) {
|
||||
tiledSliceOffset = pos.z * config.tiledSurfaceSize;
|
||||
linearSliceOffset = pos.z * config.linearSurfaceSize;
|
||||
pos.z = 0;
|
||||
}
|
||||
|
||||
uint64_t tiledByteOffset = getTiledBitOffset1D(
|
||||
config.tileMode,
|
||||
pos,
|
||||
config.dataSize,
|
||||
config.bitsPerElement
|
||||
) / 8;
|
||||
|
||||
tiledByteOffset += tiledSliceOffset;
|
||||
|
||||
uint64_t linearByteOffset = computeLinearElementByteOffset(
|
||||
pos,
|
||||
0,
|
||||
config.dataSize.x,
|
||||
config.dataSize.x * config.dataSize.y,
|
||||
config.bitsPerElement,
|
||||
1 << config.numFragments
|
||||
);
|
||||
|
||||
linearByteOffset += linearSliceOffset;
|
||||
|
||||
switch ((config.bitsPerElement + 7) / 8) {
|
||||
case 1:
|
||||
buffer_reference_uint8_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint8_t(config.srcAddress + linearByteOffset).data;
|
||||
break;
|
||||
|
||||
case 2:
|
||||
buffer_reference_uint16_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint16_t(config.srcAddress + linearByteOffset).data;
|
||||
break;
|
||||
|
||||
case 4:
|
||||
buffer_reference_uint32_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint32_t(config.srcAddress + linearByteOffset).data;
|
||||
break;
|
||||
|
||||
case 8:
|
||||
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset).data;
|
||||
break;
|
||||
|
||||
case 16:
|
||||
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset).data;
|
||||
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset + 8).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset + 8).data;
|
||||
break;
|
||||
|
||||
case 32:
|
||||
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset).data;
|
||||
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset + 8).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset + 8).data;
|
||||
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset + 16).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset + 16).data;
|
||||
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset + 24).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset + 24).data;
|
||||
break;
|
||||
}
|
||||
}
|
||||
76
rpcsx-gpu2/lib/amdgpu-tiler/shaders/tilerLinear.comp.glsl
Normal file
76
rpcsx-gpu2/lib/amdgpu-tiler/shaders/tilerLinear.comp.glsl
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
#version 460
|
||||
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types : enable
|
||||
#extension GL_EXT_shader_atomic_int64 : enable
|
||||
#extension GL_EXT_shader_atomic_float : enable
|
||||
#extension GL_EXT_shader_image_load_formatted : enable
|
||||
#extension GL_KHR_memory_scope_semantics : enable
|
||||
#extension GL_EXT_shared_memory_block : enable
|
||||
#extension GL_EXT_scalar_block_layout : enable
|
||||
#extension GL_EXT_null_initializer : enable
|
||||
#extension GL_EXT_buffer_reference2 : enable
|
||||
#extension GL_EXT_buffer_reference_uvec2 : enable
|
||||
|
||||
#include "tiler.glsl"
|
||||
|
||||
void main() {
|
||||
uvec3 pos = gl_GlobalInvocationID;
|
||||
uint64_t tiledSliceOffset = 0;
|
||||
uint64_t linearSliceOffset = 0;
|
||||
if (config.tiledSurfaceSize != 0) {
|
||||
tiledSliceOffset = pos.z * config.tiledSurfaceSize;
|
||||
linearSliceOffset = pos.z * config.linearSurfaceSize;
|
||||
pos.z = 0;
|
||||
}
|
||||
|
||||
uint64_t tiledByteOffset = computeLinearOffset(
|
||||
config.bitsPerElement,
|
||||
config.dataSize.y,
|
||||
config.dataSize.x,
|
||||
pos
|
||||
) / 8;
|
||||
|
||||
tiledByteOffset += tiledSliceOffset;
|
||||
|
||||
uint64_t linearByteOffset = computeLinearElementByteOffset(
|
||||
pos,
|
||||
0,
|
||||
config.dataSize.x,
|
||||
config.dataSize.x * config.dataSize.y,
|
||||
config.bitsPerElement,
|
||||
1 << config.numFragments
|
||||
);
|
||||
|
||||
linearByteOffset += linearSliceOffset;
|
||||
|
||||
switch ((config.bitsPerElement + 7) / 8) {
|
||||
case 1:
|
||||
buffer_reference_uint8_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint8_t(config.srcAddress + linearByteOffset).data;
|
||||
break;
|
||||
|
||||
case 2:
|
||||
buffer_reference_uint16_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint16_t(config.srcAddress + linearByteOffset).data;
|
||||
break;
|
||||
|
||||
case 4:
|
||||
buffer_reference_uint32_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint32_t(config.srcAddress + linearByteOffset).data;
|
||||
break;
|
||||
|
||||
case 8:
|
||||
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset).data;
|
||||
break;
|
||||
|
||||
case 16:
|
||||
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset).data;
|
||||
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset + 8).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset + 8).data;
|
||||
break;
|
||||
|
||||
case 32:
|
||||
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset).data;
|
||||
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset + 8).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset + 8).data;
|
||||
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset + 16).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset + 16).data;
|
||||
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset + 24).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset + 24).data;
|
||||
break;
|
||||
}
|
||||
}
|
||||
387
rpcsx-gpu2/lib/amdgpu-tiler/src/tiler.cpp
Normal file
387
rpcsx-gpu2/lib/amdgpu-tiler/src/tiler.cpp
Normal file
|
|
@ -0,0 +1,387 @@
|
|||
#include "gnm/constants.hpp"
|
||||
#include <amdgpu/tiler.hpp>
|
||||
#include <gnm/gnm.hpp>
|
||||
#include <bit>
|
||||
|
||||
using namespace amdgpu;
|
||||
|
||||
static constexpr SurfaceInfo
|
||||
computeTexture1dInfo(ArrayMode arrayMode, gnm::TextureType type,
|
||||
gnm::DataFormat dfmt, std::uint32_t width,
|
||||
std::uint32_t height, std::uint32_t depth,
|
||||
std::uint32_t pitch, int baseArrayLayer, int arrayCount,
|
||||
int baseMipLevel, int mipCount, bool pow2pad) {
|
||||
bool isCubemap = type == gnm::TextureType::Cube;
|
||||
bool isVolume = type == gnm::TextureType::Dim3D;
|
||||
|
||||
auto bitsPerFragment = getBitsPerElement(dfmt);
|
||||
std::uint32_t arraySliceCount = depth;
|
||||
|
||||
if (isCubemap) {
|
||||
arraySliceCount *= 6;
|
||||
} else if (isVolume) {
|
||||
arraySliceCount = 1;
|
||||
}
|
||||
|
||||
int numFragments = (type == gnm::TextureType::Msaa2D ||
|
||||
type == gnm::TextureType::MsaaArray2D)
|
||||
? (baseArrayLayer + arrayCount - 1)
|
||||
: 0;
|
||||
|
||||
auto numFragmentsPerPixel = 1 << numFragments;
|
||||
auto isBlockCompressed = getTexelsPerElement(dfmt) > 1;
|
||||
|
||||
auto bitsPerElement = bitsPerFragment;
|
||||
depth = isVolume ? depth : 1;
|
||||
|
||||
if (isBlockCompressed) {
|
||||
switch (bitsPerFragment) {
|
||||
case 1:
|
||||
bitsPerElement *= 8;
|
||||
break;
|
||||
case 4:
|
||||
case 8:
|
||||
bitsPerElement *= 16;
|
||||
break;
|
||||
case 16:
|
||||
std::abort();
|
||||
break;
|
||||
|
||||
default:
|
||||
std::abort();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (pow2pad) {
|
||||
arraySliceCount = std::bit_ceil(arraySliceCount);
|
||||
}
|
||||
|
||||
std::uint64_t surfaceOffset = 0;
|
||||
std::uint64_t surfaceSize = 0;
|
||||
|
||||
SurfaceInfo result;
|
||||
result.width = width;
|
||||
result.height = height;
|
||||
result.depth = depth;
|
||||
result.pitch = pitch;
|
||||
result.numFragments = numFragments;
|
||||
result.bitsPerElement = bitsPerElement;
|
||||
result.arrayLayerCount = arraySliceCount;
|
||||
|
||||
auto thickness = getMicroTileThickness(arrayMode);
|
||||
|
||||
for (int mipLevel = 0; mipLevel < baseMipLevel + mipCount; mipLevel++) {
|
||||
std::uint32_t elemWidth = std::max<std::uint64_t>(width >> mipLevel, 1);
|
||||
std::uint32_t elemPitch = std::max<std::uint64_t>(pitch >> mipLevel, 1);
|
||||
std::uint32_t elemHeight = std::max<std::uint64_t>(height >> mipLevel, 1);
|
||||
std::uint32_t elemDepth = std::max<std::uint64_t>(depth >> mipLevel, 1);
|
||||
|
||||
std::uint32_t linearPitch = elemPitch;
|
||||
std::uint32_t linearWidth = elemWidth;
|
||||
std::uint32_t linearHeight = elemHeight;
|
||||
std::uint32_t linearDepth = elemDepth;
|
||||
|
||||
if (isBlockCompressed) {
|
||||
switch (bitsPerFragment) {
|
||||
case 1:
|
||||
linearWidth = std::max<std::uint64_t>((linearWidth + 7) / 8, 1);
|
||||
linearPitch = std::max<std::uint64_t>((linearPitch + 7) / 8, 1);
|
||||
break;
|
||||
case 4:
|
||||
case 8:
|
||||
linearWidth = std::max<std::uint64_t>((linearWidth + 3) / 4, 1);
|
||||
linearPitch = std::max<std::uint64_t>((linearPitch + 3) / 4, 1);
|
||||
linearHeight = std::max<std::uint64_t>((linearHeight + 3) / 4, 1);
|
||||
break;
|
||||
case 16:
|
||||
std::abort();
|
||||
break;
|
||||
|
||||
default:
|
||||
std::abort();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (pow2pad) {
|
||||
linearPitch = std::bit_ceil(linearPitch);
|
||||
linearWidth = std::bit_ceil(linearWidth);
|
||||
linearHeight = std::bit_ceil(linearHeight);
|
||||
linearDepth = std::bit_ceil(linearDepth);
|
||||
}
|
||||
|
||||
if (mipLevel > 0 && pitch > 0) {
|
||||
linearPitch = linearWidth;
|
||||
}
|
||||
|
||||
std::uint32_t paddedPitch =
|
||||
(linearPitch + kMicroTileWidth - 1) & ~(kMicroTileWidth - 1);
|
||||
std::uint32_t paddedHeight =
|
||||
(linearHeight + kMicroTileHeight - 1) & ~(kMicroTileHeight - 1);
|
||||
std::uint32_t paddedDepth = linearDepth;
|
||||
|
||||
if (!isCubemap || (mipLevel > 0 && linearDepth > 1)) {
|
||||
if (isCubemap) {
|
||||
linearDepth = std::bit_ceil(linearDepth);
|
||||
}
|
||||
|
||||
paddedDepth = (linearDepth + thickness - 1) & ~(thickness - 1);
|
||||
}
|
||||
|
||||
std::uint32_t tempPitch = paddedPitch;
|
||||
std::uint64_t logicalSliceSizeBytes = std::uint64_t(tempPitch) *
|
||||
paddedHeight * bitsPerElement *
|
||||
numFragmentsPerPixel;
|
||||
logicalSliceSizeBytes = (logicalSliceSizeBytes + 7) / 8;
|
||||
|
||||
uint64_t physicalSliceSizeBytes = logicalSliceSizeBytes * thickness;
|
||||
while ((physicalSliceSizeBytes % kPipeInterleaveBytes) != 0) {
|
||||
tempPitch += kMicroTileWidth;
|
||||
logicalSliceSizeBytes = std::uint64_t(tempPitch) * paddedHeight *
|
||||
bitsPerElement * numFragmentsPerPixel;
|
||||
logicalSliceSizeBytes = (logicalSliceSizeBytes + 7) / 8;
|
||||
physicalSliceSizeBytes = logicalSliceSizeBytes * thickness;
|
||||
}
|
||||
|
||||
surfaceSize = logicalSliceSizeBytes * paddedDepth;
|
||||
auto linearSize =
|
||||
linearDepth *
|
||||
(linearPitch * linearHeight * bitsPerElement * numFragmentsPerPixel +
|
||||
7) /
|
||||
8;
|
||||
|
||||
result.setSubresourceInfo(mipLevel, {
|
||||
.dataWidth = linearPitch,
|
||||
.dataHeight = linearHeight,
|
||||
.dataDepth = linearDepth,
|
||||
.offset = surfaceOffset,
|
||||
.tiledSize = surfaceSize,
|
||||
.linearSize = linearSize,
|
||||
});
|
||||
|
||||
surfaceOffset += arraySliceCount * surfaceSize;
|
||||
}
|
||||
|
||||
result.totalSize = surfaceOffset;
|
||||
return result;
|
||||
}
|
||||
|
||||
static constexpr SurfaceInfo computeTextureLinearInfo(
|
||||
ArrayMode arrayMode, gnm::TextureType type, gnm::DataFormat dfmt,
|
||||
std::uint32_t width, std::uint32_t height, std::uint32_t depth,
|
||||
std::uint32_t pitch, int baseArrayLayer, int arrayCount, int baseMipLevel,
|
||||
int mipCount, bool pow2pad) {
|
||||
bool isCubemap = type == gnm::TextureType::Cube;
|
||||
bool isVolume = type == gnm::TextureType::Dim3D;
|
||||
|
||||
auto bitsPerFragment = getBitsPerElement(dfmt);
|
||||
std::uint32_t arraySliceCount = depth;
|
||||
|
||||
if (isCubemap) {
|
||||
arraySliceCount *= 6;
|
||||
} else if (isVolume) {
|
||||
arraySliceCount = 1;
|
||||
}
|
||||
|
||||
int numFragments = (type == gnm::TextureType::Msaa2D ||
|
||||
type == gnm::TextureType::MsaaArray2D)
|
||||
? (baseArrayLayer + arrayCount - 1)
|
||||
: 0;
|
||||
|
||||
auto numFragmentsPerPixel = 1 << numFragments;
|
||||
auto isBlockCompressed = getTexelsPerElement(dfmt) > 1;
|
||||
|
||||
auto bitsPerElement = bitsPerFragment;
|
||||
depth = isVolume ? depth : 1;
|
||||
|
||||
if (isBlockCompressed) {
|
||||
switch (bitsPerFragment) {
|
||||
case 1:
|
||||
bitsPerElement *= 8;
|
||||
break;
|
||||
case 4:
|
||||
case 8:
|
||||
bitsPerElement *= 16;
|
||||
break;
|
||||
case 16:
|
||||
std::abort();
|
||||
break;
|
||||
|
||||
default:
|
||||
std::abort();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (pow2pad) {
|
||||
arraySliceCount = std::bit_ceil(arraySliceCount);
|
||||
}
|
||||
|
||||
std::uint64_t surfaceOffset = 0;
|
||||
std::uint64_t surfaceSize = 0;
|
||||
|
||||
SurfaceInfo result;
|
||||
result.width = width;
|
||||
result.height = height;
|
||||
result.depth = depth;
|
||||
result.pitch = pitch;
|
||||
result.numFragments = numFragments;
|
||||
result.bitsPerElement = bitsPerElement;
|
||||
result.arrayLayerCount = arraySliceCount;
|
||||
|
||||
for (int mipLevel = 0; mipLevel < baseMipLevel + mipCount; mipLevel++) {
|
||||
std::uint32_t elemWidth = std::max<std::uint64_t>(width >> mipLevel, 1);
|
||||
std::uint32_t elemPitch = std::max<std::uint64_t>(pitch >> mipLevel, 1);
|
||||
std::uint32_t elemHeight = std::max<std::uint64_t>(height >> mipLevel, 1);
|
||||
std::uint32_t elemDepth = std::max<std::uint64_t>(depth >> mipLevel, 1);
|
||||
|
||||
std::uint32_t linearPitch = elemPitch;
|
||||
std::uint32_t linearWidth = elemWidth;
|
||||
std::uint32_t linearHeight = elemHeight;
|
||||
std::uint32_t linearDepth = elemDepth;
|
||||
|
||||
if (isBlockCompressed) {
|
||||
switch (bitsPerFragment) {
|
||||
case 1:
|
||||
linearWidth = std::max<std::uint64_t>((linearWidth + 7) / 8, 1);
|
||||
linearPitch = std::max<std::uint64_t>((linearPitch + 7) / 8, 1);
|
||||
break;
|
||||
case 4:
|
||||
case 8:
|
||||
linearWidth = std::max<std::uint64_t>((linearWidth + 3) / 4, 1);
|
||||
linearPitch = std::max<std::uint64_t>((linearPitch + 3) / 4, 1);
|
||||
linearHeight = std::max<std::uint64_t>((linearHeight + 3) / 4, 1);
|
||||
break;
|
||||
case 16:
|
||||
std::abort();
|
||||
break;
|
||||
|
||||
default:
|
||||
std::abort();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (pow2pad) {
|
||||
linearPitch = std::bit_ceil(linearPitch);
|
||||
linearWidth = std::bit_ceil(linearWidth);
|
||||
linearHeight = std::bit_ceil(linearHeight);
|
||||
linearDepth = std::bit_ceil(linearDepth);
|
||||
}
|
||||
|
||||
if (mipLevel > 0 && pitch > 0) {
|
||||
linearPitch = linearWidth;
|
||||
}
|
||||
|
||||
if (arrayMode == kArrayModeLinearGeneral) {
|
||||
surfaceSize = (static_cast<uint64_t>(linearPitch) *
|
||||
(linearHeight)*bitsPerElement * numFragmentsPerPixel +
|
||||
7) /
|
||||
8;
|
||||
surfaceSize *= linearDepth;
|
||||
|
||||
result.setSubresourceInfo(mipLevel, {
|
||||
.dataWidth = linearPitch,
|
||||
.dataHeight = linearHeight,
|
||||
.dataDepth = linearDepth,
|
||||
.offset = surfaceOffset,
|
||||
.tiledSize = surfaceSize,
|
||||
.linearSize = surfaceSize,
|
||||
});
|
||||
} else {
|
||||
if (mipLevel > 0 && pitch > 0) {
|
||||
linearPitch = linearWidth;
|
||||
}
|
||||
|
||||
auto pitchAlign = std::max(8UL, 64UL / ((bitsPerElement + 7) / 8UL));
|
||||
std::uint32_t paddedPitch =
|
||||
(linearPitch + pitchAlign - 1) & ~(pitchAlign - 1);
|
||||
std::uint32_t paddedHeight = linearHeight;
|
||||
std::uint32_t paddedDepth = linearDepth;
|
||||
|
||||
if (!isCubemap || (mipLevel > 0 && linearDepth > 1)) {
|
||||
if (isCubemap) {
|
||||
linearDepth = std::bit_ceil(linearDepth);
|
||||
}
|
||||
|
||||
auto thickness = getMicroTileThickness(arrayMode);
|
||||
paddedDepth = (linearDepth + thickness - 1) & ~(thickness - 1);
|
||||
}
|
||||
|
||||
std::uint32_t pixelsPerPipeInterleave =
|
||||
kPipeInterleaveBytes / ((bitsPerElement + 7) / 8);
|
||||
std::uint32_t sliceAlignInPixel =
|
||||
pixelsPerPipeInterleave < 64 ? 64 : pixelsPerPipeInterleave;
|
||||
auto pixelsPerSlice = static_cast<uint64_t>(paddedPitch) * paddedHeight *
|
||||
numFragmentsPerPixel;
|
||||
while (pixelsPerSlice % sliceAlignInPixel) {
|
||||
paddedPitch += pitchAlign;
|
||||
pixelsPerSlice = static_cast<uint64_t>(paddedPitch) * paddedHeight *
|
||||
numFragmentsPerPixel;
|
||||
}
|
||||
|
||||
surfaceSize = (pixelsPerSlice * bitsPerElement + 7) / 8 * paddedDepth;
|
||||
|
||||
result.setSubresourceInfo(mipLevel, {
|
||||
.dataWidth = paddedPitch,
|
||||
.dataHeight = paddedHeight,
|
||||
.dataDepth = paddedDepth,
|
||||
.offset = surfaceOffset,
|
||||
.tiledSize = surfaceSize,
|
||||
.linearSize = surfaceSize,
|
||||
});
|
||||
}
|
||||
|
||||
surfaceOffset += arraySliceCount * surfaceSize;
|
||||
}
|
||||
|
||||
result.totalSize = surfaceOffset;
|
||||
return result;
|
||||
}
|
||||
|
||||
SurfaceInfo amdgpu::computeSurfaceInfo(
|
||||
TileMode tileMode, gnm::TextureType type, gnm::DataFormat dfmt,
|
||||
std::uint32_t width, std::uint32_t height, std::uint32_t depth,
|
||||
std::uint32_t pitch, int baseArrayLayer, int arrayCount, int baseMipLevel,
|
||||
int mipCount, bool pow2pad) {
|
||||
switch (tileMode.arrayMode()) {
|
||||
case kArrayModeLinearGeneral:
|
||||
case kArrayModeLinearAligned:
|
||||
return computeTextureLinearInfo(
|
||||
tileMode.arrayMode(), type, dfmt, width, height, depth, pitch,
|
||||
baseArrayLayer, arrayCount, baseMipLevel, mipCount, pow2pad);
|
||||
|
||||
case kArrayMode1dTiledThin:
|
||||
case kArrayMode1dTiledThick:
|
||||
return computeTexture1dInfo(tileMode.arrayMode(), type, dfmt, width, height,
|
||||
depth, pitch, baseArrayLayer, arrayCount,
|
||||
baseMipLevel, mipCount, pow2pad);
|
||||
|
||||
case kArrayMode2dTiledThin:
|
||||
case kArrayMode2dTiledThick:
|
||||
case kArrayMode2dTiledXThick:
|
||||
case kArrayMode3dTiledThin:
|
||||
case kArrayMode3dTiledThick:
|
||||
case kArrayMode3dTiledXThick:
|
||||
case kArrayModeTiledThinPrt:
|
||||
case kArrayModeTiledThickPrt:
|
||||
case kArrayMode2dTiledThinPrt:
|
||||
case kArrayMode2dTiledThickPrt:
|
||||
case kArrayMode3dTiledThinPrt:
|
||||
case kArrayMode3dTiledThickPrt:
|
||||
std::abort();
|
||||
}
|
||||
|
||||
std::abort();
|
||||
}
|
||||
|
||||
SurfaceInfo amdgpu::computeSurfaceInfo(const gnm::TBuffer &tbuffer,
|
||||
TileMode tileMode) {
|
||||
return computeSurfaceInfo(
|
||||
tileMode, tbuffer.type, tbuffer.dfmt, tbuffer.width + 1,
|
||||
tbuffer.height + 1, tbuffer.depth + 1, tbuffer.pitch + 1,
|
||||
tbuffer.base_array, tbuffer.last_array - tbuffer.base_array + 1,
|
||||
tbuffer.base_level, tbuffer.last_level - tbuffer.base_level + 1,
|
||||
tbuffer.pow2pad != 0);
|
||||
}
|
||||
441
rpcsx-gpu2/lib/amdgpu-tiler/src/tiler_cpu.cpp
Normal file
441
rpcsx-gpu2/lib/amdgpu-tiler/src/tiler_cpu.cpp
Normal file
|
|
@ -0,0 +1,441 @@
|
|||
#include "amdgpu/tiler_cpu.hpp"
|
||||
#include "amdgpu/tiler.hpp"
|
||||
#include "gnm/gnm.hpp"
|
||||
|
||||
constexpr std::uint64_t
|
||||
getTiledOffset1D(gnm::TextureType texType, bool isPow2Padded,
|
||||
gnm::DataFormat dfmt, amdgpu::TileMode tileMode, int mipLevel,
|
||||
int arraySlice, int numFragments, int width, int height,
|
||||
int depth, int pitch, int x, int y, int z) {
|
||||
|
||||
using namespace amdgpu;
|
||||
bool isCubemap = texType == gnm::TextureType::Cube;
|
||||
bool isVolume = texType == gnm::TextureType::Dim3D;
|
||||
|
||||
auto bitsPerFragment = getBitsPerElement(dfmt);
|
||||
uint32_t arraySliceCount = depth;
|
||||
|
||||
if (isCubemap) {
|
||||
arraySliceCount *= 6;
|
||||
} else if (isVolume) {
|
||||
arraySliceCount = 1;
|
||||
}
|
||||
|
||||
auto numFragmentsPerPixel = 1 << numFragments;
|
||||
auto isBlockCompressed = getTexelsPerElement(dfmt) > 1;
|
||||
auto arrayMode = tileMode.arrayMode();
|
||||
|
||||
auto bitsPerElement = bitsPerFragment;
|
||||
auto paddedWidth = std::max((mipLevel != 0 ? pitch : width) >> mipLevel, 1);
|
||||
auto paddedHeight = std::max(height >> mipLevel, 1);
|
||||
|
||||
auto tileThickness = (arrayMode == amdgpu::kArrayMode1dTiledThick) ? 4 : 1;
|
||||
|
||||
if (isBlockCompressed) {
|
||||
switch (bitsPerFragment) {
|
||||
case 1:
|
||||
bitsPerElement *= 8;
|
||||
paddedWidth = std::max((paddedWidth + 7) / 8, 1);
|
||||
break;
|
||||
case 4:
|
||||
case 8:
|
||||
bitsPerElement *= 16;
|
||||
paddedWidth = std::max((paddedWidth + 3) / 4, 1);
|
||||
paddedHeight = std::max((paddedHeight + 3) / 4, 1);
|
||||
break;
|
||||
case 16:
|
||||
std::abort();
|
||||
break;
|
||||
|
||||
default:
|
||||
std::abort();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (isPow2Padded) {
|
||||
arraySliceCount = std::bit_ceil(arraySliceCount);
|
||||
paddedWidth = std::bit_ceil(unsigned(paddedWidth));
|
||||
paddedHeight = std::bit_ceil(unsigned(paddedHeight));
|
||||
}
|
||||
|
||||
uint64_t finalSurfaceOffset = 0;
|
||||
uint64_t finalSurfaceSize = 0;
|
||||
|
||||
auto thickness = getMicroTileThickness(arrayMode);
|
||||
|
||||
for (int i = 0; i <= mipLevel; i++) {
|
||||
finalSurfaceOffset += arraySliceCount * finalSurfaceSize;
|
||||
|
||||
std::uint32_t elemWidth =
|
||||
std::max<std::uint64_t>((i > 0 ? pitch : width) >> i, 1);
|
||||
std::uint32_t elemHeight = std::max<std::uint64_t>(height >> i, 1);
|
||||
std::uint32_t elemDepth =
|
||||
std::max<std::uint64_t>((isVolume ? depth : 1) >> i, 1);
|
||||
|
||||
if (isBlockCompressed) {
|
||||
switch (bitsPerFragment) {
|
||||
case 1:
|
||||
elemWidth = std::max<std::uint64_t>((elemWidth + 7) / 8, 1);
|
||||
break;
|
||||
case 4:
|
||||
case 8:
|
||||
elemWidth = std::max<std::uint64_t>((elemWidth + 3) / 4, 1);
|
||||
elemHeight = std::max<std::uint64_t>((elemHeight + 3) / 4, 1);
|
||||
break;
|
||||
case 16:
|
||||
std::abort();
|
||||
break;
|
||||
|
||||
default:
|
||||
std::abort();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (isPow2Padded) {
|
||||
elemWidth = std::bit_ceil(elemWidth);
|
||||
elemHeight = std::bit_ceil(elemHeight);
|
||||
elemDepth = std::bit_ceil(elemDepth);
|
||||
}
|
||||
|
||||
elemWidth = (elemWidth + kMicroTileWidth - 1) & ~(kMicroTileWidth - 1);
|
||||
elemHeight = (elemHeight + kMicroTileHeight - 1) & ~(kMicroTileHeight - 1);
|
||||
elemDepth = (elemDepth + thickness - 1) & ~(thickness - 1);
|
||||
|
||||
std::uint32_t tempPitch = elemWidth;
|
||||
std::uint64_t logicalSliceSizeBytes = std::uint64_t(tempPitch) *
|
||||
elemHeight * bitsPerElement *
|
||||
numFragmentsPerPixel;
|
||||
logicalSliceSizeBytes = (logicalSliceSizeBytes + 7) / 8;
|
||||
|
||||
uint64_t physicalSliceSizeBytes = logicalSliceSizeBytes * thickness;
|
||||
while ((physicalSliceSizeBytes % kPipeInterleaveBytes) != 0) {
|
||||
tempPitch += 8;
|
||||
logicalSliceSizeBytes = std::uint64_t(tempPitch) * elemHeight *
|
||||
bitsPerElement * numFragmentsPerPixel;
|
||||
logicalSliceSizeBytes = (logicalSliceSizeBytes + 7) / 8;
|
||||
physicalSliceSizeBytes = logicalSliceSizeBytes * thickness;
|
||||
}
|
||||
|
||||
finalSurfaceSize = logicalSliceSizeBytes * elemDepth;
|
||||
}
|
||||
|
||||
finalSurfaceOffset += finalSurfaceSize * (uint64_t)arraySlice;
|
||||
|
||||
auto tileBytes =
|
||||
(kMicroTileWidth * kMicroTileHeight * tileThickness * bitsPerElement +
|
||||
7) /
|
||||
8;
|
||||
auto tilesPerRow = paddedWidth / kMicroTileWidth;
|
||||
auto tilesPerSlice =
|
||||
std::max(tilesPerRow * (paddedHeight / kMicroTileHeight), 1U);
|
||||
|
||||
uint64_t elementIndex = getElementIndex(x, y, z, bitsPerElement,
|
||||
tileMode.microTileMode(), arrayMode);
|
||||
|
||||
uint64_t sliceOffset = (z / tileThickness) * tilesPerSlice * tileBytes;
|
||||
|
||||
uint64_t tileRowIndex = y / kMicroTileHeight;
|
||||
uint64_t tileColumnIndex = x / kMicroTileWidth;
|
||||
uint64_t tileOffset =
|
||||
(tileRowIndex * tilesPerRow + tileColumnIndex) * tileBytes;
|
||||
|
||||
uint64_t elementOffset = elementIndex * bitsPerElement;
|
||||
uint64_t finalOffset = (sliceOffset + tileOffset) * 8 + elementOffset;
|
||||
|
||||
return finalOffset + finalSurfaceOffset * 8;
|
||||
}
|
||||
|
||||
constexpr std::uint64_t getTiledOffsetLinear(gnm::DataFormat dfmt, int height,
|
||||
int pitch, int x, int y, int z) {
|
||||
auto bitsPerFragment = getBitsPerElement(dfmt);
|
||||
|
||||
auto bitsPerElement = bitsPerFragment;
|
||||
auto paddedHeight = height;
|
||||
auto paddedWidth = pitch;
|
||||
|
||||
if (bitsPerFragment == 1) {
|
||||
bitsPerElement *= 8;
|
||||
paddedWidth = std::max((paddedWidth + 7) / 8, 1);
|
||||
}
|
||||
|
||||
uint64_t tiledRowSizeBits = bitsPerElement * paddedWidth;
|
||||
uint64_t tiledSliceBits = paddedWidth * paddedHeight * bitsPerElement;
|
||||
return tiledSliceBits * z + tiledRowSizeBits * y + bitsPerElement * x;
|
||||
}
|
||||
|
||||
constexpr std::uint64_t
|
||||
getTiledOffset2D(gnm::TextureType texType, bool isPow2Padded,
|
||||
gnm::DataFormat dfmt, amdgpu::TileMode tileMode,
|
||||
amdgpu::MacroTileMode macroTileMode, int mipLevel,
|
||||
int arraySlice, int numFragments, int width, int height,
|
||||
int depth, int pitch, int x, int y, int z, int fragmentIndex) {
|
||||
using namespace amdgpu;
|
||||
|
||||
bool isCubemap = texType == gnm::TextureType::Cube;
|
||||
bool isVolume = texType == gnm::TextureType::Dim3D;
|
||||
auto m_bitsPerFragment = getBitsPerElement(dfmt);
|
||||
|
||||
auto m_isBlockCompressed = getTexelsPerElement(dfmt) > 1;
|
||||
auto tileSwizzleMask = 0;
|
||||
auto numFragmentsPerPixel = 1 << numFragments;
|
||||
auto arrayMode = tileMode.arrayMode();
|
||||
|
||||
auto tileThickness = 1;
|
||||
|
||||
switch (arrayMode) {
|
||||
case amdgpu::kArrayMode2dTiledThin:
|
||||
case amdgpu::kArrayMode3dTiledThin:
|
||||
case amdgpu::kArrayModeTiledThinPrt:
|
||||
case amdgpu::kArrayMode2dTiledThinPrt:
|
||||
case amdgpu::kArrayMode3dTiledThinPrt:
|
||||
tileThickness = 1;
|
||||
break;
|
||||
case amdgpu::kArrayMode1dTiledThick:
|
||||
case amdgpu::kArrayMode2dTiledThick:
|
||||
case amdgpu::kArrayMode3dTiledThick:
|
||||
case amdgpu::kArrayModeTiledThickPrt:
|
||||
case amdgpu::kArrayMode2dTiledThickPrt:
|
||||
case amdgpu::kArrayMode3dTiledThickPrt:
|
||||
tileThickness = 4;
|
||||
break;
|
||||
case amdgpu::kArrayMode2dTiledXThick:
|
||||
case amdgpu::kArrayMode3dTiledXThick:
|
||||
tileThickness = 8;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
auto bitsPerElement = m_bitsPerFragment;
|
||||
auto paddedWidth = pitch;
|
||||
auto paddedHeight = height;
|
||||
|
||||
if (m_isBlockCompressed) {
|
||||
switch (m_bitsPerFragment) {
|
||||
case 1:
|
||||
bitsPerElement *= 8;
|
||||
paddedWidth = std::max((paddedWidth + 7) / 8, 1);
|
||||
break;
|
||||
case 4:
|
||||
case 8:
|
||||
bitsPerElement *= 16;
|
||||
paddedWidth = std::max((paddedWidth + 3) / 4, 1);
|
||||
paddedHeight = std::max((paddedHeight + 3) / 4, 1);
|
||||
break;
|
||||
case 16:
|
||||
std::abort();
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
auto bankWidthHW = macroTileMode.bankWidth();
|
||||
auto bankHeightHW = macroTileMode.bankHeight();
|
||||
auto macroAspectHW = macroTileMode.macroTileAspect();
|
||||
auto numBanksHW = macroTileMode.numBanks();
|
||||
|
||||
auto bankWidth = 1 << bankWidthHW;
|
||||
auto bankHeight = 1 << bankHeightHW;
|
||||
unsigned numBanks = 2 << numBanksHW;
|
||||
auto macroTileAspect = 1 << macroAspectHW;
|
||||
|
||||
uint32_t tileBytes1x =
|
||||
(tileThickness * bitsPerElement * kMicroTileWidth * kMicroTileHeight +
|
||||
7) /
|
||||
8;
|
||||
|
||||
auto sampleSplitHw = tileMode.sampleSplit();
|
||||
auto tileSplitHw = tileMode.tileSplit();
|
||||
uint32_t sampleSplit = 1 << sampleSplitHw;
|
||||
uint32_t tileSplitC =
|
||||
(tileMode.microTileMode() == amdgpu::kMicroTileModeDepth)
|
||||
? (64 << tileSplitHw)
|
||||
: std::max(256U, tileBytes1x * sampleSplit);
|
||||
|
||||
auto tileSplitBytes = std::min(kDramRowSize, tileSplitC);
|
||||
|
||||
auto numPipes = getPipeCount(tileMode.pipeConfig());
|
||||
auto pipeInterleaveBits = std::countr_zero(kPipeInterleaveBytes);
|
||||
auto pipeInterleaveMask = (1 << pipeInterleaveBits) - 1;
|
||||
auto pipeBits = std::countr_zero(numPipes);
|
||||
auto bankBits = std::countr_zero(numBanks);
|
||||
// auto pipeMask = (numPipes - 1) << pipeInterleaveBits;
|
||||
auto bankSwizzleMask = tileSwizzleMask;
|
||||
auto pipeSwizzleMask = 0;
|
||||
auto macroTileWidth =
|
||||
(kMicroTileWidth * bankWidth * numPipes) * macroTileAspect;
|
||||
auto macroTileHeight =
|
||||
(kMicroTileHeight * bankHeight * numBanks) / macroTileAspect;
|
||||
|
||||
auto microTileMode = tileMode.microTileMode();
|
||||
|
||||
uint64_t elementIndex =
|
||||
getElementIndex(x, y, z, bitsPerElement, microTileMode, arrayMode);
|
||||
|
||||
uint32_t xh = x, yh = y;
|
||||
if (arrayMode == amdgpu::kArrayModeTiledThinPrt ||
|
||||
arrayMode == amdgpu::kArrayModeTiledThickPrt) {
|
||||
xh %= macroTileWidth;
|
||||
yh %= macroTileHeight;
|
||||
}
|
||||
uint64_t pipe = getPipeIndex(xh, yh, tileMode.pipeConfig());
|
||||
uint64_t bank =
|
||||
getBankIndex(xh, yh, bankWidth, bankHeight, numBanks, numPipes);
|
||||
|
||||
uint32_t tileBytes = (kMicroTileWidth * kMicroTileHeight * tileThickness *
|
||||
bitsPerElement * numFragmentsPerPixel +
|
||||
7) /
|
||||
8;
|
||||
|
||||
uint64_t elementOffset = 0;
|
||||
if (microTileMode == amdgpu::kMicroTileModeDepth) {
|
||||
uint64_t pixelOffset = elementIndex * bitsPerElement * numFragmentsPerPixel;
|
||||
elementOffset = pixelOffset + (fragmentIndex * bitsPerElement);
|
||||
} else {
|
||||
uint64_t fragmentOffset =
|
||||
fragmentIndex * (tileBytes / numFragmentsPerPixel) * 8;
|
||||
elementOffset = fragmentOffset + (elementIndex * bitsPerElement);
|
||||
}
|
||||
|
||||
uint64_t slicesPerTile = 1;
|
||||
uint64_t tileSplitSlice = 0;
|
||||
if (tileBytes > tileSplitBytes && tileThickness == 1) {
|
||||
slicesPerTile = tileBytes / tileSplitBytes;
|
||||
tileSplitSlice = elementOffset / (tileSplitBytes * 8);
|
||||
elementOffset %= (tileSplitBytes * 8);
|
||||
tileBytes = tileSplitBytes;
|
||||
}
|
||||
|
||||
uint64_t macroTileBytes = (macroTileWidth / kMicroTileWidth) *
|
||||
(macroTileHeight / kMicroTileHeight) * tileBytes /
|
||||
(numPipes * numBanks);
|
||||
uint64_t macroTilesPerRow = paddedWidth / macroTileWidth;
|
||||
uint64_t macroTileRowIndex = y / macroTileHeight;
|
||||
uint64_t macroTileColumnIndex = x / macroTileWidth;
|
||||
uint64_t macroTileIndex =
|
||||
(macroTileRowIndex * macroTilesPerRow) + macroTileColumnIndex;
|
||||
uint64_t macro_tile_offset = macroTileIndex * macroTileBytes;
|
||||
uint64_t macroTilesPerSlice =
|
||||
macroTilesPerRow * (paddedHeight / macroTileHeight);
|
||||
uint64_t sliceBytes = macroTilesPerSlice * macroTileBytes;
|
||||
|
||||
uint32_t slice = z;
|
||||
uint64_t sliceOffset =
|
||||
(tileSplitSlice + slicesPerTile * slice / tileThickness) * sliceBytes;
|
||||
if (arraySlice != 0) {
|
||||
slice = arraySlice;
|
||||
}
|
||||
|
||||
uint64_t tileRowIndex = (y / kMicroTileHeight) % bankHeight;
|
||||
uint64_t tileColumnIndex = ((x / kMicroTileWidth) / numPipes) % bankWidth;
|
||||
uint64_t tileIndex = (tileRowIndex * bankWidth) + tileColumnIndex;
|
||||
uint64_t tileOffset = tileIndex * tileBytes;
|
||||
|
||||
uint64_t bankSwizzle = bankSwizzleMask;
|
||||
uint64_t pipeSwizzle = pipeSwizzleMask;
|
||||
|
||||
uint64_t pipeSliceRotation = 0;
|
||||
switch (arrayMode) {
|
||||
case amdgpu::kArrayMode3dTiledThin:
|
||||
case amdgpu::kArrayMode3dTiledThick:
|
||||
case amdgpu::kArrayMode3dTiledXThick:
|
||||
pipeSliceRotation =
|
||||
std::max(1UL, (numPipes / 2UL) - 1UL) * (slice / tileThickness);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
pipeSwizzle += pipeSliceRotation;
|
||||
pipeSwizzle &= (numPipes - 1);
|
||||
pipe = pipe ^ pipeSwizzle;
|
||||
|
||||
uint32_t sliceRotation = 0;
|
||||
switch (arrayMode) {
|
||||
case amdgpu::kArrayMode2dTiledThin:
|
||||
case amdgpu::kArrayMode2dTiledThick:
|
||||
case amdgpu::kArrayMode2dTiledXThick:
|
||||
sliceRotation = ((numBanks / 2) - 1) * (slice / tileThickness);
|
||||
break;
|
||||
case amdgpu::kArrayMode3dTiledThin:
|
||||
case amdgpu::kArrayMode3dTiledThick:
|
||||
case amdgpu::kArrayMode3dTiledXThick:
|
||||
sliceRotation = std::max(1UL, (numPipes / 2UL) - 1UL) *
|
||||
(slice / tileThickness) / numPipes;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
uint64_t tileSplitSliceRotation = 0;
|
||||
switch (arrayMode) {
|
||||
case amdgpu::kArrayMode2dTiledThin:
|
||||
case amdgpu::kArrayMode3dTiledThin:
|
||||
case amdgpu::kArrayMode2dTiledThinPrt:
|
||||
case amdgpu::kArrayMode3dTiledThinPrt:
|
||||
tileSplitSliceRotation = ((numBanks / 2) + 1) * tileSplitSlice;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
bank ^= bankSwizzle + sliceRotation;
|
||||
bank ^= tileSplitSliceRotation;
|
||||
bank &= (numBanks - 1);
|
||||
|
||||
uint64_t totalOffset =
|
||||
(sliceOffset + macro_tile_offset + tileOffset) * 8 + elementOffset;
|
||||
uint64_t bitOffset = totalOffset & 0x7;
|
||||
totalOffset /= 8;
|
||||
|
||||
uint64_t pipeInterleaveOffset = totalOffset & pipeInterleaveMask;
|
||||
uint64_t offset = totalOffset >> pipeInterleaveBits;
|
||||
|
||||
uint64_t finalByteOffset =
|
||||
pipeInterleaveOffset | (pipe << (pipeInterleaveBits)) |
|
||||
(bank << (pipeInterleaveBits + pipeBits)) |
|
||||
(offset << (pipeInterleaveBits + pipeBits + bankBits));
|
||||
return (finalByteOffset << 3) | bitOffset;
|
||||
}
|
||||
|
||||
std::uint64_t amdgpu::getTiledOffset(gnm::TextureType texType,
|
||||
bool isPow2Padded, int numFragments,
|
||||
gnm::DataFormat dfmt,
|
||||
amdgpu::TileMode tileMode,
|
||||
amdgpu::MacroTileMode macroTileMode,
|
||||
int mipLevel, int arraySlice, int width,
|
||||
int height, int depth, int pitch, int x,
|
||||
int y, int z, int fragmentIndex) {
|
||||
switch (tileMode.arrayMode()) {
|
||||
case amdgpu::kArrayModeLinearGeneral:
|
||||
case amdgpu::kArrayModeLinearAligned:
|
||||
return getTiledOffsetLinear(dfmt, height, pitch, x, y, z);
|
||||
|
||||
case amdgpu::kArrayMode1dTiledThin:
|
||||
case amdgpu::kArrayMode1dTiledThick: {
|
||||
return getTiledOffset1D(texType, isPow2Padded, dfmt, tileMode, mipLevel,
|
||||
arraySlice, numFragments, width, height, depth,
|
||||
pitch, x, y, z);
|
||||
}
|
||||
|
||||
case amdgpu::kArrayMode2dTiledThin:
|
||||
case amdgpu::kArrayMode2dTiledThick:
|
||||
case amdgpu::kArrayMode2dTiledXThick:
|
||||
case amdgpu::kArrayMode3dTiledThin:
|
||||
case amdgpu::kArrayMode3dTiledThick:
|
||||
case amdgpu::kArrayMode3dTiledXThick:
|
||||
case amdgpu::kArrayModeTiledThinPrt:
|
||||
case amdgpu::kArrayModeTiledThickPrt:
|
||||
case amdgpu::kArrayMode2dTiledThinPrt:
|
||||
case amdgpu::kArrayMode2dTiledThickPrt:
|
||||
case amdgpu::kArrayMode3dTiledThinPrt:
|
||||
case amdgpu::kArrayMode3dTiledThickPrt:
|
||||
return getTiledOffset2D(texType, isPow2Padded, dfmt, tileMode,
|
||||
macroTileMode, mipLevel, arraySlice, numFragments,
|
||||
width, height, depth, pitch, x, y, z,
|
||||
fragmentIndex);
|
||||
}
|
||||
|
||||
std::abort();
|
||||
}
|
||||
354
rpcsx-gpu2/lib/amdgpu-tiler/src/tiler_vulkan.cpp
Normal file
354
rpcsx-gpu2/lib/amdgpu-tiler/src/tiler_vulkan.cpp
Normal file
|
|
@ -0,0 +1,354 @@
|
|||
#include "amdgpu/tiler_vulkan.hpp"
|
||||
#include "Scheduler.hpp"
|
||||
#include "amdgpu/tiler.hpp"
|
||||
#include <bit>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <vk.hpp>
|
||||
|
||||
#include <shaders/detiler1d.comp.h>
|
||||
#include <shaders/detiler2d.comp.h>
|
||||
#include <shaders/detilerLinear.comp.h>
|
||||
#include <shaders/tiler1d.comp.h>
|
||||
#include <shaders/tiler2d.comp.h>
|
||||
#include <shaders/tilerLinear.comp.h>
|
||||
|
||||
struct TilerDecriptorSetLayout {
|
||||
VkDescriptorSetLayout layout;
|
||||
|
||||
TilerDecriptorSetLayout() {
|
||||
std::vector<VkDescriptorSetLayoutBinding> bindings{{
|
||||
.binding = 0,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
}};
|
||||
|
||||
VkDescriptorSetLayoutCreateInfo layoutInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
|
||||
.bindingCount = static_cast<uint32_t>(bindings.size()),
|
||||
.pBindings = bindings.data(),
|
||||
};
|
||||
|
||||
VK_VERIFY(vkCreateDescriptorSetLayout(vk::context->device, &layoutInfo,
|
||||
nullptr, &layout));
|
||||
}
|
||||
|
||||
~TilerDecriptorSetLayout() {
|
||||
vkDestroyDescriptorSetLayout(vk::context->device, layout,
|
||||
vk::context->allocator);
|
||||
}
|
||||
};
|
||||
|
||||
struct TilerShader {
|
||||
VkShaderEXT shader;
|
||||
|
||||
TilerShader(TilerDecriptorSetLayout &setLayout,
|
||||
std::span<const std::uint32_t> spirv) {
|
||||
|
||||
VkShaderCreateInfoEXT shaderInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO_EXT,
|
||||
.flags = 0,
|
||||
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.nextStage = 0,
|
||||
.codeType = VK_SHADER_CODE_TYPE_SPIRV_EXT,
|
||||
.codeSize = spirv.size_bytes(),
|
||||
.pCode = spirv.data(),
|
||||
.pName = "main",
|
||||
.setLayoutCount = 1,
|
||||
.pSetLayouts = &setLayout.layout,
|
||||
.pushConstantRangeCount = 0,
|
||||
.pPushConstantRanges = 0,
|
||||
.pSpecializationInfo = 0,
|
||||
};
|
||||
|
||||
VK_VERIFY(vk::CreateShadersEXT(vk::context->device, 1, &shaderInfo, nullptr,
|
||||
&shader));
|
||||
}
|
||||
|
||||
~TilerShader() {
|
||||
vk::DestroyShaderEXT(vk::context->device, shader, vk::context->allocator);
|
||||
}
|
||||
};
|
||||
|
||||
struct amdgpu::GpuTiler::Impl {
|
||||
TilerDecriptorSetLayout descriptorSetLayout;
|
||||
std::mutex descriptorMtx;
|
||||
VkDescriptorSet descriptorSets[4]{};
|
||||
VkDescriptorPool descriptorPool;
|
||||
std::uint32_t inUseDescriptorSets = 0;
|
||||
|
||||
vk::Buffer configData;
|
||||
TilerShader detilerLinear{descriptorSetLayout, spirv_detilerLinear_comp};
|
||||
TilerShader detiler1d{descriptorSetLayout, spirv_detiler1d_comp};
|
||||
TilerShader detiler2d{descriptorSetLayout, spirv_detilerLinear_comp};
|
||||
TilerShader tilerLinear{descriptorSetLayout, spirv_tiler2d_comp};
|
||||
TilerShader tiler1d{descriptorSetLayout, spirv_tiler1d_comp};
|
||||
TilerShader tiler2d{descriptorSetLayout, spirv_tiler2d_comp};
|
||||
VkPipelineLayout pipelineLayout;
|
||||
|
||||
struct Config {
|
||||
uint64_t srcAddress;
|
||||
uint64_t dstAddress;
|
||||
uint32_t dataWidth;
|
||||
uint32_t dataHeight;
|
||||
uint32_t tileMode;
|
||||
uint32_t numFragments;
|
||||
uint32_t bitsPerElement;
|
||||
uint32_t tiledSurfaceSize;
|
||||
uint32_t linearSurfaceSize;
|
||||
};
|
||||
|
||||
Impl() {
|
||||
std::size_t count = 256;
|
||||
|
||||
configData = vk::Buffer::Allocate(
|
||||
vk::getHostVisibleMemory(), sizeof(Config) * count,
|
||||
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
|
||||
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT);
|
||||
|
||||
VkPipelineLayoutCreateInfo piplineLayoutInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
||||
.setLayoutCount = 1,
|
||||
.pSetLayouts = &descriptorSetLayout.layout,
|
||||
};
|
||||
|
||||
VK_VERIFY(vkCreatePipelineLayout(vk::context->device, &piplineLayoutInfo,
|
||||
nullptr, &pipelineLayout));
|
||||
|
||||
{
|
||||
VkDescriptorPoolSize poolSizes[]{{
|
||||
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
||||
.descriptorCount = 1,
|
||||
}};
|
||||
|
||||
VkDescriptorPoolCreateInfo info{
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
|
||||
.maxSets = static_cast<std::uint32_t>(std::size(descriptorSets)) * 4,
|
||||
.poolSizeCount = static_cast<uint32_t>(std::size(poolSizes)),
|
||||
.pPoolSizes = poolSizes,
|
||||
};
|
||||
|
||||
VK_VERIFY(vkCreateDescriptorPool(
|
||||
vk::context->device, &info, vk::context->allocator, &descriptorPool));
|
||||
}
|
||||
|
||||
VkDescriptorSetAllocateInfo info{
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
|
||||
.descriptorPool = descriptorPool,
|
||||
.descriptorSetCount = 1,
|
||||
.pSetLayouts = &descriptorSetLayout.layout,
|
||||
};
|
||||
for (std::size_t i = 0; i < std::size(descriptorSets); ++i) {
|
||||
VK_VERIFY(vkAllocateDescriptorSets(vk::context->device, &info,
|
||||
descriptorSets + i));
|
||||
}
|
||||
}
|
||||
|
||||
~Impl() {
|
||||
vkDestroyDescriptorPool(vk::context->device, descriptorPool,
|
||||
vk::context->allocator);
|
||||
vkDestroyPipelineLayout(vk::context->device, pipelineLayout,
|
||||
vk::context->allocator);
|
||||
}
|
||||
|
||||
std::uint32_t allocateDescriptorSlot() {
|
||||
std::lock_guard lock(descriptorMtx);
|
||||
|
||||
auto result = std::countl_one(inUseDescriptorSets);
|
||||
rx::dieIf(result >= std::size(descriptorSets),
|
||||
"out of tiler descriptor sets");
|
||||
inUseDescriptorSets |= (1 << result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void releaseDescriptorSlot(std::uint32_t slot) {
|
||||
std::lock_guard lock(descriptorMtx);
|
||||
inUseDescriptorSets &= ~(1u << slot);
|
||||
}
|
||||
};
|
||||
|
||||
amdgpu::GpuTiler::GpuTiler() { mImpl = std::make_unique<Impl>(); }
|
||||
amdgpu::GpuTiler::~GpuTiler() = default;
|
||||
|
||||
void amdgpu::GpuTiler::detile(Scheduler &scheduler,
|
||||
const amdgpu::SurfaceInfo &info,
|
||||
amdgpu::TileMode tileMode,
|
||||
std::uint64_t srcTiledAddress,
|
||||
std::uint64_t dstLinearAddress, int mipLevel,
|
||||
int baseArray, int arrayCount) {
|
||||
auto commandBuffer = scheduler.getCommandBuffer();
|
||||
auto slot = mImpl->allocateDescriptorSlot();
|
||||
|
||||
auto configOffset = slot * sizeof(Impl::Config);
|
||||
auto config = reinterpret_cast<Impl::Config *>(mImpl->configData.getData() +
|
||||
configOffset);
|
||||
|
||||
auto &subresource = info.getSubresourceInfo(mipLevel);
|
||||
config->srcAddress = srcTiledAddress + subresource.offset +
|
||||
(subresource.tiledSize * baseArray);
|
||||
config->dstAddress = dstLinearAddress + (subresource.linearSize * baseArray);
|
||||
config->dataWidth = subresource.dataWidth;
|
||||
config->dataHeight = subresource.dataHeight;
|
||||
config->tileMode = tileMode.raw;
|
||||
config->numFragments = info.numFragments;
|
||||
config->bitsPerElement = info.bitsPerElement;
|
||||
uint32_t groupCountZ = subresource.dataDepth;
|
||||
|
||||
if (arrayCount > 1) {
|
||||
config->tiledSurfaceSize = subresource.tiledSize;
|
||||
config->linearSurfaceSize = subresource.linearSize;
|
||||
groupCountZ = arrayCount;
|
||||
} else {
|
||||
config->tiledSurfaceSize = 0;
|
||||
config->linearSurfaceSize = 0;
|
||||
}
|
||||
|
||||
VkShaderStageFlagBits stages[]{VK_SHADER_STAGE_COMPUTE_BIT};
|
||||
|
||||
switch (tileMode.arrayMode()) {
|
||||
case amdgpu::kArrayModeLinearGeneral:
|
||||
case amdgpu::kArrayModeLinearAligned:
|
||||
vk::CmdBindShadersEXT(commandBuffer, 1, stages,
|
||||
&mImpl->detilerLinear.shader);
|
||||
break;
|
||||
|
||||
case amdgpu::kArrayMode1dTiledThin:
|
||||
case amdgpu::kArrayMode1dTiledThick:
|
||||
vk::CmdBindShadersEXT(commandBuffer, 1, stages, &mImpl->detiler1d.shader);
|
||||
break;
|
||||
|
||||
case amdgpu::kArrayMode2dTiledThin:
|
||||
case amdgpu::kArrayModeTiledThinPrt:
|
||||
case amdgpu::kArrayMode2dTiledThinPrt:
|
||||
case amdgpu::kArrayMode2dTiledThick:
|
||||
case amdgpu::kArrayMode2dTiledXThick:
|
||||
case amdgpu::kArrayModeTiledThickPrt:
|
||||
case amdgpu::kArrayMode2dTiledThickPrt:
|
||||
case amdgpu::kArrayMode3dTiledThinPrt:
|
||||
case amdgpu::kArrayMode3dTiledThin:
|
||||
case amdgpu::kArrayMode3dTiledThick:
|
||||
case amdgpu::kArrayMode3dTiledXThick:
|
||||
case amdgpu::kArrayMode3dTiledThickPrt:
|
||||
std::abort();
|
||||
vk::CmdBindShadersEXT(commandBuffer, 1, stages, &mImpl->detiler2d.shader);
|
||||
break;
|
||||
}
|
||||
|
||||
VkDescriptorBufferInfo bufferInfo{
|
||||
.buffer = mImpl->configData.getHandle(),
|
||||
.offset = configOffset,
|
||||
.range = sizeof(Impl::Config),
|
||||
};
|
||||
|
||||
VkWriteDescriptorSet writeDescSet{
|
||||
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
||||
.dstSet = mImpl->descriptorSets[slot],
|
||||
.dstBinding = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
||||
.pBufferInfo = &bufferInfo,
|
||||
};
|
||||
|
||||
vkUpdateDescriptorSets(vk::context->device, 1, &writeDescSet, 0, nullptr);
|
||||
|
||||
vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
mImpl->pipelineLayout, 0, 1,
|
||||
&mImpl->descriptorSets[slot], 0, nullptr);
|
||||
|
||||
vkCmdDispatch(commandBuffer, subresource.dataWidth, subresource.dataHeight,
|
||||
groupCountZ);
|
||||
|
||||
scheduler.afterSubmit([this, slot] { mImpl->releaseDescriptorSlot(slot); });
|
||||
}
|
||||
|
||||
void amdgpu::GpuTiler::tile(Scheduler &scheduler,
|
||||
const amdgpu::SurfaceInfo &info,
|
||||
amdgpu::TileMode tileMode,
|
||||
std::uint64_t srcLinearAddress,
|
||||
std::uint64_t dstTiledAddress, int mipLevel,
|
||||
int baseArray, int arrayCount) {
|
||||
auto commandBuffer = scheduler.getCommandBuffer();
|
||||
auto slot = mImpl->allocateDescriptorSlot();
|
||||
|
||||
auto configOffset = slot * sizeof(Impl::Config);
|
||||
auto config = reinterpret_cast<Impl::Config *>(mImpl->configData.getData() +
|
||||
configOffset);
|
||||
|
||||
auto &subresource = info.getSubresourceInfo(mipLevel);
|
||||
config->srcAddress = srcLinearAddress + subresource.offset +
|
||||
subresource.linearSize * baseArray;
|
||||
config->dstAddress = dstTiledAddress;
|
||||
config->dataWidth = subresource.dataWidth;
|
||||
config->dataHeight = subresource.dataHeight;
|
||||
config->tileMode = tileMode.raw;
|
||||
config->numFragments = info.numFragments;
|
||||
config->bitsPerElement = info.bitsPerElement;
|
||||
uint32_t groupCountZ = subresource.dataDepth;
|
||||
|
||||
if (arrayCount > 1) {
|
||||
config->tiledSurfaceSize = subresource.tiledSize;
|
||||
config->linearSurfaceSize = subresource.linearSize;
|
||||
groupCountZ = arrayCount;
|
||||
} else {
|
||||
config->tiledSurfaceSize = 0;
|
||||
config->linearSurfaceSize = 0;
|
||||
}
|
||||
|
||||
VkShaderStageFlagBits stages[]{VK_SHADER_STAGE_COMPUTE_BIT};
|
||||
|
||||
switch (tileMode.arrayMode()) {
|
||||
case amdgpu::kArrayModeLinearGeneral:
|
||||
case amdgpu::kArrayModeLinearAligned:
|
||||
vk::CmdBindShadersEXT(commandBuffer, 1, stages, &mImpl->tilerLinear.shader);
|
||||
break;
|
||||
|
||||
case amdgpu::kArrayMode1dTiledThin:
|
||||
case amdgpu::kArrayMode1dTiledThick:
|
||||
vk::CmdBindShadersEXT(commandBuffer, 1, stages, &mImpl->tiler1d.shader);
|
||||
break;
|
||||
|
||||
case amdgpu::kArrayMode2dTiledThin:
|
||||
case amdgpu::kArrayModeTiledThinPrt:
|
||||
case amdgpu::kArrayMode2dTiledThinPrt:
|
||||
case amdgpu::kArrayMode2dTiledThick:
|
||||
case amdgpu::kArrayMode2dTiledXThick:
|
||||
case amdgpu::kArrayModeTiledThickPrt:
|
||||
case amdgpu::kArrayMode2dTiledThickPrt:
|
||||
case amdgpu::kArrayMode3dTiledThinPrt:
|
||||
case amdgpu::kArrayMode3dTiledThin:
|
||||
case amdgpu::kArrayMode3dTiledThick:
|
||||
case amdgpu::kArrayMode3dTiledXThick:
|
||||
case amdgpu::kArrayMode3dTiledThickPrt:
|
||||
std::abort();
|
||||
vk::CmdBindShadersEXT(commandBuffer, 1, stages, &mImpl->tiler2d.shader);
|
||||
break;
|
||||
}
|
||||
|
||||
VkDescriptorBufferInfo bufferInfo{
|
||||
.buffer = mImpl->configData.getHandle(),
|
||||
.offset = configOffset,
|
||||
.range = sizeof(Impl::Config),
|
||||
};
|
||||
|
||||
VkWriteDescriptorSet writeDescSet{
|
||||
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
||||
.dstSet = mImpl->descriptorSets[slot],
|
||||
.dstBinding = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
||||
.pBufferInfo = &bufferInfo,
|
||||
};
|
||||
|
||||
vkUpdateDescriptorSets(vk::context->device, 1, &writeDescSet, 0, nullptr);
|
||||
|
||||
vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
mImpl->pipelineLayout, 0, 1,
|
||||
&mImpl->descriptorSets[slot], 0, nullptr);
|
||||
|
||||
vkCmdDispatch(commandBuffer, subresource.dataWidth, subresource.dataHeight,
|
||||
groupCountZ);
|
||||
|
||||
scheduler.afterSubmit([this, slot] { mImpl->releaseDescriptorSlot(slot); });
|
||||
}
|
||||
48
rpcsx-gpu2/lib/gcn-shader/CMakeLists.txt
Normal file
48
rpcsx-gpu2/lib/gcn-shader/CMakeLists.txt
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/dialect/)
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/include/dialect/spv.hpp
|
||||
COMMAND $<TARGET_FILE:spv-gen> ${CMAKE_CURRENT_BINARY_DIR}/include/dialect/spv.hpp
|
||||
DEPENDS spv-gen
|
||||
WORKING_DIRECTORY $<TARGET_PROPERTY:SPIRV-Headers,INTERFACE_INCLUDE_DIRECTORIES>/spirv/unified1
|
||||
COMMENT "Generating ${CMAKE_CURRENT_BINARY_DIR}/include/dialect/spv.hpp..."
|
||||
)
|
||||
|
||||
add_custom_target(shader-spv-dialect-gen DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/include/dialect/spv.hpp)
|
||||
add_library(shader-spv-dialect INTERFACE)
|
||||
add_dependencies(shader-spv-dialect shader-spv-dialect-gen)
|
||||
target_include_directories(shader-spv-dialect INTERFACE ${CMAKE_CURRENT_BINARY_DIR}/include/)
|
||||
|
||||
add_library(gcn-shader STATIC
|
||||
src/analyze.cpp
|
||||
src/eval.cpp
|
||||
src/Evaluator.cpp
|
||||
src/gcn.cpp
|
||||
src/GcnConverter.cpp
|
||||
src/GcnInstruction.cpp
|
||||
src/glsl.cpp
|
||||
src/ModuleInfo.cpp
|
||||
src/opt.cpp
|
||||
src/SemanticModuleInfo.cpp
|
||||
src/spv.cpp
|
||||
src/SpvConverter.cpp
|
||||
src/SpvTypeInfo.cpp
|
||||
src/transform.cpp
|
||||
)
|
||||
|
||||
target_include_directories(gcn-shader PUBLIC include PRIVATE include/shader)
|
||||
|
||||
target_link_libraries(gcn-shader
|
||||
PUBLIC
|
||||
shader-spv-dialect
|
||||
rx
|
||||
|
||||
PRIVATE
|
||||
glslang::glslang
|
||||
glslang::SPIRV
|
||||
SPIRV-Tools
|
||||
SPIRV-Tools-opt
|
||||
spirv-cross-c-shared
|
||||
)
|
||||
|
||||
add_subdirectory(shaders)
|
||||
26
rpcsx-gpu2/lib/gcn-shader/include/shader/Access.hpp
Normal file
26
rpcsx-gpu2/lib/gcn-shader/include/shader/Access.hpp
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
#pragma once
|
||||
|
||||
namespace shader {
|
||||
enum class Access {
|
||||
None = 0,
|
||||
Read = 1 << 0,
|
||||
Write = 1 << 1,
|
||||
ReadWrite = Read | Write
|
||||
};
|
||||
|
||||
constexpr Access operator|(Access lhs, Access rhs) {
|
||||
return static_cast<Access>(static_cast<int>(lhs) | static_cast<int>(rhs));
|
||||
}
|
||||
constexpr Access operator&(Access lhs, Access rhs) {
|
||||
return static_cast<Access>(static_cast<int>(lhs) & static_cast<int>(rhs));
|
||||
}
|
||||
constexpr Access operator~(Access rhs) {
|
||||
return static_cast<Access>(~static_cast<int>(rhs));
|
||||
}
|
||||
constexpr Access &operator|=(Access &lhs, Access rhs) {
|
||||
return ((lhs = lhs | rhs));
|
||||
}
|
||||
constexpr Access &operator&=(Access &lhs, Access rhs) {
|
||||
return ((lhs = lhs & rhs));
|
||||
}
|
||||
} // namespace shader
|
||||
20
rpcsx-gpu2/lib/gcn-shader/include/shader/Evaluator.hpp
Normal file
20
rpcsx-gpu2/lib/gcn-shader/include/shader/Evaluator.hpp
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
#pragma once
|
||||
#include "eval.hpp"
|
||||
#include <map>
|
||||
|
||||
namespace shader::eval {
|
||||
class Evaluator {
|
||||
std::map<ir::Value, Value> values;
|
||||
|
||||
public:
|
||||
virtual ~Evaluator() = default;
|
||||
|
||||
void invalidate(ir::Value node) { values.erase(node); }
|
||||
void setValue(ir::Value node, Value value) { values[node] = value; }
|
||||
|
||||
Value eval(const ir::Operand &op, ir::Value type = nullptr);
|
||||
virtual Value eval(ir::Value op);
|
||||
virtual Value eval(ir::InstructionId instId,
|
||||
std::span<const ir::Operand> operands);
|
||||
};
|
||||
} // namespace shader::eval
|
||||
131
rpcsx-gpu2/lib/gcn-shader/include/shader/GcnConverter.hpp
Normal file
131
rpcsx-gpu2/lib/gcn-shader/include/shader/GcnConverter.hpp
Normal file
|
|
@ -0,0 +1,131 @@
|
|||
#pragma once
|
||||
|
||||
#include "gcn.hpp"
|
||||
#include "rx/MemoryTable.hpp"
|
||||
#include <cstdint>
|
||||
#include <optional>
|
||||
#include <vector>
|
||||
|
||||
namespace shader::gcn {
|
||||
enum class PsVGprInput {
|
||||
IPerspSample,
|
||||
JPerspSample,
|
||||
IPerspCenter,
|
||||
JPerspCenter,
|
||||
IPerspCentroid,
|
||||
JPerspCentroid,
|
||||
IW,
|
||||
JW,
|
||||
_1W,
|
||||
ILinearSample,
|
||||
JLinearSample,
|
||||
ILinearCenter,
|
||||
JLinearCenter,
|
||||
ILinearCentroid,
|
||||
JLinearCentroid,
|
||||
X,
|
||||
Y,
|
||||
Z,
|
||||
W,
|
||||
FrontFace,
|
||||
Ancillary,
|
||||
SampleCoverage,
|
||||
PosFixed,
|
||||
|
||||
Count
|
||||
};
|
||||
enum class ConfigType {
|
||||
Imm,
|
||||
UserSgpr,
|
||||
ResourceSlot,
|
||||
MemoryTable,
|
||||
Gds,
|
||||
PsInputVGpr,
|
||||
VsPrimType,
|
||||
CbCompSwap,
|
||||
ViewPortOffsetX,
|
||||
ViewPortOffsetY,
|
||||
ViewPortOffsetZ,
|
||||
ViewPortScaleX,
|
||||
ViewPortScaleY,
|
||||
ViewPortScaleZ,
|
||||
};
|
||||
|
||||
struct ConfigSlot {
|
||||
ConfigType type;
|
||||
std::uint64_t data;
|
||||
};
|
||||
|
||||
struct Resources {
|
||||
struct Resource {
|
||||
std::uint32_t resourceSlot;
|
||||
};
|
||||
|
||||
struct Pointer : Resource {
|
||||
std::uint32_t size;
|
||||
ir::Value base;
|
||||
ir::Value offset;
|
||||
};
|
||||
|
||||
struct Texture : Resource {
|
||||
Access access;
|
||||
ir::Value words[8];
|
||||
};
|
||||
|
||||
struct Buffer : Resource {
|
||||
Access access;
|
||||
ir::Value words[4];
|
||||
};
|
||||
|
||||
struct Sampler : Resource {
|
||||
bool unorm;
|
||||
ir::Value words[4];
|
||||
};
|
||||
|
||||
spv::Context context;
|
||||
bool hasUnknown = false;
|
||||
std::uint32_t slots = 0;
|
||||
std::vector<Pointer> pointers;
|
||||
std::vector<Texture> textures;
|
||||
std::vector<Buffer> buffers;
|
||||
std::vector<Sampler> samplers;
|
||||
|
||||
void print(std::ostream &os, ir::NameStorage &ns) const;
|
||||
void dump();
|
||||
};
|
||||
|
||||
struct ShaderInfo {
|
||||
std::vector<ConfigSlot> configSlots;
|
||||
rx::MemoryAreaTable<> memoryMap;
|
||||
std::vector<std::pair<int, std::uint32_t>> requiredSgprs;
|
||||
Resources resources;
|
||||
|
||||
std::uint32_t create(ConfigType type, std::uint64_t data) {
|
||||
for (std::size_t slotIndex = 0; auto &slotInfo : configSlots) {
|
||||
if (slotInfo.type == type && slotInfo.data == data) {
|
||||
return slotIndex;
|
||||
}
|
||||
|
||||
slotIndex++;
|
||||
}
|
||||
|
||||
configSlots.push_back({
|
||||
.type = type,
|
||||
.data = data,
|
||||
});
|
||||
|
||||
return configSlots.size() - 1;
|
||||
}
|
||||
};
|
||||
|
||||
struct ConvertedShader {
|
||||
std::vector<std::uint32_t> spv;
|
||||
ShaderInfo info;
|
||||
};
|
||||
|
||||
std::optional<ConvertedShader>
|
||||
convertToSpv(Context &context, ir::Region body,
|
||||
const SemanticModuleInfo &semanticModule, Stage stage,
|
||||
const Environment &state);
|
||||
|
||||
} // namespace shader::gcn
|
||||
256
rpcsx-gpu2/lib/gcn-shader/include/shader/GcnInstruction.hpp
Normal file
256
rpcsx-gpu2/lib/gcn-shader/include/shader/GcnInstruction.hpp
Normal file
|
|
@ -0,0 +1,256 @@
|
|||
#pragma once
|
||||
|
||||
#include "dialect.hpp"
|
||||
#include "ir/Kind.hpp"
|
||||
|
||||
#include <functional>
|
||||
#include <ostream>
|
||||
#include <span>
|
||||
#include <type_traits>
|
||||
|
||||
namespace shader {
|
||||
struct GcnOperand {
|
||||
enum class Kind : std::uint8_t {
|
||||
Invalid,
|
||||
Constant,
|
||||
Immediate,
|
||||
VccLo,
|
||||
VccHi,
|
||||
M0,
|
||||
ExecLo,
|
||||
ExecHi,
|
||||
Scc,
|
||||
VccZ,
|
||||
ExecZ,
|
||||
LdsDirect,
|
||||
Vgpr,
|
||||
Sgpr,
|
||||
Attr,
|
||||
Buffer,
|
||||
Texture128,
|
||||
Texture256,
|
||||
Sampler,
|
||||
Pointer,
|
||||
};
|
||||
|
||||
static constexpr auto R = 1 << 0;
|
||||
static constexpr auto W = 1 << 1;
|
||||
|
||||
union {
|
||||
std::uint32_t value;
|
||||
std::uint64_t address = 0;
|
||||
|
||||
struct {
|
||||
std::uint16_t attrId;
|
||||
std::uint16_t attrChannel;
|
||||
};
|
||||
|
||||
struct {
|
||||
Kind firstRegisterKind;
|
||||
union {
|
||||
struct {
|
||||
Kind pointerOffsetKind;
|
||||
std::uint16_t pointeeSize;
|
||||
};
|
||||
bool samplerUnorm;
|
||||
};
|
||||
std::uint32_t firstRegisterIndex;
|
||||
|
||||
union {
|
||||
std::uint32_t pointerOffsetValue;
|
||||
std::uint64_t pointerOffsetAddress;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
Kind kind = Kind::Invalid;
|
||||
std::uint8_t access = 0;
|
||||
std::uint8_t omod : 4 = 0;
|
||||
bool abs : 1 = false;
|
||||
bool clamp : 1 = false;
|
||||
bool neg : 1 = false;
|
||||
|
||||
constexpr GcnOperand getUnderlyingOperand(int offset = 0) const {
|
||||
return {
|
||||
.value = firstRegisterIndex + offset,
|
||||
.kind = firstRegisterKind,
|
||||
};
|
||||
}
|
||||
|
||||
constexpr GcnOperand getPointerOffsetOperand() const {
|
||||
return {
|
||||
.address = pointerOffsetAddress,
|
||||
.kind = pointerOffsetKind,
|
||||
};
|
||||
}
|
||||
|
||||
static constexpr GcnOperand createImmediateConstant(std::uint64_t address) {
|
||||
return GcnOperand{
|
||||
.address = address,
|
||||
.kind = Kind::Immediate,
|
||||
.access = R,
|
||||
};
|
||||
}
|
||||
|
||||
static constexpr GcnOperand createConstant(std::uint32_t value) {
|
||||
return GcnOperand{
|
||||
.value = value,
|
||||
.kind = Kind::Constant,
|
||||
.access = R,
|
||||
};
|
||||
}
|
||||
|
||||
static constexpr GcnOperand createConstant(bool value) {
|
||||
return createConstant(std::uint32_t(value ? 1 : 0));
|
||||
}
|
||||
|
||||
static constexpr GcnOperand createConstant(float value) {
|
||||
return createConstant(std::bit_cast<std::uint32_t>(value));
|
||||
}
|
||||
|
||||
static constexpr GcnOperand createVgpr(std::uint32_t index) {
|
||||
return {
|
||||
.value = index,
|
||||
.kind = Kind::Vgpr,
|
||||
};
|
||||
}
|
||||
|
||||
static constexpr GcnOperand createSgpr(std::uint32_t index) {
|
||||
return {
|
||||
.value = index,
|
||||
.kind = Kind::Sgpr,
|
||||
};
|
||||
}
|
||||
|
||||
static constexpr GcnOperand createSampler(GcnOperand firstReg, bool unorm) {
|
||||
return {
|
||||
.firstRegisterKind = firstReg.kind,
|
||||
.samplerUnorm = unorm,
|
||||
.firstRegisterIndex = static_cast<std::uint8_t>(firstReg.value),
|
||||
.kind = Kind::Sampler,
|
||||
};
|
||||
}
|
||||
static constexpr GcnOperand createTexture(GcnOperand firstReg, bool is128) {
|
||||
return {
|
||||
.firstRegisterKind = firstReg.kind,
|
||||
.firstRegisterIndex = static_cast<std::uint8_t>(firstReg.value),
|
||||
.kind = (is128 ? Kind::Texture128 : Kind::Texture256),
|
||||
};
|
||||
}
|
||||
static constexpr GcnOperand createBuffer(GcnOperand firstReg) {
|
||||
return {
|
||||
.firstRegisterKind = firstReg.kind,
|
||||
.firstRegisterIndex = static_cast<std::uint8_t>(firstReg.value),
|
||||
.kind = Kind::Buffer,
|
||||
};
|
||||
}
|
||||
static constexpr GcnOperand
|
||||
createPointer(GcnOperand firstReg, std::uint16_t size, GcnOperand offset) {
|
||||
return {
|
||||
.firstRegisterKind = firstReg.kind,
|
||||
.pointerOffsetKind = offset.kind,
|
||||
.pointeeSize = size,
|
||||
.firstRegisterIndex = static_cast<std::uint8_t>(firstReg.value),
|
||||
.pointerOffsetAddress = offset.address,
|
||||
.kind = Kind::Pointer,
|
||||
};
|
||||
}
|
||||
|
||||
static constexpr GcnOperand createAttr(std::uint16_t id,
|
||||
std::uint16_t channel) {
|
||||
return {
|
||||
.attrId = id,
|
||||
.attrChannel = channel,
|
||||
.kind = Kind::Attr,
|
||||
};
|
||||
}
|
||||
|
||||
constexpr GcnOperand withRW() const { return withAccess(R | W); }
|
||||
constexpr GcnOperand withR() const { return withAccess(R); }
|
||||
constexpr GcnOperand withW() const { return withAccess(W); }
|
||||
|
||||
constexpr GcnOperand withAccess(std::uint8_t access) const {
|
||||
GcnOperand result = *this;
|
||||
result.access = access;
|
||||
return result;
|
||||
}
|
||||
|
||||
constexpr GcnOperand withNeg(bool value) const {
|
||||
GcnOperand result = *this;
|
||||
result.neg = value;
|
||||
return result;
|
||||
}
|
||||
|
||||
constexpr GcnOperand withAbs(bool value) const {
|
||||
GcnOperand result = *this;
|
||||
result.abs = value;
|
||||
return result;
|
||||
}
|
||||
|
||||
constexpr GcnOperand withClamp(bool value) const {
|
||||
GcnOperand result = *this;
|
||||
result.clamp = value;
|
||||
return result;
|
||||
}
|
||||
|
||||
constexpr GcnOperand withOutputModifier(std::uint8_t value) const {
|
||||
GcnOperand result = *this;
|
||||
result.omod = value;
|
||||
return result;
|
||||
}
|
||||
|
||||
static constexpr GcnOperand createVccLo() { return {.kind = Kind::VccLo}; }
|
||||
static constexpr GcnOperand createVccHi() { return {.kind = Kind::VccHi}; }
|
||||
static constexpr GcnOperand createM0() { return {.kind = Kind::M0}; }
|
||||
static constexpr GcnOperand createExecLo() { return {.kind = Kind::ExecLo}; }
|
||||
static constexpr GcnOperand createExecHi() { return {.kind = Kind::ExecHi}; }
|
||||
static constexpr GcnOperand createVccZ() { return {.kind = Kind::VccZ}; }
|
||||
static constexpr GcnOperand createExecZ() { return {.kind = Kind::ExecZ}; }
|
||||
static constexpr GcnOperand createScc() { return {.kind = Kind::Scc}; }
|
||||
static constexpr GcnOperand createLdsDirect() {
|
||||
return {.kind = Kind::LdsDirect};
|
||||
}
|
||||
|
||||
void print(std::ostream &os) const;
|
||||
void dump() const;
|
||||
};
|
||||
|
||||
struct GcnInstruction {
|
||||
ir::Kind kind = ir::Kind::Builtin;
|
||||
unsigned op = ir::builtin::INVALID_INSTRUCTION;
|
||||
GcnOperand operands[16];
|
||||
std::size_t operandCount{};
|
||||
|
||||
std::span<const GcnOperand> getOperands() const {
|
||||
return {operands, operandCount};
|
||||
}
|
||||
|
||||
const GcnOperand &getOperand(std::size_t index) const {
|
||||
if (index >= operandCount) {
|
||||
std::abort();
|
||||
}
|
||||
return operands[index];
|
||||
}
|
||||
|
||||
void addOperand(GcnOperand op) {
|
||||
if (operandCount >= std::size(operands)) {
|
||||
std::abort();
|
||||
}
|
||||
|
||||
operands[operandCount++] = op;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool operator==(T testOp)
|
||||
requires(ir::kOpToKind<std::remove_cvref_t<T>> != ir::Kind::Count)
|
||||
{
|
||||
return ir::kOpToKind<std::remove_cvref_t<T>> == kind && op == testOp;
|
||||
}
|
||||
|
||||
void print(std::ostream &os) const;
|
||||
void dump() const;
|
||||
};
|
||||
|
||||
void readGcnInst(GcnInstruction &isaInst, std::uint64_t &address,
|
||||
const std::function<std::uint32_t(std::uint64_t)> &readMemory);
|
||||
} // namespace shader
|
||||
28
rpcsx-gpu2/lib/gcn-shader/include/shader/ModuleInfo.hpp
Normal file
28
rpcsx-gpu2/lib/gcn-shader/include/shader/ModuleInfo.hpp
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
#pragma once
|
||||
|
||||
#include "Access.hpp"
|
||||
#include "ir/Value.hpp"
|
||||
#include "spv.hpp"
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
namespace shader {
|
||||
struct ModuleInfo {
|
||||
struct Param {
|
||||
ir::Value type;
|
||||
Access access = Access::None;
|
||||
};
|
||||
|
||||
struct Function {
|
||||
std::map<ir::Value, Access> variables;
|
||||
std::vector<Param> parameters;
|
||||
ir::Value returnType;
|
||||
};
|
||||
|
||||
std::map<ir::Value, Function> functions;
|
||||
};
|
||||
|
||||
ModuleInfo::Function &collectFunctionInfo(ModuleInfo &moduleInfo,
|
||||
ir::Value function);
|
||||
void collectModuleInfo(ModuleInfo &moduleInfo, const spv::BinaryLayout &layout);
|
||||
} // namespace shader
|
||||
46
rpcsx-gpu2/lib/gcn-shader/include/shader/SemanticInfo.hpp
Normal file
46
rpcsx-gpu2/lib/gcn-shader/include/shader/SemanticInfo.hpp
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
#pragma once
|
||||
|
||||
#include "ModuleInfo.hpp"
|
||||
#include "SpvTypeInfo.hpp"
|
||||
|
||||
namespace shader {
|
||||
struct SemanticModuleInfo : ModuleInfo {
|
||||
std::unordered_map<ir::InstructionId, ir::Value> semantics;
|
||||
|
||||
ir::Value findSemanticOf(ir::InstructionId sem) const {
|
||||
auto semIt = semantics.find(sem);
|
||||
if (semIt == semantics.end()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return semIt->second;
|
||||
}
|
||||
};
|
||||
|
||||
struct SemanticInfo {
|
||||
struct Param {
|
||||
spv::TypeInfo type;
|
||||
Access access = Access::None;
|
||||
};
|
||||
|
||||
struct Function {
|
||||
std::unordered_map<int, Access> registerAccesses;
|
||||
std::vector<Param> parameters;
|
||||
spv::TypeInfo returnType;
|
||||
Access bufferAccess = Access::None;
|
||||
};
|
||||
|
||||
std::unordered_map<ir::InstructionId, Function> semantics;
|
||||
|
||||
const Function *findSemantic(ir::InstructionId sem) const {
|
||||
if (auto it = semantics.find(sem); it != semantics.end()) {
|
||||
return &it->second;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
};
|
||||
|
||||
void collectSemanticModuleInfo(SemanticModuleInfo &moduleInfo,
|
||||
const spv::BinaryLayout &layout);
|
||||
} // namespace shader
|
||||
154
rpcsx-gpu2/lib/gcn-shader/include/shader/SpvConverter.hpp
Normal file
154
rpcsx-gpu2/lib/gcn-shader/include/shader/SpvConverter.hpp
Normal file
|
|
@ -0,0 +1,154 @@
|
|||
#pragma once
|
||||
#include "SpvTypeInfo.hpp"
|
||||
#include "dialect/spv.hpp"
|
||||
#include "spv.hpp"
|
||||
|
||||
namespace shader::spv {
|
||||
struct Import : ir::CloneMap {
|
||||
ir::Node getOrCloneImpl(ir::Context &context, ir::Node node,
|
||||
bool isOperand) override;
|
||||
};
|
||||
|
||||
struct Context : ir::Context {
|
||||
BinaryLayout layout;
|
||||
ir::Location rootLocation;
|
||||
|
||||
ir::NameStorage ns;
|
||||
ir::Value perVertex;
|
||||
std::map<int, ir::Value> outputs;
|
||||
std::map<int, ir::Value> inputs;
|
||||
|
||||
ir::RegionLike localVariables;
|
||||
ir::RegionLike epilogue;
|
||||
ir::Value entryPoint;
|
||||
|
||||
std::map<ir::InstructionId, std::vector<ir::Value>> globals;
|
||||
std::map<ir::InstructionId, std::vector<ir::Value>> constants;
|
||||
|
||||
Context();
|
||||
|
||||
ir::Value createRegionWithLabel(ir::Location loc);
|
||||
|
||||
void setName(ir::spv::IdRef inst, std::string name);
|
||||
void setConstantName(ir::Value constant);
|
||||
|
||||
ir::Value getOrCreateConstant(ir::Value typeValue, const ir::Operand &value);
|
||||
|
||||
ir::Value getType(ir::spv::Op baseType, int width, bool isSigned);
|
||||
ir::Value getType(const TypeInfo &info);
|
||||
|
||||
ir::Value imm64(std::uint64_t value) {
|
||||
return getOrCreateConstant(getTypeUInt64(), value);
|
||||
}
|
||||
ir::Value imm32(std::uint32_t value) {
|
||||
return getOrCreateConstant(getTypeUInt32(), value);
|
||||
}
|
||||
|
||||
ir::Value simm64(std::int64_t value) {
|
||||
return getOrCreateConstant(getTypeSInt64(), value);
|
||||
}
|
||||
ir::Value simm32(std::int32_t value) {
|
||||
return getOrCreateConstant(getTypeSInt32(), value);
|
||||
}
|
||||
ir::Value fimm64(double value) {
|
||||
return getOrCreateConstant(getTypeFloat(64), value);
|
||||
}
|
||||
ir::Value fimm32(float value) {
|
||||
return getOrCreateConstant(getTypeFloat(32), value);
|
||||
}
|
||||
ir::Value getBool(bool value) { return value ? getTrue() : getFalse(); }
|
||||
ir::Value getTrue() {
|
||||
return getOrCreateGlobal(ir::spv::OpConstantTrue, {{getTypeBool()}});
|
||||
}
|
||||
ir::Value getFalse() {
|
||||
return getOrCreateGlobal(ir::spv::OpConstantFalse, {{getTypeBool()}});
|
||||
}
|
||||
|
||||
ir::Value getIndex(std::int32_t index) { return simm32(index); }
|
||||
|
||||
void setTypeName(ir::Value type);
|
||||
|
||||
void addGlobal(ir::Value type) {
|
||||
globals[type.getInstId()].push_back(type);
|
||||
setTypeName(type);
|
||||
}
|
||||
|
||||
ir::Value findGlobal(ir::spv::Op op,
|
||||
std::span<const ir::Operand> operands = {}) const;
|
||||
ir::Value createGlobal(ir::spv::Op op, std::span<const ir::Operand> operands);
|
||||
ir::Value getOrCreateGlobal(ir::spv::Op op,
|
||||
std::span<const ir::Operand> operands = {});
|
||||
|
||||
ir::Value getTypeInt(int width, bool sign) {
|
||||
return getOrCreateGlobal(ir::spv::OpTypeInt, {{width, sign ? 1 : 0}});
|
||||
}
|
||||
ir::Value getTypeFloat(int width) {
|
||||
return getOrCreateGlobal(ir::spv::OpTypeFloat, {{width}});
|
||||
}
|
||||
ir::Value getTypeVoid() { return getOrCreateGlobal(ir::spv::OpTypeVoid); }
|
||||
ir::Value getTypeBool() { return getOrCreateGlobal(ir::spv::OpTypeBool); }
|
||||
ir::Value getTypeSampler() {
|
||||
return getOrCreateGlobal(ir::spv::OpTypeSampler);
|
||||
}
|
||||
ir::Value getTypeArray(ir::Value elementType, ir::Value count) {
|
||||
return getOrCreateGlobal(ir::spv::OpTypeArray, {{elementType, count}});
|
||||
}
|
||||
ir::Value getTypeVector(ir::Value elementType, int count) {
|
||||
return getOrCreateGlobal(ir::spv::OpTypeVector, {{elementType, count}});
|
||||
}
|
||||
|
||||
ir::Value getTypeStruct(auto... elements) {
|
||||
return getOrCreateGlobal(ir::spv::OpTypeStruct, {{elements...}});
|
||||
}
|
||||
ir::Value getTypeSInt8() { return getTypeInt(8, true); }
|
||||
ir::Value getTypeUInt8() { return getTypeInt(8, false); }
|
||||
ir::Value getTypeSInt16() { return getTypeInt(16, true); }
|
||||
ir::Value getTypeUInt16() { return getTypeInt(16, false); }
|
||||
ir::Value getTypeSInt32() { return getTypeInt(32, true); }
|
||||
ir::Value getTypeUInt32() { return getTypeInt(32, false); }
|
||||
ir::Value getTypeSInt64() { return getTypeInt(64, true); }
|
||||
ir::Value getTypeUInt64() { return getTypeInt(64, false); }
|
||||
ir::Value getTypeFloat16() { return getTypeFloat(16); }
|
||||
ir::Value getTypeFloat32() { return getTypeFloat(32); }
|
||||
ir::Value getTypeFloat64() { return getTypeFloat(64); }
|
||||
|
||||
ir::Value getTypeFunction(ir::Value returnType,
|
||||
std::span<const ir::Value> params) {
|
||||
std::vector<ir::Operand> operands;
|
||||
operands.reserve(1 + params.size());
|
||||
operands.push_back(returnType);
|
||||
for (auto param : params) {
|
||||
operands.push_back(param);
|
||||
}
|
||||
return getOrCreateGlobal(ir::spv::OpTypeFunction, operands);
|
||||
}
|
||||
|
||||
ir::Value getTypePointer(ir::spv::StorageClass storageClass,
|
||||
ir::spv::IdRef pointeeType) {
|
||||
return getOrCreateGlobal(ir::spv::OpTypePointer,
|
||||
{{storageClass, pointeeType}});
|
||||
}
|
||||
|
||||
ir::Value getTypeImage(ir::spv::IdRef sampledType, ir::spv::Dim dim,
|
||||
std::int32_t depth, bool arrayed, bool multisampled,
|
||||
std::int32_t sampled, ir::spv::ImageFormat format) {
|
||||
return getOrCreateGlobal(
|
||||
ir::spv::OpTypeImage,
|
||||
{{sampledType, dim, depth, arrayed, multisampled, sampled, format}});
|
||||
}
|
||||
|
||||
ir::Value getOperandValue(const ir::Operand &op, ir::Value type = {});
|
||||
|
||||
void createPerVertex();
|
||||
|
||||
ir::Value createUniformBuffer(int descriptorSet, int binding,
|
||||
ir::Value structType);
|
||||
|
||||
ir::Value createRuntimeArrayUniformBuffer(int descriptorSet, int binding,
|
||||
ir::Value elementType);
|
||||
|
||||
ir::Value createOutput(ir::Location loc, int index);
|
||||
ir::Value createInput(ir::Location loc, int index);
|
||||
ir::Value createAttr(ir::Location loc, int attrId, bool perVertex, bool flat);
|
||||
};
|
||||
} // namespace shader::spv
|
||||
18
rpcsx-gpu2/lib/gcn-shader/include/shader/SpvTypeInfo.hpp
Normal file
18
rpcsx-gpu2/lib/gcn-shader/include/shader/SpvTypeInfo.hpp
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
#pragma once
|
||||
|
||||
#include "dialect/spv.hpp"
|
||||
|
||||
namespace shader::spv {
|
||||
struct TypeInfo {
|
||||
ir::spv::Op baseType = {};
|
||||
ir::spv::Op componentType = {};
|
||||
int componentWidth = 0;
|
||||
int componentsCount = 1;
|
||||
bool isSigned = false;
|
||||
|
||||
int width() const { return componentWidth * componentsCount; }
|
||||
bool operator==(const TypeInfo &other) const = default;
|
||||
};
|
||||
|
||||
TypeInfo getTypeInfo(ir::Value type);
|
||||
} // namespace shader::spv
|
||||
129
rpcsx-gpu2/lib/gcn-shader/include/shader/Vector.hpp
Normal file
129
rpcsx-gpu2/lib/gcn-shader/include/shader/Vector.hpp
Normal file
|
|
@ -0,0 +1,129 @@
|
|||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
|
||||
namespace shader {
|
||||
template <typename T, std::size_t N> struct Vector : std::array<T, N> {
|
||||
using std::array<T, N>::array;
|
||||
|
||||
template<typename U>
|
||||
constexpr explicit operator Vector<U, N>() const {
|
||||
Vector<U, N> result;
|
||||
for (std::size_t i = 0; i < N; ++i) {
|
||||
result[i] = static_cast<U>((*this)[i]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
#define DEFINE_BINOP(OP) \
|
||||
constexpr auto operator OP(const Vector &other) const \
|
||||
requires requires(T lhs, T rhs) { lhs OP rhs; } \
|
||||
{ \
|
||||
using ResultElementT = \
|
||||
std::remove_cvref_t<decltype(std::declval<T>() OP std::declval<T>())>; \
|
||||
Vector<ResultElementT, N> result; \
|
||||
for (std::size_t i = 0; i < N; ++i) { \
|
||||
result[i] = (*this)[i] OP other[i]; \
|
||||
} \
|
||||
return result; \
|
||||
} \
|
||||
constexpr auto operator OP(const T &other) const \
|
||||
requires requires(T lhs, T rhs) { lhs OP rhs; } \
|
||||
{ \
|
||||
using ResultElementT = \
|
||||
std::remove_cvref_t<decltype(std::declval<T>() OP std::declval<T>())>; \
|
||||
Vector<ResultElementT, N> result; \
|
||||
for (std::size_t i = 0; i < N; ++i) { \
|
||||
result[i] = (*this)[i] OP other; \
|
||||
} \
|
||||
return result; \
|
||||
}
|
||||
|
||||
#define DEFINE_UNOP(OP) \
|
||||
constexpr auto operator OP() const \
|
||||
requires requires(T rhs) { OP rhs; } \
|
||||
{ \
|
||||
using ResultElementT = \
|
||||
std::remove_cvref_t<decltype(OP std::declval<T>())>; \
|
||||
Vector<ResultElementT, N> result; \
|
||||
for (std::size_t i = 0; i < N; ++i) { \
|
||||
result[i] = OP(*this)[i]; \
|
||||
} \
|
||||
return result; \
|
||||
}
|
||||
|
||||
DEFINE_BINOP(+)
|
||||
DEFINE_BINOP(-)
|
||||
DEFINE_BINOP(*)
|
||||
DEFINE_BINOP(/)
|
||||
DEFINE_BINOP(%)
|
||||
DEFINE_BINOP(&)
|
||||
DEFINE_BINOP(|)
|
||||
DEFINE_BINOP(^)
|
||||
DEFINE_BINOP(>>)
|
||||
DEFINE_BINOP(<<)
|
||||
DEFINE_BINOP(&&)
|
||||
DEFINE_BINOP(||)
|
||||
DEFINE_BINOP(<)
|
||||
DEFINE_BINOP(>)
|
||||
DEFINE_BINOP(<=)
|
||||
DEFINE_BINOP(>=)
|
||||
DEFINE_BINOP(==)
|
||||
DEFINE_BINOP(!=)
|
||||
|
||||
DEFINE_UNOP(-)
|
||||
DEFINE_UNOP(~)
|
||||
DEFINE_UNOP(!)
|
||||
|
||||
#undef DEFINE_BINOP
|
||||
#undef DEFINE_UNOP
|
||||
};
|
||||
|
||||
using float16_t = _Float16;
|
||||
using float32_t = float;
|
||||
using float64_t = double;
|
||||
|
||||
using u8vec2 = Vector<std::uint8_t, 2>;
|
||||
using u8vec3 = Vector<std::uint8_t, 3>;
|
||||
using u8vec4 = Vector<std::uint8_t, 4>;
|
||||
using i8vec2 = Vector<std::int8_t, 2>;
|
||||
using i8vec3 = Vector<std::int8_t, 3>;
|
||||
using i8vec4 = Vector<std::int8_t, 4>;
|
||||
|
||||
using u16vec2 = Vector<std::uint16_t, 2>;
|
||||
using u16vec3 = Vector<std::uint16_t, 3>;
|
||||
using u16vec4 = Vector<std::uint16_t, 4>;
|
||||
using i16vec2 = Vector<std::int16_t, 2>;
|
||||
using i16vec3 = Vector<std::int16_t, 3>;
|
||||
using i16vec4 = Vector<std::int16_t, 4>;
|
||||
|
||||
using u32vec2 = Vector<std::uint32_t, 2>;
|
||||
using u32vec3 = Vector<std::uint32_t, 3>;
|
||||
using u32vec4 = Vector<std::uint32_t, 4>;
|
||||
using i32vec2 = Vector<std::int32_t, 2>;
|
||||
using i32vec3 = Vector<std::int32_t, 3>;
|
||||
using i32vec4 = Vector<std::int32_t, 4>;
|
||||
|
||||
using u64vec2 = Vector<std::uint64_t, 2>;
|
||||
using u64vec3 = Vector<std::uint64_t, 3>;
|
||||
using u64vec4 = Vector<std::uint64_t, 4>;
|
||||
using i64vec2 = Vector<std::int64_t, 2>;
|
||||
using i64vec3 = Vector<std::int64_t, 3>;
|
||||
using i64vec4 = Vector<std::int64_t, 4>;
|
||||
|
||||
using f32vec2 = Vector<float32_t, 2>;
|
||||
using f32vec3 = Vector<float32_t, 3>;
|
||||
using f32vec4 = Vector<float32_t, 4>;
|
||||
using f64vec2 = Vector<float64_t, 2>;
|
||||
using f64vec3 = Vector<float64_t, 3>;
|
||||
using f64vec4 = Vector<float64_t, 4>;
|
||||
|
||||
using f16vec2 = Vector<float16_t, 2>;
|
||||
using f16vec3 = Vector<float16_t, 3>;
|
||||
using f16vec4 = Vector<float16_t, 4>;
|
||||
|
||||
using bvec2 = Vector<bool, 2>;
|
||||
using bvec3 = Vector<bool, 3>;
|
||||
using bvec4 = Vector<bool, 4>;
|
||||
} // namespace shader
|
||||
445
rpcsx-gpu2/lib/gcn-shader/include/shader/analyze.hpp
Normal file
445
rpcsx-gpu2/lib/gcn-shader/include/shader/analyze.hpp
Normal file
|
|
@ -0,0 +1,445 @@
|
|||
#pragma once
|
||||
|
||||
#include "ModuleInfo.hpp"
|
||||
#include "SemanticInfo.hpp"
|
||||
#include "dialect/memssa.hpp"
|
||||
#include "graph.hpp"
|
||||
#include "ir/Instruction.hpp"
|
||||
#include "ir/Value.hpp"
|
||||
#include "rx/FunctionRef.hpp"
|
||||
#include "rx/TypeId.hpp"
|
||||
#include <map>
|
||||
#include <ostream>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace shader {
|
||||
struct DomTree;
|
||||
struct PostDomTree;
|
||||
class CFG {
|
||||
public:
|
||||
class Node {
|
||||
ir::Value mLabel;
|
||||
ir::Instruction mTerminator;
|
||||
std::unordered_set<Node *> mPredecessors;
|
||||
std::unordered_set<Node *> mSuccessors;
|
||||
|
||||
public:
|
||||
using Iterator = std::unordered_set<Node *>::iterator;
|
||||
|
||||
Node() = default;
|
||||
Node(ir::Value label) : mLabel(label) {}
|
||||
|
||||
ir::Value getLabel() { return mLabel; }
|
||||
|
||||
void setTerminator(ir::Instruction inst) { mTerminator = inst; }
|
||||
bool hasTerminator() { return mTerminator != nullptr; }
|
||||
ir::Instruction getTerminator() { return mTerminator; }
|
||||
|
||||
void addEdge(Node *to) {
|
||||
to->mPredecessors.insert(this);
|
||||
mSuccessors.insert(to);
|
||||
}
|
||||
|
||||
bool hasPredecessor(Node *node) { return mPredecessors.contains(node); }
|
||||
bool hasSuccessor(Node *node) { return mSuccessors.contains(node); }
|
||||
auto &getPredecessors() { return mPredecessors; }
|
||||
auto &getSuccessors() { return mSuccessors; }
|
||||
std::size_t getPredecessorCount() { return mPredecessors.size(); }
|
||||
std::size_t getSuccessorCount() { return mSuccessors.size(); }
|
||||
bool hasPredecessors() { return !mPredecessors.empty(); }
|
||||
bool hasSuccessors() { return !mSuccessors.empty(); }
|
||||
|
||||
template <typename T = ir::Instruction> auto range() {
|
||||
return ir::range<T>(mLabel, mTerminator.getNext());
|
||||
}
|
||||
|
||||
template <typename T = ir::Instruction> auto rangeWithoutLabel() {
|
||||
return ir::range<T>(mLabel.getNext(),
|
||||
mTerminator ? mTerminator.getNext() : nullptr);
|
||||
}
|
||||
|
||||
template <typename T = ir::Instruction> auto rangeWithoutTerminator() {
|
||||
return ir::range<T>(mLabel, mTerminator);
|
||||
}
|
||||
|
||||
template <typename T = ir::Instruction>
|
||||
auto rangeWithoutLabelAndTerminator() {
|
||||
return ir::range<T>(mLabel.getNext(), mTerminator);
|
||||
}
|
||||
};
|
||||
|
||||
private:
|
||||
std::map<ir::Value, Node> mNodes;
|
||||
std::vector<Node *> mPreorderNodes;
|
||||
std::vector<Node *> mPostorderNodes;
|
||||
Node *mEntryNode = nullptr;
|
||||
|
||||
public:
|
||||
bool empty() { return mNodes.empty(); }
|
||||
void clear() {
|
||||
mNodes.clear();
|
||||
mPreorderNodes.clear();
|
||||
mPostorderNodes.clear();
|
||||
mEntryNode = nullptr;
|
||||
}
|
||||
|
||||
void addPreorderNode(Node *node) { mPreorderNodes.push_back(node); }
|
||||
void addPostorderNode(Node *node) { mPostorderNodes.push_back(node); }
|
||||
|
||||
Node *getEntryNode() { return mEntryNode; }
|
||||
ir::Value getEntryLabel() { return getEntryNode()->getLabel(); }
|
||||
void setEntryNode(Node *node) { mEntryNode = node; }
|
||||
|
||||
std::span<Node *> getPreorderNodes() { return mPreorderNodes; }
|
||||
std::span<Node *> getPostorderNodes() { return mPostorderNodes; }
|
||||
|
||||
Node *getOrCreateNode(ir::Value label) {
|
||||
return &mNodes.emplace(label, label).first->second;
|
||||
}
|
||||
|
||||
Node *getNode(ir::Value label) {
|
||||
if (auto it = mNodes.find(label); it != mNodes.end()) {
|
||||
return &it->second;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto &getSuccessors(ir::Value label) {
|
||||
return getNode(label)->getSuccessors();
|
||||
}
|
||||
|
||||
auto &getPredecessors(ir::Value label) {
|
||||
return getNode(label)->getPredecessors();
|
||||
}
|
||||
|
||||
void print(std::ostream &os, ir::NameStorage &ns, bool subgraph = false,
|
||||
std::string_view nameSuffix = "");
|
||||
std::string genTest();
|
||||
|
||||
CFG buildView(CFG::Node *from, PostDomTree *domTree = nullptr,
|
||||
const std::unordered_set<ir::Value> &stopLabels = {},
|
||||
ir::Value continueLabel = nullptr);
|
||||
|
||||
CFG buildView(ir::Value from, PostDomTree *domTree = nullptr,
|
||||
const std::unordered_set<ir::Value> &stopLabels = {},
|
||||
ir::Value continueLabel = nullptr) {
|
||||
return buildView(getNode(from), domTree, stopLabels, continueLabel);
|
||||
}
|
||||
};
|
||||
|
||||
class MemorySSA {
|
||||
public:
|
||||
ir::Context context;
|
||||
ir::Region region;
|
||||
std::map<ir::Value, ir::memssa::Var> variableToVar;
|
||||
std::map<ir::Instruction, std::map<ir::memssa::Var, ir::memssa::Def>>
|
||||
userDefs;
|
||||
|
||||
ir::memssa::Var getVar(ir::Value variable, std::span<const ir::Operand> path);
|
||||
ir::memssa::Var getVar(ir::Value pointer);
|
||||
|
||||
ir::memssa::Def getDef(ir::Instruction user, ir::memssa::Var var) {
|
||||
auto userIt = userDefs.find(user);
|
||||
if (userIt == userDefs.end()) {
|
||||
return {};
|
||||
}
|
||||
|
||||
if (auto it = userIt->second.find(var); it != userIt->second.end()) {
|
||||
return it->second;
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
ir::memssa::Def getDef(ir::Instruction user, ir::Value pointer) {
|
||||
if (auto var = getVar(pointer)) {
|
||||
return getDef(user, var);
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
ir::Instruction getDefInst(ir::Instruction user, ir::Value pointer) {
|
||||
if (auto def = getDef(user, pointer)) {
|
||||
return def.getLinkedInst();
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
void print(std::ostream &os, ir::Region irRegion, ir::NameStorage &ns);
|
||||
void print(std::ostream &os, ir::NameStorage &ns);
|
||||
void dump();
|
||||
|
||||
private:
|
||||
ir::memssa::Var getVarImpl(ir::Value variable);
|
||||
};
|
||||
|
||||
bool isWithoutSideEffects(ir::InstructionId id);
|
||||
bool isTerminator(ir::Instruction inst);
|
||||
bool isBranch(ir::Instruction inst);
|
||||
ir::Value unwrapPointer(ir::Value pointer);
|
||||
graph::DomTree<ir::Value> buildDomTree(CFG &cfg, ir::Value root = nullptr);
|
||||
graph::DomTree<ir::Value> buildPostDomTree(CFG &cfg, ir::Value root);
|
||||
|
||||
CFG buildCFG(ir::Instruction firstInstruction,
|
||||
const std::unordered_set<ir::Value> &exitLabels = {},
|
||||
ir::Value continueLabel = nullptr);
|
||||
MemorySSA buildMemorySSA(CFG &cfg, ModuleInfo *moduleInfo = nullptr);
|
||||
|
||||
MemorySSA buildMemorySSA(CFG &cfg, const SemanticInfo &instructionSemantic,
|
||||
std::function<ir::Value(int)> getRegisterVarCb);
|
||||
|
||||
bool dominates(ir::Instruction a, ir::Instruction b, bool isPostDom,
|
||||
graph::DomTree<ir::Value> &domTree);
|
||||
|
||||
ir::Value findNearestCommonDominator(ir::Instruction a, ir::Instruction b,
|
||||
graph::DomTree<ir::Value> &domTree);
|
||||
|
||||
class BackEdgeStorage {
|
||||
std::unordered_map<ir::Value, std::unordered_set<ir::Value>> backEdges;
|
||||
|
||||
public:
|
||||
BackEdgeStorage() = default;
|
||||
BackEdgeStorage(CFG &cfg);
|
||||
|
||||
const std::unordered_set<ir::Value> *get(ir::Value value) {
|
||||
if (auto it = backEdges.find(value); it != backEdges.end()) {
|
||||
return &it->second;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto &all() { return backEdges; }
|
||||
};
|
||||
|
||||
struct AnalysisStorage {
|
||||
template <typename... T>
|
||||
requires(sizeof...(T) > 0)
|
||||
bool invalidate() {
|
||||
bool invalidated = false;
|
||||
((invalidated = invalidate(rx::TypeId::get<T>()) || invalidated), ...);
|
||||
return invalidated;
|
||||
}
|
||||
|
||||
bool invalidate(rx::TypeId id) {
|
||||
if (auto it = mStorage.find(id); it != mStorage.end()) {
|
||||
return std::exchange(it->second.invalid, true) == false;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
void invalidateAll() {
|
||||
for (auto &entry : mStorage) {
|
||||
entry.second.invalid = true;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, typename... ArgsT>
|
||||
T &get(ArgsT &&...args)
|
||||
requires requires { T(std::forward<ArgsT>(args)...); }
|
||||
{
|
||||
void *result = getImpl(
|
||||
rx::TypeId::get<T>(), getDeleter<T>(),
|
||||
[&] {
|
||||
return std::make_unique<T>(std::forward<ArgsT>(args)...).release();
|
||||
},
|
||||
[&](void *object) {
|
||||
*reinterpret_cast<T *>(object) = T(std::forward<ArgsT>(args)...);
|
||||
});
|
||||
|
||||
return *static_cast<T *>(result);
|
||||
}
|
||||
|
||||
template <typename T, typename BuilderFn>
|
||||
T &get(BuilderFn &&builder)
|
||||
requires requires { T(std::forward<BuilderFn>(builder)()); }
|
||||
{
|
||||
void *result = getImpl(
|
||||
rx::TypeId::get<T>(), getDeleter<T>(),
|
||||
[&] {
|
||||
return std::make_unique<T>(std::forward<BuilderFn>(builder)())
|
||||
.release();
|
||||
},
|
||||
[&](void *object) {
|
||||
*reinterpret_cast<T *>(object) = std::forward<BuilderFn>(builder)();
|
||||
});
|
||||
|
||||
return *static_cast<T *>(result);
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename T> static void (*getDeleter())(void *) {
|
||||
return +[](void *data) { delete static_cast<T *>(data); };
|
||||
}
|
||||
|
||||
void *getImpl(rx::TypeId typeId, void (*deleter)(void *),
|
||||
rx::FunctionRef<void *()> constructor,
|
||||
rx::FunctionRef<void(void *)> placementConstructor) {
|
||||
auto [it, inserted] = mStorage.emplace(typeId, getNullPointer());
|
||||
|
||||
if (inserted) {
|
||||
it->second.object =
|
||||
std::unique_ptr<void, void (*)(void *)>(constructor(), deleter);
|
||||
} else if (it->second.invalid) {
|
||||
placementConstructor(it->second.object.get());
|
||||
it->second.invalid = false;
|
||||
}
|
||||
|
||||
return it->second.object.get();
|
||||
}
|
||||
static constexpr std::unique_ptr<void, void (*)(void *)> getNullPointer() {
|
||||
return {nullptr, [](void *) {}};
|
||||
}
|
||||
|
||||
struct Entry {
|
||||
std::unique_ptr<void, void (*)(void *)> object;
|
||||
bool invalid = false;
|
||||
};
|
||||
|
||||
std::map<rx::TypeId, Entry> mStorage;
|
||||
};
|
||||
|
||||
struct PostDomTree : graph::DomTree<ir::Value> {
|
||||
PostDomTree() = default;
|
||||
PostDomTree(graph::DomTree<ir::Value> &&other)
|
||||
: graph::DomTree<ir::Value>::DomTree(std::move(other)) {}
|
||||
PostDomTree(CFG &cfg, ir::Value root)
|
||||
: PostDomTree(buildPostDomTree(cfg, root)) {}
|
||||
};
|
||||
|
||||
struct DomTree : graph::DomTree<ir::Value> {
|
||||
DomTree() = default;
|
||||
DomTree(graph::DomTree<ir::Value> &&other)
|
||||
: graph::DomTree<ir::Value>::DomTree(std::move(other)) {}
|
||||
DomTree(CFG &cfg, ir::Value root = nullptr)
|
||||
: DomTree(buildDomTree(cfg, root)) {}
|
||||
};
|
||||
|
||||
template <typename T, std::size_t> struct Tag : T {
|
||||
using T::T;
|
||||
using T::operator=;
|
||||
|
||||
Tag(T &&other) : T(std::move(other)) {}
|
||||
Tag(const T &other) : T(other) {}
|
||||
|
||||
Tag &operator=(T &&other) {
|
||||
T::operator=(std::move(other));
|
||||
return *this;
|
||||
}
|
||||
Tag &operator=(const T &other) {
|
||||
T::operator=(other);
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
struct Construct {
|
||||
Construct *parent;
|
||||
std::forward_list<Construct> children;
|
||||
ir::Value header;
|
||||
ir::Value merge;
|
||||
ir::Value loopBody;
|
||||
ir::Value loopContinue;
|
||||
AnalysisStorage analysis;
|
||||
|
||||
static std::unique_ptr<Construct> createRoot(ir::RegionLike region,
|
||||
ir::Value merge) {
|
||||
auto result = std::make_unique<Construct>();
|
||||
auto &cfg =
|
||||
result->analysis.get<CFG>([&] { return buildCFG(region.getFirst()); });
|
||||
result->header = cfg.getEntryLabel();
|
||||
result->merge = merge;
|
||||
return result;
|
||||
}
|
||||
|
||||
Construct *createChild(ir::Value header, ir::Value merge) {
|
||||
auto &result = children.emplace_front();
|
||||
result.parent = this;
|
||||
result.header = header;
|
||||
result.merge = merge;
|
||||
return &result;
|
||||
}
|
||||
|
||||
Construct *createChild(ir::Value header, ir::Value merge,
|
||||
ir::Value loopContinue, ir::Value loopBody) {
|
||||
auto &result = children.emplace_front();
|
||||
result.parent = this;
|
||||
result.header = header;
|
||||
result.merge = merge;
|
||||
result.loopContinue = loopContinue;
|
||||
result.loopBody = loopBody;
|
||||
return &result;
|
||||
}
|
||||
|
||||
Construct createTemporaryChild(ir::Value header, ir::Value merge) {
|
||||
Construct result;
|
||||
result.parent = this;
|
||||
result.header = header;
|
||||
result.merge = merge;
|
||||
return result;
|
||||
}
|
||||
|
||||
CFG &getCfg() {
|
||||
return analysis.get<CFG>([this] {
|
||||
if (parent != nullptr) {
|
||||
return parent->getCfg().buildView(
|
||||
header,
|
||||
&parent->getPostDomTree(),
|
||||
{header, merge});
|
||||
}
|
||||
|
||||
return buildCFG(header);
|
||||
});
|
||||
}
|
||||
|
||||
CFG &getCfgWithoutContinue() {
|
||||
if (loopContinue == nullptr) {
|
||||
return getCfg();
|
||||
}
|
||||
|
||||
return analysis.get<Tag<CFG, kWithoutContinue>>([this] {
|
||||
if (parent != nullptr) {
|
||||
return parent->getCfg().buildView(
|
||||
header,
|
||||
&parent->getPostDomTree(),
|
||||
{header, merge}, loopContinue);
|
||||
}
|
||||
|
||||
return buildCFG(header, {}, loopContinue);
|
||||
});
|
||||
}
|
||||
|
||||
DomTree &getDomTree() { return analysis.get<DomTree>(getCfg(), header); }
|
||||
PostDomTree &getPostDomTree() {
|
||||
return analysis.get<PostDomTree>(getCfg(), merge);
|
||||
}
|
||||
BackEdgeStorage &getBackEdgeStorage() {
|
||||
return analysis.get<BackEdgeStorage>(getCfg());
|
||||
}
|
||||
BackEdgeStorage &getBackEdgeWithoutContinueStorage() {
|
||||
if (loopContinue == nullptr) {
|
||||
return getBackEdgeStorage();
|
||||
}
|
||||
return analysis.get<Tag<BackEdgeStorage, kWithoutContinue>>(
|
||||
getCfgWithoutContinue());
|
||||
}
|
||||
auto getBackEdges(ir::Value node) { return getBackEdgeStorage().get(node); }
|
||||
auto getBackEdgesWithoutContinue(ir::Value node) {
|
||||
return getBackEdgeWithoutContinueStorage().get(node);
|
||||
}
|
||||
auto getBackEdges() { return getBackEdges(header); }
|
||||
void invalidate();
|
||||
void invalidateAll();
|
||||
|
||||
bool isNull() const { return header == nullptr; }
|
||||
|
||||
void removeLastChild() { children.pop_front(); }
|
||||
|
||||
private:
|
||||
enum {
|
||||
kWithoutContinue,
|
||||
};
|
||||
};
|
||||
} // namespace shader
|
||||
78
rpcsx-gpu2/lib/gcn-shader/include/shader/dialect.hpp
Normal file
78
rpcsx-gpu2/lib/gcn-shader/include/shader/dialect.hpp
Normal file
|
|
@ -0,0 +1,78 @@
|
|||
#pragma once
|
||||
|
||||
#include "dialect/builtin.hpp" // IWYU pragma: export
|
||||
#include "dialect/ds.hpp" // IWYU pragma: export
|
||||
#include "dialect/exp.hpp" // IWYU pragma: export
|
||||
#include "dialect/memssa.hpp" // IWYU pragma: export
|
||||
#include "dialect/mimg.hpp" // IWYU pragma: export
|
||||
#include "dialect/mtbuf.hpp" // IWYU pragma: export
|
||||
#include "dialect/mubuf.hpp" // IWYU pragma: export
|
||||
#include "dialect/smrd.hpp" // IWYU pragma: export
|
||||
#include "dialect/sop1.hpp" // IWYU pragma: export
|
||||
#include "dialect/sop2.hpp" // IWYU pragma: export
|
||||
#include "dialect/sopc.hpp" // IWYU pragma: export
|
||||
#include "dialect/sopk.hpp" // IWYU pragma: export
|
||||
#include "dialect/sopp.hpp" // IWYU pragma: export
|
||||
#include "dialect/vintrp.hpp" // IWYU pragma: export
|
||||
#include "dialect/vop1.hpp" // IWYU pragma: export
|
||||
#include "dialect/vop2.hpp" // IWYU pragma: export
|
||||
#include "dialect/vop3.hpp" // IWYU pragma: export
|
||||
#include "dialect/vopc.hpp" // IWYU pragma: export
|
||||
|
||||
#include "dialect/spv.hpp" // IWYU pragma: export
|
||||
|
||||
#include "dialect/amdgpu.hpp" // IWYU pragma: export
|
||||
#include <concepts>
|
||||
|
||||
namespace shader::ir {
|
||||
template <> inline constexpr Kind kOpToKind<spv::Op> = Kind::Spv;
|
||||
template <> inline constexpr Kind kOpToKind<builtin::Op> = Kind::Builtin;
|
||||
template <> inline constexpr Kind kOpToKind<amdgpu::Op> = Kind::AmdGpu;
|
||||
template <> inline constexpr Kind kOpToKind<vop2::Op> = Kind::Vop2;
|
||||
template <> inline constexpr Kind kOpToKind<sop2::Op> = Kind::Sop2;
|
||||
template <> inline constexpr Kind kOpToKind<sopk::Op> = Kind::Sopk;
|
||||
template <> inline constexpr Kind kOpToKind<smrd::Op> = Kind::Smrd;
|
||||
template <> inline constexpr Kind kOpToKind<vop3::Op> = Kind::Vop3;
|
||||
template <> inline constexpr Kind kOpToKind<mubuf::Op> = Kind::Mubuf;
|
||||
template <> inline constexpr Kind kOpToKind<mtbuf::Op> = Kind::Mtbuf;
|
||||
template <> inline constexpr Kind kOpToKind<mimg::Op> = Kind::Mimg;
|
||||
template <> inline constexpr Kind kOpToKind<ds::Op> = Kind::Ds;
|
||||
template <> inline constexpr Kind kOpToKind<vintrp::Op> = Kind::Vintrp;
|
||||
template <> inline constexpr Kind kOpToKind<exp::Op> = Kind::Exp;
|
||||
template <> inline constexpr Kind kOpToKind<vop1::Op> = Kind::Vop1;
|
||||
template <> inline constexpr Kind kOpToKind<vopc::Op> = Kind::Vopc;
|
||||
template <> inline constexpr Kind kOpToKind<sop1::Op> = Kind::Sop1;
|
||||
template <> inline constexpr Kind kOpToKind<sopc::Op> = Kind::Sopc;
|
||||
template <> inline constexpr Kind kOpToKind<sopp::Op> = Kind::Sopp;
|
||||
template <> inline constexpr Kind kOpToKind<memssa::Op> = Kind::MemSSA;
|
||||
|
||||
template <typename T>
|
||||
requires(kOpToKind<std::remove_cvref_t<T>> != Kind::Count)
|
||||
constexpr InstructionId getInstructionId(T op) {
|
||||
return getInstructionId(kOpToKind<std::remove_cvref_t<T>>, op);
|
||||
}
|
||||
|
||||
constexpr bool operator==(ir::Instruction lhs, InstructionId rhs) {
|
||||
return lhs && lhs.getInstId() == rhs;
|
||||
}
|
||||
|
||||
template <typename L, typename R>
|
||||
constexpr bool operator==(L lhs, R rhs)
|
||||
requires requires {
|
||||
requires(!std::is_same_v<L, R>);
|
||||
{ getInstructionId(lhs) == rhs } -> std::convertible_to<bool>;
|
||||
}
|
||||
{
|
||||
return getInstructionId(lhs) == rhs;
|
||||
}
|
||||
|
||||
template <typename L, typename R>
|
||||
constexpr bool operator==(L lhs, R rhs)
|
||||
requires requires {
|
||||
requires(!std::is_same_v<L, R>);
|
||||
{ getTypeId(lhs) == rhs } -> std::convertible_to<bool>;
|
||||
}
|
||||
{
|
||||
return getTypeId(lhs) == rhs;
|
||||
}
|
||||
} // namespace ir
|
||||
57
rpcsx-gpu2/lib/gcn-shader/include/shader/dialect/amdgpu.hpp
Normal file
57
rpcsx-gpu2/lib/gcn-shader/include/shader/dialect/amdgpu.hpp
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
#pragma once
|
||||
|
||||
namespace shader::ir::amdgpu {
|
||||
|
||||
enum Op {
|
||||
EXEC_TEST,
|
||||
BRANCH,
|
||||
IMM,
|
||||
USER_SGPR,
|
||||
VBUFFER,
|
||||
SAMPLER,
|
||||
TBUFFER,
|
||||
POINTER,
|
||||
OMOD,
|
||||
NEG_ABS,
|
||||
PS_INPUT_VGPR,
|
||||
PS_COMP_SWAP,
|
||||
VS_GET_INDEX,
|
||||
RESOURCE_PHI,
|
||||
|
||||
OpCount,
|
||||
};
|
||||
|
||||
inline const char *getInstructionName(unsigned op) {
|
||||
switch (op) {
|
||||
case EXEC_TEST:
|
||||
return "exec_test";
|
||||
case BRANCH:
|
||||
return "branch";
|
||||
case IMM:
|
||||
return "imm";
|
||||
case USER_SGPR:
|
||||
return "user_sgpr";
|
||||
case VBUFFER:
|
||||
return "vbuffer";
|
||||
case SAMPLER:
|
||||
return "sampler";
|
||||
case TBUFFER:
|
||||
return "tbuffer";
|
||||
case POINTER:
|
||||
return "pointer";
|
||||
case OMOD:
|
||||
return "omod";
|
||||
case NEG_ABS:
|
||||
return "neg_abs";
|
||||
case PS_INPUT_VGPR:
|
||||
return "ps_input_vgpr";
|
||||
case PS_COMP_SWAP:
|
||||
return "ps_comp_swap";
|
||||
case VS_GET_INDEX:
|
||||
return "vs_get_index";
|
||||
case RESOURCE_PHI:
|
||||
return "resource_phi";
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
} // namespace shader::ir::amdgpu
|
||||
193
rpcsx-gpu2/lib/gcn-shader/include/shader/dialect/builtin.hpp
Normal file
193
rpcsx-gpu2/lib/gcn-shader/include/shader/dialect/builtin.hpp
Normal file
|
|
@ -0,0 +1,193 @@
|
|||
#pragma once
|
||||
#include "../ir/Block.hpp"
|
||||
#include "../ir/Builder.hpp"
|
||||
#include "../ir/Value.hpp"
|
||||
|
||||
namespace shader::ir {
|
||||
template <typename T> inline constexpr Kind kOpToKind = Kind::Count;
|
||||
}
|
||||
|
||||
namespace shader::ir::builtin {
|
||||
enum Op {
|
||||
INVALID_INSTRUCTION,
|
||||
BLOCK,
|
||||
IF_ELSE,
|
||||
LOOP,
|
||||
};
|
||||
|
||||
inline const char *getInstructionName(unsigned id) {
|
||||
switch (id) {
|
||||
case INVALID_INSTRUCTION:
|
||||
return "<invalid instruction>";
|
||||
|
||||
case BLOCK:
|
||||
return "block";
|
||||
|
||||
case IF_ELSE:
|
||||
return "ifElse";
|
||||
|
||||
case LOOP:
|
||||
return "loop";
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template <typename ImplT>
|
||||
struct Builder : BuilderFacade<Builder<ImplT>, ImplT> {
|
||||
/**
|
||||
* Creates an invalid instruction with the given location.
|
||||
*
|
||||
* @param location the location of the instruction
|
||||
*
|
||||
* @return the created invalid instruction
|
||||
*/
|
||||
Instruction createInvalidInstruction(Location location) {
|
||||
return this->template create<Instruction>(location, Kind::Builtin,
|
||||
INVALID_INSTRUCTION);
|
||||
}
|
||||
|
||||
Instruction createIfElse(Location location, Value cond, Block ifTrue,
|
||||
Block ifFalse = {}) {
|
||||
std::vector<Operand> operands = {{cond, ifTrue}};
|
||||
if (ifFalse) {
|
||||
operands.push_back(ifFalse);
|
||||
}
|
||||
return this->template create<Instruction>(location, Kind::Builtin, IF_ELSE,
|
||||
operands);
|
||||
}
|
||||
|
||||
Instruction createLoop(Location location, Block body) {
|
||||
return this->template create<Instruction>(location, Kind::Builtin, IF_ELSE,
|
||||
{{body}});
|
||||
}
|
||||
|
||||
auto createBlock(Location location) {
|
||||
return this->template create<Block>(location);
|
||||
}
|
||||
|
||||
auto createRegion(Location location) {
|
||||
return this->getContext().template create<Region>(location);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an instruction with the given location, kind, op, and operands.
|
||||
*
|
||||
* @param location the location of the instruction
|
||||
* @param kind the kind of the instruction
|
||||
* @param op the opcode of the instruction
|
||||
* @param operands the operands of the instruction
|
||||
*
|
||||
* @return the created instruction
|
||||
*/
|
||||
Instruction createInstruction(Location location, Kind kind, unsigned op,
|
||||
std::span<const Operand> operands = {}) {
|
||||
return this->template create<Instruction>(location, kind, op, operands);
|
||||
}
|
||||
|
||||
template <typename OpT>
|
||||
Instruction createInstruction(Location location, OpT &&op,
|
||||
std::span<const Operand> operands = {})
|
||||
requires requires {
|
||||
this->template create<Instruction>(
|
||||
location, kOpToKind<std::remove_cvref_t<OpT>>, op, operands);
|
||||
}
|
||||
{
|
||||
return this->template create<Instruction>(
|
||||
location, kOpToKind<std::remove_cvref_t<OpT>>, op, operands);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an Instruction object with the given location, kind, opcode, and
|
||||
* operands.
|
||||
*
|
||||
* @param location the location of the instruction
|
||||
* @param kind the kind of the instruction
|
||||
* @param op the opcode of the instruction
|
||||
* @param operands variadic parameter pack of operands for the instruction
|
||||
*
|
||||
* @return the created Instruction object
|
||||
*/
|
||||
template <typename... T>
|
||||
Instruction createInstruction(Location location, Kind kind, unsigned op,
|
||||
T &&...operands)
|
||||
requires requires {
|
||||
createInstruction(location, kind, op,
|
||||
{{Operand(std::forward<T>(operands))...}});
|
||||
}
|
||||
{
|
||||
return createInstruction(location, kind, op,
|
||||
{{Operand(std::forward<T>(operands))...}});
|
||||
}
|
||||
|
||||
template <typename OpT, typename... T>
|
||||
Instruction createInstruction(Location location, OpT &&op, T &&...operands)
|
||||
requires requires {
|
||||
createInstruction(location, std::forward<OpT>(op),
|
||||
{{Operand(std::forward<T>(operands))...}});
|
||||
}
|
||||
{
|
||||
return createInstruction(location, std::forward<OpT>(op),
|
||||
{{Operand(std::forward<T>(operands))...}});
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a Value object with the given location, kind, opcode, and operands.
|
||||
*
|
||||
* @param location the location of the Value object
|
||||
* @param kind the kind of the Value object
|
||||
* @param op the opcode of the Value object
|
||||
* @param operands a span of operands for the Value object
|
||||
*
|
||||
* @return the created Value object
|
||||
*/
|
||||
auto createValue(Location location, Kind kind, unsigned op,
|
||||
std::span<const Operand> operands = {}) {
|
||||
return this->template create<Value>(location, kind, op, operands);
|
||||
}
|
||||
|
||||
template <typename OpT>
|
||||
auto createValue(Location location, OpT &&op,
|
||||
std::span<const Operand> operands = {})
|
||||
requires requires {
|
||||
this->template create<Value>(
|
||||
location, kOpToKind<std::remove_cvref_t<OpT>>, op, operands);
|
||||
}
|
||||
{
|
||||
return this->template create<Value>(
|
||||
location, kOpToKind<std::remove_cvref_t<OpT>>, op, operands);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a Value object with the given location, kind, opcode, and operands.
|
||||
*
|
||||
* @param location the location of the Value object
|
||||
* @param kind the kind of the Value object
|
||||
* @param op the opcode of the Value object
|
||||
* @param operands variadic parameter pack of operands for the Value object
|
||||
*
|
||||
* @return the created Value object
|
||||
*/
|
||||
template <typename... T>
|
||||
auto createValue(Location location, Kind kind, unsigned op, T &&...operands)
|
||||
requires requires {
|
||||
createValue(location, kind, op,
|
||||
{{Operand(std::forward<T>(operands))...}});
|
||||
}
|
||||
{
|
||||
return createValue(location, kind, op,
|
||||
{{Operand(std::forward<T>(operands))...}});
|
||||
}
|
||||
|
||||
template <typename OpT, typename... T>
|
||||
requires requires { kOpToKind<std::remove_cvref_t<OpT>>; }
|
||||
auto createValue(Location location, OpT &&op, T &&...operands)
|
||||
requires requires {
|
||||
createValue(location, std::forward<OpT>(op),
|
||||
{{Operand(std::forward<T>(operands))...}});
|
||||
}
|
||||
{
|
||||
return createValue(location, std::forward<OpT>(op),
|
||||
{{Operand(std::forward<T>(operands))...}});
|
||||
}
|
||||
};
|
||||
} // namespace shader::ir::builtin
|
||||
294
rpcsx-gpu2/lib/gcn-shader/include/shader/dialect/ds.hpp
Normal file
294
rpcsx-gpu2/lib/gcn-shader/include/shader/dialect/ds.hpp
Normal file
|
|
@ -0,0 +1,294 @@
|
|||
#pragma once
|
||||
|
||||
namespace shader::ir::ds {
|
||||
enum Op {
|
||||
ADD_U32,
|
||||
SUB_U32,
|
||||
RSUB_U32,
|
||||
INC_U32,
|
||||
DEC_U32,
|
||||
MIN_I32,
|
||||
MAX_I32,
|
||||
MIN_U32,
|
||||
MAX_U32,
|
||||
AND_B32,
|
||||
OR_B32,
|
||||
XOR_B32,
|
||||
MSKOR_B32,
|
||||
WRITE_B32,
|
||||
WRITE2_B32,
|
||||
WRITE2ST64_B32,
|
||||
CMPST_B32,
|
||||
CMPST_F32,
|
||||
MIN_F32,
|
||||
MAX_F32,
|
||||
NOP,
|
||||
GWS_SEMA_RELEASE_ALL = 24,
|
||||
GWS_INIT,
|
||||
GWS_SEMA_V,
|
||||
GWS_SEMA_BR,
|
||||
GWS_SEMA_P,
|
||||
GWS_BARRIER,
|
||||
WRITE_B8,
|
||||
WRITE_B16,
|
||||
ADD_RTN_U32,
|
||||
SUB_RTN_U32,
|
||||
RSUB_RTN_U32,
|
||||
INC_RTN_U32,
|
||||
DEC_RTN_U32,
|
||||
MIN_RTN_I32,
|
||||
MAX_RTN_I32,
|
||||
MIN_RTN_U32,
|
||||
MAX_RTN_U32,
|
||||
AND_RTN_B32,
|
||||
OR_RTN_B32,
|
||||
XOR_RTN_B32,
|
||||
MSKOR_RTN_B32,
|
||||
WRXCHG_RTN_B32,
|
||||
WRXCHG2_RTN_B32,
|
||||
WRXCHG2ST64_RTN_B32,
|
||||
CMPST_RTN_B32,
|
||||
CMPST_RTN_F32,
|
||||
MIN_RTN_F32,
|
||||
MAX_RTN_F32,
|
||||
WRAP_RTN_B32,
|
||||
SWIZZLE_B32,
|
||||
READ_B32,
|
||||
READ2_B32,
|
||||
READ2ST64_B32,
|
||||
READ_I8,
|
||||
READ_U8,
|
||||
READ_I16,
|
||||
READ_U16,
|
||||
CONSUME,
|
||||
APPEND,
|
||||
ORDERED_COUNT,
|
||||
ADD_U64,
|
||||
SUB_U64,
|
||||
RSUB_U64,
|
||||
INC_U64,
|
||||
DEC_U64,
|
||||
MIN_I64,
|
||||
MAX_I64,
|
||||
MIN_U64,
|
||||
MAX_U64,
|
||||
AND_B64,
|
||||
OR_B64,
|
||||
XOR_B64,
|
||||
MSKOR_B64,
|
||||
WRITE_B64,
|
||||
WRITE2_B64,
|
||||
WRITE2ST64_B64,
|
||||
CMPST_B64,
|
||||
CMPST_F64,
|
||||
MIN_F64,
|
||||
MAX_F64,
|
||||
ADD_RTN_U64 = 96,
|
||||
SUB_RTN_U64,
|
||||
RSUB_RTN_U64,
|
||||
INC_RTN_U64,
|
||||
DEC_RTN_U64,
|
||||
MIN_RTN_I64,
|
||||
MAX_RTN_I64,
|
||||
MIN_RTN_U64,
|
||||
MAX_RTN_U64,
|
||||
AND_RTN_B64,
|
||||
OR_RTN_B64,
|
||||
XOR_RTN_B64,
|
||||
MSKOR_RTN_B64,
|
||||
WRXCHG_RTN_B64,
|
||||
WRXCHG2_RTN_B64,
|
||||
WRXCHG2ST64_RTN_B64,
|
||||
CMPST_RTN_B64,
|
||||
CMPST_RTN_F64,
|
||||
MIN_RTN_F64,
|
||||
MAX_RTN_F64,
|
||||
READ_B64 = 118,
|
||||
READ2_B64,
|
||||
READ2ST64_B64,
|
||||
CONDXCHG32_RTN_B64 = 126,
|
||||
ADD_SRC2_U32 = 128,
|
||||
SUB_SRC2_U32,
|
||||
RSUB_SRC2_U32,
|
||||
INC_SRC2_U32,
|
||||
DEC_SRC2_U32,
|
||||
MIN_SRC2_I32,
|
||||
MAX_SRC2_I32,
|
||||
MIN_SRC2_U32,
|
||||
MAX_SRC2_U32,
|
||||
AND_SRC2_B32,
|
||||
OR_SRC2_B32,
|
||||
XOR_SRC2_B32,
|
||||
WRITE_SRC2_B32,
|
||||
MIN_SRC2_F32 = 146,
|
||||
MAX_SRC2_F32,
|
||||
ADD_SRC2_U64 = 192,
|
||||
SUB_SRC2_U64,
|
||||
RSUB_SRC2_U64,
|
||||
INC_SRC2_U64,
|
||||
DEC_SRC2_U64,
|
||||
MIN_SRC2_I64,
|
||||
MAX_SRC2_I64,
|
||||
MIN_SRC2_U64,
|
||||
MAX_SRC2_U64,
|
||||
AND_SRC2_B64,
|
||||
OR_SRC2_B64,
|
||||
XOR_SRC2_B64,
|
||||
WRITE_SRC2_B64,
|
||||
MIN_SRC2_F64 = 210,
|
||||
MAX_SRC2_F64,
|
||||
WRITE_B96 = 222,
|
||||
WRITE_B128,
|
||||
CONDXCHG32_RTN_B128 = 253,
|
||||
READ_B96,
|
||||
READ_B128,
|
||||
|
||||
OpCount
|
||||
};
|
||||
|
||||
inline const char *getInstructionName(unsigned id) {
|
||||
switch (id) {
|
||||
case ADD_U32: return "ds_add_u32";
|
||||
case SUB_U32: return "ds_sub_u32";
|
||||
case RSUB_U32: return "ds_rsub_u32";
|
||||
case INC_U32: return "ds_inc_u32";
|
||||
case DEC_U32: return "ds_dec_u32";
|
||||
case MIN_I32: return "ds_min_i32";
|
||||
case MAX_I32: return "ds_max_i32";
|
||||
case MIN_U32: return "ds_min_u32";
|
||||
case MAX_U32: return "ds_max_u32";
|
||||
case AND_B32: return "ds_and_b32";
|
||||
case OR_B32: return "ds_or_b32";
|
||||
case XOR_B32: return "ds_xor_b32";
|
||||
case MSKOR_B32: return "ds_mskor_b32";
|
||||
case WRITE_B32: return "ds_write_b32";
|
||||
case WRITE2_B32: return "ds_write2_b32";
|
||||
case WRITE2ST64_B32: return "ds_write2st64_b32";
|
||||
case CMPST_B32: return "ds_cmpst_b32";
|
||||
case CMPST_F32: return "ds_cmpst_f32";
|
||||
case MIN_F32: return "ds_min_f32";
|
||||
case MAX_F32: return "ds_max_f32";
|
||||
case NOP: return "ds_nop";
|
||||
case GWS_SEMA_RELEASE_ALL: return "ds_gws_sema_release_all";
|
||||
case GWS_INIT: return "ds_gws_init";
|
||||
case GWS_SEMA_V: return "ds_gws_sema_v";
|
||||
case GWS_SEMA_BR: return "ds_gws_sema_br";
|
||||
case GWS_SEMA_P: return "ds_gws_sema_p";
|
||||
case GWS_BARRIER: return "ds_gws_barrier";
|
||||
case WRITE_B8: return "ds_write_b8";
|
||||
case WRITE_B16: return "ds_write_b16";
|
||||
case ADD_RTN_U32: return "ds_add_rtn_u32";
|
||||
case SUB_RTN_U32: return "ds_sub_rtn_u32";
|
||||
case RSUB_RTN_U32: return "ds_rsub_rtn_u32";
|
||||
case INC_RTN_U32: return "ds_inc_rtn_u32";
|
||||
case DEC_RTN_U32: return "ds_dec_rtn_u32";
|
||||
case MIN_RTN_I32: return "ds_min_rtn_i32";
|
||||
case MAX_RTN_I32: return "ds_max_rtn_i32";
|
||||
case MIN_RTN_U32: return "ds_min_rtn_u32";
|
||||
case MAX_RTN_U32: return "ds_max_rtn_u32";
|
||||
case AND_RTN_B32: return "ds_and_rtn_b32";
|
||||
case OR_RTN_B32: return "ds_or_rtn_b32";
|
||||
case XOR_RTN_B32: return "ds_xor_rtn_b32";
|
||||
case MSKOR_RTN_B32: return "ds_mskor_rtn_b32";
|
||||
case WRXCHG_RTN_B32: return "ds_wrxchg_rtn_b32";
|
||||
case WRXCHG2_RTN_B32: return "ds_wrxchg2_rtn_b32";
|
||||
case WRXCHG2ST64_RTN_B32: return "ds_wrxchg2st64_rtn_b32";
|
||||
case CMPST_RTN_B32: return "ds_cmpst_rtn_b32";
|
||||
case CMPST_RTN_F32: return "ds_cmpst_rtn_f32";
|
||||
case MIN_RTN_F32: return "ds_min_rtn_f32";
|
||||
case MAX_RTN_F32: return "ds_max_rtn_f32";
|
||||
case WRAP_RTN_B32: return "ds_wrap_rtn_b32";
|
||||
case SWIZZLE_B32: return "ds_swizzle_b32";
|
||||
case READ_B32: return "ds_read_b32";
|
||||
case READ2_B32: return "ds_read2_b32";
|
||||
case READ2ST64_B32: return "ds_read2st64_b32";
|
||||
case READ_I8: return "ds_read_i8";
|
||||
case READ_U8: return "ds_read_u8";
|
||||
case READ_I16: return "ds_read_i16";
|
||||
case READ_U16: return "ds_read_u16";
|
||||
case CONSUME: return "ds_consume";
|
||||
case APPEND: return "ds_append";
|
||||
case ORDERED_COUNT: return "ds_ordered_count";
|
||||
case ADD_U64: return "ds_add_u64";
|
||||
case SUB_U64: return "ds_sub_u64";
|
||||
case RSUB_U64: return "ds_rsub_u64";
|
||||
case INC_U64: return "ds_inc_u64";
|
||||
case DEC_U64: return "ds_dec_u64";
|
||||
case MIN_I64: return "ds_min_i64";
|
||||
case MAX_I64: return "ds_max_i64";
|
||||
case MIN_U64: return "ds_min_u64";
|
||||
case MAX_U64: return "ds_max_u64";
|
||||
case AND_B64: return "ds_and_b64";
|
||||
case OR_B64: return "ds_or_b64";
|
||||
case XOR_B64: return "ds_xor_b64";
|
||||
case MSKOR_B64: return "ds_mskor_b64";
|
||||
case WRITE_B64: return "ds_write_b64";
|
||||
case WRITE2_B64: return "ds_write2_b64";
|
||||
case WRITE2ST64_B64: return "ds_write2st64_b64";
|
||||
case CMPST_B64: return "ds_cmpst_b64";
|
||||
case CMPST_F64: return "ds_cmpst_f64";
|
||||
case MIN_F64: return "ds_min_f64";
|
||||
case MAX_F64: return "ds_max_f64";
|
||||
case ADD_RTN_U64: return "ds_add_rtn_u64";
|
||||
case SUB_RTN_U64: return "ds_sub_rtn_u64";
|
||||
case RSUB_RTN_U64: return "ds_rsub_rtn_u64";
|
||||
case INC_RTN_U64: return "ds_inc_rtn_u64";
|
||||
case DEC_RTN_U64: return "ds_dec_rtn_u64";
|
||||
case MIN_RTN_I64: return "ds_min_rtn_i64";
|
||||
case MAX_RTN_I64: return "ds_max_rtn_i64";
|
||||
case MIN_RTN_U64: return "ds_min_rtn_u64";
|
||||
case MAX_RTN_U64: return "ds_max_rtn_u64";
|
||||
case AND_RTN_B64: return "ds_and_rtn_b64";
|
||||
case OR_RTN_B64: return "ds_or_rtn_b64";
|
||||
case XOR_RTN_B64: return "ds_xor_rtn_b64";
|
||||
case MSKOR_RTN_B64: return "ds_mskor_rtn_b64";
|
||||
case WRXCHG_RTN_B64: return "ds_wrxchg_rtn_b64";
|
||||
case WRXCHG2_RTN_B64: return "ds_wrxchg2_rtn_b64";
|
||||
case WRXCHG2ST64_RTN_B64: return "ds_wrxchg2st64_rtn_b64";
|
||||
case CMPST_RTN_B64: return "ds_cmpst_rtn_b64";
|
||||
case CMPST_RTN_F64: return "ds_cmpst_rtn_f64";
|
||||
case MIN_RTN_F64: return "ds_min_rtn_f64";
|
||||
case MAX_RTN_F64: return "ds_max_rtn_f64";
|
||||
case READ_B64: return "ds_read_b64";
|
||||
case READ2_B64: return "ds_read2_b64";
|
||||
case READ2ST64_B64: return "ds_read2st64_b64";
|
||||
case CONDXCHG32_RTN_B64: return "ds_condxchg32_rtn_b64";
|
||||
case ADD_SRC2_U32: return "ds_add_src2_u32";
|
||||
case SUB_SRC2_U32: return "ds_sub_src2_u32";
|
||||
case RSUB_SRC2_U32: return "ds_rsub_src2_u32";
|
||||
case INC_SRC2_U32: return "ds_inc_src2_u32";
|
||||
case DEC_SRC2_U32: return "ds_dec_src2_u32";
|
||||
case MIN_SRC2_I32: return "ds_min_src2_i32";
|
||||
case MAX_SRC2_I32: return "ds_max_src2_i32";
|
||||
case MIN_SRC2_U32: return "ds_min_src2_u32";
|
||||
case MAX_SRC2_U32: return "ds_max_src2_u32";
|
||||
case AND_SRC2_B32: return "ds_and_src2_b32";
|
||||
case OR_SRC2_B32: return "ds_or_src2_b32";
|
||||
case XOR_SRC2_B32: return "ds_xor_src2_b32";
|
||||
case WRITE_SRC2_B32: return "ds_write_src2_b32";
|
||||
case MIN_SRC2_F32: return "ds_min_src2_f32";
|
||||
case MAX_SRC2_F32: return "ds_max_src2_f32";
|
||||
case ADD_SRC2_U64: return "ds_add_src2_u64";
|
||||
case SUB_SRC2_U64: return "ds_sub_src2_u64";
|
||||
case RSUB_SRC2_U64: return "ds_rsub_src2_u64";
|
||||
case INC_SRC2_U64: return "ds_inc_src2_u64";
|
||||
case DEC_SRC2_U64: return "ds_dec_src2_u64";
|
||||
case MIN_SRC2_I64: return "ds_min_src2_i64";
|
||||
case MAX_SRC2_I64: return "ds_max_src2_i64";
|
||||
case MIN_SRC2_U64: return "ds_min_src2_u64";
|
||||
case MAX_SRC2_U64: return "ds_max_src2_u64";
|
||||
case AND_SRC2_B64: return "ds_and_src2_b64";
|
||||
case OR_SRC2_B64: return "ds_or_src2_b64";
|
||||
case XOR_SRC2_B64: return "ds_xor_src2_b64";
|
||||
case WRITE_SRC2_B64: return "ds_write_src2_b64";
|
||||
case MIN_SRC2_F64: return "ds_min_src2_f64";
|
||||
case MAX_SRC2_F64: return "ds_max_src2_f64";
|
||||
case WRITE_B96: return "ds_write_b96";
|
||||
case WRITE_B128: return "ds_write_b128";
|
||||
case CONDXCHG32_RTN_B128: return "ds_condxchg32_rtn_b128";
|
||||
case READ_B96: return "ds_read_b96";
|
||||
case READ_B128: return "ds_read_b128";
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
} // namespace shader::ir::ds
|
||||
11
rpcsx-gpu2/lib/gcn-shader/include/shader/dialect/exp.hpp
Normal file
11
rpcsx-gpu2/lib/gcn-shader/include/shader/dialect/exp.hpp
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
#pragma once
|
||||
|
||||
namespace shader::ir::exp {
|
||||
enum Op {
|
||||
EXP = 0,
|
||||
|
||||
OpCount
|
||||
};
|
||||
|
||||
inline const char *getInstructionName(unsigned) { return "exp"; }
|
||||
} // namespace shader::ir::exp
|
||||
423
rpcsx-gpu2/lib/gcn-shader/include/shader/dialect/memssa.hpp
Normal file
423
rpcsx-gpu2/lib/gcn-shader/include/shader/dialect/memssa.hpp
Normal file
|
|
@ -0,0 +1,423 @@
|
|||
#pragma once
|
||||
|
||||
#include "../ir/Block.hpp"
|
||||
#include "../ir/Builder.hpp"
|
||||
#include "../ir/Value.hpp"
|
||||
#include "../ir/ValueImpl.hpp"
|
||||
|
||||
namespace shader::ir::memssa {
|
||||
enum Op {
|
||||
OpVar,
|
||||
OpDef,
|
||||
OpPhi,
|
||||
OpUse,
|
||||
OpBarrier,
|
||||
OpJump,
|
||||
OpExit,
|
||||
|
||||
OpCount,
|
||||
};
|
||||
|
||||
template <typename BaseT> struct BaseImpl : BaseT {
|
||||
Instruction link;
|
||||
|
||||
using BaseT::BaseT;
|
||||
using BaseT::operator=;
|
||||
|
||||
void print(std::ostream &os, NameStorage &ns) const override {
|
||||
BaseT::print(os, ns);
|
||||
|
||||
if (link) {
|
||||
os << " : ";
|
||||
link.print(os, ns);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename ImplT, template <typename> typename BaseT>
|
||||
struct BaseWrapper : BaseT<ImplT> {
|
||||
using BaseT<ImplT>::BaseT;
|
||||
using BaseT<ImplT>::operator=;
|
||||
|
||||
Instruction getLinkedInst() const { return this->impl->link; }
|
||||
};
|
||||
|
||||
struct DefImpl : BaseImpl<ValueImpl> {
|
||||
using BaseImpl::BaseImpl;
|
||||
using BaseImpl::operator=;
|
||||
|
||||
Node clone(Context &context, CloneMap &map) const override;
|
||||
};
|
||||
struct UseImpl : BaseImpl<InstructionImpl> {
|
||||
using BaseImpl::BaseImpl;
|
||||
using BaseImpl::operator=;
|
||||
|
||||
Node clone(Context &context, CloneMap &map) const override;
|
||||
};
|
||||
struct VarImpl : BaseImpl<ValueImpl> {
|
||||
using BaseImpl::BaseImpl;
|
||||
using BaseImpl::operator=;
|
||||
|
||||
Node clone(Context &context, CloneMap &map) const override;
|
||||
};
|
||||
struct PhiImpl : DefImpl {
|
||||
using DefImpl::DefImpl;
|
||||
using DefImpl::operator=;
|
||||
|
||||
Node clone(Context &context, CloneMap &map) const override;
|
||||
};
|
||||
|
||||
using Use = BaseWrapper<UseImpl, InstructionWrapper>;
|
||||
using Var = BaseWrapper<VarImpl, ValueWrapper>;
|
||||
|
||||
template <typename ImplT> struct DefWrapper : BaseWrapper<ImplT, ValueWrapper> {
|
||||
using BaseWrapper<ImplT, ValueWrapper>::BaseWrapper;
|
||||
using BaseWrapper<ImplT, ValueWrapper>::operator=;
|
||||
|
||||
void addVariable(Var variable) {
|
||||
this->addOperand(variable);
|
||||
|
||||
std::vector<Var> workList;
|
||||
|
||||
for (auto &comp : variable.getOperands()) {
|
||||
auto compVar = comp.getAsValue().staticCast<Var>();
|
||||
this->addOperand(compVar);
|
||||
|
||||
if (compVar.getOperandCount() > 1) {
|
||||
workList.push_back(compVar);
|
||||
} else if (compVar.getOperandCount() == 1) {
|
||||
this->addOperand(compVar.getOperand(0).getAsValue().staticCast<Var>());
|
||||
}
|
||||
}
|
||||
|
||||
while (!workList.empty()) {
|
||||
auto var = workList.back();
|
||||
workList.pop_back();
|
||||
|
||||
for (auto &comp : var.getOperands()) {
|
||||
auto compVar = comp.getAsValue().staticCast<Var>();
|
||||
this->addOperand(compVar);
|
||||
|
||||
if (compVar.getOperandCount() > 1) {
|
||||
workList.push_back(var);
|
||||
} else if (compVar.getOperandCount() == 1) {
|
||||
this->addOperand(
|
||||
compVar.getOperand(0).getAsValue().staticCast<Var>());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Var getRootVar() {
|
||||
return this->getOperand(0).getAsValue().template staticCast<Var>();
|
||||
}
|
||||
|
||||
Var getVar(std::size_t index) {
|
||||
return this->getOperand(index).getAsValue().template staticCast<Var>();
|
||||
}
|
||||
};
|
||||
|
||||
struct ScopeImpl : BaseImpl<ir::BlockImpl> {
|
||||
using BaseImpl::BaseImpl;
|
||||
using BaseImpl::operator=;
|
||||
|
||||
Node clone(Context &context, CloneMap &map) const override;
|
||||
};
|
||||
|
||||
template <typename ImplT> struct ScopeWrapper;
|
||||
|
||||
using Scope = ScopeWrapper<ScopeImpl>;
|
||||
using Def = DefWrapper<DefImpl>;
|
||||
|
||||
template <typename ImplT> struct BarrierWrapper : DefWrapper<ImplT> {
|
||||
using DefWrapper<ImplT>::DefWrapper;
|
||||
using DefWrapper<ImplT>::operator=;
|
||||
};
|
||||
|
||||
using Barrier = BarrierWrapper<PhiImpl>;
|
||||
|
||||
template <typename ImplT>
|
||||
struct ScopeWrapper : BaseWrapper<ImplT, ir::BlockWrapper> {
|
||||
using BaseWrapper<ImplT, ir::BlockWrapper>::BaseWrapper;
|
||||
using BaseWrapper<ImplT, ir::BlockWrapper>::operator=;
|
||||
|
||||
Scope getSingleSuccessor() {
|
||||
if (this->empty()) {
|
||||
return {};
|
||||
}
|
||||
auto terminator = this->getLast();
|
||||
if (terminator.getKind() != Kind::MemSSA || terminator.getOp() != OpJump) {
|
||||
return {};
|
||||
}
|
||||
if (terminator.getOperandCount() != 1) {
|
||||
return {};
|
||||
}
|
||||
|
||||
return terminator.getOperand(0).getAsValue().template cast<Scope>();
|
||||
}
|
||||
|
||||
std::vector<Scope> getSuccessors() {
|
||||
if (this->empty()) {
|
||||
return {};
|
||||
}
|
||||
auto terminator = this->getLast();
|
||||
if (terminator.getKind() != Kind::MemSSA || terminator.getOp() != OpJump) {
|
||||
return {};
|
||||
}
|
||||
|
||||
std::vector<Scope> result;
|
||||
result.reserve(terminator.getOperandCount());
|
||||
for (auto &successor : terminator.getOperands()) {
|
||||
if (auto block = successor.getAsValue().template cast<Scope>()) {
|
||||
result.push_back(block);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
auto getPredecessors() {
|
||||
std::set<Scope> predecessors;
|
||||
for (auto &use : this->getUseList()) {
|
||||
if (use.user != OpJump) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (auto userParent = use.user.getParent().template cast<Scope>()) {
|
||||
predecessors.insert(userParent);
|
||||
}
|
||||
}
|
||||
return predecessors;
|
||||
}
|
||||
|
||||
auto getSinglePredecessor() {
|
||||
Scope predecessor;
|
||||
|
||||
for (auto &use : this->getUseList()) {
|
||||
if (use.user != OpJump) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (auto userParent = use.user.getParent().template cast<Scope>()) {
|
||||
if (predecessor == nullptr) {
|
||||
predecessor = userParent;
|
||||
} else if (predecessor != userParent) {
|
||||
return Scope(nullptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return predecessor;
|
||||
}
|
||||
|
||||
Def findVarDef(Var var, Instruction point = nullptr) {
|
||||
if (point == nullptr) {
|
||||
point = this->getLast();
|
||||
}
|
||||
|
||||
std::optional<std::set<Var>> compList;
|
||||
|
||||
auto buildMatchList = [&] {
|
||||
std::set<Var> result;
|
||||
std::vector<Var> workList;
|
||||
|
||||
for (auto comp : var.getOperands()) {
|
||||
auto compVar = comp.getAsValue().staticCast<Var>();
|
||||
result.insert(compVar);
|
||||
|
||||
if (compVar.getOperandCount() > 1) {
|
||||
workList.push_back(compVar);
|
||||
} else if (compVar.getOperandCount() == 1) {
|
||||
result.insert(compVar.getOperand(0).getAsValue().staticCast<Var>());
|
||||
}
|
||||
}
|
||||
|
||||
while (!workList.empty()) {
|
||||
auto var = workList.back();
|
||||
workList.pop_back();
|
||||
|
||||
for (auto comp : var.getOperands()) {
|
||||
auto compVar = comp.getAsValue().staticCast<Var>();
|
||||
result.insert(compVar);
|
||||
|
||||
if (compVar.getOperandCount() > 1) {
|
||||
workList.push_back(compVar);
|
||||
} else if (compVar.getOperandCount() == 1) {
|
||||
result.insert(compVar.getOperand(0).getAsValue().staticCast<Var>());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
};
|
||||
|
||||
for (auto child : revRange(point)) {
|
||||
if (child.getKind() != Kind::MemSSA) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (child.getOp() == OpDef || child.getOp() == OpPhi) {
|
||||
if (child.getOperand(0) == var) {
|
||||
return child.template staticCast<Def>();
|
||||
}
|
||||
|
||||
if (!compList) {
|
||||
compList = buildMatchList();
|
||||
}
|
||||
|
||||
if (compList->empty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (compList->contains(
|
||||
child.getOperand(0).getAsValue().staticCast<Var>())) {
|
||||
return child.template staticCast<Def>();
|
||||
}
|
||||
}
|
||||
|
||||
if (child.getOp() == OpBarrier) {
|
||||
// barrier is definition for everything
|
||||
return child.template staticCast<Def>();
|
||||
}
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
};
|
||||
|
||||
template <typename ImplT> struct PhiWrapper : ValueWrapper<ImplT> {
|
||||
using ValueWrapper<ImplT>::ValueWrapper;
|
||||
using ValueWrapper<ImplT>::operator=;
|
||||
|
||||
void addValue(Scope scope, Def def) {
|
||||
this->addOperand(scope);
|
||||
this->addOperand(def);
|
||||
}
|
||||
|
||||
// Set value for specified block or add new node
|
||||
// Returns true if node was added
|
||||
bool setValue(Scope pred, Def def) {
|
||||
for (std::size_t i = 1, end = this->getOperandCount(); i < end; i += 2) {
|
||||
if (pred == this->getOperand(i).getAsValue()) {
|
||||
this->replaceOperand(i + 1, def);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
addValue(pred, def);
|
||||
return true;
|
||||
}
|
||||
|
||||
Def getDef(Scope pred) {
|
||||
for (std::size_t i = 1, end = this->getOperandCount(); i < end; i += 2) {
|
||||
if (pred == this->getOperand(i).getAsValue()) {
|
||||
return this->getOperand(i + 1).getAsValue().template staticCast<Def>();
|
||||
}
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
bool empty() { return this->getOperandCount() < 2; }
|
||||
|
||||
Def getUniqDef() {
|
||||
if (empty()) {
|
||||
return {};
|
||||
}
|
||||
|
||||
Def result = this->getOperand(2).getAsValue().template staticCast<Def>();
|
||||
|
||||
for (std::size_t i = 4, end = this->getOperandCount(); i < end; i += 2) {
|
||||
if (this->getOperand(i) != result) {
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
Var getVar() {
|
||||
return this->getOperand(0).getAsValue().template staticCast<Var>();
|
||||
}
|
||||
};
|
||||
|
||||
using Phi = PhiWrapper<PhiImpl>;
|
||||
|
||||
template <typename ImplT>
|
||||
struct Builder : BuilderFacade<Builder<ImplT>, ImplT> {
|
||||
Def createDef(Instruction defInst, Var var) {
|
||||
auto result =
|
||||
this->template create<Def>(defInst.getLocation(), Kind::MemSSA, OpDef);
|
||||
result.impl->link = defInst;
|
||||
result.addOperand(var);
|
||||
return result;
|
||||
}
|
||||
|
||||
Scope createScope(ir::Instruction labelInst) {
|
||||
Scope result = this->template create<Scope>(labelInst.getLocation());
|
||||
result.impl->link = labelInst;
|
||||
return result;
|
||||
}
|
||||
|
||||
Phi createPhi(Var var) {
|
||||
auto result =
|
||||
this->template create<Phi>(var.getLocation(), Kind::MemSSA, OpPhi);
|
||||
result.addOperand(var);
|
||||
return result;
|
||||
}
|
||||
|
||||
Use createUse(ir::Instruction useInst) {
|
||||
Use result =
|
||||
this->template create<Use>(useInst.getLocation(), Kind::MemSSA, OpUse);
|
||||
result.impl->link = useInst;
|
||||
return result;
|
||||
}
|
||||
|
||||
Use createUse(ir::Instruction useInst, Def def) {
|
||||
auto result = createUse(useInst);
|
||||
result.addOperand(def);
|
||||
return result;
|
||||
}
|
||||
|
||||
Var createVar(ir::Instruction varInst) {
|
||||
Var result =
|
||||
this->template create<Var>(varInst.getLocation(), Kind::MemSSA, OpVar);
|
||||
result.impl->link = varInst;
|
||||
return result;
|
||||
}
|
||||
|
||||
Barrier createBarrier(ir::Instruction barrierInst) {
|
||||
Barrier result = this->template create<Barrier>(barrierInst.getLocation(),
|
||||
Kind::MemSSA, OpBarrier);
|
||||
result.impl->link = barrierInst;
|
||||
return result;
|
||||
}
|
||||
|
||||
Instruction createJump(Location loc) {
|
||||
return this->template create<Instruction>(loc, Kind::MemSSA, OpJump);
|
||||
}
|
||||
|
||||
Instruction createExit(Location loc) {
|
||||
return this->template create<Instruction>(loc, Kind::MemSSA, OpExit);
|
||||
}
|
||||
};
|
||||
|
||||
inline const char *getInstructionName(unsigned op) {
|
||||
switch (op) {
|
||||
case OpVar:
|
||||
return "var";
|
||||
case OpDef:
|
||||
return "def";
|
||||
case OpPhi:
|
||||
return "phi";
|
||||
case OpUse:
|
||||
return "use";
|
||||
case OpBarrier:
|
||||
return "barrier";
|
||||
case OpJump:
|
||||
return "jump";
|
||||
case OpExit:
|
||||
return "exit";
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
} // namespace shader::ir::memssa
|
||||
199
rpcsx-gpu2/lib/gcn-shader/include/shader/dialect/mimg.hpp
Normal file
199
rpcsx-gpu2/lib/gcn-shader/include/shader/dialect/mimg.hpp
Normal file
|
|
@ -0,0 +1,199 @@
|
|||
#pragma once
|
||||
|
||||
namespace shader::ir::mimg {
|
||||
enum Op {
|
||||
LOAD,
|
||||
LOAD_MIP,
|
||||
LOAD_PCK,
|
||||
LOAD_PCK_SGN,
|
||||
LOAD_MIP_PCK,
|
||||
LOAD_MIP_PCK_SGN,
|
||||
STORE = 8,
|
||||
STORE_MIP,
|
||||
STORE_PCK,
|
||||
STORE_MIP_PCK,
|
||||
GET_RESINFO = 14,
|
||||
ATOMIC_SWAP,
|
||||
ATOMIC_CMPSWAP,
|
||||
ATOMIC_ADD,
|
||||
ATOMIC_SUB,
|
||||
ATOMIC_RSUB,
|
||||
ATOMIC_SMIN,
|
||||
ATOMIC_UMIN,
|
||||
ATOMIC_SMAX,
|
||||
ATOMIC_UMAX,
|
||||
ATOMIC_AND,
|
||||
ATOMIC_OR,
|
||||
ATOMIC_XOR,
|
||||
ATOMIC_INC,
|
||||
ATOMIC_DEC,
|
||||
ATOMIC_FCMPSWAP,
|
||||
ATOMIC_FMIN,
|
||||
ATOMIC_FMAX,
|
||||
SAMPLE,
|
||||
SAMPLE_CL,
|
||||
SAMPLE_D,
|
||||
SAMPLE_D_CL,
|
||||
SAMPLE_L,
|
||||
SAMPLE_B,
|
||||
SAMPLE_B_CL,
|
||||
SAMPLE_LZ,
|
||||
SAMPLE_C,
|
||||
SAMPLE_C_CL,
|
||||
SAMPLE_C_D,
|
||||
SAMPLE_C_D_CL,
|
||||
SAMPLE_C_L,
|
||||
SAMPLE_C_B,
|
||||
SAMPLE_C_B_CL,
|
||||
SAMPLE_C_LZ,
|
||||
SAMPLE_O,
|
||||
SAMPLE_CL_O,
|
||||
SAMPLE_D_O,
|
||||
SAMPLE_D_CL_O,
|
||||
SAMPLE_L_O,
|
||||
SAMPLE_B_O,
|
||||
SAMPLE_B_CL_O,
|
||||
SAMPLE_LZ_O,
|
||||
SAMPLE_C_O,
|
||||
SAMPLE_C_CL_O,
|
||||
SAMPLE_C_D_O,
|
||||
SAMPLE_C_D_CL_O,
|
||||
SAMPLE_C_L_O,
|
||||
SAMPLE_C_B_O,
|
||||
SAMPLE_C_B_CL_O,
|
||||
SAMPLE_C_LZ_O,
|
||||
GATHER4,
|
||||
GATHER4_CL,
|
||||
GATHER4_L = 68,
|
||||
GATHER4_B,
|
||||
GATHER4_B_CL,
|
||||
GATHER4_LZ,
|
||||
GATHER4_C,
|
||||
GATHER4_C_CL,
|
||||
GATHER4_C_L = 76,
|
||||
GATHER4_C_B,
|
||||
GATHER4_C_B_CL,
|
||||
GATHER4_C_LZ,
|
||||
GATHER4_O,
|
||||
GATHER4_CL_O,
|
||||
GATHER4_L_O = 84,
|
||||
GATHER4_B_O,
|
||||
GATHER4_B_CL_O,
|
||||
GATHER4_LZ_O,
|
||||
GATHER4_C_O,
|
||||
GATHER4_C_CL_O,
|
||||
GATHER4_C_L_O = 92,
|
||||
GATHER4_C_B_O,
|
||||
GATHER4_C_B_CL_O,
|
||||
GATHER4_C_LZ_O,
|
||||
GET_LOD,
|
||||
SAMPLE_CD = 104,
|
||||
SAMPLE_CD_CL,
|
||||
SAMPLE_C_CD,
|
||||
SAMPLE_C_CD_CL,
|
||||
SAMPLE_CD_O,
|
||||
SAMPLE_CD_CL_O,
|
||||
SAMPLE_C_CD_O,
|
||||
SAMPLE_C_CD_CL_O,
|
||||
|
||||
OpCount
|
||||
};
|
||||
inline const char *getInstructionName(unsigned id) {
|
||||
switch (id) {
|
||||
case LOAD: return "image_load";
|
||||
case LOAD_MIP: return "image_load_mip";
|
||||
case LOAD_PCK: return "image_load_pck";
|
||||
case LOAD_PCK_SGN: return "image_load_pck_sgn";
|
||||
case LOAD_MIP_PCK: return "image_load_mip_pck";
|
||||
case LOAD_MIP_PCK_SGN: return "image_load_mip_pck_sgn";
|
||||
case STORE: return "image_store";
|
||||
case STORE_MIP: return "image_store_mip";
|
||||
case STORE_PCK: return "image_store_pck";
|
||||
case STORE_MIP_PCK: return "image_store_mip_pck";
|
||||
case GET_RESINFO: return "image_get_resinfo";
|
||||
case ATOMIC_SWAP: return "image_atomic_swap";
|
||||
case ATOMIC_CMPSWAP: return "image_atomic_cmpswap";
|
||||
case ATOMIC_ADD: return "image_atomic_add";
|
||||
case ATOMIC_SUB: return "image_atomic_sub";
|
||||
case ATOMIC_RSUB: return "image_atomic_rsub";
|
||||
case ATOMIC_SMIN: return "image_atomic_smin";
|
||||
case ATOMIC_UMIN: return "image_atomic_umin";
|
||||
case ATOMIC_SMAX: return "image_atomic_smax";
|
||||
case ATOMIC_UMAX: return "image_atomic_umax";
|
||||
case ATOMIC_AND: return "image_atomic_and";
|
||||
case ATOMIC_OR: return "image_atomic_or";
|
||||
case ATOMIC_XOR: return "image_atomic_xor";
|
||||
case ATOMIC_INC: return "image_atomic_inc";
|
||||
case ATOMIC_DEC: return "image_atomic_dec";
|
||||
case ATOMIC_FCMPSWAP: return "image_atomic_fcmpswap";
|
||||
case ATOMIC_FMIN: return "image_atomic_fmin";
|
||||
case ATOMIC_FMAX: return "image_atomic_fmax";
|
||||
case SAMPLE: return "image_sample";
|
||||
case SAMPLE_CL: return "image_sample_cl";
|
||||
case SAMPLE_D: return "image_sample_d";
|
||||
case SAMPLE_D_CL: return "image_sample_d_cl";
|
||||
case SAMPLE_L: return "image_sample_l";
|
||||
case SAMPLE_B: return "image_sample_b";
|
||||
case SAMPLE_B_CL: return "image_sample_b_cl";
|
||||
case SAMPLE_LZ: return "image_sample_lz";
|
||||
case SAMPLE_C: return "image_sample_c";
|
||||
case SAMPLE_C_CL: return "image_sample_c_cl";
|
||||
case SAMPLE_C_D: return "image_sample_c_d";
|
||||
case SAMPLE_C_D_CL: return "image_sample_c_d_cl";
|
||||
case SAMPLE_C_L: return "image_sample_c_l";
|
||||
case SAMPLE_C_B: return "image_sample_c_b";
|
||||
case SAMPLE_C_B_CL: return "image_sample_c_b_cl";
|
||||
case SAMPLE_C_LZ: return "image_sample_c_lz";
|
||||
case SAMPLE_O: return "image_sample_o";
|
||||
case SAMPLE_CL_O: return "image_sample_cl_o";
|
||||
case SAMPLE_D_O: return "image_sample_d_o";
|
||||
case SAMPLE_D_CL_O: return "image_sample_d_cl_o";
|
||||
case SAMPLE_L_O: return "image_sample_l_o";
|
||||
case SAMPLE_B_O: return "image_sample_b_o";
|
||||
case SAMPLE_B_CL_O: return "image_sample_b_cl_o";
|
||||
case SAMPLE_LZ_O: return "image_sample_lz_o";
|
||||
case SAMPLE_C_O: return "image_sample_c_o";
|
||||
case SAMPLE_C_CL_O: return "image_sample_c_cl_o";
|
||||
case SAMPLE_C_D_O: return "image_sample_c_d_o";
|
||||
case SAMPLE_C_D_CL_O: return "image_sample_c_d_cl_o";
|
||||
case SAMPLE_C_L_O: return "image_sample_c_l_o";
|
||||
case SAMPLE_C_B_O: return "image_sample_c_b_o";
|
||||
case SAMPLE_C_B_CL_O: return "image_sample_c_b_cl_o";
|
||||
case SAMPLE_C_LZ_O: return "image_sample_c_lz_o";
|
||||
case GATHER4: return "image_gather4";
|
||||
case GATHER4_CL: return "image_gather4_cl";
|
||||
case GATHER4_L: return "image_gather4_l";
|
||||
case GATHER4_B: return "image_gather4_b";
|
||||
case GATHER4_B_CL: return "image_gather4_b_cl";
|
||||
case GATHER4_LZ: return "image_gather4_lz";
|
||||
case GATHER4_C: return "image_gather4_c";
|
||||
case GATHER4_C_CL: return "image_gather4_c_cl";
|
||||
case GATHER4_C_L: return "image_gather4_c_l";
|
||||
case GATHER4_C_B: return "image_gather4_c_b";
|
||||
case GATHER4_C_B_CL: return "image_gather4_c_b_cl";
|
||||
case GATHER4_C_LZ: return "image_gather4_c_lz";
|
||||
case GATHER4_O: return "image_gather4_o";
|
||||
case GATHER4_CL_O: return "image_gather4_cl_o";
|
||||
case GATHER4_L_O: return "image_gather4_l_o";
|
||||
case GATHER4_B_O: return "image_gather4_b_o";
|
||||
case GATHER4_B_CL_O: return "image_gather4_b_cl_o";
|
||||
case GATHER4_LZ_O: return "image_gather4_lz_o";
|
||||
case GATHER4_C_O: return "image_gather4_c_o";
|
||||
case GATHER4_C_CL_O: return "image_gather4_c_cl_o";
|
||||
case GATHER4_C_L_O: return "image_gather4_c_l_o";
|
||||
case GATHER4_C_B_O: return "image_gather4_c_b_o";
|
||||
case GATHER4_C_B_CL_O: return "image_gather4_c_b_cl_o";
|
||||
case GATHER4_C_LZ_O: return "image_gather4_c_lz_o";
|
||||
case GET_LOD: return "image_get_lod";
|
||||
case SAMPLE_CD: return "image_sample_cd";
|
||||
case SAMPLE_CD_CL: return "image_sample_cd_cl";
|
||||
case SAMPLE_C_CD: return "image_sample_c_cd";
|
||||
case SAMPLE_C_CD_CL: return "image_sample_c_cd_cl";
|
||||
case SAMPLE_CD_O: return "image_sample_cd_o";
|
||||
case SAMPLE_CD_CL_O: return "image_sample_cd_cl_o";
|
||||
case SAMPLE_C_CD_O: return "image_sample_c_cd_o";
|
||||
case SAMPLE_C_CD_CL_O: return "image_sample_c_cd_cl_o";
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
} // namespace shader::ir::mimg
|
||||
37
rpcsx-gpu2/lib/gcn-shader/include/shader/dialect/mtbuf.hpp
Normal file
37
rpcsx-gpu2/lib/gcn-shader/include/shader/dialect/mtbuf.hpp
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
#pragma once
|
||||
|
||||
namespace shader::ir::mtbuf {
|
||||
enum Op {
|
||||
LOAD_FORMAT_X,
|
||||
LOAD_FORMAT_XY,
|
||||
LOAD_FORMAT_XYZ,
|
||||
LOAD_FORMAT_XYZW,
|
||||
STORE_FORMAT_X,
|
||||
STORE_FORMAT_XY,
|
||||
STORE_FORMAT_XYZ,
|
||||
STORE_FORMAT_XYZW,
|
||||
|
||||
OpCount
|
||||
};
|
||||
inline const char *getInstructionName(unsigned id) {
|
||||
switch (id) {
|
||||
case LOAD_FORMAT_X:
|
||||
return "tbuffer_load_format_x";
|
||||
case LOAD_FORMAT_XY:
|
||||
return "tbuffer_load_format_xy";
|
||||
case LOAD_FORMAT_XYZ:
|
||||
return "tbuffer_load_format_xyz";
|
||||
case LOAD_FORMAT_XYZW:
|
||||
return "tbuffer_load_format_xyzw";
|
||||
case STORE_FORMAT_X:
|
||||
return "tbuffer_store_format_x";
|
||||
case STORE_FORMAT_XY:
|
||||
return "tbuffer_store_format_xy";
|
||||
case STORE_FORMAT_XYZ:
|
||||
return "tbuffer_store_format_xyz";
|
||||
case STORE_FORMAT_XYZW:
|
||||
return "tbuffer_store_format_xyzw";
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
} // namespace shader::ir::mtbuf
|
||||
129
rpcsx-gpu2/lib/gcn-shader/include/shader/dialect/mubuf.hpp
Normal file
129
rpcsx-gpu2/lib/gcn-shader/include/shader/dialect/mubuf.hpp
Normal file
|
|
@ -0,0 +1,129 @@
|
|||
#pragma once
|
||||
|
||||
namespace shader::ir::mubuf {
|
||||
enum Op {
|
||||
LOAD_FORMAT_X,
|
||||
LOAD_FORMAT_XY,
|
||||
LOAD_FORMAT_XYZ,
|
||||
LOAD_FORMAT_XYZW,
|
||||
STORE_FORMAT_X,
|
||||
STORE_FORMAT_XY,
|
||||
STORE_FORMAT_XYZ,
|
||||
STORE_FORMAT_XYZW,
|
||||
LOAD_UBYTE,
|
||||
LOAD_SBYTE,
|
||||
LOAD_USHORT,
|
||||
LOAD_SSHORT,
|
||||
LOAD_DWORD,
|
||||
LOAD_DWORDX2,
|
||||
LOAD_DWORDX4,
|
||||
LOAD_DWORDX3,
|
||||
STORE_BYTE = 24,
|
||||
STORE_SHORT = 26,
|
||||
STORE_DWORD = 28,
|
||||
STORE_DWORDX2,
|
||||
STORE_DWORDX4,
|
||||
STORE_DWORDX3,
|
||||
ATOMIC_SWAP = 48,
|
||||
ATOMIC_CMPSWAP,
|
||||
ATOMIC_ADD,
|
||||
ATOMIC_SUB,
|
||||
ATOMIC_RSUB,
|
||||
ATOMIC_SMIN,
|
||||
ATOMIC_UMIN,
|
||||
ATOMIC_SMAX,
|
||||
ATOMIC_UMAX,
|
||||
ATOMIC_AND,
|
||||
ATOMIC_OR,
|
||||
ATOMIC_XOR,
|
||||
ATOMIC_INC,
|
||||
ATOMIC_DEC,
|
||||
ATOMIC_FCMPSWAP,
|
||||
ATOMIC_FMIN,
|
||||
ATOMIC_FMAX,
|
||||
ATOMIC_SWAP_X2 = 80,
|
||||
ATOMIC_CMPSWAP_X2,
|
||||
ATOMIC_ADD_X2,
|
||||
ATOMIC_SUB_X2,
|
||||
ATOMIC_RSUB_X2,
|
||||
ATOMIC_SMIN_X2,
|
||||
ATOMIC_UMIN_X2,
|
||||
ATOMIC_SMAX_X2,
|
||||
ATOMIC_UMAX_X2,
|
||||
ATOMIC_AND_X2,
|
||||
ATOMIC_OR_X2,
|
||||
ATOMIC_XOR_X2,
|
||||
ATOMIC_INC_X2,
|
||||
ATOMIC_DEC_X2,
|
||||
ATOMIC_FCMPSWAP_X2,
|
||||
ATOMIC_FMIN_X2,
|
||||
ATOMIC_FMAX_X2,
|
||||
WBINVL1_SC_VOL = 112,
|
||||
WBINVL1,
|
||||
|
||||
OpCount
|
||||
};
|
||||
inline const char *getInstructionName(unsigned id) {
|
||||
switch (id) {
|
||||
case LOAD_FORMAT_X:return "buffer_load_format_x";
|
||||
case LOAD_FORMAT_XY:return "buffer_load_format_xy";
|
||||
case LOAD_FORMAT_XYZ:return "buffer_load_format_xyz";
|
||||
case LOAD_FORMAT_XYZW:return "buffer_load_format_xyzw";
|
||||
case STORE_FORMAT_X:return "buffer_store_format_x";
|
||||
case STORE_FORMAT_XY:return "buffer_store_format_xy";
|
||||
case STORE_FORMAT_XYZ:return "buffer_store_format_xyz";
|
||||
case STORE_FORMAT_XYZW:return "buffer_store_format_xyzw";
|
||||
case LOAD_UBYTE:return "buffer_load_ubyte";
|
||||
case LOAD_SBYTE:return "buffer_load_sbyte";
|
||||
case LOAD_USHORT:return "buffer_load_ushort";
|
||||
case LOAD_SSHORT:return "buffer_load_sshort";
|
||||
case LOAD_DWORD:return "buffer_load_dword";
|
||||
case LOAD_DWORDX2:return "buffer_load_dwordx2";
|
||||
case LOAD_DWORDX4:return "buffer_load_dwordx4";
|
||||
case LOAD_DWORDX3:return "buffer_load_dwordx3";
|
||||
case STORE_BYTE:return "buffer_store_byte";
|
||||
case STORE_SHORT:return "buffer_store_short";
|
||||
case STORE_DWORD:return "buffer_store_dword";
|
||||
case STORE_DWORDX2:return "buffer_store_dwordx2";
|
||||
case STORE_DWORDX4:return "buffer_store_dwordx4";
|
||||
case STORE_DWORDX3:return "buffer_store_dwordx3";
|
||||
case ATOMIC_SWAP:return "buffer_atomic_swap";
|
||||
case ATOMIC_CMPSWAP:return "buffer_atomic_cmpswap";
|
||||
case ATOMIC_ADD:return "buffer_atomic_add";
|
||||
case ATOMIC_SUB:return "buffer_atomic_sub";
|
||||
case ATOMIC_RSUB:return "buffer_atomic_rsub";
|
||||
case ATOMIC_SMIN:return "buffer_atomic_smin";
|
||||
case ATOMIC_UMIN:return "buffer_atomic_umin";
|
||||
case ATOMIC_SMAX:return "buffer_atomic_smax";
|
||||
case ATOMIC_UMAX:return "buffer_atomic_umax";
|
||||
case ATOMIC_AND:return "buffer_atomic_and";
|
||||
case ATOMIC_OR:return "buffer_atomic_or";
|
||||
case ATOMIC_XOR:return "buffer_atomic_xor";
|
||||
case ATOMIC_INC:return "buffer_atomic_inc";
|
||||
case ATOMIC_DEC:return "buffer_atomic_dec";
|
||||
case ATOMIC_FCMPSWAP:return "buffer_atomic_fcmpswap";
|
||||
case ATOMIC_FMIN:return "buffer_atomic_fmin";
|
||||
case ATOMIC_FMAX:return "buffer_atomic_fmax";
|
||||
case ATOMIC_SWAP_X2:return "buffer_atomic_swap_x2";
|
||||
case ATOMIC_CMPSWAP_X2:return "buffer_atomic_cmpswap_x2";
|
||||
case ATOMIC_ADD_X2:return "buffer_atomic_add_x2";
|
||||
case ATOMIC_SUB_X2:return "buffer_atomic_sub_x2";
|
||||
case ATOMIC_RSUB_X2:return "buffer_atomic_rsub_x2";
|
||||
case ATOMIC_SMIN_X2:return "buffer_atomic_smin_x2";
|
||||
case ATOMIC_UMIN_X2:return "buffer_atomic_umin_x2";
|
||||
case ATOMIC_SMAX_X2:return "buffer_atomic_smax_x2";
|
||||
case ATOMIC_UMAX_X2:return "buffer_atomic_umax_x2";
|
||||
case ATOMIC_AND_X2:return "buffer_atomic_and_x2";
|
||||
case ATOMIC_OR_X2:return "buffer_atomic_or_x2";
|
||||
case ATOMIC_XOR_X2:return "buffer_atomic_xor_x2";
|
||||
case ATOMIC_INC_X2:return "buffer_atomic_inc_x2";
|
||||
case ATOMIC_DEC_X2:return "buffer_atomic_dec_x2";
|
||||
case ATOMIC_FCMPSWAP_X2:return "buffer_atomic_fcmpswap_x2";
|
||||
case ATOMIC_FMIN_X2:return "buffer_atomic_fmin_x2";
|
||||
case ATOMIC_FMAX_X2:return "buffer_atomic_fmax_x2";
|
||||
case WBINVL1_SC_VOL:return "buffer_wbinvl1_sc_vol";
|
||||
case WBINVL1:return "buffer_wbinvl1";
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
} // namespace shader::ir::mubuf
|
||||
39
rpcsx-gpu2/lib/gcn-shader/include/shader/dialect/smrd.hpp
Normal file
39
rpcsx-gpu2/lib/gcn-shader/include/shader/dialect/smrd.hpp
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
#pragma once
|
||||
|
||||
namespace shader::ir::smrd {
|
||||
enum Op {
|
||||
LOAD_DWORD,
|
||||
LOAD_DWORDX2,
|
||||
LOAD_DWORDX4,
|
||||
LOAD_DWORDX8,
|
||||
LOAD_DWORDX16,
|
||||
BUFFER_LOAD_DWORD = 8,
|
||||
BUFFER_LOAD_DWORDX2,
|
||||
BUFFER_LOAD_DWORDX4,
|
||||
BUFFER_LOAD_DWORDX8,
|
||||
BUFFER_LOAD_DWORDX16,
|
||||
DCACHE_INV_VOL = 29,
|
||||
MEMTIME,
|
||||
DCACHE_INV,
|
||||
|
||||
OpCount
|
||||
};
|
||||
inline const char *getInstructionName(unsigned id) {
|
||||
switch (id) {
|
||||
case LOAD_DWORD: return "s_load_dword";
|
||||
case LOAD_DWORDX2: return "s_load_dwordx2";
|
||||
case LOAD_DWORDX4: return "s_load_dwordx4";
|
||||
case LOAD_DWORDX8: return "s_load_dwordx8";
|
||||
case LOAD_DWORDX16: return "s_load_dwordx16";
|
||||
case BUFFER_LOAD_DWORD: return "s_buffer_load_dword";
|
||||
case BUFFER_LOAD_DWORDX2: return "s_buffer_load_dwordx2";
|
||||
case BUFFER_LOAD_DWORDX4: return "s_buffer_load_dwordx4";
|
||||
case BUFFER_LOAD_DWORDX8: return "s_buffer_load_dwordx8";
|
||||
case BUFFER_LOAD_DWORDX16: return "s_buffer_load_dwordx16";
|
||||
case DCACHE_INV_VOL: return "s_dcache_inv_vol";
|
||||
case MEMTIME: return "s_memtime";
|
||||
case DCACHE_INV: return "s_dcache_inv";
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
} // namespace shader::ir::smrd
|
||||
109
rpcsx-gpu2/lib/gcn-shader/include/shader/dialect/sop1.hpp
Normal file
109
rpcsx-gpu2/lib/gcn-shader/include/shader/dialect/sop1.hpp
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
#pragma once
|
||||
|
||||
namespace shader::ir::sop1 {
|
||||
enum Op {
|
||||
MOV_B32 = 3,
|
||||
MOV_B64,
|
||||
CMOV_B32,
|
||||
CMOV_B64,
|
||||
NOT_B32,
|
||||
NOT_B64,
|
||||
WQM_B32,
|
||||
WQM_B64,
|
||||
BREV_B32,
|
||||
BREV_B64,
|
||||
BCNT0_I32_B32,
|
||||
BCNT0_I32_B64,
|
||||
BCNT1_I32_B32,
|
||||
BCNT1_I32_B64,
|
||||
FF0_I32_B32,
|
||||
FF0_I32_B64,
|
||||
FF1_I32_B32,
|
||||
FF1_I32_B64,
|
||||
FLBIT_I32_B32,
|
||||
FLBIT_I32_B64,
|
||||
FLBIT_I32,
|
||||
FLBIT_I32_I64,
|
||||
SEXT_I32_I8,
|
||||
SEXT_I32_I16,
|
||||
BITSET0_B32,
|
||||
BITSET0_B64,
|
||||
BITSET1_B32,
|
||||
BITSET1_B64,
|
||||
GETPC_B64,
|
||||
SETPC_B64,
|
||||
SWAPPC_B64,
|
||||
AND_SAVEEXEC_B64 = 36,
|
||||
OR_SAVEEXEC_B64,
|
||||
XOR_SAVEEXEC_B64,
|
||||
ANDN2_SAVEEXEC_B64,
|
||||
ORN2_SAVEEXEC_B64,
|
||||
NAND_SAVEEXEC_B64,
|
||||
NOR_SAVEEXEC_B64,
|
||||
XNOR_SAVEEXEC_B64,
|
||||
QUADMASK_B32,
|
||||
QUADMASK_B64,
|
||||
MOVRELS_B32,
|
||||
MOVRELS_B64,
|
||||
MOVRELD_B32,
|
||||
MOVRELD_B64,
|
||||
CBRANCH_JOIN,
|
||||
ABS_I32 = 52,
|
||||
MOV_FED_B32,
|
||||
|
||||
OpCount
|
||||
};
|
||||
inline const char *getInstructionName(unsigned id) {
|
||||
switch (id) {
|
||||
case MOV_B32: return "s_mov_b32";
|
||||
case MOV_B64: return "s_mov_b64";
|
||||
case CMOV_B32: return "s_cmov_b32";
|
||||
case CMOV_B64: return "s_cmov_b64";
|
||||
case NOT_B32: return "s_not_b32";
|
||||
case NOT_B64: return "s_not_b64";
|
||||
case WQM_B32: return "s_wqm_b32";
|
||||
case WQM_B64: return "s_wqm_b64";
|
||||
case BREV_B32: return "s_brev_b32";
|
||||
case BREV_B64: return "s_brev_b64";
|
||||
case BCNT0_I32_B32: return "s_bcnt0_i32_b32";
|
||||
case BCNT0_I32_B64: return "s_bcnt0_i32_b64";
|
||||
case BCNT1_I32_B32: return "s_bcnt1_i32_b32";
|
||||
case BCNT1_I32_B64: return "s_bcnt1_i32_b64";
|
||||
case FF0_I32_B32: return "s_ff0_i32_b32";
|
||||
case FF0_I32_B64: return "s_ff0_i32_b64";
|
||||
case FF1_I32_B32: return "s_ff1_i32_b32";
|
||||
case FF1_I32_B64: return "s_ff1_i32_b64";
|
||||
case FLBIT_I32_B32: return "s_flbit_i32_b32";
|
||||
case FLBIT_I32_B64: return "s_flbit_i32_b64";
|
||||
case FLBIT_I32: return "s_flbit_i32";
|
||||
case FLBIT_I32_I64: return "s_flbit_i32_i64";
|
||||
case SEXT_I32_I8: return "s_sext_i32_i8";
|
||||
case SEXT_I32_I16: return "s_sext_i32_i16";
|
||||
case BITSET0_B32: return "s_bitset0_b32";
|
||||
case BITSET0_B64: return "s_bitset0_b64";
|
||||
case BITSET1_B32: return "s_bitset1_b32";
|
||||
case BITSET1_B64: return "s_bitset1_b64";
|
||||
case GETPC_B64: return "s_getpc_b64";
|
||||
case SETPC_B64: return "s_setpc_b64";
|
||||
case SWAPPC_B64: return "s_swappc_b64";
|
||||
case AND_SAVEEXEC_B64: return "s_and_saveexec_b64";
|
||||
case OR_SAVEEXEC_B64: return "s_or_saveexec_b64";
|
||||
case XOR_SAVEEXEC_B64: return "s_xor_saveexec_b64";
|
||||
case ANDN2_SAVEEXEC_B64: return "s_andn2_saveexec_b64";
|
||||
case ORN2_SAVEEXEC_B64: return "s_orn2_saveexec_b64";
|
||||
case NAND_SAVEEXEC_B64: return "s_nand_saveexec_b64";
|
||||
case NOR_SAVEEXEC_B64: return "s_nor_saveexec_b64";
|
||||
case XNOR_SAVEEXEC_B64: return "s_xnor_saveexec_b64";
|
||||
case QUADMASK_B32: return "s_quadmask_b32";
|
||||
case QUADMASK_B64: return "s_quadmask_b64";
|
||||
case MOVRELS_B32: return "s_movrels_b32";
|
||||
case MOVRELS_B64: return "s_movrels_b64";
|
||||
case MOVRELD_B32: return "s_movreld_b32";
|
||||
case MOVRELD_B64: return "s_movreld_b64";
|
||||
case CBRANCH_JOIN: return "s_cbranch_join";
|
||||
case ABS_I32: return "s_abs_i32";
|
||||
case MOV_FED_B32: return "s_mov_fed_b32";
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
171
rpcsx-gpu2/lib/gcn-shader/include/shader/dialect/sop2.hpp
Normal file
171
rpcsx-gpu2/lib/gcn-shader/include/shader/dialect/sop2.hpp
Normal file
|
|
@ -0,0 +1,171 @@
|
|||
#pragma once
|
||||
#include "../ir.hpp"
|
||||
|
||||
namespace shader::ir::sop2 {
|
||||
enum Op {
|
||||
ADD_U32,
|
||||
SUB_U32,
|
||||
ADD_I32,
|
||||
SUB_I32,
|
||||
ADDC_U32,
|
||||
SUBB_U32,
|
||||
MIN_I32,
|
||||
MIN_U32,
|
||||
MAX_I32,
|
||||
MAX_U32,
|
||||
CSELECT_B32,
|
||||
CSELECT_B64,
|
||||
AND_B32 = 14,
|
||||
AND_B64,
|
||||
OR_B32,
|
||||
OR_B64,
|
||||
XOR_B32,
|
||||
XOR_B64,
|
||||
ANDN2_B32,
|
||||
ANDN2_B64,
|
||||
ORN2_B32,
|
||||
ORN2_B64,
|
||||
NAND_B32,
|
||||
NAND_B64,
|
||||
NOR_B32,
|
||||
NOR_B64,
|
||||
XNOR_B32,
|
||||
XNOR_B64,
|
||||
LSHL_B32,
|
||||
LSHL_B64,
|
||||
LSHR_B32,
|
||||
LSHR_B64,
|
||||
ASHR_I32,
|
||||
ASHR_I64,
|
||||
BFM_B32,
|
||||
BFM_B64,
|
||||
MUL_I32,
|
||||
BFE_U32,
|
||||
BFE_I32,
|
||||
BFE_U64,
|
||||
BFE_I64,
|
||||
CBRANCH_G_FORK,
|
||||
ABSDIFF_I32,
|
||||
LSHL1_ADD_U32,
|
||||
LSHL2_ADD_U32,
|
||||
LSHL3_ADD_U32,
|
||||
LSHL4_ADD_U32,
|
||||
PACK_LL_B32_B16,
|
||||
PACK_LH_B32_B16,
|
||||
PACK_HH_B32_B16,
|
||||
MUL_HI_U32,
|
||||
MUL_HI_I32,
|
||||
|
||||
OpCount
|
||||
};
|
||||
|
||||
inline const char *getInstructionName(unsigned id) {
|
||||
switch (id) {
|
||||
case ADD_U32:
|
||||
return "s_add_u32";
|
||||
case SUB_U32:
|
||||
return "s_sub_u32";
|
||||
case ADD_I32:
|
||||
return "s_add_i32";
|
||||
case SUB_I32:
|
||||
return "s_sub_i32";
|
||||
case ADDC_U32:
|
||||
return "s_addc_u32";
|
||||
case SUBB_U32:
|
||||
return "s_subb_u32";
|
||||
case MIN_I32:
|
||||
return "s_min_i32";
|
||||
case MIN_U32:
|
||||
return "s_min_u32";
|
||||
case MAX_I32:
|
||||
return "s_max_i32";
|
||||
case MAX_U32:
|
||||
return "s_max_u32";
|
||||
case CSELECT_B32:
|
||||
return "s_cselect_b32";
|
||||
case CSELECT_B64:
|
||||
return "s_cselect_b64";
|
||||
case AND_B32:
|
||||
return "s_and_b32";
|
||||
case AND_B64:
|
||||
return "s_and_b64";
|
||||
case OR_B32:
|
||||
return "s_or_b32";
|
||||
case OR_B64:
|
||||
return "s_or_b64";
|
||||
case XOR_B32:
|
||||
return "s_xor_b32";
|
||||
case XOR_B64:
|
||||
return "s_xor_b64";
|
||||
case ANDN2_B32:
|
||||
return "s_andn2_b32";
|
||||
case ANDN2_B64:
|
||||
return "s_andn2_b64";
|
||||
case ORN2_B32:
|
||||
return "s_orn2_b32";
|
||||
case ORN2_B64:
|
||||
return "s_orn2_b64";
|
||||
case NAND_B32:
|
||||
return "s_nand_b32";
|
||||
case NAND_B64:
|
||||
return "s_nand_b64";
|
||||
case NOR_B32:
|
||||
return "s_nor_b32";
|
||||
case NOR_B64:
|
||||
return "s_nor_b64";
|
||||
case XNOR_B32:
|
||||
return "s_xnor_b32";
|
||||
case XNOR_B64:
|
||||
return "s_xnor_b64";
|
||||
case LSHL_B32:
|
||||
return "s_lshl_b32";
|
||||
case LSHL_B64:
|
||||
return "s_lshl_b64";
|
||||
case LSHR_B32:
|
||||
return "s_lshr_b32";
|
||||
case LSHR_B64:
|
||||
return "s_lshr_b64";
|
||||
case ASHR_I32:
|
||||
return "s_ashr_i32";
|
||||
case ASHR_I64:
|
||||
return "s_ashr_i64";
|
||||
case BFM_B32:
|
||||
return "s_bfm_b32";
|
||||
case BFM_B64:
|
||||
return "s_bfm_b64";
|
||||
case MUL_I32:
|
||||
return "s_mul_i32";
|
||||
case BFE_U32:
|
||||
return "s_bfe_u32";
|
||||
case BFE_I32:
|
||||
return "s_bfe_i32";
|
||||
case BFE_U64:
|
||||
return "s_bfe_u64";
|
||||
case BFE_I64:
|
||||
return "s_bfe_i64";
|
||||
case CBRANCH_G_FORK:
|
||||
return "s_cbranch_g_fork";
|
||||
case ABSDIFF_I32:
|
||||
return "s_absdiff_i32";
|
||||
case LSHL1_ADD_U32:
|
||||
return "s_lshl1_add_u32";
|
||||
case LSHL2_ADD_U32:
|
||||
return "s_lshl2_add_u32";
|
||||
case LSHL3_ADD_U32:
|
||||
return "s_lshl3_add_u32";
|
||||
case LSHL4_ADD_U32:
|
||||
return "s_lshl4_add_u32";
|
||||
case PACK_LL_B32_B16:
|
||||
return "s_pack_ll_b32_b16";
|
||||
case PACK_LH_B32_B16:
|
||||
return "s_pack_lh_b32_b16";
|
||||
case PACK_HH_B32_B16:
|
||||
return "s_pack_hh_b32_b16";
|
||||
case MUL_HI_U32:
|
||||
return "s_mul_hi_u32";
|
||||
case MUL_HI_I32:
|
||||
return "s_mul_hi_i32";
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
} // namespace shader::ir::sop2
|
||||
67
rpcsx-gpu2/lib/gcn-shader/include/shader/dialect/sopc.hpp
Normal file
67
rpcsx-gpu2/lib/gcn-shader/include/shader/dialect/sopc.hpp
Normal file
|
|
@ -0,0 +1,67 @@
|
|||
#pragma once
|
||||
|
||||
namespace shader::ir::sopc {
|
||||
enum Op {
|
||||
CMP_EQ_I32,
|
||||
CMP_LG_I32,
|
||||
CMP_GT_I32,
|
||||
CMP_GE_I32,
|
||||
CMP_LT_I32,
|
||||
CMP_LE_I32,
|
||||
CMP_EQ_U32,
|
||||
CMP_LG_U32,
|
||||
CMP_GT_U32,
|
||||
CMP_GE_U32,
|
||||
CMP_LT_U32,
|
||||
CMP_LE_U32,
|
||||
BITCMP0_B32,
|
||||
BITCMP1_B32,
|
||||
BITCMP0_B64,
|
||||
BITCMP1_B64,
|
||||
SETVSKIP,
|
||||
ILLEGALD,
|
||||
|
||||
OpCount
|
||||
};
|
||||
inline const char *getInstructionName(unsigned id) {
|
||||
switch (id) {
|
||||
case CMP_EQ_I32:
|
||||
return "s_cmp_eq_i32";
|
||||
case CMP_LG_I32:
|
||||
return "s_cmp_lg_i32";
|
||||
case CMP_GT_I32:
|
||||
return "s_cmp_gt_i32";
|
||||
case CMP_GE_I32:
|
||||
return "s_cmp_ge_i32";
|
||||
case CMP_LT_I32:
|
||||
return "s_cmp_lt_i32";
|
||||
case CMP_LE_I32:
|
||||
return "s_cmp_le_i32";
|
||||
case CMP_EQ_U32:
|
||||
return "s_cmp_eq_u32";
|
||||
case CMP_LG_U32:
|
||||
return "s_cmp_lg_u32";
|
||||
case CMP_GT_U32:
|
||||
return "s_cmp_gt_u32";
|
||||
case CMP_GE_U32:
|
||||
return "s_cmp_ge_u32";
|
||||
case CMP_LT_U32:
|
||||
return "s_cmp_lt_u32";
|
||||
case CMP_LE_U32:
|
||||
return "s_cmp_le_u32";
|
||||
case BITCMP0_B32:
|
||||
return "bitcmp0_b32";
|
||||
case BITCMP1_B32:
|
||||
return "bitcmp1_b32";
|
||||
case BITCMP0_B64:
|
||||
return "bitcmp0_b64";
|
||||
case BITCMP1_B64:
|
||||
return "bitcmp1_b64";
|
||||
case SETVSKIP:
|
||||
return "setvskip";
|
||||
case ILLEGALD:
|
||||
return "illegald";
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
} // namespace shader::ir::sopc
|
||||
73
rpcsx-gpu2/lib/gcn-shader/include/shader/dialect/sopk.hpp
Normal file
73
rpcsx-gpu2/lib/gcn-shader/include/shader/dialect/sopk.hpp
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
#pragma once
|
||||
|
||||
namespace shader::ir::sopk {
|
||||
enum Op {
|
||||
MOVK_I32,
|
||||
CMOVK_I32 = 2,
|
||||
CMPK_EQ_I32,
|
||||
CMPK_LG_I32,
|
||||
CMPK_GT_I32,
|
||||
CMPK_GE_I32,
|
||||
CMPK_LT_I32,
|
||||
CMPK_LE_I32,
|
||||
CMPK_EQ_U32,
|
||||
CMPK_LG_U32,
|
||||
CMPK_GT_U32,
|
||||
CMPK_GE_U32,
|
||||
CMPK_LT_U32,
|
||||
CMPK_LE_U32,
|
||||
ADDK_I32,
|
||||
MULK_I32,
|
||||
CBRANCH_I_FORK,
|
||||
GETREG_B32,
|
||||
SETREG_B32,
|
||||
SETREG_IMM,
|
||||
|
||||
OpCount
|
||||
};
|
||||
inline const char *getInstructionName(unsigned id) {
|
||||
switch (id) {
|
||||
case MOVK_I32:
|
||||
return "s_movk_i32";
|
||||
case CMOVK_I32:
|
||||
return "s_cmovk_i32";
|
||||
case CMPK_EQ_I32:
|
||||
return "s_cmpk_eq_i32";
|
||||
case CMPK_LG_I32:
|
||||
return "s_cmpk_lg_i32";
|
||||
case CMPK_GT_I32:
|
||||
return "s_cmpk_gt_i32";
|
||||
case CMPK_GE_I32:
|
||||
return "s_cmpk_ge_i32";
|
||||
case CMPK_LT_I32:
|
||||
return "s_cmpk_lt_i32";
|
||||
case CMPK_LE_I32:
|
||||
return "s_cmpk_le_i32";
|
||||
case CMPK_EQ_U32:
|
||||
return "s_cmpk_eq_u32";
|
||||
case CMPK_LG_U32:
|
||||
return "s_cmpk_lg_u32";
|
||||
case CMPK_GT_U32:
|
||||
return "s_cmpk_gt_u32";
|
||||
case CMPK_GE_U32:
|
||||
return "s_cmpk_ge_u32";
|
||||
case CMPK_LT_U32:
|
||||
return "s_cmpk_lt_u32";
|
||||
case CMPK_LE_U32:
|
||||
return "s_cmpk_le_u32";
|
||||
case ADDK_I32:
|
||||
return "s_addk_i32";
|
||||
case MULK_I32:
|
||||
return "s_mulk_i32";
|
||||
case CBRANCH_I_FORK:
|
||||
return "s_cbranch_i_fork";
|
||||
case GETREG_B32:
|
||||
return "s_getreg_b32";
|
||||
case SETREG_B32:
|
||||
return "s_setreg_b32";
|
||||
case SETREG_IMM:
|
||||
return "s_setreg_imm";
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
} // namespace shader::ir::sopk
|
||||
89
rpcsx-gpu2/lib/gcn-shader/include/shader/dialect/sopp.hpp
Normal file
89
rpcsx-gpu2/lib/gcn-shader/include/shader/dialect/sopp.hpp
Normal file
|
|
@ -0,0 +1,89 @@
|
|||
#pragma once
|
||||
|
||||
namespace shader::ir::sopp {
|
||||
enum Op {
|
||||
NOP,
|
||||
ENDPGM,
|
||||
BRANCH,
|
||||
CBRANCH_SCC0 = 4,
|
||||
CBRANCH_SCC1,
|
||||
CBRANCH_VCCZ,
|
||||
CBRANCH_VCCNZ,
|
||||
CBRANCH_EXECZ,
|
||||
CBRANCH_EXECNZ,
|
||||
BARRIER,
|
||||
WAITCNT = 12,
|
||||
SETHALT,
|
||||
SLEEP,
|
||||
SETPRIO,
|
||||
SENDMSG,
|
||||
SENDMSGHALT,
|
||||
TRAP,
|
||||
ICACHE_INV,
|
||||
INCPERFLEVEL,
|
||||
DECPERFLEVEL,
|
||||
TTRACEDATA,
|
||||
CBRANCH_CDBGSYS = 23,
|
||||
CBRANCH_CDBGUSER = 24,
|
||||
CBRANCH_CDBGSYS_OR_USER = 25,
|
||||
CBRANCH_CDBGSYS_AND_USER = 26,
|
||||
|
||||
OpCount
|
||||
};
|
||||
|
||||
inline const char *getInstructionName(unsigned id) {
|
||||
switch (id) {
|
||||
case NOP:
|
||||
return "s_nop";
|
||||
case ENDPGM:
|
||||
return "s_endpgm";
|
||||
case BRANCH:
|
||||
return "s_branch";
|
||||
case CBRANCH_SCC0:
|
||||
return "s_cbranch_scc0";
|
||||
case CBRANCH_SCC1:
|
||||
return "s_cbranch_scc1";
|
||||
case CBRANCH_VCCZ:
|
||||
return "s_cbranch_vccz";
|
||||
case CBRANCH_VCCNZ:
|
||||
return "s_cbranch_vccnz";
|
||||
case CBRANCH_EXECZ:
|
||||
return "s_cbranch_execz";
|
||||
case CBRANCH_EXECNZ:
|
||||
return "s_cbranch_execnz";
|
||||
case BARRIER:
|
||||
return "s_barrier";
|
||||
case WAITCNT:
|
||||
return "s_waitcnt";
|
||||
case SETHALT:
|
||||
return "s_sethalt";
|
||||
case SLEEP:
|
||||
return "s_sleep";
|
||||
case SETPRIO:
|
||||
return "s_setprio";
|
||||
case SENDMSG:
|
||||
return "s_sendmsg";
|
||||
case SENDMSGHALT:
|
||||
return "s_sendmsghalt";
|
||||
case TRAP:
|
||||
return "s_trap";
|
||||
case ICACHE_INV:
|
||||
return "s_icache_inv";
|
||||
case INCPERFLEVEL:
|
||||
return "s_incperflevel";
|
||||
case DECPERFLEVEL:
|
||||
return "s_decperflevel";
|
||||
case TTRACEDATA:
|
||||
return "s_ttracedata";
|
||||
case CBRANCH_CDBGSYS:
|
||||
return "s_cbranch_cdbgsys";
|
||||
case CBRANCH_CDBGUSER:
|
||||
return "s_cbranch_cdbguser";
|
||||
case CBRANCH_CDBGSYS_OR_USER:
|
||||
return "s_cbranch_cdbgsys_or_user";
|
||||
case CBRANCH_CDBGSYS_AND_USER:
|
||||
return "s_cbranch_cdbgsys_and_user";
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
} // namespace shader::ir::sopp
|
||||
23
rpcsx-gpu2/lib/gcn-shader/include/shader/dialect/vintrp.hpp
Normal file
23
rpcsx-gpu2/lib/gcn-shader/include/shader/dialect/vintrp.hpp
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
#pragma once
|
||||
|
||||
namespace shader::ir::vintrp {
|
||||
enum Op {
|
||||
P1_F32,
|
||||
P2_F32,
|
||||
MOV_F32,
|
||||
|
||||
OpCount
|
||||
};
|
||||
|
||||
inline const char *getInstructionName(unsigned id) {
|
||||
switch (id) {
|
||||
case P1_F32:
|
||||
return "v_interp_p1_f32";
|
||||
case P2_F32:
|
||||
return "v_interp_p2_f32";
|
||||
case MOV_F32:
|
||||
return "v_interp_mov_f32";
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
} // namespace shader::ir::vintrp
|
||||
259
rpcsx-gpu2/lib/gcn-shader/include/shader/dialect/vop1.hpp
Normal file
259
rpcsx-gpu2/lib/gcn-shader/include/shader/dialect/vop1.hpp
Normal file
|
|
@ -0,0 +1,259 @@
|
|||
#pragma once
|
||||
|
||||
namespace shader::ir::vop1 {
|
||||
enum Op {
|
||||
NOP,
|
||||
MOV_B32,
|
||||
READFIRSTLANE_B32,
|
||||
CVT_I32_F64,
|
||||
CVT_F64_I32,
|
||||
CVT_F32_I32,
|
||||
CVT_F32_U32,
|
||||
CVT_U32_F32,
|
||||
CVT_I32_F32,
|
||||
MOV_FED_B32,
|
||||
CVT_F16_F32,
|
||||
CVT_F32_F16,
|
||||
CVT_RPI_I32_F32,
|
||||
CVT_FLR_I32_F32,
|
||||
CVT_OFF_F32_I4,
|
||||
CVT_F32_F64,
|
||||
CVT_F64_F32,
|
||||
CVT_F32_UBYTE0,
|
||||
CVT_F32_UBYTE1,
|
||||
CVT_F32_UBYTE2,
|
||||
CVT_F32_UBYTE3,
|
||||
CVT_U32_F64,
|
||||
CVT_F64_U32,
|
||||
FRACT_F32 = 32,
|
||||
TRUNC_F32,
|
||||
CEIL_F32,
|
||||
RNDNE_F32,
|
||||
FLOOR_F32,
|
||||
EXP_F32,
|
||||
LOG_CLAMP_F32,
|
||||
LOG_F32,
|
||||
RCP_CLAMP_F32,
|
||||
RCP_LEGACY_F32,
|
||||
RCP_F32,
|
||||
RCP_IFLAG_F32,
|
||||
RSQ_CLAMP_F32,
|
||||
RSQ_LEGACY_F32,
|
||||
RSQ_F32,
|
||||
RCP_F64,
|
||||
RCP_CLAMP_F64,
|
||||
RSQ_F64,
|
||||
RSQ_CLAMP_F64,
|
||||
SQRT_F32,
|
||||
SQRT_F64,
|
||||
SIN_F32,
|
||||
COS_F32,
|
||||
NOT_B32,
|
||||
BFREV_B32,
|
||||
FFBH_U32,
|
||||
FFBL_B32,
|
||||
FFBH_I32,
|
||||
FREXP_EXP_I32_F64,
|
||||
FREXP_MANT_F64,
|
||||
FRACT_F64,
|
||||
FREXP_EXP_I32_F32,
|
||||
FREXP_MANT_F32,
|
||||
CLREXCP,
|
||||
MOVRELD_B32,
|
||||
MOVRELS_B32,
|
||||
MOVRELSD_B32,
|
||||
CVT_F16_U16 = 80,
|
||||
CVT_F16_I16,
|
||||
CVT_U16_F16,
|
||||
CVT_I16_F16,
|
||||
RCP_F16,
|
||||
SQRT_F16,
|
||||
RSQ_F16,
|
||||
LOG_F16,
|
||||
EXP_F16,
|
||||
FREXP_MANT_F16,
|
||||
FREXP_EXP_I16_F16,
|
||||
FLOOR_F16,
|
||||
CEIL_F16,
|
||||
TRUNC_F16,
|
||||
RNDNE_F16,
|
||||
FRACT_F16,
|
||||
SIN_F16,
|
||||
COS_F16,
|
||||
SAT_PK_U8_I16,
|
||||
CVT_NORM_I16_F16,
|
||||
CVT_NORM_U16_F16,
|
||||
SWAP_B32,
|
||||
|
||||
OpCount
|
||||
};
|
||||
inline const char *getInstructionName(unsigned id) {
|
||||
switch (id) {
|
||||
case NOP:
|
||||
return "v_nop";
|
||||
case MOV_B32:
|
||||
return "v_mov_b32";
|
||||
case READFIRSTLANE_B32:
|
||||
return "v_readfirstlane_b32";
|
||||
case CVT_I32_F64:
|
||||
return "v_cvt_i32_f64";
|
||||
case CVT_F64_I32:
|
||||
return "v_cvt_f64_i32";
|
||||
case CVT_F32_I32:
|
||||
return "v_cvt_f32_i32";
|
||||
case CVT_F32_U32:
|
||||
return "v_cvt_f32_u32";
|
||||
case CVT_U32_F32:
|
||||
return "v_cvt_u32_f32";
|
||||
case CVT_I32_F32:
|
||||
return "v_cvt_i32_f32";
|
||||
case MOV_FED_B32:
|
||||
return "v_mov_fed_b32";
|
||||
case CVT_F16_F32:
|
||||
return "v_cvt_f16_f32";
|
||||
case CVT_F32_F16:
|
||||
return "v_cvt_f32_f16";
|
||||
case CVT_RPI_I32_F32:
|
||||
return "v_cvt_rpi_i32_f32";
|
||||
case CVT_FLR_I32_F32:
|
||||
return "v_cvt_flr_i32_f32";
|
||||
case CVT_OFF_F32_I4:
|
||||
return "v_cvt_off_f32_i4";
|
||||
case CVT_F32_F64:
|
||||
return "v_cvt_f32_f64";
|
||||
case CVT_F64_F32:
|
||||
return "v_cvt_f64_f32";
|
||||
case CVT_F32_UBYTE0:
|
||||
return "v_cvt_f32_ubyte0";
|
||||
case CVT_F32_UBYTE1:
|
||||
return "v_cvt_f32_ubyte1";
|
||||
case CVT_F32_UBYTE2:
|
||||
return "v_cvt_f32_ubyte2";
|
||||
case CVT_F32_UBYTE3:
|
||||
return "v_cvt_f32_ubyte3";
|
||||
case CVT_U32_F64:
|
||||
return "v_cvt_u32_f64";
|
||||
case CVT_F64_U32:
|
||||
return "v_cvt_f64_u32";
|
||||
case FRACT_F32:
|
||||
return "v_fract_f32";
|
||||
case TRUNC_F32:
|
||||
return "v_trunc_f32";
|
||||
case CEIL_F32:
|
||||
return "v_ceil_f32";
|
||||
case RNDNE_F32:
|
||||
return "v_rndne_f32";
|
||||
case FLOOR_F32:
|
||||
return "v_floor_f32";
|
||||
case EXP_F32:
|
||||
return "v_exp_f32";
|
||||
case LOG_CLAMP_F32:
|
||||
return "v_log_clamp_f32";
|
||||
case LOG_F32:
|
||||
return "v_log_f32";
|
||||
case RCP_CLAMP_F32:
|
||||
return "v_rcp_clamp_f32";
|
||||
case RCP_LEGACY_F32:
|
||||
return "v_rcp_legacy_f32";
|
||||
case RCP_F32:
|
||||
return "v_rcp_f32";
|
||||
case RCP_IFLAG_F32:
|
||||
return "v_rcp_iflag_f32";
|
||||
case RSQ_CLAMP_F32:
|
||||
return "v_rsq_clamp_f32";
|
||||
case RSQ_LEGACY_F32:
|
||||
return "v_rsq_legacy_f32";
|
||||
case RSQ_F32:
|
||||
return "v_rsq_f32";
|
||||
case RCP_F64:
|
||||
return "v_rcp_f64";
|
||||
case RCP_CLAMP_F64:
|
||||
return "v_rcp_clamp_f64";
|
||||
case RSQ_F64:
|
||||
return "v_rsq_f64";
|
||||
case RSQ_CLAMP_F64:
|
||||
return "v_rsq_clamp_f64";
|
||||
case SQRT_F32:
|
||||
return "v_sqrt_f32";
|
||||
case SQRT_F64:
|
||||
return "v_sqrt_f64";
|
||||
case SIN_F32:
|
||||
return "v_sin_f32";
|
||||
case COS_F32:
|
||||
return "v_cos_f32";
|
||||
case NOT_B32:
|
||||
return "v_not_b32";
|
||||
case BFREV_B32:
|
||||
return "v_bfrev_b32";
|
||||
case FFBH_U32:
|
||||
return "v_ffbh_u32";
|
||||
case FFBL_B32:
|
||||
return "v_ffbl_b32";
|
||||
case FFBH_I32:
|
||||
return "v_ffbh_i32";
|
||||
case FREXP_EXP_I32_F64:
|
||||
return "v_frexp_exp_i32_f64";
|
||||
case FREXP_MANT_F64:
|
||||
return "v_frexp_mant_f64";
|
||||
case FRACT_F64:
|
||||
return "v_fract_f64";
|
||||
case FREXP_EXP_I32_F32:
|
||||
return "v_frexp_exp_i32_f32";
|
||||
case FREXP_MANT_F32:
|
||||
return "v_frexp_mant_f32";
|
||||
case CLREXCP:
|
||||
return "v_clrexcp";
|
||||
case MOVRELD_B32:
|
||||
return "v_movreld_b32";
|
||||
case MOVRELS_B32:
|
||||
return "v_movrels_b32";
|
||||
case MOVRELSD_B32:
|
||||
return "v_movrelsd_b32";
|
||||
case CVT_F16_U16:
|
||||
return "v_cvt_f16_u16";
|
||||
case CVT_F16_I16:
|
||||
return "v_cvt_f16_i16";
|
||||
case CVT_U16_F16:
|
||||
return "v_cvt_u16_f16";
|
||||
case CVT_I16_F16:
|
||||
return "v_cvt_i16_f16";
|
||||
case RCP_F16:
|
||||
return "v_rcp_f16";
|
||||
case SQRT_F16:
|
||||
return "v_sqrt_f16";
|
||||
case RSQ_F16:
|
||||
return "v_rsq_f16";
|
||||
case LOG_F16:
|
||||
return "v_log_f16";
|
||||
case EXP_F16:
|
||||
return "v_exp_f16";
|
||||
case FREXP_MANT_F16:
|
||||
return "v_frexp_mant_f16";
|
||||
case FREXP_EXP_I16_F16:
|
||||
return "v_frexp_exp_i16_f16";
|
||||
case FLOOR_F16:
|
||||
return "v_floor_f16";
|
||||
case CEIL_F16:
|
||||
return "v_ceil_f16";
|
||||
case TRUNC_F16:
|
||||
return "v_trunc_f16";
|
||||
case RNDNE_F16:
|
||||
return "v_rndne_f16";
|
||||
case FRACT_F16:
|
||||
return "v_fract_f16";
|
||||
case SIN_F16:
|
||||
return "v_sin_f16";
|
||||
case COS_F16:
|
||||
return "v_cos_f16";
|
||||
case SAT_PK_U8_I16:
|
||||
return "v_sat_pk_u8_i16";
|
||||
case CVT_NORM_I16_F16:
|
||||
return "v_cvt_norm_i16_f16";
|
||||
case CVT_NORM_U16_F16:
|
||||
return "v_cvt_norm_u16_f16";
|
||||
case SWAP_B32:
|
||||
return "v_swap_b32";
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
} // namespace shader::ir::vop1
|
||||
164
rpcsx-gpu2/lib/gcn-shader/include/shader/dialect/vop2.hpp
Normal file
164
rpcsx-gpu2/lib/gcn-shader/include/shader/dialect/vop2.hpp
Normal file
|
|
@ -0,0 +1,164 @@
|
|||
#pragma once
|
||||
|
||||
namespace shader::ir::vop2 {
|
||||
enum Op {
|
||||
CNDMASK_B32,
|
||||
READLANE_B32,
|
||||
WRITELANE_B32,
|
||||
ADD_F32,
|
||||
SUB_F32,
|
||||
SUBREV_F32,
|
||||
MAC_LEGACY_F32,
|
||||
MUL_LEGACY_F32,
|
||||
MUL_F32,
|
||||
MUL_I32_I24,
|
||||
MUL_HI_I32_I24,
|
||||
MUL_U32_U24,
|
||||
MUL_HI_U32_U24,
|
||||
MIN_LEGACY_F32,
|
||||
MAX_LEGACY_F32,
|
||||
MIN_F32,
|
||||
MAX_F32,
|
||||
MIN_I32,
|
||||
MAX_I32,
|
||||
MIN_U32,
|
||||
MAX_U32,
|
||||
LSHR_B32,
|
||||
LSHRREV_B32,
|
||||
ASHR_I32,
|
||||
ASHRREV_I32,
|
||||
LSHL_B32,
|
||||
LSHLREV_B32,
|
||||
AND_B32,
|
||||
OR_B32,
|
||||
XOR_B32,
|
||||
BFM_B32,
|
||||
MAC_F32,
|
||||
MADMK_F32,
|
||||
MADAK_F32,
|
||||
BCNT_U32_B32,
|
||||
MBCNT_LO_U32_B32,
|
||||
MBCNT_HI_U32_B32,
|
||||
ADD_I32,
|
||||
SUB_I32,
|
||||
SUBREV_I32,
|
||||
ADDC_U32,
|
||||
SUBB_U32,
|
||||
SUBBREV_U32,
|
||||
LDEXP_F32,
|
||||
CVT_PKACCUM_U8_F32,
|
||||
CVT_PKNORM_I16_F32,
|
||||
CVT_PKNORM_U16_F32,
|
||||
CVT_PKRTZ_F16_F32,
|
||||
CVT_PK_U16_U32,
|
||||
CVT_PK_I16_I32,
|
||||
|
||||
OpCount
|
||||
};
|
||||
inline const char *getInstructionName(unsigned id) {
|
||||
switch (id) {
|
||||
case CNDMASK_B32:
|
||||
return "v_cndmask_b32";
|
||||
case READLANE_B32:
|
||||
return "v_readlane_b32";
|
||||
case WRITELANE_B32:
|
||||
return "v_writelane_b32";
|
||||
case ADD_F32:
|
||||
return "v_add_f32";
|
||||
case SUB_F32:
|
||||
return "v_sub_f32";
|
||||
case SUBREV_F32:
|
||||
return "v_subrev_f32";
|
||||
case MAC_LEGACY_F32:
|
||||
return "v_mac_legacy_f32";
|
||||
case MUL_LEGACY_F32:
|
||||
return "v_mul_legacy_f32";
|
||||
case MUL_F32:
|
||||
return "v_mul_f32";
|
||||
case MUL_I32_I24:
|
||||
return "v_mul_i32_i24";
|
||||
case MUL_HI_I32_I24:
|
||||
return "v_mul_hi_i32_i24";
|
||||
case MUL_U32_U24:
|
||||
return "v_mul_u32_u24";
|
||||
case MUL_HI_U32_U24:
|
||||
return "v_mul_hi_u32_u24";
|
||||
case MIN_LEGACY_F32:
|
||||
return "v_min_legacy_f32";
|
||||
case MAX_LEGACY_F32:
|
||||
return "v_max_legacy_f32";
|
||||
case MIN_F32:
|
||||
return "v_min_f32";
|
||||
case MAX_F32:
|
||||
return "v_max_f32";
|
||||
case MIN_I32:
|
||||
return "v_min_i32";
|
||||
case MAX_I32:
|
||||
return "v_max_i32";
|
||||
case MIN_U32:
|
||||
return "v_min_u32";
|
||||
case MAX_U32:
|
||||
return "v_max_u32";
|
||||
case LSHR_B32:
|
||||
return "v_lshr_b32";
|
||||
case LSHRREV_B32:
|
||||
return "v_lshrrev_b32";
|
||||
case ASHR_I32:
|
||||
return "v_ashr_i32";
|
||||
case ASHRREV_I32:
|
||||
return "v_ashrrev_i32";
|
||||
case LSHL_B32:
|
||||
return "v_lshl_b32";
|
||||
case LSHLREV_B32:
|
||||
return "v_lshlrev_b32";
|
||||
case AND_B32:
|
||||
return "v_and_b32";
|
||||
case OR_B32:
|
||||
return "v_or_b32";
|
||||
case XOR_B32:
|
||||
return "v_xor_b32";
|
||||
case BFM_B32:
|
||||
return "v_bfm_b32";
|
||||
case MAC_F32:
|
||||
return "v_mac_f32";
|
||||
case MADMK_F32:
|
||||
return "v_madmk_f32";
|
||||
case MADAK_F32:
|
||||
return "v_madak_f32";
|
||||
case BCNT_U32_B32:
|
||||
return "v_bcnt_u32_b32";
|
||||
case MBCNT_LO_U32_B32:
|
||||
return "v_mbcnt_lo_u32_b32";
|
||||
case MBCNT_HI_U32_B32:
|
||||
return "v_mbcnt_hi_u32_b32";
|
||||
case ADD_I32:
|
||||
return "v_add_i32";
|
||||
case SUB_I32:
|
||||
return "v_sub_i32";
|
||||
case SUBREV_I32:
|
||||
return "v_subrev_i32";
|
||||
case ADDC_U32:
|
||||
return "v_addc_u32";
|
||||
case SUBB_U32:
|
||||
return "v_subb_u32";
|
||||
case SUBBREV_U32:
|
||||
return "v_subbrev_u32";
|
||||
case LDEXP_F32:
|
||||
return "v_ldexp_f32";
|
||||
case CVT_PKACCUM_U8_F32:
|
||||
return "v_cvt_pkaccum_u8_f32";
|
||||
case CVT_PKNORM_I16_F32:
|
||||
return "v_cvt_pknorm_i16_f32";
|
||||
case CVT_PKNORM_U16_F32:
|
||||
return "v_cvt_pknorm_u16_f32";
|
||||
case CVT_PKRTZ_F16_F32:
|
||||
return "v_cvt_pkrtz_f16_f32";
|
||||
case CVT_PK_U16_U32:
|
||||
return "v_cvt_pk_u16_u32";
|
||||
case CVT_PK_I16_I32:
|
||||
return "v_cvt_pk_i16_i32";
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
} // namespace shader::ir::vop2
|
||||
1249
rpcsx-gpu2/lib/gcn-shader/include/shader/dialect/vop3.hpp
Normal file
1249
rpcsx-gpu2/lib/gcn-shader/include/shader/dialect/vop3.hpp
Normal file
File diff suppressed because it is too large
Load diff
522
rpcsx-gpu2/lib/gcn-shader/include/shader/dialect/vopc.hpp
Normal file
522
rpcsx-gpu2/lib/gcn-shader/include/shader/dialect/vopc.hpp
Normal file
|
|
@ -0,0 +1,522 @@
|
|||
#pragma once
|
||||
|
||||
namespace shader::ir::vopc {
|
||||
enum Op {
|
||||
CMP_F_F32,
|
||||
CMP_LT_F32,
|
||||
CMP_EQ_F32,
|
||||
CMP_LE_F32,
|
||||
CMP_GT_F32,
|
||||
CMP_LG_F32,
|
||||
CMP_GE_F32,
|
||||
CMP_O_F32,
|
||||
CMP_U_F32,
|
||||
CMP_NGE_F32,
|
||||
CMP_NLG_F32,
|
||||
CMP_NGT_F32,
|
||||
CMP_NLE_F32,
|
||||
CMP_NEQ_F32,
|
||||
CMP_NLT_F32,
|
||||
CMP_TRU_F32,
|
||||
CMPX_F_F32,
|
||||
CMPX_LT_F32,
|
||||
CMPX_EQ_F32,
|
||||
CMPX_LE_F32,
|
||||
CMPX_GT_F32,
|
||||
CMPX_LG_F32,
|
||||
CMPX_GE_F32,
|
||||
CMPX_O_F32,
|
||||
CMPX_U_F32,
|
||||
CMPX_NGE_F32,
|
||||
CMPX_NLG_F32,
|
||||
CMPX_NGT_F32,
|
||||
CMPX_NLE_F32,
|
||||
CMPX_NEQ_F32,
|
||||
CMPX_NLT_F32,
|
||||
CMPX_TRU_F32,
|
||||
CMP_F_F64,
|
||||
CMP_LT_F64,
|
||||
CMP_EQ_F64,
|
||||
CMP_LE_F64,
|
||||
CMP_GT_F64,
|
||||
CMP_LG_F64,
|
||||
CMP_GE_F64,
|
||||
CMP_O_F64,
|
||||
CMP_U_F64,
|
||||
CMP_NGE_F64,
|
||||
CMP_NLG_F64,
|
||||
CMP_NGT_F64,
|
||||
CMP_NLE_F64,
|
||||
CMP_NEQ_F64,
|
||||
CMP_NLT_F64,
|
||||
CMP_TRU_F64,
|
||||
CMPX_F_F64,
|
||||
CMPX_LT_F64,
|
||||
CMPX_EQ_F64,
|
||||
CMPX_LE_F64,
|
||||
CMPX_GT_F64,
|
||||
CMPX_LG_F64,
|
||||
CMPX_GE_F64,
|
||||
CMPX_O_F64,
|
||||
CMPX_U_F64,
|
||||
CMPX_NGE_F64,
|
||||
CMPX_NLG_F64,
|
||||
CMPX_NGT_F64,
|
||||
CMPX_NLE_F64,
|
||||
CMPX_NEQ_F64,
|
||||
CMPX_NLT_F64,
|
||||
CMPX_TRU_F64,
|
||||
CMPS_F_F32,
|
||||
CMPS_LT_F32,
|
||||
CMPS_EQ_F32,
|
||||
CMPS_LE_F32,
|
||||
CMPS_GT_F32,
|
||||
CMPS_LG_F32,
|
||||
CMPS_GE_F32,
|
||||
CMPS_O_F32,
|
||||
CMPS_U_F32,
|
||||
CMPS_NGE_F32,
|
||||
CMPS_NLG_F32,
|
||||
CMPS_NGT_F32,
|
||||
CMPS_NLE_F32,
|
||||
CMPS_NEQ_F32,
|
||||
CMPS_NLT_F32,
|
||||
CMPS_TRU_F32,
|
||||
CMPSX_F_F32,
|
||||
CMPSX_LT_F32,
|
||||
CMPSX_EQ_F32,
|
||||
CMPSX_LE_F32,
|
||||
CMPSX_GT_F32,
|
||||
CMPSX_LG_F32,
|
||||
CMPSX_GE_F32,
|
||||
CMPSX_O_F32,
|
||||
CMPSX_U_F32,
|
||||
CMPSX_NGE_F32,
|
||||
CMPSX_NLG_F32,
|
||||
CMPSX_NGT_F32,
|
||||
CMPSX_NLE_F32,
|
||||
CMPSX_NEQ_F32,
|
||||
CMPSX_NLT_F32,
|
||||
CMPSX_TRU_F32,
|
||||
CMPS_F_F64,
|
||||
CMPS_LT_F64,
|
||||
CMPS_EQ_F64,
|
||||
CMPS_LE_F64,
|
||||
CMPS_GT_F64,
|
||||
CMPS_LG_F64,
|
||||
CMPS_GE_F64,
|
||||
CMPS_O_F64,
|
||||
CMPS_U_F64,
|
||||
CMPS_NGE_F64,
|
||||
CMPS_NLG_F64,
|
||||
CMPS_NGT_F64,
|
||||
CMPS_NLE_F64,
|
||||
CMPS_NEQ_F64,
|
||||
CMPS_NLT_F64,
|
||||
CMPS_TRU_F64,
|
||||
CMPSX_F_F64,
|
||||
CMPSX_LT_F64,
|
||||
CMPSX_EQ_F64,
|
||||
CMPSX_LE_F64,
|
||||
CMPSX_GT_F64,
|
||||
CMPSX_LG_F64,
|
||||
CMPSX_GE_F64,
|
||||
CMPSX_O_F64,
|
||||
CMPSX_U_F64,
|
||||
CMPSX_NGE_F64,
|
||||
CMPSX_NLG_F64,
|
||||
CMPSX_NGT_F64,
|
||||
CMPSX_NLE_F64,
|
||||
CMPSX_NEQ_F64,
|
||||
CMPSX_NLT_F64,
|
||||
CMPSX_TRU_F64,
|
||||
CMP_F_I32,
|
||||
CMP_LT_I32,
|
||||
CMP_EQ_I32,
|
||||
CMP_LE_I32,
|
||||
CMP_GT_I32,
|
||||
CMP_NE_I32,
|
||||
CMP_GE_I32,
|
||||
CMP_T_I32,
|
||||
CMP_CLASS_F32,
|
||||
CMP_LT_I16,
|
||||
CMP_EQ_I16,
|
||||
CMP_LE_I16,
|
||||
CMP_GT_I16,
|
||||
CMP_NE_I16,
|
||||
CMP_GE_I16,
|
||||
CMP_CLASS_F16,
|
||||
CMPX_F_I32,
|
||||
CMPX_LT_I32,
|
||||
CMPX_EQ_I32,
|
||||
CMPX_LE_I32,
|
||||
CMPX_GT_I32,
|
||||
CMPX_NE_I32,
|
||||
CMPX_GE_I32,
|
||||
CMPX_T_I32,
|
||||
CMPX_CLASS_F32,
|
||||
CMPX_LT_I16,
|
||||
CMPX_EQ_I16,
|
||||
CMPX_LE_I16,
|
||||
CMPX_GT_I16,
|
||||
CMPX_NE_I16,
|
||||
CMPX_GE_I16,
|
||||
CMPX_CLASS_F16,
|
||||
CMP_F_I64,
|
||||
CMP_LT_I64,
|
||||
CMP_EQ_I64,
|
||||
CMP_LE_I64,
|
||||
CMP_GT_I64,
|
||||
CMP_NE_I64,
|
||||
CMP_GE_I64,
|
||||
CMP_T_I64,
|
||||
CMP_CLASS_F64,
|
||||
CMP_LT_U16,
|
||||
CMP_EQ_U16,
|
||||
CMP_LE_U16,
|
||||
CMP_GT_U16,
|
||||
CMP_NE_U16,
|
||||
CMP_GE_U16,
|
||||
CMPX_F_I64 = 176,
|
||||
CMPX_LT_I64,
|
||||
CMPX_EQ_I64,
|
||||
CMPX_LE_I64,
|
||||
CMPX_GT_I64,
|
||||
CMPX_NE_I64,
|
||||
CMPX_GE_I64,
|
||||
CMPX_T_I64,
|
||||
CMPX_CLASS_F64,
|
||||
CMPX_LT_U16,
|
||||
CMPX_EQ_U16,
|
||||
CMPX_LE_U16,
|
||||
CMPX_GT_U16,
|
||||
CMPX_NE_U16,
|
||||
CMPX_GE_U16,
|
||||
CMP_F_U32 = 192,
|
||||
CMP_LT_U32,
|
||||
CMP_EQ_U32,
|
||||
CMP_LE_U32,
|
||||
CMP_GT_U32,
|
||||
CMP_NE_U32,
|
||||
CMP_GE_U32,
|
||||
CMP_T_U32,
|
||||
CMP_F_F16,
|
||||
CMP_LT_F16,
|
||||
CMP_EQ_F16,
|
||||
CMP_LE_F16,
|
||||
CMP_GT_F16,
|
||||
CMP_LG_F16,
|
||||
CMP_GE_F16,
|
||||
CMP_O_F16,
|
||||
CMPX_F_U32,
|
||||
CMPX_LT_U32,
|
||||
CMPX_EQ_U32,
|
||||
CMPX_LE_U32,
|
||||
CMPX_GT_U32,
|
||||
CMPX_NE_U32,
|
||||
CMPX_GE_U32,
|
||||
CMPX_T_U32,
|
||||
CMPX_F_F16,
|
||||
CMPX_LT_F16,
|
||||
CMPX_EQ_F16,
|
||||
CMPX_LE_F16,
|
||||
CMPX_GT_F16,
|
||||
CMPX_LG_F16,
|
||||
CMPX_GE_F16,
|
||||
CMPX_O_F16,
|
||||
CMP_F_U64,
|
||||
CMP_LT_U64,
|
||||
CMP_EQ_U64,
|
||||
CMP_LE_U64,
|
||||
CMP_GT_U64,
|
||||
CMP_NE_U64,
|
||||
CMP_GE_U64,
|
||||
CMP_T_U64,
|
||||
CMP_U_F16,
|
||||
CMP_NGE_F16,
|
||||
CMP_NLG_F16,
|
||||
CMP_NGT_F16,
|
||||
CMP_NLE_F16,
|
||||
CMP_NEQ_F16,
|
||||
CMP_NLT_F16,
|
||||
CMP_TRU_F16,
|
||||
CMPX_F_U64,
|
||||
CMPX_LT_U64,
|
||||
CMPX_EQ_U64,
|
||||
CMPX_LE_U64,
|
||||
CMPX_GT_U64,
|
||||
CMPX_NE_U64,
|
||||
CMPX_GE_U64,
|
||||
CMPX_T_U64,
|
||||
CMPX_U_F16,
|
||||
CMPX_NGE_F16,
|
||||
CMPX_NLG_F16,
|
||||
CMPX_NGT_F16,
|
||||
CMPX_NLE_F16,
|
||||
CMPX_NEQ_F16,
|
||||
CMPX_NLT_F16,
|
||||
CMPX_TRU_F16,
|
||||
|
||||
OpCount
|
||||
};
|
||||
|
||||
inline const char *getInstructionName(unsigned id) {
|
||||
switch (id) {
|
||||
case CMP_F_F32: return "v_cmp_f_f32";
|
||||
case CMP_LT_F32: return "v_cmp_lt_f32";
|
||||
case CMP_EQ_F32: return "v_cmp_eq_f32";
|
||||
case CMP_LE_F32: return "v_cmp_le_f32";
|
||||
case CMP_GT_F32: return "v_cmp_gt_f32";
|
||||
case CMP_LG_F32: return "v_cmp_lg_f32";
|
||||
case CMP_GE_F32: return "v_cmp_ge_f32";
|
||||
case CMP_O_F32: return "v_cmp_o_f32";
|
||||
case CMP_U_F32: return "v_cmp_u_f32";
|
||||
case CMP_NGE_F32: return "v_cmp_nge_f32";
|
||||
case CMP_NLG_F32: return "v_cmp_nlg_f32";
|
||||
case CMP_NGT_F32: return "v_cmp_ngt_f32";
|
||||
case CMP_NLE_F32: return "v_cmp_nle_f32";
|
||||
case CMP_NEQ_F32: return "v_cmp_neq_f32";
|
||||
case CMP_NLT_F32: return "v_cmp_nlt_f32";
|
||||
case CMP_TRU_F32: return "v_cmp_tru_f32";
|
||||
case CMPX_F_F32: return "v_cmpx_f_f32";
|
||||
case CMPX_LT_F32: return "v_cmpx_lt_f32";
|
||||
case CMPX_EQ_F32: return "v_cmpx_eq_f32";
|
||||
case CMPX_LE_F32: return "v_cmpx_le_f32";
|
||||
case CMPX_GT_F32: return "v_cmpx_gt_f32";
|
||||
case CMPX_LG_F32: return "v_cmpx_lg_f32";
|
||||
case CMPX_GE_F32: return "v_cmpx_ge_f32";
|
||||
case CMPX_O_F32: return "v_cmpx_o_f32";
|
||||
case CMPX_U_F32: return "v_cmpx_u_f32";
|
||||
case CMPX_NGE_F32: return "v_cmpx_nge_f32";
|
||||
case CMPX_NLG_F32: return "v_cmpx_nlg_f32";
|
||||
case CMPX_NGT_F32: return "v_cmpx_ngt_f32";
|
||||
case CMPX_NLE_F32: return "v_cmpx_nle_f32";
|
||||
case CMPX_NEQ_F32: return "v_cmpx_neq_f32";
|
||||
case CMPX_NLT_F32: return "v_cmpx_nlt_f32";
|
||||
case CMPX_TRU_F32: return "v_cmpx_tru_f32";
|
||||
case CMP_F_F64: return "v_cmp_f_f64";
|
||||
case CMP_LT_F64: return "v_cmp_lt_f64";
|
||||
case CMP_EQ_F64: return "v_cmp_eq_f64";
|
||||
case CMP_LE_F64: return "v_cmp_le_f64";
|
||||
case CMP_GT_F64: return "v_cmp_gt_f64";
|
||||
case CMP_LG_F64: return "v_cmp_lg_f64";
|
||||
case CMP_GE_F64: return "v_cmp_ge_f64";
|
||||
case CMP_O_F64: return "v_cmp_o_f64";
|
||||
case CMP_U_F64: return "v_cmp_u_f64";
|
||||
case CMP_NGE_F64: return "v_cmp_nge_f64";
|
||||
case CMP_NLG_F64: return "v_cmp_nlg_f64";
|
||||
case CMP_NGT_F64: return "v_cmp_ngt_f64";
|
||||
case CMP_NLE_F64: return "v_cmp_nle_f64";
|
||||
case CMP_NEQ_F64: return "v_cmp_neq_f64";
|
||||
case CMP_NLT_F64: return "v_cmp_nlt_f64";
|
||||
case CMP_TRU_F64: return "v_cmp_tru_f64";
|
||||
case CMPX_F_F64: return "v_cmpx_f_f64";
|
||||
case CMPX_LT_F64: return "v_cmpx_lt_f64";
|
||||
case CMPX_EQ_F64: return "v_cmpx_eq_f64";
|
||||
case CMPX_LE_F64: return "v_cmpx_le_f64";
|
||||
case CMPX_GT_F64: return "v_cmpx_gt_f64";
|
||||
case CMPX_LG_F64: return "v_cmpx_lg_f64";
|
||||
case CMPX_GE_F64: return "v_cmpx_ge_f64";
|
||||
case CMPX_O_F64: return "v_cmpx_o_f64";
|
||||
case CMPX_U_F64: return "v_cmpx_u_f64";
|
||||
case CMPX_NGE_F64: return "v_cmpx_nge_f64";
|
||||
case CMPX_NLG_F64: return "v_cmpx_nlg_f64";
|
||||
case CMPX_NGT_F64: return "v_cmpx_ngt_f64";
|
||||
case CMPX_NLE_F64: return "v_cmpx_nle_f64";
|
||||
case CMPX_NEQ_F64: return "v_cmpx_neq_f64";
|
||||
case CMPX_NLT_F64: return "v_cmpx_nlt_f64";
|
||||
case CMPX_TRU_F64: return "v_cmpx_tru_f64";
|
||||
case CMPS_F_F32: return "v_cmps_f_f32";
|
||||
case CMPS_LT_F32: return "v_cmps_lt_f32";
|
||||
case CMPS_EQ_F32: return "v_cmps_eq_f32";
|
||||
case CMPS_LE_F32: return "v_cmps_le_f32";
|
||||
case CMPS_GT_F32: return "v_cmps_gt_f32";
|
||||
case CMPS_LG_F32: return "v_cmps_lg_f32";
|
||||
case CMPS_GE_F32: return "v_cmps_ge_f32";
|
||||
case CMPS_O_F32: return "v_cmps_o_f32";
|
||||
case CMPS_U_F32: return "v_cmps_u_f32";
|
||||
case CMPS_NGE_F32: return "v_cmps_nge_f32";
|
||||
case CMPS_NLG_F32: return "v_cmps_nlg_f32";
|
||||
case CMPS_NGT_F32: return "v_cmps_ngt_f32";
|
||||
case CMPS_NLE_F32: return "v_cmps_nle_f32";
|
||||
case CMPS_NEQ_F32: return "v_cmps_neq_f32";
|
||||
case CMPS_NLT_F32: return "v_cmps_nlt_f32";
|
||||
case CMPS_TRU_F32: return "v_cmps_tru_f32";
|
||||
case CMPSX_F_F32: return "v_cmpsx_f_f32";
|
||||
case CMPSX_LT_F32: return "v_cmpsx_lt_f32";
|
||||
case CMPSX_EQ_F32: return "v_cmpsx_eq_f32";
|
||||
case CMPSX_LE_F32: return "v_cmpsx_le_f32";
|
||||
case CMPSX_GT_F32: return "v_cmpsx_gt_f32";
|
||||
case CMPSX_LG_F32: return "v_cmpsx_lg_f32";
|
||||
case CMPSX_GE_F32: return "v_cmpsx_ge_f32";
|
||||
case CMPSX_O_F32: return "v_cmpsx_o_f32";
|
||||
case CMPSX_U_F32: return "v_cmpsx_u_f32";
|
||||
case CMPSX_NGE_F32: return "v_cmpsx_nge_f32";
|
||||
case CMPSX_NLG_F32: return "v_cmpsx_nlg_f32";
|
||||
case CMPSX_NGT_F32: return "v_cmpsx_ngt_f32";
|
||||
case CMPSX_NLE_F32: return "v_cmpsx_nle_f32";
|
||||
case CMPSX_NEQ_F32: return "v_cmpsx_neq_f32";
|
||||
case CMPSX_NLT_F32: return "v_cmpsx_nlt_f32";
|
||||
case CMPSX_TRU_F32: return "v_cmpsx_tru_f32";
|
||||
case CMPS_F_F64: return "v_cmps_f_f64";
|
||||
case CMPS_LT_F64: return "v_cmps_lt_f64";
|
||||
case CMPS_EQ_F64: return "v_cmps_eq_f64";
|
||||
case CMPS_LE_F64: return "v_cmps_le_f64";
|
||||
case CMPS_GT_F64: return "v_cmps_gt_f64";
|
||||
case CMPS_LG_F64: return "v_cmps_lg_f64";
|
||||
case CMPS_GE_F64: return "v_cmps_ge_f64";
|
||||
case CMPS_O_F64: return "v_cmps_o_f64";
|
||||
case CMPS_U_F64: return "v_cmps_u_f64";
|
||||
case CMPS_NGE_F64: return "v_cmps_nge_f64";
|
||||
case CMPS_NLG_F64: return "v_cmps_nlg_f64";
|
||||
case CMPS_NGT_F64: return "v_cmps_ngt_f64";
|
||||
case CMPS_NLE_F64: return "v_cmps_nle_f64";
|
||||
case CMPS_NEQ_F64: return "v_cmps_neq_f64";
|
||||
case CMPS_NLT_F64: return "v_cmps_nlt_f64";
|
||||
case CMPS_TRU_F64: return "v_cmps_tru_f64";
|
||||
case CMPSX_F_F64: return "v_cmpsx_f_f64";
|
||||
case CMPSX_LT_F64: return "v_cmpsx_lt_f64";
|
||||
case CMPSX_EQ_F64: return "v_cmpsx_eq_f64";
|
||||
case CMPSX_LE_F64: return "v_cmpsx_le_f64";
|
||||
case CMPSX_GT_F64: return "v_cmpsx_gt_f64";
|
||||
case CMPSX_LG_F64: return "v_cmpsx_lg_f64";
|
||||
case CMPSX_GE_F64: return "v_cmpsx_ge_f64";
|
||||
case CMPSX_O_F64: return "v_cmpsx_o_f64";
|
||||
case CMPSX_U_F64: return "v_cmpsx_u_f64";
|
||||
case CMPSX_NGE_F64: return "v_cmpsx_nge_f64";
|
||||
case CMPSX_NLG_F64: return "v_cmpsx_nlg_f64";
|
||||
case CMPSX_NGT_F64: return "v_cmpsx_ngt_f64";
|
||||
case CMPSX_NLE_F64: return "v_cmpsx_nle_f64";
|
||||
case CMPSX_NEQ_F64: return "v_cmpsx_neq_f64";
|
||||
case CMPSX_NLT_F64: return "v_cmpsx_nlt_f64";
|
||||
case CMPSX_TRU_F64: return "v_cmpsx_tru_f64";
|
||||
case CMP_F_I32: return "v_cmp_f_i32";
|
||||
case CMP_LT_I32: return "v_cmp_lt_i32";
|
||||
case CMP_EQ_I32: return "v_cmp_eq_i32";
|
||||
case CMP_LE_I32: return "v_cmp_le_i32";
|
||||
case CMP_GT_I32: return "v_cmp_gt_i32";
|
||||
case CMP_NE_I32: return "v_cmp_ne_i32";
|
||||
case CMP_GE_I32: return "v_cmp_ge_i32";
|
||||
case CMP_T_I32: return "v_cmp_t_i32";
|
||||
case CMP_CLASS_F32: return "v_cmp_class_f32";
|
||||
case CMP_LT_I16: return "v_cmp_lt_i16";
|
||||
case CMP_EQ_I16: return "v_cmp_eq_i16";
|
||||
case CMP_LE_I16: return "v_cmp_le_i16";
|
||||
case CMP_GT_I16: return "v_cmp_gt_i16";
|
||||
case CMP_NE_I16: return "v_cmp_ne_i16";
|
||||
case CMP_GE_I16: return "v_cmp_ge_i16";
|
||||
case CMP_CLASS_F16: return "v_cmp_class_f16";
|
||||
case CMPX_F_I32: return "v_cmpx_f_i32";
|
||||
case CMPX_LT_I32: return "v_cmpx_lt_i32";
|
||||
case CMPX_EQ_I32: return "v_cmpx_eq_i32";
|
||||
case CMPX_LE_I32: return "v_cmpx_le_i32";
|
||||
case CMPX_GT_I32: return "v_cmpx_gt_i32";
|
||||
case CMPX_NE_I32: return "v_cmpx_ne_i32";
|
||||
case CMPX_GE_I32: return "v_cmpx_ge_i32";
|
||||
case CMPX_T_I32: return "v_cmpx_t_i32";
|
||||
case CMPX_CLASS_F32: return "v_cmpx_class_f32";
|
||||
case CMPX_LT_I16: return "v_cmpx_lt_i16";
|
||||
case CMPX_EQ_I16: return "v_cmpx_eq_i16";
|
||||
case CMPX_LE_I16: return "v_cmpx_le_i16";
|
||||
case CMPX_GT_I16: return "v_cmpx_gt_i16";
|
||||
case CMPX_NE_I16: return "v_cmpx_ne_i16";
|
||||
case CMPX_GE_I16: return "v_cmpx_ge_i16";
|
||||
case CMPX_CLASS_F16: return "v_cmpx_class_f16";
|
||||
case CMP_F_I64: return "v_cmp_f_i64";
|
||||
case CMP_LT_I64: return "v_cmp_lt_i64";
|
||||
case CMP_EQ_I64: return "v_cmp_eq_i64";
|
||||
case CMP_LE_I64: return "v_cmp_le_i64";
|
||||
case CMP_GT_I64: return "v_cmp_gt_i64";
|
||||
case CMP_NE_I64: return "v_cmp_ne_i64";
|
||||
case CMP_GE_I64: return "v_cmp_ge_i64";
|
||||
case CMP_T_I64: return "v_cmp_t_i64";
|
||||
case CMP_CLASS_F64: return "v_cmp_class_f64";
|
||||
case CMP_LT_U16: return "v_cmp_lt_u16";
|
||||
case CMP_EQ_U16: return "v_cmp_eq_u16";
|
||||
case CMP_LE_U16: return "v_cmp_le_u16";
|
||||
case CMP_GT_U16: return "v_cmp_gt_u16";
|
||||
case CMP_NE_U16: return "v_cmp_ne_u16";
|
||||
case CMP_GE_U16: return "v_cmp_ge_u16";
|
||||
case CMPX_F_I64: return "v_cmpx_f_i64";
|
||||
case CMPX_LT_I64: return "v_cmpx_lt_i64";
|
||||
case CMPX_EQ_I64: return "v_cmpx_eq_i64";
|
||||
case CMPX_LE_I64: return "v_cmpx_le_i64";
|
||||
case CMPX_GT_I64: return "v_cmpx_gt_i64";
|
||||
case CMPX_NE_I64: return "v_cmpx_ne_i64";
|
||||
case CMPX_GE_I64: return "v_cmpx_ge_i64";
|
||||
case CMPX_T_I64: return "v_cmpx_t_i64";
|
||||
case CMPX_CLASS_F64: return "v_cmpx_class_f64";
|
||||
case CMPX_LT_U16: return "v_cmpx_lt_u16";
|
||||
case CMPX_EQ_U16: return "v_cmpx_eq_u16";
|
||||
case CMPX_LE_U16: return "v_cmpx_le_u16";
|
||||
case CMPX_GT_U16: return "v_cmpx_gt_u16";
|
||||
case CMPX_NE_U16: return "v_cmpx_ne_u16";
|
||||
case CMPX_GE_U16: return "v_cmpx_ge_u16";
|
||||
case CMP_F_U32: return "v_cmp_f_u32";
|
||||
case CMP_LT_U32: return "v_cmp_lt_u32";
|
||||
case CMP_EQ_U32: return "v_cmp_eq_u32";
|
||||
case CMP_LE_U32: return "v_cmp_le_u32";
|
||||
case CMP_GT_U32: return "v_cmp_gt_u32";
|
||||
case CMP_NE_U32: return "v_cmp_ne_u32";
|
||||
case CMP_GE_U32: return "v_cmp_ge_u32";
|
||||
case CMP_T_U32: return "v_cmp_t_u32";
|
||||
case CMP_F_F16: return "v_cmp_f_f16";
|
||||
case CMP_LT_F16: return "v_cmp_lt_f16";
|
||||
case CMP_EQ_F16: return "v_cmp_eq_f16";
|
||||
case CMP_LE_F16: return "v_cmp_le_f16";
|
||||
case CMP_GT_F16: return "v_cmp_gt_f16";
|
||||
case CMP_LG_F16: return "v_cmp_lg_f16";
|
||||
case CMP_GE_F16: return "v_cmp_ge_f16";
|
||||
case CMP_O_F16: return "v_cmp_o_f16";
|
||||
case CMPX_F_U32: return "v_cmpx_f_u32";
|
||||
case CMPX_LT_U32: return "v_cmpx_lt_u32";
|
||||
case CMPX_EQ_U32: return "v_cmpx_eq_u32";
|
||||
case CMPX_LE_U32: return "v_cmpx_le_u32";
|
||||
case CMPX_GT_U32: return "v_cmpx_gt_u32";
|
||||
case CMPX_NE_U32: return "v_cmpx_ne_u32";
|
||||
case CMPX_GE_U32: return "v_cmpx_ge_u32";
|
||||
case CMPX_T_U32: return "v_cmpx_t_u32";
|
||||
case CMPX_F_F16: return "v_cmpx_f_f16";
|
||||
case CMPX_LT_F16: return "v_cmpx_lt_f16";
|
||||
case CMPX_EQ_F16: return "v_cmpx_eq_f16";
|
||||
case CMPX_LE_F16: return "v_cmpx_le_f16";
|
||||
case CMPX_GT_F16: return "v_cmpx_gt_f16";
|
||||
case CMPX_LG_F16: return "v_cmpx_lg_f16";
|
||||
case CMPX_GE_F16: return "v_cmpx_ge_f16";
|
||||
case CMPX_O_F16: return "v_cmpx_o_f16";
|
||||
case CMP_F_U64: return "v_cmp_f_u64";
|
||||
case CMP_LT_U64: return "v_cmp_lt_u64";
|
||||
case CMP_EQ_U64: return "v_cmp_eq_u64";
|
||||
case CMP_LE_U64: return "v_cmp_le_u64";
|
||||
case CMP_GT_U64: return "v_cmp_gt_u64";
|
||||
case CMP_NE_U64: return "v_cmp_ne_u64";
|
||||
case CMP_GE_U64: return "v_cmp_ge_u64";
|
||||
case CMP_T_U64: return "v_cmp_t_u64";
|
||||
case CMP_U_F16: return "v_cmp_u_f16";
|
||||
case CMP_NGE_F16: return "v_cmp_nge_f16";
|
||||
case CMP_NLG_F16: return "v_cmp_nlg_f16";
|
||||
case CMP_NGT_F16: return "v_cmp_ngt_f16";
|
||||
case CMP_NLE_F16: return "v_cmp_nle_f16";
|
||||
case CMP_NEQ_F16: return "v_cmp_neq_f16";
|
||||
case CMP_NLT_F16: return "v_cmp_nlt_f16";
|
||||
case CMP_TRU_F16: return "v_cmp_tru_f16";
|
||||
case CMPX_F_U64: return "v_cmpx_f_u64";
|
||||
case CMPX_LT_U64: return "v_cmpx_lt_u64";
|
||||
case CMPX_EQ_U64: return "v_cmpx_eq_u64";
|
||||
case CMPX_LE_U64: return "v_cmpx_le_u64";
|
||||
case CMPX_GT_U64: return "v_cmpx_gt_u64";
|
||||
case CMPX_NE_U64: return "v_cmpx_ne_u64";
|
||||
case CMPX_GE_U64: return "v_cmpx_ge_u64";
|
||||
case CMPX_T_U64: return "v_cmpx_t_u64";
|
||||
case CMPX_U_F16: return "v_cmpx_u_f16";
|
||||
case CMPX_NGE_F16: return "v_cmpx_nge_f16";
|
||||
case CMPX_NLG_F16: return "v_cmpx_nlg_f16";
|
||||
case CMPX_NGT_F16: return "v_cmpx_ngt_f16";
|
||||
case CMPX_NLE_F16: return "v_cmpx_nle_f16";
|
||||
case CMPX_NEQ_F16: return "v_cmpx_neq_f16";
|
||||
case CMPX_NLT_F16: return "v_cmpx_nlt_f16";
|
||||
case CMPX_TRU_F16: return "v_cmpx_tru_f16";
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
92
rpcsx-gpu2/lib/gcn-shader/include/shader/eval.hpp
Normal file
92
rpcsx-gpu2/lib/gcn-shader/include/shader/eval.hpp
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
#pragma once
|
||||
|
||||
#include "Vector.hpp"
|
||||
#include "ir/Value.hpp"
|
||||
#include <cstdint>
|
||||
#include <variant>
|
||||
#include <array>
|
||||
|
||||
namespace shader::eval {
|
||||
struct Value {
|
||||
using Storage = std::variant<
|
||||
std::nullptr_t, std::int8_t, std::int16_t, std::int32_t, std::int64_t,
|
||||
std::uint8_t, std::uint16_t, std::uint32_t, std::uint64_t, float16_t,
|
||||
float32_t, float64_t, u8vec2, u8vec3, u8vec4, i8vec2, i8vec3, i8vec4,
|
||||
u16vec2, u16vec3, u16vec4, i16vec2, i16vec3, i16vec4, u32vec2, u32vec3,
|
||||
u32vec4, i32vec2, i32vec3, i32vec4, u64vec2, u64vec3, u64vec4, i64vec2,
|
||||
i64vec3, i64vec4, f32vec2, f32vec3, f32vec4, f64vec2, f64vec3, f64vec4,
|
||||
f16vec2, f16vec3, f16vec4, bool, bvec2, bvec3, bvec4, std::array<uint32_t, 8>>;
|
||||
static constexpr auto StorageSize = std::variant_size_v<Storage>;
|
||||
Storage storage;
|
||||
|
||||
explicit operator bool() const { return !empty(); }
|
||||
bool empty() const { return storage.index() == 0; }
|
||||
|
||||
Value() : storage(nullptr) {}
|
||||
|
||||
template <typename T>
|
||||
Value(T &&value)
|
||||
requires requires { Storage(std::forward<T>(value)); }
|
||||
: storage(std::forward<T>(value)) {}
|
||||
|
||||
static Value compositeConstruct(ir::Value type,
|
||||
std::span<const Value> constituents);
|
||||
Value compositeExtract(const Value &index) const;
|
||||
// Value compositeInsert(const Value &object, std::size_t index) const;
|
||||
|
||||
Value isNan() const;
|
||||
Value isInf() const;
|
||||
Value isFinite() const;
|
||||
Value makeUnsigned() const;
|
||||
Value makeSigned() const;
|
||||
Value all() const;
|
||||
Value any() const;
|
||||
Value select(const Value &trueValue, const Value &falseValue) const;
|
||||
Value iConvert(ir::Value type, bool isSigned) const;
|
||||
Value sConvert(ir::Value type) const { return iConvert(type, true); }
|
||||
Value uConvert(ir::Value type) const { return iConvert(type, false); }
|
||||
Value fConvert(ir::Value type) const;
|
||||
Value bitcast(ir::Value type) const;
|
||||
std::optional<std::uint64_t> zExtScalar() const;
|
||||
std::optional<std::int64_t> sExtScalar() const;
|
||||
|
||||
template <typename T>
|
||||
requires requires { std::get<T>(storage); }
|
||||
T get() const {
|
||||
return std::get<T>(storage);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires requires { std::get<T>(storage); }
|
||||
std::optional<T> as() const {
|
||||
if (auto result = std::get_if<T>(&storage)) {
|
||||
return *result;
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
Value operator+(const Value &rhs) const;
|
||||
Value operator-(const Value &rhs) const;
|
||||
Value operator*(const Value &rhs) const;
|
||||
Value operator/(const Value &rhs) const;
|
||||
Value operator%(const Value &rhs) const;
|
||||
Value operator&(const Value &rhs) const;
|
||||
Value operator|(const Value &rhs) const;
|
||||
Value operator^(const Value &rhs) const;
|
||||
Value operator>>(const Value &rhs) const;
|
||||
Value operator<<(const Value &rhs) const;
|
||||
Value operator&&(const Value &rhs) const;
|
||||
Value operator||(const Value &rhs) const;
|
||||
Value operator<(const Value &rhs) const;
|
||||
Value operator>(const Value &rhs) const;
|
||||
Value operator<=(const Value &rhs) const;
|
||||
Value operator>=(const Value &rhs) const;
|
||||
Value operator==(const Value &rhs) const;
|
||||
Value operator!=(const Value &rhs) const;
|
||||
|
||||
Value operator-() const;
|
||||
Value operator~() const;
|
||||
Value operator!() const;
|
||||
};
|
||||
} // namespace shader::eval
|
||||
125
rpcsx-gpu2/lib/gcn-shader/include/shader/gcn.hpp
Normal file
125
rpcsx-gpu2/lib/gcn-shader/include/shader/gcn.hpp
Normal file
|
|
@ -0,0 +1,125 @@
|
|||
#pragma once
|
||||
|
||||
#include "SemanticInfo.hpp"
|
||||
#include "SpvConverter.hpp"
|
||||
#include "analyze.hpp"
|
||||
#include "rx/MemoryTable.hpp"
|
||||
#include "spv.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
|
||||
namespace shader::gcn {
|
||||
using Builder = ir::Builder<ir::spv::Builder, ir::builtin::Builder>;
|
||||
|
||||
enum class Stage {
|
||||
Ps,
|
||||
VsVs,
|
||||
VsEs,
|
||||
VsLs,
|
||||
Cs,
|
||||
Gs,
|
||||
GsVs,
|
||||
Hs,
|
||||
DsVs,
|
||||
DsEs,
|
||||
|
||||
Invalid,
|
||||
};
|
||||
|
||||
struct Import : spv::Import {
|
||||
ir::Node getOrCloneImpl(ir::Context &context, ir::Node node,
|
||||
bool isOperand) override;
|
||||
};
|
||||
|
||||
struct SemanticModuleInfo : shader::SemanticModuleInfo {
|
||||
std::map<int, ir::Value> registerVariables;
|
||||
};
|
||||
|
||||
void canonicalizeSemantic(ir::Context &context,
|
||||
const spv::BinaryLayout &semantic);
|
||||
void collectSemanticModuleInfo(SemanticModuleInfo &moduleInfo,
|
||||
const spv::BinaryLayout &layout);
|
||||
SemanticInfo collectSemanticInfo(const SemanticModuleInfo &moduleInfo);
|
||||
|
||||
struct InstructionRegion : ir::RegionLikeImpl {
|
||||
ir::RegionLike base;
|
||||
ir::Instruction *firstInstruction;
|
||||
|
||||
void insertAfter(ir::Instruction point, ir::Instruction node) {
|
||||
if (!*firstInstruction) {
|
||||
*firstInstruction = node;
|
||||
}
|
||||
|
||||
base.insertAfter(point, node);
|
||||
}
|
||||
};
|
||||
|
||||
enum RegId {
|
||||
Sgpr,
|
||||
Vgpr,
|
||||
M0,
|
||||
Scc,
|
||||
Vcc,
|
||||
Exec,
|
||||
VccZ,
|
||||
ExecZ,
|
||||
LdsDirect,
|
||||
SgprCount,
|
||||
VgprCount,
|
||||
ThreadId,
|
||||
MemoryTable,
|
||||
Gds,
|
||||
};
|
||||
|
||||
struct Context : spv::Context {
|
||||
ir::Region body;
|
||||
rx::MemoryAreaTable<> memoryMap;
|
||||
std::uint32_t requiredUserSgprs = 0;
|
||||
std::map<RegId, ir::Value> registerVariables;
|
||||
std::map<std::uint64_t, ir::Instruction> instructions;
|
||||
AnalysisStorage analysis;
|
||||
|
||||
std::pair<ir::Value, bool> getOrCreateLabel(ir::Location loc, ir::Region body,
|
||||
std::uint64_t address);
|
||||
Builder createBuilder(InstructionRegion ®ion, ir::Region bodyRegion,
|
||||
std::uint64_t address);
|
||||
|
||||
ir::Value createCast(ir::Location loc, Builder &builder, ir::Value targetType,
|
||||
ir::Value value);
|
||||
|
||||
void setRegisterVariable(RegId id, ir::Value value) {
|
||||
registerVariables[id] = value;
|
||||
}
|
||||
|
||||
ir::Value getOrCreateRegisterVariable(RegId id);
|
||||
|
||||
ir::Value getRegisterRef(ir::Location loc, Builder &builder, RegId id,
|
||||
const ir::Operand &index, ir::Value lane = nullptr);
|
||||
|
||||
ir::Value readReg(ir::Location loc, Builder &builder, ir::Value typeValue,
|
||||
RegId id, const ir::Operand &index,
|
||||
ir::Value lane = nullptr);
|
||||
|
||||
void writeReg(ir::Location loc, Builder &builder, RegId id,
|
||||
const ir::Operand &index, ir::Value value,
|
||||
ir::Value lane = nullptr);
|
||||
|
||||
ir::Value createRegisterAccess(Builder &builder, ir::Location loc,
|
||||
ir::Value reg, const ir::Operand &index,
|
||||
ir::Value lane = nullptr);
|
||||
};
|
||||
|
||||
struct Environment {
|
||||
std::uint8_t vgprCount;
|
||||
std::uint8_t sgprCount;
|
||||
std::span<const std::uint32_t> userSgprs;
|
||||
bool supportsBarycentric = true;
|
||||
bool supportsInt8 = false;
|
||||
bool supportsInt64Atomics = false;
|
||||
};
|
||||
|
||||
ir::Region deserialize(Context &context, const Environment &environment,
|
||||
const SemanticInfo &semanticInfo, std::uint64_t base,
|
||||
std::function<std::uint32_t(std::uint64_t)> readMemory);
|
||||
} // namespace shader::gcn
|
||||
31
rpcsx-gpu2/lib/gcn-shader/include/shader/glsl.hpp
Normal file
31
rpcsx-gpu2/lib/gcn-shader/include/shader/glsl.hpp
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
#pragma once
|
||||
#include "ir/Location.hpp"
|
||||
#include "spv.hpp"
|
||||
#include <filesystem>
|
||||
|
||||
namespace shader::glsl {
|
||||
enum class Stage {
|
||||
Library,
|
||||
Vertex,
|
||||
TessControl,
|
||||
TessEvaluation,
|
||||
Geometry,
|
||||
Fragment,
|
||||
Compute,
|
||||
RayGen,
|
||||
Intersect,
|
||||
AnyHit,
|
||||
ClosestHit,
|
||||
Miss,
|
||||
Callable,
|
||||
Task,
|
||||
Mesh,
|
||||
};
|
||||
|
||||
std::optional<spv::BinaryLayout> parseFile(ir::Context &context, Stage stage,
|
||||
const std::filesystem::path &path);
|
||||
std::optional<spv::BinaryLayout> parseSource(ir::Context &context, Stage stage,
|
||||
std::string_view source,
|
||||
ir::Location loc = nullptr);
|
||||
std::string decompile(std::span<const std::uint32_t> spv);
|
||||
} // namespace shader::glsl
|
||||
320
rpcsx-gpu2/lib/gcn-shader/include/shader/graph.hpp
Normal file
320
rpcsx-gpu2/lib/gcn-shader/include/shader/graph.hpp
Normal file
|
|
@ -0,0 +1,320 @@
|
|||
#pragma once
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
namespace graph {
|
||||
template <typename BasicBlockPtrT> class DomTree {
|
||||
public:
|
||||
struct Node {
|
||||
BasicBlockPtrT block = nullptr;
|
||||
Node *immDom = nullptr;
|
||||
unsigned dfsNumIn = ~0;
|
||||
unsigned dfsNumOut = ~0;
|
||||
unsigned level = 0;
|
||||
std::vector<Node *> children;
|
||||
|
||||
bool isLeaf() const { return children.empty(); }
|
||||
|
||||
bool dominatedBy(const Node *other) const {
|
||||
return this->dfsNumIn >= other->dfsNumIn &&
|
||||
this->dfsNumOut <= other->dfsNumOut;
|
||||
}
|
||||
};
|
||||
|
||||
private:
|
||||
std::map<BasicBlockPtrT, Node> bbToNodes;
|
||||
Node *rootNode = nullptr;
|
||||
|
||||
public:
|
||||
Node *getNode(BasicBlockPtrT bb) {
|
||||
auto it = bbToNodes.find(bb);
|
||||
if (it != bbToNodes.end()) {
|
||||
return &it->second;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Node *createChild(BasicBlockPtrT bb, Node *parent) {
|
||||
auto &child = bbToNodes[bb];
|
||||
child.block = bb;
|
||||
child.immDom = parent;
|
||||
child.level = parent->level + 1;
|
||||
parent->children.push_back(&child);
|
||||
return &child;
|
||||
}
|
||||
|
||||
Node *createRoot(BasicBlockPtrT bb) {
|
||||
auto &root = bbToNodes[bb];
|
||||
rootNode = &root;
|
||||
root.block = bb;
|
||||
return rootNode;
|
||||
}
|
||||
|
||||
Node *getRootNode() { return rootNode; }
|
||||
|
||||
void updateDFSNumbers() {
|
||||
std::vector<std::pair<Node *, typename std::vector<Node *>::iterator>>
|
||||
workStack;
|
||||
|
||||
auto root = getRootNode();
|
||||
if (!root)
|
||||
return;
|
||||
|
||||
workStack.push_back({root, root->children.begin()});
|
||||
|
||||
unsigned dfsNum = 0;
|
||||
root->dfsNumIn = dfsNum++;
|
||||
|
||||
while (!workStack.empty()) {
|
||||
auto node = workStack.back().first;
|
||||
const auto childIt = workStack.back().second;
|
||||
|
||||
if (childIt == node->children.end()) {
|
||||
node->dfsNumOut = dfsNum++;
|
||||
workStack.pop_back();
|
||||
} else {
|
||||
auto child = *childIt;
|
||||
++workStack.back().second;
|
||||
|
||||
workStack.push_back({child, child->children.begin()});
|
||||
child->dfsNumIn = dfsNum++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool dominates(Node *a, Node *b) {
|
||||
if (a == b || b->immDom == a) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (a->immDom == b || a->level >= b->level) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return b->dominatedBy(a);
|
||||
}
|
||||
|
||||
bool dominates(BasicBlockPtrT a, BasicBlockPtrT b) {
|
||||
return dominates(getNode(a), getNode(b));
|
||||
}
|
||||
|
||||
BasicBlockPtrT getImmediateDominator(BasicBlockPtrT a) {
|
||||
auto immDom = getNode(a)->immDom;
|
||||
if (immDom) {
|
||||
return immDom->block;
|
||||
}
|
||||
return{};
|
||||
}
|
||||
|
||||
bool isImmediateDominator(BasicBlockPtrT block, BasicBlockPtrT immDomBlock) {
|
||||
if (immDomBlock == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return getImmediateDominator(immDomBlock) == block;
|
||||
}
|
||||
|
||||
BasicBlockPtrT findNearestCommonDominator(BasicBlockPtrT a,
|
||||
BasicBlockPtrT b) {
|
||||
auto aNode = getNode(a);
|
||||
auto bNode = getNode(b);
|
||||
|
||||
if (aNode == rootNode || bNode == rootNode) {
|
||||
return rootNode->block;
|
||||
}
|
||||
|
||||
while (aNode != bNode) {
|
||||
if (aNode->level < bNode->level) {
|
||||
std::swap(aNode, bNode);
|
||||
}
|
||||
|
||||
aNode = aNode->immDom;
|
||||
}
|
||||
|
||||
return aNode->block;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename BasicBlockPtrT> class DomTreeBuilder {
|
||||
using DomTreeNode = typename DomTree<BasicBlockPtrT>::Node;
|
||||
|
||||
struct NodeInfo {
|
||||
unsigned dfsNum = 0;
|
||||
unsigned parent = 0;
|
||||
unsigned semi = 0;
|
||||
BasicBlockPtrT label = nullptr;
|
||||
BasicBlockPtrT immDom = nullptr;
|
||||
std::vector<BasicBlockPtrT> revChildren;
|
||||
};
|
||||
|
||||
std::vector<BasicBlockPtrT> indexToNode = {nullptr};
|
||||
std::map<BasicBlockPtrT, NodeInfo> nodeToInfo;
|
||||
|
||||
template <typename WalkFn>
|
||||
void runDFS(BasicBlockPtrT root, const WalkFn &walk) {
|
||||
std::vector<BasicBlockPtrT> workList;
|
||||
workList.reserve(10);
|
||||
workList.push_back(root);
|
||||
unsigned index = 0;
|
||||
|
||||
while (!workList.empty()) {
|
||||
auto bb = workList.back();
|
||||
workList.pop_back();
|
||||
|
||||
auto &bbInfo = nodeToInfo[bb];
|
||||
|
||||
if (bbInfo.dfsNum != 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
bbInfo.dfsNum = bbInfo.semi = ++index;
|
||||
bbInfo.label = bb;
|
||||
indexToNode.push_back(bb);
|
||||
|
||||
walk(bb, [&](BasicBlockPtrT successor) {
|
||||
auto it = nodeToInfo.find(successor);
|
||||
if (it != nodeToInfo.end() && it->second.dfsNum != 0) {
|
||||
if (successor != bb) {
|
||||
it->second.revChildren.push_back(bb);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
auto &succInfo = nodeToInfo[successor];
|
||||
workList.push_back(successor);
|
||||
succInfo.parent = index;
|
||||
succInfo.revChildren.push_back(bb);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
void runSemiNCA() {
|
||||
const unsigned nextDFS = indexToNode.size();
|
||||
|
||||
for (unsigned i = 1; i < nextDFS; ++i) {
|
||||
const BasicBlockPtrT node = indexToNode[i];
|
||||
auto &NodeInfo = nodeToInfo[node];
|
||||
NodeInfo.immDom = indexToNode[NodeInfo.parent];
|
||||
}
|
||||
|
||||
std::vector<NodeInfo *> evalStack;
|
||||
evalStack.reserve(10);
|
||||
|
||||
for (unsigned i = nextDFS - 1; i >= 2; --i) {
|
||||
BasicBlockPtrT node = indexToNode[i];
|
||||
auto &nodeInfo = nodeToInfo[node];
|
||||
|
||||
nodeInfo.semi = nodeInfo.parent;
|
||||
for (const auto &child : nodeInfo.revChildren) {
|
||||
if (!nodeToInfo.contains(child)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
unsigned childSemi = nodeToInfo[eval(child, i + 1, evalStack)].semi;
|
||||
if (childSemi < nodeInfo.semi) {
|
||||
nodeInfo.semi = childSemi;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned i = 2; i < nextDFS; ++i) {
|
||||
const BasicBlockPtrT node = indexToNode[i];
|
||||
auto &nodeInfo = nodeToInfo[node];
|
||||
const unsigned sDomNum = nodeToInfo[indexToNode[nodeInfo.semi]].dfsNum;
|
||||
BasicBlockPtrT immDom = nodeInfo.immDom;
|
||||
|
||||
while (nodeToInfo[immDom].dfsNum > sDomNum) {
|
||||
immDom = nodeToInfo[immDom].immDom;
|
||||
}
|
||||
|
||||
nodeInfo.immDom = immDom;
|
||||
}
|
||||
}
|
||||
|
||||
BasicBlockPtrT eval(BasicBlockPtrT block, unsigned LastLinked,
|
||||
std::vector<NodeInfo *> &stack) {
|
||||
NodeInfo *blockInfo = &nodeToInfo[block];
|
||||
if (blockInfo->parent < LastLinked)
|
||||
return blockInfo->label;
|
||||
|
||||
do {
|
||||
stack.push_back(blockInfo);
|
||||
blockInfo = &nodeToInfo[indexToNode[blockInfo->parent]];
|
||||
} while (blockInfo->parent >= LastLinked);
|
||||
|
||||
const NodeInfo *pInfo = blockInfo;
|
||||
const NodeInfo *pLabelInfo = &nodeToInfo[pInfo->label];
|
||||
do {
|
||||
blockInfo = stack.back();
|
||||
stack.pop_back();
|
||||
|
||||
blockInfo->parent = pInfo->parent;
|
||||
const NodeInfo *labelInfo = &nodeToInfo[blockInfo->label];
|
||||
if (pLabelInfo->semi < labelInfo->semi) {
|
||||
blockInfo->label = pInfo->label;
|
||||
} else {
|
||||
pLabelInfo = labelInfo;
|
||||
}
|
||||
|
||||
pInfo = blockInfo;
|
||||
} while (!stack.empty());
|
||||
return blockInfo->label;
|
||||
}
|
||||
|
||||
DomTreeNode *getNodeForBlock(BasicBlockPtrT BB, DomTree<BasicBlockPtrT> &DT) {
|
||||
if (auto Node = DT.getNode(BB))
|
||||
return Node;
|
||||
|
||||
BasicBlockPtrT IDom = getIDom(BB);
|
||||
auto IDomNode = getNodeForBlock(IDom, DT);
|
||||
|
||||
return DT.createChild(BB, IDomNode);
|
||||
}
|
||||
|
||||
BasicBlockPtrT getIDom(BasicBlockPtrT BB) const {
|
||||
auto InfoIt = nodeToInfo.find(BB);
|
||||
if (InfoIt == nodeToInfo.end())
|
||||
return nullptr;
|
||||
|
||||
return InfoIt->second.immDom;
|
||||
}
|
||||
|
||||
public:
|
||||
template <typename WalkFn>
|
||||
DomTree<BasicBlockPtrT> build(BasicBlockPtrT root,
|
||||
const WalkFn &walkSuccessors) {
|
||||
runDFS(root, walkSuccessors);
|
||||
runSemiNCA();
|
||||
|
||||
DomTree<BasicBlockPtrT> domTree;
|
||||
domTree.createRoot(root);
|
||||
|
||||
nodeToInfo[indexToNode[1]].immDom = root;
|
||||
|
||||
for (size_t i = 1, e = indexToNode.size(); i != e; ++i) {
|
||||
BasicBlockPtrT node = indexToNode[i];
|
||||
|
||||
if (domTree.getNode(node))
|
||||
continue;
|
||||
|
||||
BasicBlockPtrT immDom = getIDom(node);
|
||||
|
||||
auto immDomNode = getNodeForBlock(immDom, domTree);
|
||||
domTree.createChild(node, immDomNode);
|
||||
}
|
||||
|
||||
domTree.updateDFSNumbers();
|
||||
return domTree;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename BasicBlockPtrT>
|
||||
DomTree<BasicBlockPtrT> buildDomTree(BasicBlockPtrT root, auto &&walkSuccessors)
|
||||
requires requires(void (*cb)(BasicBlockPtrT)) { walkSuccessors(root, cb); }
|
||||
{
|
||||
return DomTreeBuilder<BasicBlockPtrT>().build(root, walkSuccessors);
|
||||
}
|
||||
} // namespace graph
|
||||
14
rpcsx-gpu2/lib/gcn-shader/include/shader/ir.hpp
Normal file
14
rpcsx-gpu2/lib/gcn-shader/include/shader/ir.hpp
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
#pragma once
|
||||
|
||||
#include "ir/Context.hpp" // IWYU pragma: export
|
||||
#include "ir/Instruction.hpp" // IWYU pragma: export
|
||||
#include "ir/Location.hpp" // IWYU pragma: export
|
||||
#include "ir/Node.hpp" // IWYU pragma: export
|
||||
#include "ir/Operand.hpp" // IWYU pragma: export
|
||||
#include "ir/PointerWrapper.hpp" // IWYU pragma: export
|
||||
#include "ir/PrintableWrapper.hpp" // IWYU pragma: export
|
||||
#include "ir/Value.hpp" // IWYU pragma: export
|
||||
#include "ir/Builder.hpp" // IWYU pragma: export
|
||||
#include "ir/Region.hpp" // IWYU pragma: export
|
||||
#include "ir/OperandPrint.hpp" // IWYU pragma: export
|
||||
#include "ir/Impl.hpp" // IWYU pragma: export
|
||||
52
rpcsx-gpu2/lib/gcn-shader/include/shader/ir/Block.hpp
Normal file
52
rpcsx-gpu2/lib/gcn-shader/include/shader/ir/Block.hpp
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
#pragma once
|
||||
|
||||
#include "RegionLike.hpp"
|
||||
#include "RegionLikeImpl.hpp"
|
||||
#include "ValueImpl.hpp"
|
||||
|
||||
namespace shader::ir {
|
||||
template <typename ImplT>
|
||||
struct BlockWrapper : RegionLikeWrapper<ImplT, ValueWrapper> {
|
||||
using RegionLikeWrapper<ImplT, ValueWrapper>::RegionLikeWrapper;
|
||||
using RegionLikeWrapper<ImplT, ValueWrapper>::operator=;
|
||||
};
|
||||
|
||||
struct BlockImpl;
|
||||
|
||||
struct Block : BlockWrapper<BlockImpl> {
|
||||
using BlockWrapper<BlockImpl>::BlockWrapper;
|
||||
using BlockWrapper<BlockImpl>::operator=;
|
||||
};
|
||||
|
||||
struct BlockImpl : ValueImpl, RegionLikeImpl {
|
||||
BlockImpl(Location loc);
|
||||
Node clone(Context &context, CloneMap &map) const override;
|
||||
|
||||
void print(std::ostream &os, NameStorage &ns) const override {
|
||||
os << '%' << ns.getNameOf(const_cast<BlockImpl *>(this));
|
||||
os << " = ";
|
||||
|
||||
if (!getOperands().empty()) {
|
||||
os << '[';
|
||||
for (bool first = true; auto &operand : getOperands()) {
|
||||
if (first) {
|
||||
first = false;
|
||||
} else {
|
||||
os << ", ";
|
||||
}
|
||||
|
||||
operand.print(os, ns);
|
||||
}
|
||||
os << "] ";
|
||||
}
|
||||
|
||||
os << "{\n";
|
||||
for (auto child : children()) {
|
||||
os << " ";
|
||||
child.print(os, ns);
|
||||
os << "\n";
|
||||
}
|
||||
os << "}";
|
||||
}
|
||||
};
|
||||
} // namespace ir
|
||||
84
rpcsx-gpu2/lib/gcn-shader/include/shader/ir/Builder.hpp
Normal file
84
rpcsx-gpu2/lib/gcn-shader/include/shader/ir/Builder.hpp
Normal file
|
|
@ -0,0 +1,84 @@
|
|||
#pragma once
|
||||
#include "Context.hpp"
|
||||
#include "Node.hpp"
|
||||
#include "RegionLikeImpl.hpp"
|
||||
|
||||
namespace shader::ir {
|
||||
template <typename BuilderT, typename ImplT> struct BuilderFacade {
|
||||
ImplT &instance() {
|
||||
return *static_cast<ImplT *>(static_cast<BuilderT *>(this));
|
||||
}
|
||||
Context &getContext() { return instance().getContext(); }
|
||||
|
||||
Node getInsertionStorage() { return instance().getInsertionStorage(); }
|
||||
template <typename T, typename... ArgsT>
|
||||
requires requires {
|
||||
typename T::underlying_type;
|
||||
requires std::is_constructible_v<typename T::underlying_type, ArgsT...>;
|
||||
requires std::is_base_of_v<NodeImpl, typename T::underlying_type>;
|
||||
}
|
||||
T create(ArgsT &&...args) {
|
||||
return instance().template create<T>(std::forward<ArgsT>(args)...);
|
||||
}
|
||||
};
|
||||
|
||||
template <template <typename> typename... InterfaceTs>
|
||||
class Builder : public InterfaceTs<Builder<InterfaceTs...>>... {
|
||||
Context *mContext{};
|
||||
RegionLike mInsertionStorage;
|
||||
Instruction mInsertionPoint;
|
||||
|
||||
public:
|
||||
Builder() = default;
|
||||
Builder(Context &context) : mContext(&context) {}
|
||||
|
||||
static Builder createInsertAfter(Context &context, Instruction point) {
|
||||
auto result = Builder(context);
|
||||
result.mInsertionStorage = point.getParent();
|
||||
result.mInsertionPoint = point;
|
||||
return result;
|
||||
}
|
||||
|
||||
static Builder createInsertBefore(Context &context, Instruction point) {
|
||||
auto result = Builder(context);
|
||||
result.mInsertionStorage = point.getParent();
|
||||
result.mInsertionPoint = point.getPrev().cast<Instruction>();
|
||||
return result;
|
||||
}
|
||||
|
||||
static Builder createAppend(Context &context, RegionLike storage) {
|
||||
auto result = Builder(context);
|
||||
result.mInsertionStorage = storage;
|
||||
result.mInsertionPoint = storage.getLast().cast<Instruction>();
|
||||
return result;
|
||||
}
|
||||
|
||||
static Builder createPrepend(Context &context, RegionLike storage) {
|
||||
auto result = Builder(context);
|
||||
result.mInsertionStorage = storage;
|
||||
result.mInsertionPoint = nullptr;
|
||||
return result;
|
||||
}
|
||||
|
||||
Context &getContext() { return *mContext; }
|
||||
RegionLike getInsertionStorage() { return mInsertionStorage; }
|
||||
Instruction getInsertionPoint() { return mInsertionPoint; }
|
||||
void setInsertionPoint(Instruction inst) { mInsertionPoint = inst; }
|
||||
|
||||
template <typename T, typename... ArgsT>
|
||||
requires requires {
|
||||
typename T::underlying_type;
|
||||
requires std::is_constructible_v<typename T::underlying_type, ArgsT...>;
|
||||
requires std::is_base_of_v<NodeImpl, typename T::underlying_type>;
|
||||
}
|
||||
T create(ArgsT &&...args) {
|
||||
auto result = getContext().template create<T>(std::forward<ArgsT>(args)...);
|
||||
using InstanceType = typename T::underlying_type;
|
||||
getInsertionStorage().insertAfter(getInsertionPoint(), result);
|
||||
if constexpr (requires { mInsertionPoint = Instruction(result); }) {
|
||||
mInsertionPoint = Instruction(result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
};
|
||||
} // namespace ir
|
||||
84
rpcsx-gpu2/lib/gcn-shader/include/shader/ir/Context.hpp
Normal file
84
rpcsx-gpu2/lib/gcn-shader/include/shader/ir/Context.hpp
Normal file
|
|
@ -0,0 +1,84 @@
|
|||
#pragma once
|
||||
|
||||
#include "Location.hpp"
|
||||
#include "NodeImpl.hpp"
|
||||
#include "Operand.hpp"
|
||||
|
||||
#include <forward_list>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
|
||||
namespace shader::ir {
|
||||
struct UniqPtrCompare {
|
||||
static bool operator()(const auto &lhs, const auto &rhs)
|
||||
requires requires { *lhs <=> *rhs; }
|
||||
{
|
||||
return (*lhs <=> *rhs) == std::strong_ordering::less;
|
||||
}
|
||||
};
|
||||
|
||||
class Context {
|
||||
std::forward_list<std::unique_ptr<NodeImpl>> mNodes;
|
||||
std::set<std::unique_ptr<LocationImpl>, UniqPtrCompare> mLocations;
|
||||
std::unique_ptr<UnknownLocationImpl> mUnknownLocation;
|
||||
|
||||
public:
|
||||
Context() = default;
|
||||
Context(const Context &) = delete;
|
||||
Context(Context &&) = default;
|
||||
Context& operator=(Context &&) = default;
|
||||
|
||||
template <typename T, typename... ArgsT>
|
||||
requires requires {
|
||||
typename T::underlying_type;
|
||||
requires std::is_constructible_v<typename T::underlying_type, ArgsT...>;
|
||||
requires std::is_base_of_v<NodeImpl, typename T::underlying_type>;
|
||||
}
|
||||
T create(ArgsT &&...args) {
|
||||
auto result = new typename T::underlying_type(std::forward<ArgsT>(args)...);
|
||||
mNodes.emplace_front(std::unique_ptr<NodeImpl>{result});
|
||||
return T(result);
|
||||
}
|
||||
|
||||
template <typename T, typename... ArgsT>
|
||||
requires requires {
|
||||
typename T::underlying_type;
|
||||
requires std::is_constructible_v<typename T::underlying_type, ArgsT...>;
|
||||
requires std::is_base_of_v<LocationImpl, typename T::underlying_type>;
|
||||
}
|
||||
T getLocation(ArgsT &&...args) {
|
||||
auto result = std::make_unique<typename T::underlying_type>(
|
||||
std::forward<ArgsT>(args)...);
|
||||
auto ptr = mLocations.insert(std::move(result)).first->get();
|
||||
return T(static_cast<typename T::underlying_type *>(ptr));
|
||||
}
|
||||
|
||||
PathLocation getPathLocation(std::string path) {
|
||||
return getLocation<PathLocation>(std::move(path));
|
||||
}
|
||||
TextFileLocation getTextFileLocation(PathLocation location,
|
||||
std::uint64_t line,
|
||||
std::uint64_t column = 0) {
|
||||
return getLocation<TextFileLocation>(location, line, column);
|
||||
}
|
||||
TextFileLocation getTextFileLocation(std::string path, std::uint64_t line,
|
||||
std::uint64_t column = 0) {
|
||||
return getLocation<TextFileLocation>(getPathLocation(path), line, column);
|
||||
}
|
||||
OffsetLocation getOffsetLocation(Location baseLocation,
|
||||
std::uint64_t offset) {
|
||||
return getLocation<OffsetLocation>(baseLocation, offset);
|
||||
}
|
||||
MemoryLocation getMemoryLocation(std::uint64_t address, std::uint64_t size) {
|
||||
return getLocation<MemoryLocation>(address, size);
|
||||
}
|
||||
UnknownLocation getUnknownLocation() {
|
||||
if (mUnknownLocation == nullptr) {
|
||||
mUnknownLocation = std::make_unique<UnknownLocationImpl>();
|
||||
}
|
||||
return mUnknownLocation.get();
|
||||
}
|
||||
};
|
||||
} // namespace shader::ir
|
||||
361
rpcsx-gpu2/lib/gcn-shader/include/shader/ir/Impl.hpp
Normal file
361
rpcsx-gpu2/lib/gcn-shader/include/shader/ir/Impl.hpp
Normal file
|
|
@ -0,0 +1,361 @@
|
|||
#pragma once
|
||||
#include "../dialect/builtin.hpp"
|
||||
#include "../dialect/memssa.hpp"
|
||||
#include "Block.hpp"
|
||||
#include "Context.hpp"
|
||||
#include "InstructionImpl.hpp"
|
||||
#include "NodeImpl.hpp"
|
||||
#include "RegionImpl.hpp"
|
||||
#include "ValueImpl.hpp"
|
||||
|
||||
namespace shader::ir {
|
||||
inline void InstructionImpl::addOperand(Operand operand) {
|
||||
if (operand != nullptr) {
|
||||
if (auto value = operand.getAsValue()) {
|
||||
value.get()->addUse(this, operands.size());
|
||||
}
|
||||
}
|
||||
|
||||
operands.addOperand(std::move(operand));
|
||||
}
|
||||
|
||||
inline Operand InstructionImpl::replaceOperand(int index, Operand operand) {
|
||||
if (operands.size() <= unsigned(index)) {
|
||||
std::abort();
|
||||
}
|
||||
|
||||
if (!operands[index].isNull()) {
|
||||
if (auto value = operands[index].getAsValue()) {
|
||||
value.get()->removeUse(this, index);
|
||||
}
|
||||
}
|
||||
|
||||
if (auto value = operand.getAsValue()) {
|
||||
value.get()->addUse(this, index);
|
||||
}
|
||||
|
||||
return std::exchange(operands[index], std::move(operand));
|
||||
}
|
||||
|
||||
inline Operand InstructionImpl::eraseOperand(int index, int count) {
|
||||
if (index + count == operands.size()) {
|
||||
auto result = replaceOperand(index, nullptr);
|
||||
|
||||
for (int i = 1; i < count; ++i) {
|
||||
replaceOperand(i + index, nullptr);
|
||||
}
|
||||
|
||||
operands.resize(operands.size() - count);
|
||||
return result;
|
||||
}
|
||||
|
||||
auto result = replaceOperand(index, replaceOperand(index + 1, nullptr));
|
||||
|
||||
for (int i = 1; i < count; ++i) {
|
||||
replaceOperand(index + i, nullptr);
|
||||
}
|
||||
|
||||
for (int i = index + 1; i < operands.size() - count; ++i) {
|
||||
replaceOperand(i, replaceOperand(i + count, nullptr));
|
||||
}
|
||||
|
||||
operands.resize(operands.size() - count);
|
||||
return result;
|
||||
}
|
||||
|
||||
inline void InstructionImpl::remove() {
|
||||
if (auto value = Instruction(this).cast<Value>()) {
|
||||
if (!value.isUnused()) {
|
||||
std::abort();
|
||||
}
|
||||
}
|
||||
|
||||
for (int index = 0; auto &operand : operands) {
|
||||
if (auto value = operand.getAsValue()) {
|
||||
value.get()->removeUse(this, index);
|
||||
}
|
||||
index++;
|
||||
}
|
||||
|
||||
operands.clear();
|
||||
|
||||
if (parent != nullptr) {
|
||||
erase();
|
||||
}
|
||||
}
|
||||
|
||||
inline void InstructionImpl::erase() {
|
||||
assert(parent != nullptr);
|
||||
|
||||
if (prev != nullptr) {
|
||||
prev.get()->next = next;
|
||||
} else {
|
||||
parent.get()->first = next;
|
||||
}
|
||||
if (next != nullptr) {
|
||||
next.get()->prev = prev;
|
||||
} else {
|
||||
parent.get()->last = prev;
|
||||
}
|
||||
|
||||
prev = nullptr;
|
||||
next = nullptr;
|
||||
parent = nullptr;
|
||||
}
|
||||
|
||||
template <typename ImplT, template <typename> typename BaseWrapper>
|
||||
void RegionLikeWrapper<ImplT, BaseWrapper>::appendRegion(RegionLike other) {
|
||||
for (auto child = other.getFirst(); child != nullptr;) {
|
||||
auto node = child;
|
||||
child = child.getNext();
|
||||
node.erase();
|
||||
this->addChild(node);
|
||||
}
|
||||
}
|
||||
|
||||
inline void RegionLikeImpl::insertAfter(Instruction point, Instruction node) {
|
||||
assert(point == nullptr || point.getParent() == this);
|
||||
assert(node.getParent() == nullptr);
|
||||
assert(node.getPrev() == nullptr);
|
||||
assert(node.getNext() == nullptr);
|
||||
|
||||
if (point == nullptr) {
|
||||
prependChild(node);
|
||||
return;
|
||||
}
|
||||
|
||||
assert(first != nullptr);
|
||||
assert(last != nullptr);
|
||||
|
||||
node.get()->parent = this;
|
||||
node.get()->prev = point.get();
|
||||
|
||||
if (auto pointNext = point.getNext()) {
|
||||
pointNext.get()->prev = node.get();
|
||||
node.get()->next = pointNext.get();
|
||||
} else {
|
||||
assert(last == point);
|
||||
last = node.get();
|
||||
}
|
||||
|
||||
point.get()->next = node.get();
|
||||
}
|
||||
|
||||
inline void RegionLikeImpl::prependChild(Instruction node) {
|
||||
assert(node.getParent() == nullptr);
|
||||
assert(node.getPrev() == nullptr);
|
||||
assert(node.getNext() == nullptr);
|
||||
|
||||
node.get()->parent = this;
|
||||
if (last == nullptr) {
|
||||
last = node;
|
||||
} else {
|
||||
first.get()->prev = node;
|
||||
node.get()->next = first;
|
||||
}
|
||||
first = node;
|
||||
}
|
||||
|
||||
inline void RegionLikeImpl::addChild(Instruction node) {
|
||||
assert(node.getParent() == nullptr);
|
||||
assert(node.getPrev() == nullptr);
|
||||
assert(node.getNext() == nullptr);
|
||||
|
||||
node.get()->parent = this;
|
||||
if (first == nullptr) {
|
||||
first = node;
|
||||
} else {
|
||||
last.get()->next = node;
|
||||
node.get()->prev = last;
|
||||
}
|
||||
last = node;
|
||||
}
|
||||
|
||||
inline void RegionImpl::print(std::ostream &os, NameStorage &ns) const {
|
||||
os << "{\n";
|
||||
for (auto child : children()) {
|
||||
os << " ";
|
||||
child.print(os, ns);
|
||||
os << "\n";
|
||||
}
|
||||
os << "}";
|
||||
}
|
||||
|
||||
inline Value Operand::getAsValue() const {
|
||||
if (auto node = std::get_if<ValueImpl *>(&value)) {
|
||||
return Value(const_cast<ValueImpl *>(*node));
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T clone(T object, Context &context, CloneMap &map, bool isOperand = false)
|
||||
requires requires {
|
||||
map.getOrClone(context, object, isOperand).template staticCast<T>();
|
||||
}
|
||||
{
|
||||
return map.getOrClone(context, object, isOperand).template staticCast<T>();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T clone(T object, Context &context)
|
||||
requires requires(CloneMap map) { clone(object, context, map); }
|
||||
{
|
||||
CloneMap map;
|
||||
return clone(object, context, map);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T clone(T location, Context &context)
|
||||
requires requires { Location(location).get()->clone(context); }
|
||||
{
|
||||
if (location == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
return Location(location).get()->clone(context).staticCast<T>();
|
||||
}
|
||||
|
||||
namespace detail {
|
||||
template <typename T, typename U, typename... ArgsT>
|
||||
requires(std::is_same_v<typename T::underlying_type, U>)
|
||||
T cloneInstructionImpl(const U *object, Context &context, CloneMap &map,
|
||||
ArgsT &&...args) {
|
||||
auto result = context.create<T>(clone(object->getLocation(), context),
|
||||
std::forward<ArgsT>(args)...);
|
||||
|
||||
for (auto &&operand : object->getOperands()) {
|
||||
result.addOperand(operand.clone(context, map));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
} // namespace detail
|
||||
|
||||
inline Node InstructionImpl::clone(Context &context, CloneMap &map) const {
|
||||
return detail::cloneInstructionImpl<Instruction>(this, context, map, kind,
|
||||
op);
|
||||
}
|
||||
|
||||
inline Node ValueImpl::clone(Context &context, CloneMap &map) const {
|
||||
return detail::cloneInstructionImpl<Value>(this, context, map, kind, op);
|
||||
}
|
||||
|
||||
inline Node RegionImpl::clone(Context &context, CloneMap &map) const {
|
||||
auto result = context.create<Region>(ir::clone(getLocation(), context));
|
||||
for (auto &&child : children()) {
|
||||
result.addChild(ir::clone(child, context, map));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
inline BlockImpl::BlockImpl(Location loc)
|
||||
: ValueImpl(loc, ir::Kind::Builtin, builtin::BLOCK) {}
|
||||
|
||||
inline Node BlockImpl::clone(Context &context, CloneMap &map) const {
|
||||
auto result = context.create<Block>(ir::clone(getLocation(), context));
|
||||
for (auto &&operand : getOperands()) {
|
||||
result.addOperand(operand.clone(context, map));
|
||||
}
|
||||
|
||||
for (auto &&child : children()) {
|
||||
result.addChild(ir::clone(child, context, map));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
inline Operand Operand::clone(Context &context, CloneMap &map) const {
|
||||
if (auto value = getAsValue()) {
|
||||
return ir::clone(value, context, map, true);
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline Node memssa::PhiImpl::clone(Context &context, CloneMap &map) const {
|
||||
auto self = Phi(const_cast<PhiImpl *>(this));
|
||||
auto result = context.create<Phi>(ir::clone(self.getLocation(), context),
|
||||
self.getKind(), self.getOp());
|
||||
|
||||
for (auto &&operand : self.getOperands()) {
|
||||
result.addOperand(operand.clone(context, map));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
inline Node memssa::VarImpl::clone(Context &context, CloneMap &map) const {
|
||||
auto self = Var(const_cast<VarImpl *>(this));
|
||||
auto result = context.create<Var>(ir::clone(self.getLocation(), context),
|
||||
self.getKind(), self.getOp());
|
||||
|
||||
for (auto &&operand : self.getOperands()) {
|
||||
result.addOperand(operand.clone(context, map));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
inline Node memssa::UseImpl::clone(Context &context, CloneMap &map) const {
|
||||
auto self = Use(const_cast<UseImpl *>(this));
|
||||
auto result = context.create<Use>(ir::clone(self.getLocation(), context),
|
||||
self.getKind(), self.getOp());
|
||||
|
||||
for (auto &&operand : self.getOperands()) {
|
||||
result.addOperand(operand.clone(context, map));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
inline Node memssa::DefImpl::clone(Context &context, CloneMap &map) const {
|
||||
auto self = Def(const_cast<DefImpl *>(this));
|
||||
auto result = context.create<Def>(ir::clone(self.getLocation(), context),
|
||||
self.getKind(), self.getOp());
|
||||
|
||||
for (auto &&operand : self.getOperands()) {
|
||||
result.addOperand(operand.clone(context, map));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
inline Node memssa::ScopeImpl::clone(Context &context, CloneMap &map) const {
|
||||
auto self = Scope(const_cast<ScopeImpl *>(this));
|
||||
auto result = context.create<Scope>(ir::clone(self.getLocation(), context));
|
||||
|
||||
for (auto &&operand : self.getOperands()) {
|
||||
result.addOperand(operand.clone(context, map));
|
||||
}
|
||||
|
||||
for (auto child : self.children()) {
|
||||
result.addChild(ir::clone(child, context, map));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
inline Location PathLocationImpl::clone(Context &context) const {
|
||||
return context.getPathLocation(data.path);
|
||||
}
|
||||
inline Location TextFileLocationImpl::clone(Context &context) const {
|
||||
return context.getTextFileLocation(data.file, data.line, data.column);
|
||||
}
|
||||
inline Location OffsetLocationImpl::clone(Context &context) const {
|
||||
return context.getOffsetLocation(baseLocation, offset);
|
||||
}
|
||||
inline Location MemoryLocationImpl::clone(Context &context) const {
|
||||
return context.getMemoryLocation(data.address, data.size);
|
||||
}
|
||||
inline Location UnknownLocationImpl::clone(Context &context) const {
|
||||
return context.getUnknownLocation();
|
||||
}
|
||||
|
||||
inline Node CloneMap::getOrCloneImpl(Context &context, Node node, bool) {
|
||||
Node result = node.get()->clone(context, *this);
|
||||
overrides[node] = result;
|
||||
return result;
|
||||
}
|
||||
} // namespace shader::ir
|
||||
72
rpcsx-gpu2/lib/gcn-shader/include/shader/ir/Instruction.hpp
Normal file
72
rpcsx-gpu2/lib/gcn-shader/include/shader/ir/Instruction.hpp
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
#pragma once
|
||||
|
||||
#include "Kind.hpp"
|
||||
#include "Node.hpp"
|
||||
|
||||
namespace shader::ir {
|
||||
enum class InstructionId : std::uint32_t {};
|
||||
|
||||
constexpr InstructionId getInstructionId(ir::Kind kind, unsigned op) {
|
||||
return static_cast<InstructionId>(static_cast<std::uint32_t>(kind) |
|
||||
static_cast<std::uint32_t>(op) << 5);
|
||||
}
|
||||
|
||||
constexpr ir::Kind getInstructionKind(InstructionId id) {
|
||||
return static_cast<ir::Kind>(static_cast<std::uint32_t>(id) & 0x1f);
|
||||
}
|
||||
constexpr unsigned getInstructionOp(InstructionId id) {
|
||||
return static_cast<unsigned>(static_cast<std::uint32_t>(id) >> 5);
|
||||
}
|
||||
|
||||
struct Region;
|
||||
struct InstructionImpl;
|
||||
struct Instruction;
|
||||
|
||||
template <typename ImplT> struct InstructionWrapper : NodeWrapper<ImplT> {
|
||||
using NodeWrapper<ImplT>::NodeWrapper;
|
||||
using NodeWrapper<ImplT>::operator=;
|
||||
|
||||
Kind getKind() const { return this->impl->kind; }
|
||||
unsigned getOp() const { return this->impl->op; }
|
||||
InstructionId getInstId() const {
|
||||
return getInstructionId(getKind(), getOp());
|
||||
}
|
||||
|
||||
auto getParent() const { return this->impl->parent; };
|
||||
bool hasParent() const { return this->impl->parent != nullptr; }
|
||||
auto getNext() const { return Instruction(this->impl->next); }
|
||||
auto getPrev() const { return Instruction(this->impl->prev); }
|
||||
|
||||
void addOperand(Operand operand) const { this->impl->addOperand(operand); }
|
||||
|
||||
decltype(auto) replaceOperand(int index, Operand operand) const {
|
||||
return this->impl->replaceOperand(index, operand);
|
||||
}
|
||||
decltype(auto) eraseOperand(int index, int count = 1) const {
|
||||
return this->impl->eraseOperand(index, count);
|
||||
}
|
||||
void insertAfter(Node point, Node node) const {
|
||||
this->impl->insertAfter(point, node);
|
||||
}
|
||||
void erase() const { this->impl->erase(); }
|
||||
void remove() const { this->impl->remove(); }
|
||||
|
||||
template <typename T = Node> auto children() const {
|
||||
return this->impl->template children<T>();
|
||||
}
|
||||
decltype(auto) getOperand(std::size_t i) const { return this->impl->getOperand(i); }
|
||||
decltype(auto) getOperands() const { return this->impl->getOperands(); }
|
||||
std::size_t getOperandCount() const { return getOperands().size(); }
|
||||
|
||||
template <typename T>
|
||||
requires std::is_enum_v<T>
|
||||
void addOperand(T enumValue) {
|
||||
addOperand(std::to_underlying(enumValue));
|
||||
}
|
||||
};
|
||||
|
||||
struct Instruction : InstructionWrapper<InstructionImpl> {
|
||||
using InstructionWrapper<InstructionImpl>::InstructionWrapper;
|
||||
using InstructionWrapper<InstructionImpl>::operator=;
|
||||
};
|
||||
} // namespace ir
|
||||
|
|
@ -0,0 +1,69 @@
|
|||
#pragma once
|
||||
|
||||
#include "Instruction.hpp"
|
||||
#include "Kind.hpp"
|
||||
#include "Location.hpp"
|
||||
#include "NodeImpl.hpp"
|
||||
#include "PrintableWrapper.hpp"
|
||||
#include "RegionLike.hpp"
|
||||
#include <ostream>
|
||||
#include <span>
|
||||
|
||||
namespace shader::ir {
|
||||
struct InstructionImpl : NodeImpl {
|
||||
Kind kind;
|
||||
unsigned op;
|
||||
|
||||
RegionLike parent;
|
||||
Instruction prev;
|
||||
Instruction next;
|
||||
OperandList operands;
|
||||
|
||||
InstructionImpl(Location location, Kind kind, unsigned op,
|
||||
std::span<const Operand> operands = {})
|
||||
: kind(kind), op(op) {
|
||||
setLocation(location);
|
||||
|
||||
for (auto &&op : operands) {
|
||||
addOperand(std::move(op));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires std::is_enum_v<T>
|
||||
void addOperand(T enumValue) {
|
||||
addOperand(std::to_underlying(enumValue));
|
||||
}
|
||||
|
||||
void addOperand(Operand operand);
|
||||
Operand replaceOperand(int index, Operand operand);
|
||||
Operand eraseOperand(int index, int count);
|
||||
void remove();
|
||||
void erase();
|
||||
|
||||
decltype(auto) getOperand(std::size_t i) const {
|
||||
return operands.getOperand(i);
|
||||
}
|
||||
|
||||
decltype(auto) getOperands() const { return std::span(operands); }
|
||||
|
||||
void print(std::ostream &os, NameStorage &ns) const override {
|
||||
os << getInstructionName(kind, op);
|
||||
|
||||
if (!operands.empty()) {
|
||||
os << "(";
|
||||
for (bool first = true; auto operand : operands) {
|
||||
if (first) {
|
||||
first = false;
|
||||
} else {
|
||||
os << ", ";
|
||||
}
|
||||
operand.print(os, ns);
|
||||
}
|
||||
os << ")";
|
||||
}
|
||||
}
|
||||
|
||||
Node clone(Context &context, CloneMap &map) const override;
|
||||
};
|
||||
} // namespace ir
|
||||
205
rpcsx-gpu2/lib/gcn-shader/include/shader/ir/Kind.hpp
Normal file
205
rpcsx-gpu2/lib/gcn-shader/include/shader/ir/Kind.hpp
Normal file
|
|
@ -0,0 +1,205 @@
|
|||
#pragma once
|
||||
|
||||
#include <string>
|
||||
namespace shader::ir {
|
||||
enum class Kind {
|
||||
Spv,
|
||||
Builtin,
|
||||
AmdGpu,
|
||||
Vop2,
|
||||
Sop2,
|
||||
Sopk,
|
||||
Smrd,
|
||||
Vop3,
|
||||
Mubuf,
|
||||
Mtbuf,
|
||||
Mimg,
|
||||
Ds,
|
||||
Vintrp,
|
||||
Exp,
|
||||
Vop1,
|
||||
Vopc,
|
||||
Sop1,
|
||||
Sopc,
|
||||
Sopp,
|
||||
MemSSA,
|
||||
|
||||
Count,
|
||||
};
|
||||
|
||||
namespace spv {
|
||||
const char *getInstructionName(unsigned id);
|
||||
}
|
||||
namespace builtin {
|
||||
const char *getInstructionName(unsigned id);
|
||||
}
|
||||
namespace amdgpu {
|
||||
const char *getInstructionName(unsigned id);
|
||||
}
|
||||
namespace vop2 {
|
||||
const char *getInstructionName(unsigned id);
|
||||
}
|
||||
namespace sop2 {
|
||||
const char *getInstructionName(unsigned id);
|
||||
}
|
||||
namespace sopk {
|
||||
const char *getInstructionName(unsigned id);
|
||||
}
|
||||
namespace smrd {
|
||||
const char *getInstructionName(unsigned id);
|
||||
}
|
||||
namespace vop3 {
|
||||
const char *getInstructionName(unsigned id);
|
||||
}
|
||||
namespace mubuf {
|
||||
const char *getInstructionName(unsigned id);
|
||||
}
|
||||
namespace mtbuf {
|
||||
const char *getInstructionName(unsigned id);
|
||||
}
|
||||
namespace mimg {
|
||||
const char *getInstructionName(unsigned id);
|
||||
}
|
||||
namespace ds {
|
||||
const char *getInstructionName(unsigned id);
|
||||
}
|
||||
namespace vintrp {
|
||||
const char *getInstructionName(unsigned id);
|
||||
}
|
||||
namespace exp {
|
||||
const char *getInstructionName(unsigned id);
|
||||
}
|
||||
namespace vop1 {
|
||||
const char *getInstructionName(unsigned id);
|
||||
}
|
||||
namespace vopc {
|
||||
const char *getInstructionName(unsigned id);
|
||||
}
|
||||
namespace sop1 {
|
||||
const char *getInstructionName(unsigned id);
|
||||
}
|
||||
namespace sopc {
|
||||
const char *getInstructionName(unsigned id);
|
||||
}
|
||||
namespace sopp {
|
||||
const char *getInstructionName(unsigned id);
|
||||
}
|
||||
|
||||
namespace memssa {
|
||||
const char *getInstructionName(unsigned id);
|
||||
}
|
||||
|
||||
inline const char *getKindName(Kind kind) {
|
||||
switch (kind) {
|
||||
case Kind::Spv:
|
||||
return "spv";
|
||||
case Kind::Builtin:
|
||||
return "builtin";
|
||||
case Kind::AmdGpu:
|
||||
return "amdgpu";
|
||||
case Kind::Vop2:
|
||||
return "vop2";
|
||||
case Kind::Sop2:
|
||||
return "sop2";
|
||||
case Kind::Sopk:
|
||||
return "sopk";
|
||||
case Kind::Smrd:
|
||||
return "smrd";
|
||||
case Kind::Vop3:
|
||||
return "vop3";
|
||||
case Kind::Mubuf:
|
||||
return "mubuf";
|
||||
case Kind::Mtbuf:
|
||||
return "mtbuf";
|
||||
case Kind::Mimg:
|
||||
return "mimg";
|
||||
case Kind::Ds:
|
||||
return "ds";
|
||||
case Kind::Vintrp:
|
||||
return "vintrp";
|
||||
case Kind::Exp:
|
||||
return "exp";
|
||||
case Kind::Vop1:
|
||||
return "vop1";
|
||||
case Kind::Vopc:
|
||||
return "vopc";
|
||||
case Kind::Sop1:
|
||||
return "sop1";
|
||||
case Kind::Sopc:
|
||||
return "sopc";
|
||||
case Kind::Sopp:
|
||||
return "sopp";
|
||||
case Kind::MemSSA:
|
||||
return "memssa";
|
||||
|
||||
case Kind::Count:
|
||||
break;
|
||||
}
|
||||
|
||||
return "<invalid>";
|
||||
}
|
||||
inline const char *getInstructionShortName(Kind kind, unsigned op) {
|
||||
switch (kind) {
|
||||
case Kind::Spv:
|
||||
return spv::getInstructionName(op);
|
||||
case Kind::Builtin:
|
||||
return builtin::getInstructionName(op);
|
||||
case Kind::AmdGpu:
|
||||
return amdgpu::getInstructionName(op);
|
||||
case Kind::Vop2:
|
||||
return vop2::getInstructionName(op);
|
||||
case Kind::Sop2:
|
||||
return sop2::getInstructionName(op);
|
||||
case Kind::Sopk:
|
||||
return sopk::getInstructionName(op);
|
||||
case Kind::Smrd:
|
||||
return smrd::getInstructionName(op);
|
||||
case Kind::Vop3:
|
||||
return vop3::getInstructionName(op);
|
||||
case Kind::Mubuf:
|
||||
return mubuf::getInstructionName(op);
|
||||
case Kind::Mtbuf:
|
||||
return mtbuf::getInstructionName(op);
|
||||
case Kind::Mimg:
|
||||
return mimg::getInstructionName(op);
|
||||
case Kind::Ds:
|
||||
return ds::getInstructionName(op);
|
||||
case Kind::Vintrp:
|
||||
return vintrp::getInstructionName(op);
|
||||
case Kind::Exp:
|
||||
return exp::getInstructionName(op);
|
||||
case Kind::Vop1:
|
||||
return vop1::getInstructionName(op);
|
||||
case Kind::Vopc:
|
||||
return vopc::getInstructionName(op);
|
||||
case Kind::Sop1:
|
||||
return sop1::getInstructionName(op);
|
||||
case Kind::Sopc:
|
||||
return sopc::getInstructionName(op);
|
||||
case Kind::Sopp:
|
||||
return sopp::getInstructionName(op);
|
||||
case Kind::MemSSA:
|
||||
return memssa::getInstructionName(op);
|
||||
|
||||
case Kind::Count:
|
||||
break;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
inline std::string getInstructionName(Kind kind, unsigned op) {
|
||||
std::string result = getKindName(kind);
|
||||
result += '.';
|
||||
|
||||
if (auto name = getInstructionShortName(kind, op)) {
|
||||
result += name;
|
||||
} else {
|
||||
result += "<invalid ";
|
||||
result += std::to_string(op);
|
||||
result += ">";
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
} // namespace ir
|
||||
200
rpcsx-gpu2/lib/gcn-shader/include/shader/ir/Location.hpp
Normal file
200
rpcsx-gpu2/lib/gcn-shader/include/shader/ir/Location.hpp
Normal file
|
|
@ -0,0 +1,200 @@
|
|||
#pragma once
|
||||
#include "PrintableWrapper.hpp"
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
namespace shader::ir {
|
||||
struct LocationImpl;
|
||||
struct CloneMap;
|
||||
class Context;
|
||||
|
||||
template <typename ImplT> struct LocationWrapper : PrintableWrapper<ImplT> {
|
||||
using PrintableWrapper<ImplT>::PrintableWrapper;
|
||||
using PrintableWrapper<ImplT>::operator=;
|
||||
};
|
||||
|
||||
using Location = LocationWrapper<LocationImpl>;
|
||||
|
||||
struct LocationImpl {
|
||||
virtual ~LocationImpl() {}
|
||||
virtual void print(std::ostream &os) = 0;
|
||||
virtual std::strong_ordering compare(const LocationImpl &other) const = 0;
|
||||
|
||||
virtual Location clone(Context &context) const = 0;
|
||||
auto operator<=>(const LocationImpl &other) const { return compare(other); }
|
||||
};
|
||||
|
||||
struct PathLocationImpl final : LocationImpl {
|
||||
struct Data {
|
||||
std::string path;
|
||||
auto operator<=>(const Data &other) const = default;
|
||||
} data;
|
||||
|
||||
PathLocationImpl(std::string path) : data{.path = std::move(path)} {}
|
||||
|
||||
void print(std::ostream &os) override { os << data.path; }
|
||||
|
||||
std::strong_ordering compare(const LocationImpl &other) const override {
|
||||
if (this == &other) {
|
||||
return std::strong_ordering::equal;
|
||||
}
|
||||
|
||||
if (auto p = dynamic_cast<const PathLocationImpl *>(&other)) {
|
||||
return this->data <=> p->data;
|
||||
}
|
||||
|
||||
return this <=> &other;
|
||||
}
|
||||
|
||||
Location clone(Context &context) const override;
|
||||
};
|
||||
|
||||
struct PathLocation : LocationWrapper<PathLocationImpl> {
|
||||
using LocationWrapper::LocationWrapper;
|
||||
using LocationWrapper::operator=;
|
||||
const std::string &getPath() const { return impl->data.path; }
|
||||
};
|
||||
|
||||
struct TextFileLocationImpl final : LocationImpl {
|
||||
struct Data {
|
||||
PathLocation file;
|
||||
std::uint64_t line;
|
||||
std::uint64_t column;
|
||||
auto operator<=>(const Data &other) const = default;
|
||||
|
||||
} data;
|
||||
|
||||
TextFileLocationImpl(PathLocation file, std::uint64_t line,
|
||||
std::uint64_t column)
|
||||
: data{.file = file, .line = line, .column = column} {}
|
||||
|
||||
void print(std::ostream &os) override {
|
||||
data.file.print(os);
|
||||
os << ':' << data.line << ':' << data.column;
|
||||
}
|
||||
|
||||
auto operator<=>(const TextFileLocationImpl &other) const = default;
|
||||
std::strong_ordering compare(const LocationImpl &other) const override {
|
||||
if (this == &other) {
|
||||
return std::strong_ordering::equal;
|
||||
}
|
||||
|
||||
if (auto p = dynamic_cast<const TextFileLocationImpl *>(&other)) {
|
||||
return *this <=> *p;
|
||||
}
|
||||
|
||||
return this <=> &other;
|
||||
}
|
||||
|
||||
Location clone(Context &context) const override;
|
||||
};
|
||||
|
||||
struct TextFileLocation : LocationWrapper<TextFileLocationImpl> {
|
||||
using LocationWrapper::LocationWrapper;
|
||||
using LocationWrapper::operator=;
|
||||
PathLocation getFile() const { return impl->data.file; }
|
||||
std::uint64_t getLine() const { return impl->data.line; }
|
||||
std::uint64_t getColumn() const { return impl->data.column; }
|
||||
};
|
||||
|
||||
struct OffsetLocationData {
|
||||
Location baseLocation;
|
||||
std::uint64_t offset;
|
||||
|
||||
OffsetLocationData(Location baseLocation, std::uint64_t offset)
|
||||
: baseLocation(baseLocation), offset(offset) {}
|
||||
|
||||
auto operator<=>(const OffsetLocationData &other) const = default;
|
||||
};
|
||||
|
||||
struct OffsetLocationImpl final : OffsetLocationData, LocationImpl {
|
||||
OffsetLocationImpl(Location file, std::uint64_t offset)
|
||||
: OffsetLocationData(file, offset) {}
|
||||
|
||||
void print(std::ostream &os) override {
|
||||
baseLocation.print(os);
|
||||
os << '+' << offset;
|
||||
}
|
||||
|
||||
std::strong_ordering compare(const LocationImpl &other) const override {
|
||||
if (this == &other) {
|
||||
return std::strong_ordering::equal;
|
||||
}
|
||||
|
||||
if (auto p = dynamic_cast<const OffsetLocationData *>(&other)) {
|
||||
return static_cast<const OffsetLocationData &>(*this) <=> *p;
|
||||
}
|
||||
|
||||
return this <=> &other;
|
||||
}
|
||||
|
||||
Location clone(Context &context) const override;
|
||||
};
|
||||
|
||||
struct OffsetLocation : LocationWrapper<OffsetLocationImpl> {
|
||||
using LocationWrapper::LocationWrapper;
|
||||
using LocationWrapper::operator=;
|
||||
Location getBaseLocation() const { return impl->baseLocation; }
|
||||
std::uint64_t getOffset() const { return impl->offset; }
|
||||
};
|
||||
|
||||
struct MemoryLocationImpl final : LocationImpl {
|
||||
struct Data {
|
||||
std::uint64_t address;
|
||||
std::uint64_t size;
|
||||
|
||||
auto operator<=>(const Data &other) const = default;
|
||||
} data;
|
||||
|
||||
MemoryLocationImpl(std::uint64_t address, std::uint64_t size)
|
||||
: data{.address = address, .size = size} {}
|
||||
|
||||
void print(std::ostream &os) override {
|
||||
os << '(' << data.address << " - " << data.size << ')';
|
||||
}
|
||||
|
||||
std::strong_ordering compare(const LocationImpl &other) const override {
|
||||
if (this == &other) {
|
||||
return std::strong_ordering::equal;
|
||||
}
|
||||
|
||||
if (auto p = dynamic_cast<const MemoryLocationImpl *>(&other)) {
|
||||
return data <=> p->data;
|
||||
}
|
||||
|
||||
return this <=> &other;
|
||||
}
|
||||
|
||||
Location clone(Context &context) const override;
|
||||
};
|
||||
|
||||
struct MemoryLocation : LocationWrapper<MemoryLocationImpl> {
|
||||
using LocationWrapper::LocationWrapper;
|
||||
using LocationWrapper::operator=;
|
||||
std::uint64_t getAddress() const { return impl->data.address; }
|
||||
std::uint64_t getSize() const { return impl->data.size; }
|
||||
};
|
||||
|
||||
struct UnknownLocationImpl final : LocationImpl {
|
||||
void print(std::ostream &os) override { os << "unknown"; }
|
||||
|
||||
std::strong_ordering compare(const LocationImpl &other) const override {
|
||||
if (this == &other) {
|
||||
return std::strong_ordering::equal;
|
||||
}
|
||||
|
||||
if (dynamic_cast<const MemoryLocationImpl *>(&other)) {
|
||||
return std::strong_ordering::equal;
|
||||
}
|
||||
|
||||
return this <=> &other;
|
||||
}
|
||||
|
||||
Location clone(Context &context) const override;
|
||||
};
|
||||
|
||||
struct UnknownLocation : LocationWrapper<UnknownLocationImpl> {
|
||||
using LocationWrapper::LocationWrapper;
|
||||
using LocationWrapper::operator=;
|
||||
};
|
||||
} // namespace ir
|
||||
90
rpcsx-gpu2/lib/gcn-shader/include/shader/ir/NameStorage.hpp
Normal file
90
rpcsx-gpu2/lib/gcn-shader/include/shader/ir/NameStorage.hpp
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
#pragma once
|
||||
|
||||
#include "Node.hpp"
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace shader::ir {
|
||||
class NameStorage {
|
||||
std::set<std::string> mNames;
|
||||
std::unordered_map<const NodeImpl *, const std::string *> mNodeToName;
|
||||
|
||||
public:
|
||||
void setUniqueNameOf(Node node, std::string name) {
|
||||
auto [nodeIt, nodeInserted] = mNodeToName.try_emplace(node.impl, nullptr);
|
||||
|
||||
if (!nodeInserted && *nodeIt->second == name) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto [nameIt, nameInserted] = mNames.insert(name);
|
||||
|
||||
if (!nameInserted) {
|
||||
std::size_t i = 1;
|
||||
|
||||
while (true) {
|
||||
auto newName = name + "_" + std::to_string(i);
|
||||
auto [newNameIt, newNameInserted] = mNames.insert(std::move(newName));
|
||||
|
||||
if (!newNameInserted) {
|
||||
++i;
|
||||
continue;
|
||||
}
|
||||
|
||||
nameIt = newNameIt;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
nodeIt->second = &*nameIt;
|
||||
}
|
||||
|
||||
void setNameOf(Node node, std::string name) {
|
||||
auto [nodeIt, nodeInserted] = mNodeToName.try_emplace(node.impl, nullptr);
|
||||
|
||||
if (!nodeInserted && *nodeIt->second == name) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto [nameIt, nameInserted] = mNames.insert(name);
|
||||
nodeIt->second = &*nameIt;
|
||||
}
|
||||
|
||||
std::string_view tryGetNameOf(Node node) const {
|
||||
auto it = mNodeToName.find(node.impl);
|
||||
if (it == mNodeToName.end()) {
|
||||
return {};
|
||||
}
|
||||
return *it->second;
|
||||
}
|
||||
|
||||
const std::string &getNameOf(Node node) {
|
||||
auto [it, inserted] = mNodeToName.emplace(node.impl, nullptr);
|
||||
|
||||
if (inserted) {
|
||||
std::size_t i = mNames.size() + 1;
|
||||
|
||||
while (true) {
|
||||
auto newName = std::to_string(i);
|
||||
auto [newNameIt, newNameInserted] = mNames.insert(std::move(newName));
|
||||
|
||||
if (!newNameInserted) {
|
||||
++i;
|
||||
continue;
|
||||
}
|
||||
|
||||
it->second = &*newNameIt;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return *it->second;
|
||||
}
|
||||
|
||||
void clear() {
|
||||
mNames.clear();
|
||||
mNodeToName.clear();
|
||||
}
|
||||
};
|
||||
} // namespace shader::ir
|
||||
17
rpcsx-gpu2/lib/gcn-shader/include/shader/ir/Node.hpp
Normal file
17
rpcsx-gpu2/lib/gcn-shader/include/shader/ir/Node.hpp
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
#pragma once
|
||||
|
||||
#include "Operand.hpp"
|
||||
#include "PrintableWrapper.hpp"
|
||||
|
||||
namespace shader::ir {
|
||||
template <typename ImplT> struct NodeWrapper;
|
||||
|
||||
using Node = NodeWrapper<NodeImpl>;
|
||||
|
||||
template <typename ImplT> struct NodeWrapper : PrintableWrapper<ImplT> {
|
||||
using PrintableWrapper<ImplT>::PrintableWrapper;
|
||||
using PrintableWrapper<ImplT>::operator=;
|
||||
|
||||
auto getLocation() const { return this->impl->getLocation(); }
|
||||
};
|
||||
} // namespace ir
|
||||
65
rpcsx-gpu2/lib/gcn-shader/include/shader/ir/NodeImpl.hpp
Normal file
65
rpcsx-gpu2/lib/gcn-shader/include/shader/ir/NodeImpl.hpp
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
#pragma once
|
||||
|
||||
#include "Location.hpp"
|
||||
#include "Node.hpp"
|
||||
#include "Operand.hpp"
|
||||
#include <cassert>
|
||||
#include <map>
|
||||
|
||||
namespace shader::ir {
|
||||
struct NodeImpl;
|
||||
struct CloneMap;
|
||||
class NameStorage;
|
||||
class Context;
|
||||
|
||||
// namespace debug {
|
||||
// [[gnu::used, gnu::noinline]] void dump(Node object);
|
||||
// [[gnu::used, gnu::noinline]] void dump(NodeImpl *object);
|
||||
// } // namespace debug
|
||||
|
||||
struct CloneMap {
|
||||
virtual ~CloneMap() = default;
|
||||
|
||||
std::map<Node, Node> overrides;
|
||||
void setOverride(Node from, Node to) { overrides[from] = to; }
|
||||
Node getOverride(Node from) {
|
||||
if (auto it = overrides.find(from); it != overrides.end()) {
|
||||
return it->second;
|
||||
}
|
||||
return {};
|
||||
}
|
||||
virtual Node getOrClone(Context &context, Node node, bool isOperand) {
|
||||
// if (auto it = overrides.find(node); it != overrides.end()) {
|
||||
// return it->second;
|
||||
// }
|
||||
|
||||
// return getOrCloneImpl(context, node, isOperand);
|
||||
|
||||
if (node == nullptr) {
|
||||
return node;
|
||||
}
|
||||
|
||||
auto [it, inserted] = overrides.insert({node, nullptr});
|
||||
|
||||
if (inserted) {
|
||||
it->second = getOrCloneImpl(context, node, isOperand);
|
||||
overrides[it->second] = it->second;
|
||||
}
|
||||
|
||||
return it->second;
|
||||
}
|
||||
|
||||
virtual Node getOrCloneImpl(Context &context, Node node, bool isOperand);
|
||||
};
|
||||
|
||||
struct NodeImpl {
|
||||
Location location;
|
||||
virtual ~NodeImpl() = default;
|
||||
|
||||
void setLocation(Location newLocation) { location = newLocation; }
|
||||
Location getLocation() const { return location; }
|
||||
|
||||
virtual void print(std::ostream &os, NameStorage &ns) const = 0;
|
||||
virtual Node clone(Context &context, CloneMap &map) const = 0;
|
||||
};
|
||||
} // namespace shader::ir
|
||||
152
rpcsx-gpu2/lib/gcn-shader/include/shader/ir/Operand.hpp
Normal file
152
rpcsx-gpu2/lib/gcn-shader/include/shader/ir/Operand.hpp
Normal file
|
|
@ -0,0 +1,152 @@
|
|||
#pragma once
|
||||
|
||||
#include "../Vector.hpp"
|
||||
#include <bit>
|
||||
#include <compare>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <span>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
namespace shader::ir {
|
||||
class NameStorage;
|
||||
class Context;
|
||||
struct ValueImpl;
|
||||
struct Value;
|
||||
struct NodeImpl;
|
||||
struct CloneMap;
|
||||
template <typename ImplT> struct NodeWrapper;
|
||||
using Node = NodeWrapper<NodeImpl>;
|
||||
|
||||
struct Operand {
|
||||
using UnderlyingT =
|
||||
std::variant<std::nullptr_t, ValueImpl *, std::int64_t, std::int32_t,
|
||||
double, float, bool, std::string>;
|
||||
|
||||
UnderlyingT value{nullptr};
|
||||
|
||||
template <typename T>
|
||||
requires(!std::is_integral_v<std::remove_cvref_t<T>> ||
|
||||
std::is_same_v<bool, std::remove_cvref_t<T>>)
|
||||
Operand(T &&value)
|
||||
requires requires { UnderlyingT{std::forward<T>(value)}; }
|
||||
: value(std::forward<T>(value)) {}
|
||||
|
||||
template <typename T>
|
||||
Operand(T value)
|
||||
requires requires {
|
||||
requires(std::is_integral_v<std::remove_cvref_t<T>> &&
|
||||
!std::is_same_v<bool, T> && sizeof(T) <= sizeof(std::int32_t));
|
||||
UnderlyingT{static_cast<std::int32_t>(value)};
|
||||
}
|
||||
: value(static_cast<std::int32_t>(value)) {}
|
||||
|
||||
template <typename T>
|
||||
Operand(T value)
|
||||
requires requires {
|
||||
requires(std::is_integral_v<std::remove_cvref_t<T>> &&
|
||||
sizeof(T) == sizeof(std::int64_t));
|
||||
UnderlyingT{static_cast<std::int64_t>(value)};
|
||||
}
|
||||
: value(static_cast<std::int64_t>(value)) {}
|
||||
|
||||
template <typename T>
|
||||
requires(std::is_enum_v<std::remove_cvref_t<T>>)
|
||||
Operand(T value) : Operand(std::to_underlying(value)) {}
|
||||
|
||||
template <typename T>
|
||||
Operand(T &&value)
|
||||
requires requires { Operand(value.impl); }
|
||||
: Operand(value.impl) {
|
||||
if (value.impl == nullptr) {
|
||||
std::abort();
|
||||
}
|
||||
}
|
||||
|
||||
Operand() = default;
|
||||
Operand(const Operand &) = default;
|
||||
Operand(Operand &&) = default;
|
||||
Operand &operator=(const Operand &) = default;
|
||||
Operand &operator=(Operand &&) = default;
|
||||
|
||||
template <typename T>
|
||||
Operand &operator=(T &&other)
|
||||
requires requires { value = std::forward<T>(other); }
|
||||
{
|
||||
value = std::forward<T>(other);
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename T> const T *getAs() const {
|
||||
if (auto node = std::get_if<T>(&value)) {
|
||||
return node;
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
Value getAsValue() const;
|
||||
|
||||
const std::string *getAsString() const { return getAs<std::string>(); }
|
||||
const std::int32_t *getAsInt32() const { return getAs<std::int32_t>(); }
|
||||
const std::int64_t *getAsInt64() const { return getAs<std::int64_t>(); }
|
||||
const double *getAsDouble() const { return getAs<double>(); }
|
||||
const float *getAsFloat() const { return getAs<float>(); }
|
||||
const bool *getAsBool() const { return getAs<bool>(); }
|
||||
bool isNull() const { return std::get_if<std::nullptr_t>(&value) != nullptr; }
|
||||
explicit operator bool() const { return !isNull(); }
|
||||
|
||||
void print(std::ostream &os, NameStorage &ns) const;
|
||||
Operand clone(Context &context, CloneMap &map) const;
|
||||
|
||||
std::partial_ordering operator<=>(const Operand &other) const {
|
||||
auto result = value.index() <=> other.value.index();
|
||||
if (result != 0) {
|
||||
return result;
|
||||
}
|
||||
|
||||
return std::visit(
|
||||
[](auto &&lhs, auto &&rhs) -> std::partial_ordering {
|
||||
using lhs_type = std::remove_cvref_t<decltype(lhs)>;
|
||||
using rhs_type = std::remove_cvref_t<decltype(rhs)>;
|
||||
if constexpr (std::is_same_v<lhs_type, rhs_type>) {
|
||||
if constexpr (std::is_same_v<lhs_type, std::nullptr_t>) {
|
||||
return std::strong_ordering::equal;
|
||||
} else if constexpr (std::is_same_v<lhs_type, float>) {
|
||||
return std::bit_cast<std::uint32_t>(lhs) <=>
|
||||
std::bit_cast<std::uint32_t>(rhs);
|
||||
} else if constexpr (std::is_same_v<lhs_type, double>) {
|
||||
return std::bit_cast<std::uint64_t>(lhs) <=>
|
||||
std::bit_cast<std::uint64_t>(rhs);
|
||||
} else {
|
||||
return lhs <=> rhs;
|
||||
}
|
||||
}
|
||||
|
||||
throw;
|
||||
},
|
||||
value, other.value);
|
||||
}
|
||||
|
||||
bool operator==(const Operand &) const = default;
|
||||
};
|
||||
|
||||
struct OperandList : std::vector<Operand> {
|
||||
using std::vector<Operand>::vector;
|
||||
using std::vector<Operand>::operator=;
|
||||
|
||||
template <typename T>
|
||||
requires std::is_enum_v<T>
|
||||
void addOperand(T enumValue) {
|
||||
addOperand(std::to_underlying(enumValue));
|
||||
}
|
||||
|
||||
void addOperand(Operand operand) { push_back(std::move(operand)); }
|
||||
|
||||
const Operand &getOperand(std::size_t i) const { return at(i); }
|
||||
};
|
||||
} // namespace shader::ir
|
||||
43
rpcsx-gpu2/lib/gcn-shader/include/shader/ir/OperandPrint.hpp
Normal file
43
rpcsx-gpu2/lib/gcn-shader/include/shader/ir/OperandPrint.hpp
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
#pragma once
|
||||
|
||||
#include "NameStorage.hpp"
|
||||
#include "Operand.hpp"
|
||||
#include "ValueImpl.hpp" // IWYU pragma: keep
|
||||
|
||||
namespace shader::ir {
|
||||
inline void Operand::print(std::ostream &os, NameStorage &ns) const {
|
||||
if (auto node = getAsValue()) {
|
||||
os << '%' << ns.getNameOf(node);
|
||||
return;
|
||||
}
|
||||
if (auto node = getAsString()) {
|
||||
os << '"' << *node << '"';
|
||||
return;
|
||||
}
|
||||
if (auto node = getAsInt32()) {
|
||||
os << *node << "i32";
|
||||
return;
|
||||
}
|
||||
if (auto node = getAsInt64()) {
|
||||
os << *node << "i64";
|
||||
return;
|
||||
}
|
||||
if (auto node = getAsFloat()) {
|
||||
os << *node << 'f';
|
||||
return;
|
||||
}
|
||||
if (auto node = getAsDouble()) {
|
||||
os << *node << 'd';
|
||||
return;
|
||||
}
|
||||
if (auto node = getAsBool()) {
|
||||
os << (*node ? "true" : "false");
|
||||
return;
|
||||
}
|
||||
if (isNull()) {
|
||||
os << "null";
|
||||
return;
|
||||
}
|
||||
os << "<invalid operand " << value.index() << ">";
|
||||
}
|
||||
} // namespace ir
|
||||
|
|
@ -0,0 +1,88 @@
|
|||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include <functional>
|
||||
#include <type_traits>
|
||||
|
||||
namespace shader::ir {
|
||||
template <typename ImplT> struct PointerWrapper {
|
||||
using underlying_type = ImplT;
|
||||
ImplT *impl = nullptr;
|
||||
PointerWrapper() = default;
|
||||
PointerWrapper(ImplT *impl) : impl(impl) {}
|
||||
|
||||
template <typename OtherT>
|
||||
requires std::is_base_of_v<ImplT, OtherT>
|
||||
PointerWrapper(PointerWrapper<OtherT> node) : impl(node.impl) {}
|
||||
|
||||
explicit operator bool() const { return impl != nullptr; }
|
||||
bool operator==(std::nullptr_t) const { return impl == nullptr; }
|
||||
bool operator==(ImplT *other) const { return impl == other; }
|
||||
|
||||
template <typename Self> Self &operator=(this Self &self, ImplT *other) {
|
||||
self.impl = other;
|
||||
return self;
|
||||
}
|
||||
|
||||
template <typename Self, typename OtherT>
|
||||
requires std::is_base_of_v<ImplT, OtherT>
|
||||
Self &operator=(this Self &self, PointerWrapper<OtherT> other) {
|
||||
self.impl = other.get();
|
||||
return self;
|
||||
}
|
||||
|
||||
// ImplT *operator->() const { return impl; }
|
||||
|
||||
ImplT *get() const { return impl; }
|
||||
|
||||
auto operator<=>(const PointerWrapper &) const = default;
|
||||
bool operator==(const PointerWrapper &) const = default;
|
||||
|
||||
template <typename T>
|
||||
T cast() const
|
||||
requires requires { static_cast<typename T::underlying_type *>(impl); }
|
||||
{
|
||||
return T(dynamic_cast<typename T::underlying_type *>(impl));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T staticCast() const
|
||||
requires requires { static_cast<typename T::underlying_type *>(impl); }
|
||||
{
|
||||
assert(impl == nullptr || cast<T>() != nullptr);
|
||||
return T(static_cast<typename T::underlying_type *>(impl));
|
||||
}
|
||||
|
||||
template <typename T> bool isa() const {
|
||||
if (impl == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if constexpr (std::is_same_v<std::remove_cvref_t<T>,
|
||||
std::remove_cvref_t<ImplT>>) {
|
||||
return true;
|
||||
} else if constexpr (!requires { cast<T>() != nullptr; }) {
|
||||
return false;
|
||||
} else {
|
||||
return cast<T>() != nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename... T>
|
||||
requires(sizeof...(T) > 1)
|
||||
bool isa() const {
|
||||
return (isa<T>() || ...);
|
||||
}
|
||||
};
|
||||
} // namespace shader::ir
|
||||
|
||||
namespace std {
|
||||
template <typename T>
|
||||
requires std::is_base_of_v<
|
||||
shader::ir::PointerWrapper<typename T::underlying_type>, T>
|
||||
struct hash<T> {
|
||||
constexpr std::size_t operator()(const T &pointer) const noexcept {
|
||||
return hash<typename T::underlying_type *>{}(pointer.impl);
|
||||
}
|
||||
};
|
||||
} // namespace std
|
||||
|
|
@ -0,0 +1,136 @@
|
|||
#pragma once
|
||||
|
||||
#include "InstructionImpl.hpp" // IWYU pragma: keep
|
||||
|
||||
namespace shader::ir {
|
||||
template <typename T> struct PreincNodeIterable {
|
||||
struct EndIterator {};
|
||||
|
||||
struct Iterator {
|
||||
Instruction nextElem;
|
||||
Instruction currentElem;
|
||||
Instruction endElem;
|
||||
|
||||
Iterator() = default;
|
||||
|
||||
Iterator(Instruction elem, Instruction end)
|
||||
: currentElem(elem), endElem(end) {
|
||||
nextElem = currentElem ? currentElem.getNext() : nullptr;
|
||||
|
||||
if constexpr (!std::is_same_v<Instruction, T>) {
|
||||
while (currentElem != endElem && !currentElem.isa<T>()) {
|
||||
advance();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
T operator*() const { return currentElem.staticCast<T>(); }
|
||||
|
||||
Iterator &operator++() {
|
||||
advance();
|
||||
|
||||
if constexpr (!std::is_same_v<Instruction, T>) {
|
||||
while (currentElem != endElem && !currentElem.isa<T>()) {
|
||||
advance();
|
||||
}
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool operator==(const Iterator &) const = default;
|
||||
|
||||
bool operator==(const EndIterator &) const {
|
||||
return currentElem == endElem;
|
||||
}
|
||||
|
||||
void advance() {
|
||||
currentElem = nextElem;
|
||||
if (nextElem) {
|
||||
nextElem = nextElem.getNext();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
PreincNodeIterable(Instruction beginIt, Instruction endIt)
|
||||
: mBeginIt(beginIt), mEndIt(endIt) {}
|
||||
|
||||
Iterator begin() const { return Iterator(mBeginIt, mEndIt); }
|
||||
EndIterator end() const { return EndIterator{}; }
|
||||
|
||||
private:
|
||||
Instruction mBeginIt;
|
||||
Instruction mEndIt;
|
||||
};
|
||||
|
||||
template <typename T> struct RevPreincNodeIterable {
|
||||
struct EndIterator {};
|
||||
|
||||
struct Iterator {
|
||||
Instruction nextElem;
|
||||
Instruction currentElem;
|
||||
Instruction endElem;
|
||||
|
||||
Iterator() = default;
|
||||
|
||||
Iterator(Instruction elem, Instruction end)
|
||||
: currentElem(elem), endElem(end) {
|
||||
nextElem = currentElem ? currentElem.getPrev() : nullptr;
|
||||
|
||||
if constexpr (!std::is_same_v<Instruction, T>) {
|
||||
while (currentElem != endElem && !currentElem.isa<T>()) {
|
||||
advance();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
T operator*() const { return currentElem.staticCast<T>(); }
|
||||
|
||||
Iterator &operator++() {
|
||||
advance();
|
||||
|
||||
if constexpr (!std::is_same_v<Instruction, T>) {
|
||||
while (currentElem != endElem && !currentElem.isa<T>()) {
|
||||
advance();
|
||||
}
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool operator==(const Iterator &) const = default;
|
||||
|
||||
bool operator==(const EndIterator &) const {
|
||||
return currentElem == endElem;
|
||||
}
|
||||
|
||||
void advance() {
|
||||
currentElem = nextElem;
|
||||
if (nextElem) {
|
||||
nextElem = nextElem.getPrev();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
RevPreincNodeIterable(Instruction beginIt, Instruction endIt)
|
||||
: mBeginIt(beginIt), mEndIt(endIt) {}
|
||||
|
||||
Iterator begin() const { return Iterator(mBeginIt, mEndIt); }
|
||||
EndIterator end() const { return EndIterator{}; }
|
||||
|
||||
private:
|
||||
Instruction mBeginIt;
|
||||
Instruction mEndIt;
|
||||
};
|
||||
|
||||
template <typename T = Instruction>
|
||||
inline PreincNodeIterable<T> range(Instruction begin,
|
||||
Instruction end = nullptr) {
|
||||
return {begin, end};
|
||||
}
|
||||
template <typename T = Instruction>
|
||||
inline RevPreincNodeIterable<T> revRange(Instruction begin,
|
||||
Instruction end = nullptr) {
|
||||
return {begin, end};
|
||||
}
|
||||
} // namespace shader::ir
|
||||
|
|
@ -0,0 +1,26 @@
|
|||
#pragma once
|
||||
|
||||
#include "PointerWrapper.hpp"
|
||||
#include <ostream>
|
||||
|
||||
namespace shader::ir {
|
||||
class NameStorage;
|
||||
template <typename T> struct PrintableWrapper : PointerWrapper<T> {
|
||||
using PointerWrapper<T>::PointerWrapper;
|
||||
using PointerWrapper<T>::operator=;
|
||||
|
||||
void print(std::ostream &os, NameStorage &ns) const {
|
||||
if constexpr (requires { this->impl->print(os, ns); }) {
|
||||
this->impl->print(os, ns);
|
||||
} else {
|
||||
this->impl->print(os);
|
||||
}
|
||||
}
|
||||
|
||||
void print(std::ostream &os) const
|
||||
requires requires { this->impl->print(os); }
|
||||
{
|
||||
this->impl->print(os);
|
||||
}
|
||||
};
|
||||
} // namespace shader::ir
|
||||
19
rpcsx-gpu2/lib/gcn-shader/include/shader/ir/Region.hpp
Normal file
19
rpcsx-gpu2/lib/gcn-shader/include/shader/ir/Region.hpp
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
#pragma once
|
||||
|
||||
#include "Node.hpp"
|
||||
#include "RegionLike.hpp"
|
||||
|
||||
namespace shader::ir {
|
||||
template <typename ImplT>
|
||||
struct RegionWrapper : RegionLikeWrapper<ImplT, NodeWrapper> {
|
||||
using RegionLikeWrapper<ImplT, NodeWrapper>::RegionLikeWrapper;
|
||||
using RegionLikeWrapper<ImplT, NodeWrapper>::operator=;
|
||||
};
|
||||
|
||||
struct RegionImpl;
|
||||
|
||||
struct Region : RegionWrapper<RegionImpl> {
|
||||
using RegionWrapper<RegionImpl>::RegionWrapper;
|
||||
using RegionWrapper<RegionImpl>::operator=;
|
||||
};
|
||||
} // namespace ir
|
||||
15
rpcsx-gpu2/lib/gcn-shader/include/shader/ir/RegionImpl.hpp
Normal file
15
rpcsx-gpu2/lib/gcn-shader/include/shader/ir/RegionImpl.hpp
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
#pragma once
|
||||
#include "NameStorage.hpp"
|
||||
#include "NodeImpl.hpp"
|
||||
#include "Region.hpp"
|
||||
#include "RegionLikeImpl.hpp"
|
||||
#include <ostream>
|
||||
|
||||
namespace shader::ir {
|
||||
struct RegionImpl : NodeImpl, RegionLikeImpl {
|
||||
RegionImpl(Location loc) { setLocation(loc); }
|
||||
|
||||
void print(std::ostream &os, NameStorage &ns) const override;
|
||||
Node clone(Context &context, CloneMap &map) const override;
|
||||
};
|
||||
} // namespace ir
|
||||
38
rpcsx-gpu2/lib/gcn-shader/include/shader/ir/RegionLike.hpp
Normal file
38
rpcsx-gpu2/lib/gcn-shader/include/shader/ir/RegionLike.hpp
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
#pragma once
|
||||
|
||||
#include "Instruction.hpp"
|
||||
|
||||
namespace shader::ir {
|
||||
struct RegionLike;
|
||||
|
||||
template <typename ImplT, template <typename> typename BaseWrapper>
|
||||
struct RegionLikeWrapper : BaseWrapper<ImplT> {
|
||||
using BaseWrapper<ImplT>::BaseWrapper;
|
||||
using BaseWrapper<ImplT>::operator=;
|
||||
|
||||
void appendRegion(RegionLike other);
|
||||
|
||||
auto getFirst() { return this->impl->first; }
|
||||
auto getLast() { return this->impl->last; }
|
||||
bool empty() { return this->impl->first == nullptr; }
|
||||
|
||||
void insertAfter(Instruction point, Instruction node) {
|
||||
this->impl->insertAfter(point, node);
|
||||
}
|
||||
void prependChild(Instruction node) { this->impl->prependChild(node); }
|
||||
|
||||
void addChild(Instruction node) { this->impl->addChild(node); }
|
||||
template <typename T = Instruction> auto children() {
|
||||
return this->impl->template children<T>();
|
||||
}
|
||||
template <typename T = Instruction> auto revChildren() {
|
||||
return this->impl->template revChildren<T>();
|
||||
}
|
||||
};
|
||||
|
||||
struct RegionLikeImpl;
|
||||
struct RegionLike : RegionLikeWrapper<RegionLikeImpl, PointerWrapper> {
|
||||
using RegionLikeWrapper::RegionLikeWrapper;
|
||||
using RegionLikeWrapper::operator=;
|
||||
};
|
||||
} // namespace shader::ir
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
#pragma once
|
||||
|
||||
#include "PreincNodeIterable.hpp"
|
||||
#include "RegionLike.hpp"
|
||||
|
||||
namespace shader::ir {
|
||||
struct RegionLikeImpl {
|
||||
Instruction first = nullptr;
|
||||
Instruction last = nullptr;
|
||||
|
||||
virtual ~RegionLikeImpl() = default;
|
||||
|
||||
template <typename T = Instruction> auto children() const {
|
||||
return PreincNodeIterable<T>{first, nullptr};
|
||||
}
|
||||
|
||||
template <typename T = Instruction> auto revChildren() const {
|
||||
return RevPreincNodeIterable<T>{last, nullptr};
|
||||
}
|
||||
|
||||
virtual void insertAfter(Instruction point, Instruction node);
|
||||
virtual void prependChild(Instruction node);
|
||||
virtual void addChild(Instruction node);
|
||||
};
|
||||
} // namespace shader::ir
|
||||
36
rpcsx-gpu2/lib/gcn-shader/include/shader/ir/Value.hpp
Normal file
36
rpcsx-gpu2/lib/gcn-shader/include/shader/ir/Value.hpp
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
#pragma once
|
||||
|
||||
#include "Instruction.hpp"
|
||||
#include "Operand.hpp"
|
||||
|
||||
namespace shader::ir {
|
||||
struct Value;
|
||||
template <typename T> struct ValueWrapper : InstructionWrapper<T> {
|
||||
using InstructionWrapper<T>::InstructionWrapper;
|
||||
using InstructionWrapper<T>::operator=;
|
||||
|
||||
decltype(auto) getUserList() const { return this->impl->getUserList(); }
|
||||
auto & getUseList() const { return this->impl->uses; }
|
||||
void replaceAllUsesWith(Value other) const;
|
||||
|
||||
bool isUnused() const { return this->impl->uses.empty(); }
|
||||
};
|
||||
|
||||
struct ValueImpl;
|
||||
struct Value : ValueWrapper<ValueImpl> {
|
||||
using ValueWrapper::ValueWrapper;
|
||||
using ValueWrapper::operator=;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
void ValueWrapper<T>::replaceAllUsesWith(Value other) const {
|
||||
this->impl->replaceAllUsesWith(other);
|
||||
}
|
||||
|
||||
struct ValueUse {
|
||||
Instruction user;
|
||||
Value node;
|
||||
int operandIndex;
|
||||
auto operator<=>(const ValueUse &) const = default;
|
||||
};
|
||||
} // namespace shader::ir
|
||||
55
rpcsx-gpu2/lib/gcn-shader/include/shader/ir/ValueImpl.hpp
Normal file
55
rpcsx-gpu2/lib/gcn-shader/include/shader/ir/ValueImpl.hpp
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
#pragma once
|
||||
|
||||
#include "InstructionImpl.hpp"
|
||||
#include "NameStorage.hpp"
|
||||
#include "Node.hpp"
|
||||
#include "Value.hpp"
|
||||
|
||||
namespace shader::ir {
|
||||
struct ValueImpl : InstructionImpl {
|
||||
std::set<ValueUse> uses;
|
||||
|
||||
ValueImpl(Location location, Kind kind, unsigned op,
|
||||
std::span<const Operand> operands = {})
|
||||
: InstructionImpl(location, kind, op, operands) {}
|
||||
|
||||
void addUse(Instruction user, int operandIndex) {
|
||||
uses.insert({user, this, operandIndex});
|
||||
}
|
||||
|
||||
void removeUse(Instruction user, int operandIndex) {
|
||||
uses.erase({user, this, operandIndex});
|
||||
}
|
||||
|
||||
std::set<Node> getUserList() const {
|
||||
std::set<Node> list;
|
||||
for (auto use : uses) {
|
||||
list.insert(use.user);
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
||||
void replaceAllUsesWith(Value other) {
|
||||
if (other == this) {
|
||||
std::abort();
|
||||
}
|
||||
|
||||
while (!uses.empty()) {
|
||||
auto use = *uses.begin();
|
||||
if (other == nullptr) {
|
||||
use.user.replaceOperand(use.operandIndex, nullptr);
|
||||
} else {
|
||||
use.user.replaceOperand(use.operandIndex, other);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void print(std::ostream &os, NameStorage &ns) const override {
|
||||
os << '%' << ns.getNameOf(const_cast<ValueImpl *>(this));
|
||||
os << " = ";
|
||||
InstructionImpl::print(os, ns);
|
||||
}
|
||||
|
||||
Node clone(Context &context, CloneMap &map) const override;
|
||||
};
|
||||
} // namespace shader::ir
|
||||
7
rpcsx-gpu2/lib/gcn-shader/include/shader/opt.hpp
Normal file
7
rpcsx-gpu2/lib/gcn-shader/include/shader/opt.hpp
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
#pragma once
|
||||
#include "ir/Context.hpp"
|
||||
#include "ir/Region.hpp"
|
||||
|
||||
namespace shader {
|
||||
bool optimize(ir::Context &context, ir::Region region);
|
||||
}
|
||||
173
rpcsx-gpu2/lib/gcn-shader/include/shader/spv.hpp
Normal file
173
rpcsx-gpu2/lib/gcn-shader/include/shader/spv.hpp
Normal file
|
|
@ -0,0 +1,173 @@
|
|||
#pragma once
|
||||
|
||||
#include "ir/Context.hpp"
|
||||
#include "ir/Region.hpp"
|
||||
#include "ir/RegionImpl.hpp"
|
||||
#include <optional>
|
||||
#include <span>
|
||||
#include <spirv-tools/optimizer.hpp>
|
||||
|
||||
namespace shader::spv {
|
||||
|
||||
struct BinaryLayout {
|
||||
enum {
|
||||
kCapabilities,
|
||||
kExtensions,
|
||||
kExtInstImports,
|
||||
kMemoryModels,
|
||||
kEntryPoints,
|
||||
kExecutionModes,
|
||||
kDebugs,
|
||||
kAnnotations,
|
||||
kGlobals,
|
||||
kFunctionDeclarations,
|
||||
kFunctions,
|
||||
|
||||
kRegionCount
|
||||
};
|
||||
|
||||
ir::Region regions[kRegionCount];
|
||||
|
||||
ir::Region getOrCreateRegion(ir::Context &context, int index) {
|
||||
if (regions[index] == nullptr) {
|
||||
regions[index] = context.create<ir::Region>(context.getUnknownLocation());
|
||||
}
|
||||
|
||||
return regions[index];
|
||||
}
|
||||
|
||||
ir::Region getOrCreateCapabilities(ir::Context &context) {
|
||||
return getOrCreateRegion(context, kCapabilities);
|
||||
}
|
||||
ir::Region getOrCreateExtensions(ir::Context &context) {
|
||||
return getOrCreateRegion(context, kExtensions);
|
||||
}
|
||||
ir::Region getOrCreateExtInstImports(ir::Context &context) {
|
||||
return getOrCreateRegion(context, kExtInstImports);
|
||||
}
|
||||
ir::Region getOrCreateMemoryModels(ir::Context &context) {
|
||||
return getOrCreateRegion(context, kMemoryModels);
|
||||
}
|
||||
ir::Region getOrCreateEntryPoints(ir::Context &context) {
|
||||
return getOrCreateRegion(context, kEntryPoints);
|
||||
}
|
||||
ir::Region getOrCreateExecutionModes(ir::Context &context) {
|
||||
return getOrCreateRegion(context, kExecutionModes);
|
||||
}
|
||||
ir::Region getOrCreateDebugs(ir::Context &context) {
|
||||
return getOrCreateRegion(context, kDebugs);
|
||||
}
|
||||
ir::Region getOrCreateAnnotations(ir::Context &context) {
|
||||
return getOrCreateRegion(context, kAnnotations);
|
||||
}
|
||||
ir::Region getOrCreateGlobals(ir::Context &context) {
|
||||
return getOrCreateRegion(context, kGlobals);
|
||||
}
|
||||
ir::Region getOrCreateFunctionDeclarations(ir::Context &context) {
|
||||
return getOrCreateRegion(context, kFunctionDeclarations);
|
||||
}
|
||||
ir::Region getOrCreateFunctions(ir::Context &context) {
|
||||
return getOrCreateRegion(context, kFunctions);
|
||||
}
|
||||
|
||||
///
|
||||
/// \brief Merge all regions into a single one.
|
||||
///
|
||||
/// After calling this function, all regions in the object
|
||||
/// become empty.
|
||||
///
|
||||
ir::Region merge(ir::Context &context) {
|
||||
auto result = context.create<ir::Region>(context.getUnknownLocation());
|
||||
for (auto ®ion : regions) {
|
||||
if (region == nullptr) {
|
||||
continue;
|
||||
}
|
||||
|
||||
result.appendRegion(std::move(region));
|
||||
region = {};
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
///
|
||||
/// Deserialize a SPIR-V binary into an intermediate representation.
|
||||
///
|
||||
/// \param context context to attach the IR to
|
||||
/// \param spv SPIR-V binary
|
||||
/// \param loc location to use for error reporting
|
||||
/// \returns the deserialized IR, or std::nullopt if deserialization failed
|
||||
///
|
||||
std::optional<BinaryLayout> deserialize(ir::Context &context,
|
||||
std::span<const std::uint32_t> spv,
|
||||
ir::Location loc);
|
||||
///
|
||||
/// \brief Serialize SPIR-V from an IR region.
|
||||
///
|
||||
/// This function generates a SPIR-V binary from an IR region.
|
||||
/// The SPIR-V binary is stored in the returned vector.
|
||||
///
|
||||
/// \returns A vector of u32 values representing the SPIR-V binary.
|
||||
///
|
||||
std::vector<std::uint32_t> serialize(ir::Region body);
|
||||
|
||||
inline std::vector<std::uint32_t> serialize(ir::Context &context,
|
||||
BinaryLayout &&layout) {
|
||||
return serialize(layout.merge(context));
|
||||
}
|
||||
|
||||
///
|
||||
/// \brief Returns true if the instruction is a terminator.
|
||||
///
|
||||
bool isTerminatorInst(ir::InstructionId inst);
|
||||
|
||||
///
|
||||
/// \brief Disassemble a SPIR-V binary into text and print result to stderr.
|
||||
///
|
||||
/// \param spv The SPIR-V binary to disassemble.
|
||||
/// \param pretty If true, emit friendly names for functions, variables, and
|
||||
/// other values. If false, emit the SPIR-V ID for each value.
|
||||
///
|
||||
/// \note The SPIR-V binary is not validated or checked for errors. If the
|
||||
/// input is invalid, the output is undefined.
|
||||
void dump(std::span<const std::uint32_t> spv, bool pretty = false);
|
||||
|
||||
///
|
||||
/// \brief Disassemble a SPIR-V binary into text.
|
||||
///
|
||||
/// \param spv The SPIR-V binary to disassemble.
|
||||
/// \param pretty If true, emit friendly names for functions, variables, and
|
||||
/// other values. If false, emit the SPIR-V ID for each value.
|
||||
/// \return the assembly text
|
||||
///
|
||||
/// \note The SPIR-V binary is not validated or checked for errors. If the
|
||||
/// input is invalid, the output is undefined.
|
||||
std::string disassembly(std::span<const std::uint32_t> spv, bool pretty = false);
|
||||
|
||||
///
|
||||
/// \brief Validates a given SPIR-V binary against the SPIR-V spec
|
||||
///
|
||||
/// \param spv the SPIR-V binary to validate
|
||||
/// \return whether the SPIR-V binary is valid
|
||||
///
|
||||
/// This functions uses the SPIR-V Tools validator to check the given SPIR-V
|
||||
/// binary against the SPIR-V spec. If the SPIR-V is invalid, the function
|
||||
/// will print out the validation error messages and return false. If the
|
||||
/// SPIR-V is valid, the function simply returns true.
|
||||
bool validate(std::span<const std::uint32_t> spv);
|
||||
|
||||
///
|
||||
/// \brief Optimize a SPIR-V module.
|
||||
///
|
||||
/// \param spv the SPIR-V binary to optimize
|
||||
/// \return the optimized SPIR-V binary or an empty optional if binary is
|
||||
/// invalid
|
||||
///
|
||||
/// This function takes a SPIR-V module and runs a series of optimization passes
|
||||
/// on it using SPIR-V Tools opt. If the optimization is successful, the
|
||||
/// optimized module is returned. Otherwise, an empty optional is returned.
|
||||
///
|
||||
std::optional<std::vector<std::uint32_t>>
|
||||
optimize(std::span<const std::uint32_t> spv);
|
||||
} // namespace shader::spv
|
||||
8
rpcsx-gpu2/lib/gcn-shader/include/shader/transform.hpp
Normal file
8
rpcsx-gpu2/lib/gcn-shader/include/shader/transform.hpp
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
#pragma once
|
||||
#include "SpvConverter.hpp"
|
||||
#include "ir.hpp"
|
||||
|
||||
namespace shader {
|
||||
void structurizeCfg(spv::Context &context, ir::RegionLike region,
|
||||
ir::Value exitLabel);
|
||||
}
|
||||
19
rpcsx-gpu2/lib/gcn-shader/shaders/CMakeLists.txt
Normal file
19
rpcsx-gpu2/lib/gcn-shader/shaders/CMakeLists.txt
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
set(OUTPUT_FILENAME rdna-semantic-spirv.hpp)
|
||||
set(INCLUDE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include)
|
||||
set(OUTPUT_DIRECTORY ${INCLUDE_DIRECTORY}/shaders)
|
||||
set(OUTPUT_FILE ${OUTPUT_DIRECTORY}/${OUTPUT_FILENAME})
|
||||
set(INPUT_FILE ${CMAKE_CURRENT_SOURCE_DIR}/rdna.glsl)
|
||||
file(MAKE_DIRECTORY ${OUTPUT_DIRECTORY})
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${OUTPUT_FILE}
|
||||
COMMAND $<TARGET_FILE:shader-tool> --output-type spirv-header --output-var-name g_rdna_semantic_spirv -i ${INPUT_FILE} -o ${OUTPUT_FILE}
|
||||
DEPENDS shader-tool ${INPUT_FILE}
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
|
||||
COMMENT "Generating ${OUTPUT_FILE}..."
|
||||
)
|
||||
|
||||
add_custom_target(rdna-semantic-spirv-gen DEPENDS ${OUTPUT_FILE})
|
||||
add_library(rdna-semantic-spirv INTERFACE)
|
||||
add_dependencies(rdna-semantic-spirv rdna-semantic-spirv-gen)
|
||||
target_include_directories(rdna-semantic-spirv INTERFACE ${INCLUDE_DIRECTORY})
|
||||
2708
rpcsx-gpu2/lib/gcn-shader/shaders/rdna.glsl
Normal file
2708
rpcsx-gpu2/lib/gcn-shader/shaders/rdna.glsl
Normal file
File diff suppressed because it is too large
Load diff
274
rpcsx-gpu2/lib/gcn-shader/src/Evaluator.cpp
Normal file
274
rpcsx-gpu2/lib/gcn-shader/src/Evaluator.cpp
Normal file
|
|
@ -0,0 +1,274 @@
|
|||
#include "Evaluator.hpp"
|
||||
#include "dialect.hpp"
|
||||
#include "ir.hpp"
|
||||
|
||||
using namespace shader;
|
||||
|
||||
eval::Value eval::Evaluator::eval(const ir::Operand &op, ir::Value type) {
|
||||
if (auto val = op.getAsValue()) {
|
||||
auto [it, inserted] = values.try_emplace(val, Value{});
|
||||
if (inserted) {
|
||||
it->second = eval(val);
|
||||
}
|
||||
return it->second;
|
||||
}
|
||||
|
||||
if (auto result = op.getAsInt32()) {
|
||||
if (type != nullptr) {
|
||||
bool isSigned = *type.getOperand(1).getAsInt32() != 0;
|
||||
switch (*type.getOperand(0).getAsInt32()) {
|
||||
case 8:
|
||||
if (isSigned) {
|
||||
return static_cast<std::int8_t>(*result);
|
||||
}
|
||||
|
||||
return static_cast<std::uint8_t>(*result);
|
||||
|
||||
case 16:
|
||||
if (isSigned) {
|
||||
return static_cast<std::int16_t>(*result);
|
||||
}
|
||||
|
||||
return static_cast<std::uint16_t>(*result);
|
||||
|
||||
case 32:
|
||||
if (isSigned) {
|
||||
return static_cast<std::int32_t>(*result);
|
||||
}
|
||||
|
||||
return static_cast<std::uint32_t>(*result);
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
return *result;
|
||||
}
|
||||
|
||||
if (auto result = op.getAsInt64()) {
|
||||
if (type != nullptr) {
|
||||
bool isSigned = *type.getOperand(1).getAsInt32() != 0;
|
||||
|
||||
if (isSigned) {
|
||||
return static_cast<std::int64_t>(*result);
|
||||
}
|
||||
|
||||
return static_cast<std::uint64_t>(*result);
|
||||
}
|
||||
|
||||
return *result;
|
||||
}
|
||||
|
||||
if (auto result = op.getAsBool()) {
|
||||
return *result;
|
||||
}
|
||||
|
||||
if (auto result = op.getAsFloat()) {
|
||||
if (type != nullptr) {
|
||||
if (*type.getOperand(0).getAsInt32() == 16) {
|
||||
return static_cast<float16_t>(*result);
|
||||
}
|
||||
|
||||
return static_cast<std::uint64_t>(*result);
|
||||
}
|
||||
|
||||
return *result;
|
||||
}
|
||||
|
||||
if (auto result = op.getAsDouble()) {
|
||||
return *result;
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
eval::Value eval::Evaluator::eval(ir::InstructionId instId,
|
||||
std::span<const ir::Operand> operands) {
|
||||
if (instId == ir::spv::OpConstant) {
|
||||
return eval(operands[1], operands[0].getAsValue());
|
||||
}
|
||||
|
||||
if (instId == ir::spv::OpBitcast) {
|
||||
return eval(operands[1]).bitcast(operands[0].getAsValue());
|
||||
}
|
||||
|
||||
if (instId == ir::spv::OpSConvert || instId == ir::spv::OpUConvert) {
|
||||
if (auto rhs = eval(operands[1])) {
|
||||
return rhs.iConvert(operands[0].getAsValue(),
|
||||
instId == ir::spv::OpSConvert);
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
if (instId == ir::spv::OpSelect) {
|
||||
return eval(operands[1]).select(eval(operands[2]), eval(operands[3]));
|
||||
}
|
||||
|
||||
if (instId == ir::spv::OpIAdd || instId == ir::spv::OpFAdd) {
|
||||
return eval(operands[1]) + eval(operands[2]);
|
||||
}
|
||||
if (instId == ir::spv::OpISub || instId == ir::spv::OpFSub) {
|
||||
return eval(operands[1]) - eval(operands[2]);
|
||||
}
|
||||
if (instId == ir::spv::OpSDiv || instId == ir::spv::OpUDiv ||
|
||||
instId == ir::spv::OpFDiv) {
|
||||
return eval(operands[1]) / eval(operands[2]);
|
||||
}
|
||||
if (instId == ir::spv::OpSMod || instId == ir::spv::OpUMod ||
|
||||
instId == ir::spv::OpFMod) {
|
||||
return eval(operands[1]) % eval(operands[2]);
|
||||
}
|
||||
if (instId == ir::spv::OpSRem) {
|
||||
return eval(operands[1]) % eval(operands[2]);
|
||||
}
|
||||
if (instId == ir::spv::OpFRem) {
|
||||
return eval(operands[1]) % eval(operands[2]);
|
||||
}
|
||||
if (instId == ir::spv::OpSNegate || instId == ir::spv::OpFNegate) {
|
||||
return -eval(operands[0]);
|
||||
}
|
||||
|
||||
if (instId == ir::spv::OpNot) {
|
||||
return ~eval(operands[1]);
|
||||
}
|
||||
if (instId == ir::spv::OpLogicalNot) {
|
||||
return !eval(operands[1]);
|
||||
}
|
||||
|
||||
if (instId == ir::spv::OpLogicalEqual || instId == ir::spv::OpIEqual) {
|
||||
return eval(operands[1]) == eval(operands[2]);
|
||||
}
|
||||
if (instId == ir::spv::OpLogicalNotEqual || instId == ir::spv::OpINotEqual) {
|
||||
return eval(operands[1]) != eval(operands[2]);
|
||||
}
|
||||
if (instId == ir::spv::OpLogicalOr) {
|
||||
return eval(operands[1]) || eval(operands[2]);
|
||||
}
|
||||
if (instId == ir::spv::OpLogicalAnd) {
|
||||
return eval(operands[1]) && eval(operands[2]);
|
||||
}
|
||||
if (instId == ir::spv::OpUGreaterThan || instId == ir::spv::OpSGreaterThan) {
|
||||
return eval(operands[1]) > eval(operands[2]);
|
||||
}
|
||||
if (instId == ir::spv::OpUGreaterThanEqual ||
|
||||
instId == ir::spv::OpSGreaterThanEqual) {
|
||||
return eval(operands[1]) >= eval(operands[2]);
|
||||
}
|
||||
if (instId == ir::spv::OpULessThan || instId == ir::spv::OpSLessThan) {
|
||||
return eval(operands[1]) < eval(operands[2]);
|
||||
}
|
||||
if (instId == ir::spv::OpULessThanEqual ||
|
||||
instId == ir::spv::OpSLessThanEqual) {
|
||||
return eval(operands[1]) <= eval(operands[2]);
|
||||
}
|
||||
if (instId == ir::spv::OpFOrdEqual) {
|
||||
return !eval(operands[1]).isNan() && !eval(operands[2]).isNan() &&
|
||||
eval(operands[1]) == eval(operands[2]);
|
||||
}
|
||||
if (instId == ir::spv::OpFUnordEqual) {
|
||||
return eval(operands[1]).isNan() || eval(operands[2]).isNan() ||
|
||||
eval(operands[1]) == eval(operands[2]);
|
||||
}
|
||||
if (instId == ir::spv::OpFOrdNotEqual) {
|
||||
return !eval(operands[1]).isNan() && !eval(operands[2]).isNan() &&
|
||||
eval(operands[1]) != eval(operands[2]);
|
||||
}
|
||||
if (instId == ir::spv::OpFUnordNotEqual) {
|
||||
return eval(operands[1]).isNan() || eval(operands[2]).isNan() ||
|
||||
eval(operands[1]) != eval(operands[2]);
|
||||
}
|
||||
if (instId == ir::spv::OpFOrdLessThan) {
|
||||
return !eval(operands[1]).isNan() && !eval(operands[2]).isNan() &&
|
||||
eval(operands[1]) < eval(operands[2]);
|
||||
}
|
||||
if (instId == ir::spv::OpFUnordLessThan) {
|
||||
return eval(operands[1]).isNan() || eval(operands[2]).isNan() ||
|
||||
eval(operands[1]) < eval(operands[2]);
|
||||
}
|
||||
if (instId == ir::spv::OpFOrdGreaterThan) {
|
||||
return !eval(operands[1]).isNan() && !eval(operands[2]).isNan() &&
|
||||
eval(operands[1]) > eval(operands[2]);
|
||||
}
|
||||
if (instId == ir::spv::OpFUnordGreaterThan) {
|
||||
return eval(operands[1]).isNan() || eval(operands[2]).isNan() ||
|
||||
eval(operands[1]) > eval(operands[2]);
|
||||
}
|
||||
if (instId == ir::spv::OpFOrdLessThanEqual) {
|
||||
return !eval(operands[1]).isNan() && !eval(operands[2]).isNan() &&
|
||||
eval(operands[1]) <= eval(operands[2]);
|
||||
}
|
||||
if (instId == ir::spv::OpFUnordLessThanEqual) {
|
||||
return eval(operands[1]).isNan() || eval(operands[2]).isNan() ||
|
||||
eval(operands[1]) <= eval(operands[2]);
|
||||
}
|
||||
if (instId == ir::spv::OpFOrdGreaterThanEqual) {
|
||||
return !eval(operands[1]).isNan() && !eval(operands[2]).isNan() &&
|
||||
eval(operands[1]) >= eval(operands[2]);
|
||||
}
|
||||
if (instId == ir::spv::OpFUnordGreaterThanEqual) {
|
||||
return eval(operands[1]).isNan() || eval(operands[2]).isNan() ||
|
||||
eval(operands[1]) >= eval(operands[2]);
|
||||
}
|
||||
if (instId == ir::spv::OpShiftRightLogical) {
|
||||
return eval(operands[1]) >> eval(operands[2]);
|
||||
}
|
||||
if (instId == ir::spv::OpShiftRightArithmetic) {
|
||||
return eval(operands[1]) >> eval(operands[2]);
|
||||
}
|
||||
if (instId == ir::spv::OpShiftLeftLogical) {
|
||||
return eval(operands[1]) << eval(operands[2]);
|
||||
}
|
||||
if (instId == ir::spv::OpBitwiseOr) {
|
||||
return eval(operands[1]) | eval(operands[2]);
|
||||
}
|
||||
if (instId == ir::spv::OpBitwiseXor) {
|
||||
return eval(operands[1]) ^ eval(operands[2]);
|
||||
}
|
||||
if (instId == ir::spv::OpBitwiseAnd) {
|
||||
return eval(operands[1]) & eval(operands[2]);
|
||||
}
|
||||
|
||||
if (instId == ir::spv::OpIsNan) {
|
||||
return eval(operands[1]).isNan();
|
||||
}
|
||||
if (instId == ir::spv::OpIsInf) {
|
||||
return eval(operands[1]).isInf();
|
||||
}
|
||||
if (instId == ir::spv::OpIsFinite) {
|
||||
return eval(operands[1]).isFinite();
|
||||
}
|
||||
|
||||
if (instId == ir::spv::OpCompositeConstruct) {
|
||||
std::vector<Value> constituents;
|
||||
constituents.reserve(operands.size() - 1);
|
||||
for (auto &op : operands.subspan(1)) {
|
||||
constituents.push_back(eval(op));
|
||||
}
|
||||
return Value::compositeConstruct(operands[0].getAsValue(), constituents);
|
||||
}
|
||||
|
||||
if (instId == ir::spv::OpCompositeExtract) {
|
||||
auto composite = eval(operands[1].getAsValue());
|
||||
if (composite.empty()) {
|
||||
return{};
|
||||
}
|
||||
|
||||
std::vector<Value> indexes;
|
||||
indexes.reserve(operands.size() - 2);
|
||||
for (auto &op : operands.subspan(2)) {
|
||||
indexes.push_back(eval(op));
|
||||
}
|
||||
|
||||
if (indexes.size() != 1) {
|
||||
return{};
|
||||
}
|
||||
|
||||
return composite.compositeExtract(indexes[0]);
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
eval::Value eval::Evaluator::eval(ir::Value op) {
|
||||
return eval(op.getInstId(), op.getOperands());
|
||||
}
|
||||
1626
rpcsx-gpu2/lib/gcn-shader/src/GcnConverter.cpp
Normal file
1626
rpcsx-gpu2/lib/gcn-shader/src/GcnConverter.cpp
Normal file
File diff suppressed because it is too large
Load diff
1153
rpcsx-gpu2/lib/gcn-shader/src/GcnInstruction.cpp
Normal file
1153
rpcsx-gpu2/lib/gcn-shader/src/GcnInstruction.cpp
Normal file
File diff suppressed because it is too large
Load diff
106
rpcsx-gpu2/lib/gcn-shader/src/ModuleInfo.cpp
Normal file
106
rpcsx-gpu2/lib/gcn-shader/src/ModuleInfo.cpp
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
#include "ModuleInfo.hpp"
|
||||
#include "analyze.hpp"
|
||||
#include "dialect.hpp"
|
||||
#include "ir.hpp"
|
||||
|
||||
shader::ModuleInfo::Function &
|
||||
shader::collectFunctionInfo(ModuleInfo &moduleInfo, ir::Value function) {
|
||||
auto [fnIt, fnInserted] =
|
||||
moduleInfo.functions.try_emplace(function, ModuleInfo::Function{});
|
||||
if (!fnInserted) {
|
||||
return fnIt->second;
|
||||
}
|
||||
|
||||
auto &result = fnIt->second;
|
||||
std::map<ir::Value, int> params;
|
||||
|
||||
result.returnType = function.getOperand(0).getAsValue();
|
||||
|
||||
auto trackAccess = [&](ir::Value pointer, Access access) {
|
||||
pointer = unwrapPointer(pointer);
|
||||
|
||||
if (auto it = params.find(pointer); it != params.end()) {
|
||||
result.parameters[it->second].access |= access;
|
||||
return;
|
||||
}
|
||||
|
||||
if (pointer == ir::spv::OpVariable) {
|
||||
auto storagePtr = pointer.getOperand(1).getAsInt32();
|
||||
if (!storagePtr) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto storage = ir::spv::StorageClass(*storagePtr);
|
||||
|
||||
if (storage != ir::spv::StorageClass::Function) {
|
||||
result.variables[pointer] = access;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
for (auto inst : ir::range(function.getNext())) {
|
||||
if (inst == ir::spv::OpFunctionEnd) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (inst == ir::spv::OpFunctionParameter) {
|
||||
auto type = inst.getOperand(0).getAsValue();
|
||||
params[inst.staticCast<ir::Value>()] = result.parameters.size();
|
||||
result.parameters.push_back({.type = type, .access = Access::None});
|
||||
continue;
|
||||
}
|
||||
|
||||
if (inst == ir::spv::OpFunctionCall) {
|
||||
auto callee = inst.getOperand(1).getAsValue();
|
||||
auto &calleeInfo = collectFunctionInfo(moduleInfo, callee);
|
||||
auto args = inst.getOperands().subspan(2);
|
||||
|
||||
for (std::size_t index = 0; auto &[_, access] : calleeInfo.parameters) {
|
||||
trackAccess(args[index++].getAsValue(), access);
|
||||
}
|
||||
for (auto &[global, access] : calleeInfo.variables) {
|
||||
trackAccess(global, access);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (inst == ir::spv::OpLoad || inst == ir::spv::OpAtomicLoad) {
|
||||
trackAccess(inst.getOperand(1).getAsValue(), Access::Read);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (inst == ir::spv::OpStore || inst == ir::spv::OpAtomicStore) {
|
||||
trackAccess(inst.getOperand(0).getAsValue(), Access::Write);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (inst == ir::spv::OpAtomicExchange ||
|
||||
inst == ir::spv::OpAtomicCompareExchange ||
|
||||
inst == ir::spv::OpAtomicCompareExchangeWeak ||
|
||||
inst == ir::spv::OpAtomicIIncrement ||
|
||||
inst == ir::spv::OpAtomicIDecrement || inst == ir::spv::OpAtomicIAdd ||
|
||||
inst == ir::spv::OpAtomicISub || inst == ir::spv::OpAtomicSMin ||
|
||||
inst == ir::spv::OpAtomicUMin || inst == ir::spv::OpAtomicSMax ||
|
||||
inst == ir::spv::OpAtomicUMax || inst == ir::spv::OpAtomicAnd ||
|
||||
inst == ir::spv::OpAtomicOr || inst == ir::spv::OpAtomicXor) {
|
||||
trackAccess(inst.getOperand(1).getAsValue(), Access::ReadWrite);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void shader::collectModuleInfo(ModuleInfo &moduleInfo,
|
||||
const spv::BinaryLayout &layout) {
|
||||
auto functions = layout.regions[spv::BinaryLayout::kFunctions];
|
||||
|
||||
if (!functions) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (auto child : functions.children<ir::Value>()) {
|
||||
if (child == ir::spv::OpFunction) {
|
||||
collectFunctionInfo(moduleInfo, child);
|
||||
}
|
||||
}
|
||||
}
|
||||
149
rpcsx-gpu2/lib/gcn-shader/src/SemanticModuleInfo.cpp
Normal file
149
rpcsx-gpu2/lib/gcn-shader/src/SemanticModuleInfo.cpp
Normal file
|
|
@ -0,0 +1,149 @@
|
|||
#include "SemanticInfo.hpp"
|
||||
#include "dialect.hpp"
|
||||
|
||||
using namespace shader;
|
||||
|
||||
static std::size_t getOpCount(ir::Kind kind) {
|
||||
switch (kind) {
|
||||
case ir::Kind::Spv:
|
||||
case ir::Kind::Builtin:
|
||||
case ir::Kind::MemSSA:
|
||||
break;
|
||||
|
||||
case ir::Kind::AmdGpu:
|
||||
return ir::amdgpu::OpCount;
|
||||
case ir::Kind::Vop2:
|
||||
return ir::vop2::OpCount;
|
||||
case ir::Kind::Sop2:
|
||||
return ir::sop2::OpCount;
|
||||
case ir::Kind::Sopk:
|
||||
return ir::sopk::OpCount;
|
||||
case ir::Kind::Smrd:
|
||||
return ir::smrd::OpCount;
|
||||
case ir::Kind::Vop3:
|
||||
return ir::vop3::OpCount;
|
||||
case ir::Kind::Mubuf:
|
||||
return ir::mubuf::OpCount;
|
||||
case ir::Kind::Mtbuf:
|
||||
return ir::mtbuf::OpCount;
|
||||
case ir::Kind::Mimg:
|
||||
return ir::mimg::OpCount;
|
||||
case ir::Kind::Ds:
|
||||
return ir::ds::OpCount;
|
||||
case ir::Kind::Vintrp:
|
||||
return ir::vintrp::OpCount;
|
||||
case ir::Kind::Exp:
|
||||
return 1;
|
||||
case ir::Kind::Vop1:
|
||||
return ir::vop1::OpCount;
|
||||
case ir::Kind::Vopc:
|
||||
return ir::vopc::OpCount;
|
||||
case ir::Kind::Sop1:
|
||||
return ir::sop1::OpCount;
|
||||
case ir::Kind::Sopc:
|
||||
return ir::sopc::OpCount;
|
||||
case ir::Kind::Sopp:
|
||||
return ir::sopp::OpCount;
|
||||
case ir::Kind::Count:
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void shader::collectSemanticModuleInfo(SemanticModuleInfo &moduleInfo,
|
||||
const spv::BinaryLayout &layout) {
|
||||
static auto instNameToIds = [] {
|
||||
std::map<std::string, std::vector<ir::InstructionId>, std::less<>> result;
|
||||
for (std::size_t kind = 0; kind < std::size_t(ir::Kind::Count); ++kind) {
|
||||
auto opCount = getOpCount(ir::Kind(kind));
|
||||
|
||||
for (unsigned op = 0; op < opCount; ++op) {
|
||||
auto name = getInstructionShortName(ir::Kind(kind), op);
|
||||
if (name == nullptr) {
|
||||
continue;
|
||||
}
|
||||
|
||||
result[name].push_back(ir::getInstructionId(ir::Kind(kind), op));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}();
|
||||
|
||||
collectModuleInfo(moduleInfo, layout);
|
||||
|
||||
static auto wideInstNameToIds = [] {
|
||||
std::map<std::string, std::vector<ir::InstructionId>, std::less<>> result;
|
||||
for (std::size_t kind = 0; kind < std::size_t(ir::Kind::Count); ++kind) {
|
||||
auto opCount = getOpCount(ir::Kind(kind));
|
||||
if (opCount == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (unsigned op = 0; op < opCount; ++op) {
|
||||
auto name = getInstructionShortName(ir::Kind(kind), op);
|
||||
if (name == nullptr) {
|
||||
continue;
|
||||
}
|
||||
|
||||
std::string wideName = getKindName(ir::Kind(kind));
|
||||
wideName += '_';
|
||||
wideName += name;
|
||||
|
||||
result[std::move(wideName)].push_back(
|
||||
ir::getInstructionId(ir::Kind(kind), op));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}();
|
||||
|
||||
for (auto &[fn, info] : moduleInfo.functions) {
|
||||
for (auto &use : fn.getUseList()) {
|
||||
if (use.user != ir::spv::OpName) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto mangledNameString = use.user.getOperand(1).getAsString();
|
||||
|
||||
if (mangledNameString == nullptr) {
|
||||
break;
|
||||
}
|
||||
|
||||
auto mangledName = std::string_view(*mangledNameString);
|
||||
std::string_view name;
|
||||
if (auto pos = mangledName.find('('); pos != std::string_view::npos) {
|
||||
name = mangledName.substr(0, pos);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
||||
std::vector<ir::InstructionId> *ids = nullptr;
|
||||
std::vector<ir::InstructionId> *wideIds = nullptr;
|
||||
|
||||
if (auto it = wideInstNameToIds.find(name);
|
||||
it != wideInstNameToIds.end()) {
|
||||
wideIds = &it->second;
|
||||
}
|
||||
|
||||
if (auto it = instNameToIds.find(name); it != instNameToIds.end()) {
|
||||
ids = &it->second;
|
||||
}
|
||||
|
||||
if (ids == nullptr && wideIds == nullptr) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (wideIds != nullptr) {
|
||||
for (auto id : *wideIds) {
|
||||
moduleInfo.semantics[id] = fn;
|
||||
}
|
||||
} else {
|
||||
for (auto id : *ids) {
|
||||
moduleInfo.semantics.emplace(id, fn);
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
641
rpcsx-gpu2/lib/gcn-shader/src/SpvConverter.cpp
Normal file
641
rpcsx-gpu2/lib/gcn-shader/src/SpvConverter.cpp
Normal file
|
|
@ -0,0 +1,641 @@
|
|||
#include "SpvConverter.hpp"
|
||||
#include "dialect.hpp"
|
||||
#include "dialect/spv.hpp"
|
||||
#include <string>
|
||||
|
||||
using namespace shader;
|
||||
|
||||
using Builder = ir::Builder<ir::spv::Builder, ir::builtin::Builder>;
|
||||
|
||||
static std::string getTypeName(ir::Value type);
|
||||
|
||||
static std::string getConstantName(ir::Value constant) {
|
||||
if (constant == ir::spv::OpConstant) {
|
||||
auto typeValue = constant.getOperand(0).getAsValue();
|
||||
auto value = constant.getOperand(1);
|
||||
|
||||
if (typeValue == ir::spv::OpTypeInt) {
|
||||
auto width = *typeValue.getOperand(0).getAsInt32();
|
||||
|
||||
if (width <= 32) {
|
||||
if (value.getAsInt32() == nullptr) {
|
||||
std::abort();
|
||||
}
|
||||
return "_" + std::to_string(*value.getAsInt32());
|
||||
}
|
||||
if (value.getAsInt64() == nullptr) {
|
||||
std::abort();
|
||||
}
|
||||
return "c_" + std::to_string(*value.getAsInt64());
|
||||
}
|
||||
|
||||
if (typeValue == ir::spv::OpTypeFloat) {
|
||||
auto width = *typeValue.getOperand(0).getAsInt32();
|
||||
|
||||
if (width == 32) {
|
||||
if (value.getAsFloat() == nullptr) {
|
||||
std::abort();
|
||||
}
|
||||
return "c_" + std::to_string(*value.getAsFloat());
|
||||
}
|
||||
if (value.getAsDouble() == nullptr) {
|
||||
std::abort();
|
||||
}
|
||||
return "c_" + std::to_string(*value.getAsDouble());
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
if (constant == ir::spv::OpConstantTrue) {
|
||||
return "true";
|
||||
}
|
||||
|
||||
if (constant == ir::spv::OpConstantFalse) {
|
||||
return "false";
|
||||
}
|
||||
|
||||
if (constant == ir::spv::OpConstantNull) {
|
||||
return "null_" + getTypeName(constant.getOperand(0).getAsValue());
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
static std::string getTypeName(ir::Value type) {
|
||||
if (type == ir::spv::OpTypeInt) {
|
||||
if (type.getOperand(1) != 0) {
|
||||
return "s" + std::to_string(*type.getOperand(0).getAsInt32());
|
||||
}
|
||||
return "u" + std::to_string(*type.getOperand(0).getAsInt32());
|
||||
}
|
||||
|
||||
if (type == ir::spv::OpTypeFloat) {
|
||||
return "f" + std::to_string(*type.getOperand(0).getAsInt32());
|
||||
}
|
||||
|
||||
if (type == ir::spv::OpTypeBool) {
|
||||
return "bool";
|
||||
}
|
||||
|
||||
if (type == ir::spv::OpTypeVoid) {
|
||||
return "void";
|
||||
}
|
||||
|
||||
if (type == ir::spv::OpTypeSampler) {
|
||||
return "sampler";
|
||||
}
|
||||
|
||||
if (type == ir::spv::OpTypeVector) {
|
||||
return getTypeName(type.getOperand(0).getAsValue()) + 'x' +
|
||||
std::to_string(*type.getOperand(1).getAsInt32());
|
||||
}
|
||||
|
||||
if (type == ir::spv::OpTypeArray) {
|
||||
auto count = type.getOperand(1).getAsValue();
|
||||
if (count == ir::spv::OpConstant) {
|
||||
if (auto n = count.getOperand(1).getAsInt32()) {
|
||||
return getTypeName(type.getOperand(0).getAsValue()) + '[' +
|
||||
std::to_string(*n) + ']';
|
||||
}
|
||||
}
|
||||
|
||||
return getTypeName(type.getOperand(0).getAsValue()) + "[N]";
|
||||
}
|
||||
|
||||
if (type == ir::spv::OpTypeRuntimeArray) {
|
||||
return getTypeName(type.getOperand(0).getAsValue()) + "[]";
|
||||
}
|
||||
|
||||
if (type == ir::spv::OpTypeStruct) {
|
||||
std::string result = "struct{";
|
||||
for (bool first = true; auto &op : type.getOperands()) {
|
||||
if (!first) {
|
||||
result += ", ";
|
||||
} else {
|
||||
first = false;
|
||||
}
|
||||
result += getTypeName(op.getAsValue());
|
||||
}
|
||||
|
||||
result += "}";
|
||||
return result;
|
||||
}
|
||||
|
||||
if (type == ir::spv::OpTypePointer) {
|
||||
return getTypeName(type.getOperand(1).getAsValue()) + "*";
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
spv::Context::Context() {
|
||||
localVariables = create<ir::Region>(getUnknownLocation());
|
||||
epilogue = createRegionWithLabel(getUnknownLocation()).getParent();
|
||||
}
|
||||
|
||||
ir::Node spv::Import::getOrCloneImpl(ir::Context &context, ir::Node node,
|
||||
bool isOperand) {
|
||||
auto inst = node.cast<ir::Instruction>();
|
||||
|
||||
if (inst == nullptr) {
|
||||
return CloneMap::getOrCloneImpl(context, node, isOperand);
|
||||
}
|
||||
|
||||
auto &spvContext = static_cast<spv::Context &>(context);
|
||||
|
||||
auto redefine = [&](ir::Node newNode) {
|
||||
setOverride(node, newNode);
|
||||
return newNode;
|
||||
};
|
||||
|
||||
auto cloneDecorationsAndDebugs = [&](ir::Node inst = nullptr) {
|
||||
if (inst == nullptr) {
|
||||
inst = node;
|
||||
}
|
||||
|
||||
auto annotations = spvContext.layout.getOrCreateAnnotations(context);
|
||||
auto debugs = spvContext.layout.getOrCreateDebugs(context);
|
||||
auto value = inst.cast<ir::Value>();
|
||||
if (value == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (auto &use : value.getUseList()) {
|
||||
if (use.user == ir::spv::OpDecorate ||
|
||||
use.user == ir::spv::OpMemberDecorate ||
|
||||
use.user == ir::spv::OpDecorationGroup ||
|
||||
use.user == ir::spv::OpGroupDecorate ||
|
||||
use.user == ir::spv::OpGroupMemberDecorate ||
|
||||
use.user == ir::spv::OpDecorateId) {
|
||||
|
||||
annotations.addChild(ir::clone(use.user, context, *this));
|
||||
}
|
||||
|
||||
if (use.user == ir::spv::OpName || use.user == ir::spv::OpMemberName) {
|
||||
auto cloned = ir::clone(use.user, context, *this);
|
||||
debugs.addChild(cloned);
|
||||
if (use.user == ir::spv::OpName) {
|
||||
auto demangled =
|
||||
std::string_view(*cloned.getOperand(1).getAsString());
|
||||
if (auto pos = demangled.find('('); pos != std::string::npos) {
|
||||
demangled = demangled.substr(0, pos);
|
||||
}
|
||||
spvContext.setName(cloned.getOperand(0).getAsValue(),
|
||||
std::string(demangled));
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
auto hasDecoration = [&] {
|
||||
for (auto use : node.staticCast<ir::Value>().getUseList()) {
|
||||
if (use.user == ir::spv::OpDecorate ||
|
||||
use.user == ir::spv::OpMemberDecorate) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
};
|
||||
|
||||
if (inst.getKind() == ir::Kind::Spv) {
|
||||
if (inst.getOp() == ir::spv::OpExtInstImport) {
|
||||
auto extensions = spvContext.layout.getOrCreateExtInstImports(context);
|
||||
auto result = CloneMap::getOrCloneImpl(context, node, isOperand);
|
||||
extensions.addChild(result.staticCast<ir::Value>());
|
||||
|
||||
return redefine(result);
|
||||
}
|
||||
|
||||
if (ir::spv::isTypeOp(inst.getOp())) {
|
||||
std::vector<ir::Operand> operands;
|
||||
|
||||
for (auto &op : inst.getOperands()) {
|
||||
operands.push_back(op.clone(context, *this));
|
||||
}
|
||||
|
||||
auto typeOp = static_cast<ir::spv::Op>(inst.getOp());
|
||||
|
||||
if ((inst != ir::spv::OpTypeArray || !hasDecoration()) &&
|
||||
inst != ir::spv::OpTypeRuntimeArray &&
|
||||
inst != ir::spv::OpTypeStruct) {
|
||||
if (inst != ir::spv::OpTypePointer ||
|
||||
inst.getOperand(0) == ir::spv::StorageClass::Function) {
|
||||
if (auto result = spvContext.findGlobal(typeOp, operands)) {
|
||||
return redefine(result);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto result = spvContext.createGlobal(
|
||||
static_cast<ir::spv::Op>(inst.getOp()), operands);
|
||||
redefine(result);
|
||||
cloneDecorationsAndDebugs();
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
if (inst == ir::spv::OpConstant || inst == ir::spv::OpConstantComposite ||
|
||||
inst == ir::spv::OpConstantTrue || inst == ir::spv::OpConstantFalse ||
|
||||
inst == ir::spv::OpConstantNull || inst == ir::spv::OpConstantSampler ||
|
||||
inst == ir::spv::OpSpecConstantTrue ||
|
||||
inst == ir::spv::OpSpecConstantFalse || inst == ir::spv::OpSpecConstant ||
|
||||
inst == ir::spv::OpSpecConstantComposite) {
|
||||
std::vector<ir::Operand> operands;
|
||||
|
||||
for (auto &op : inst.getOperands()) {
|
||||
operands.push_back(op.clone(context, *this));
|
||||
}
|
||||
|
||||
auto result = spvContext.getOrCreateGlobal(
|
||||
static_cast<ir::spv::Op>(inst.getOp()), operands);
|
||||
return redefine(result);
|
||||
}
|
||||
|
||||
if (isOperand && inst == ir::spv::OpVariable) {
|
||||
if (inst == ir::spv::OpVariable) {
|
||||
auto storage = inst.getOperand(1).getAsInt32();
|
||||
if (*storage == int(ir::spv::StorageClass::Function)) {
|
||||
return CloneMap::getOrCloneImpl(context, node, isOperand);
|
||||
}
|
||||
}
|
||||
|
||||
auto globals = spvContext.layout.getOrCreateGlobals(context);
|
||||
auto result = CloneMap::getOrCloneImpl(context, node, isOperand);
|
||||
globals.addChild(result.staticCast<ir::Instruction>());
|
||||
cloneDecorationsAndDebugs();
|
||||
return result;
|
||||
}
|
||||
|
||||
if (inst == ir::spv::OpConstant) {
|
||||
auto type = inst.getOperand(0).clone(context, *this);
|
||||
return redefine(
|
||||
spvContext.getOrCreateConstant(type.getAsValue(), inst.getOperand(1)));
|
||||
}
|
||||
|
||||
if (inst == ir::spv::OpFunction) {
|
||||
auto functions = spvContext.layout.getOrCreateFunctions(context);
|
||||
|
||||
auto result = CloneMap::getOrCloneImpl(context, node, isOperand)
|
||||
.staticCast<ir::Value>();
|
||||
functions.insertAfter(nullptr, result);
|
||||
redefine(result);
|
||||
cloneDecorationsAndDebugs();
|
||||
|
||||
ir::Instruction insertPoint = result;
|
||||
|
||||
for (auto child : ir::range(inst.getNext())) {
|
||||
auto cloned = ir::clone(child, context, *this);
|
||||
functions.insertAfter(insertPoint, cloned);
|
||||
insertPoint = cloned;
|
||||
cloneDecorationsAndDebugs(child);
|
||||
|
||||
if (child == ir::spv::OpFunctionEnd) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
return CloneMap::getOrCloneImpl(context, node, isOperand);
|
||||
}
|
||||
|
||||
ir::Value spv::Context::createRegionWithLabel(ir::Location loc) {
|
||||
return Builder::createAppend(*this, create<ir::Region>(loc))
|
||||
.createSpvLabel(loc);
|
||||
}
|
||||
|
||||
void spv::Context::setName(ir::spv::IdRef inst, std::string name) {
|
||||
ns.setNameOf(inst, name);
|
||||
auto debugs = Builder::createAppend(*this, layout.getOrCreateDebugs(*this));
|
||||
debugs.createSpvName(getUnknownLocation(), inst, std::move(name));
|
||||
}
|
||||
|
||||
void spv::Context::setConstantName(ir::Value constant) {
|
||||
auto name = getConstantName(constant);
|
||||
if (!name.empty()) {
|
||||
ns.setNameOf(constant, std::move(name));
|
||||
}
|
||||
}
|
||||
|
||||
ir::Value spv::Context::getOrCreateConstant(ir::Value typeValue,
|
||||
const ir::Operand &value) {
|
||||
if (typeValue == getTypeBool()) {
|
||||
return *value.getAsBool() ? getTrue() : getFalse();
|
||||
}
|
||||
return getOrCreateGlobal(ir::spv::OpConstant, {{typeValue, value}});
|
||||
}
|
||||
|
||||
ir::Value spv::Context::getType(ir::spv::Op baseType, int width,
|
||||
bool isSigned) {
|
||||
switch (baseType) {
|
||||
case ir::spv::OpTypeInt:
|
||||
return getTypeInt(width, isSigned);
|
||||
case ir::spv::OpTypeFloat:
|
||||
return getTypeFloat(width);
|
||||
case ir::spv::OpTypeBool:
|
||||
return getTypeBool();
|
||||
case ir::spv::OpTypeVoid:
|
||||
return getTypeVoid();
|
||||
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
}
|
||||
|
||||
ir::Value spv::Context::getType(const TypeInfo &info) {
|
||||
switch (info.baseType) {
|
||||
case ir::spv::OpTypeInt:
|
||||
case ir::spv::OpTypeFloat:
|
||||
case ir::spv::OpTypeBool:
|
||||
case ir::spv::OpTypeVoid:
|
||||
return getType(info.baseType, info.componentWidth, info.isSigned);
|
||||
|
||||
case ir::spv::OpTypeVector:
|
||||
return getTypeVector(
|
||||
getType(info.componentType, info.componentWidth, info.isSigned),
|
||||
info.componentsCount);
|
||||
|
||||
case ir::spv::OpTypeArray:
|
||||
return getTypeArray(
|
||||
getType(info.componentType, info.componentWidth, info.isSigned),
|
||||
imm32(info.componentsCount));
|
||||
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
}
|
||||
|
||||
void spv::Context::setTypeName(ir::Value type) {
|
||||
auto name = getTypeName(type);
|
||||
if (!name.empty()) {
|
||||
ns.setNameOf(type, std::move(name));
|
||||
}
|
||||
}
|
||||
|
||||
ir::Value
|
||||
spv::Context::findGlobal(ir::spv::Op op,
|
||||
std::span<const ir::Operand> operands) const {
|
||||
auto it = globals.find(ir::getInstructionId(ir::Kind::Spv, op));
|
||||
|
||||
if (it == globals.end()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto &types = it->second;
|
||||
|
||||
for (auto type : types) {
|
||||
if (type.getOperandCount() != operands.size()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
bool matches = true;
|
||||
for (std::size_t i = 0; auto &operand : type.getOperands()) {
|
||||
if (operands[i++] != operand) {
|
||||
matches = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (matches) {
|
||||
return type;
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
ir::Value
|
||||
spv::Context::createGlobal(ir::spv::Op op,
|
||||
std::span<const ir::Operand> operands) {
|
||||
auto builder = Builder::createAppend(*this, layout.getOrCreateGlobals(*this));
|
||||
auto result =
|
||||
builder.createValue(getUnknownLocation(), ir::Kind::Spv, op, operands);
|
||||
|
||||
globals[ir::getInstructionId(op)].push_back(result);
|
||||
if (ir::spv::isTypeOp(op)) {
|
||||
setTypeName(result);
|
||||
} else {
|
||||
setConstantName(result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
ir::Value spv::Context::getOrCreateGlobal(
|
||||
ir::spv::Op op, std::span<const ir::Operand> operands) {
|
||||
if (auto result = findGlobal(op, operands)) {
|
||||
return result;
|
||||
}
|
||||
|
||||
return createGlobal(op, operands);
|
||||
}
|
||||
|
||||
ir::Value spv::Context::getOperandValue(const ir::Operand &op,
|
||||
ir::Value type) {
|
||||
if (auto result = op.getAsValue()) {
|
||||
return result;
|
||||
}
|
||||
|
||||
auto createConstant = [&](auto value, ir::Value expType) {
|
||||
return getOrCreateConstant(type ? type : expType, value);
|
||||
};
|
||||
|
||||
if (auto result = op.getAsInt32()) {
|
||||
return createConstant(*result, getTypeSInt32());
|
||||
}
|
||||
|
||||
if (auto result = op.getAsInt64()) {
|
||||
return createConstant(*result, getTypeSInt64());
|
||||
}
|
||||
|
||||
if (auto result = op.getAsFloat()) {
|
||||
return createConstant(*result, getTypeFloat32());
|
||||
}
|
||||
|
||||
if (auto result = op.getAsDouble()) {
|
||||
return createConstant(*result, getTypeFloat64());
|
||||
}
|
||||
|
||||
if (auto result = op.getAsBool()) {
|
||||
return createConstant(*result, getTypeBool());
|
||||
}
|
||||
|
||||
std::abort();
|
||||
}
|
||||
|
||||
void spv::Context::createPerVertex() {
|
||||
if (perVertex != nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto loc = rootLocation;
|
||||
|
||||
auto float32 = getTypeFloat32();
|
||||
auto arr1Float = getTypeArray(float32, getIndex(1));
|
||||
auto float32x4 = getTypeVector(float32, 4);
|
||||
|
||||
auto gl_PerVertexStructT =
|
||||
getTypeStruct(float32x4, float32, arr1Float, arr1Float);
|
||||
auto gl_PerVertexPtrT =
|
||||
getTypePointer(ir::spv::StorageClass::Output, gl_PerVertexStructT);
|
||||
auto annotations =
|
||||
Builder::createAppend(*this, layout.getOrCreateAnnotations(*this));
|
||||
|
||||
annotations.createSpvDecorate(loc, gl_PerVertexStructT,
|
||||
ir::spv::Decoration::Block());
|
||||
annotations.createSpvMemberDecorate(
|
||||
loc, gl_PerVertexStructT, 0,
|
||||
ir::spv::Decoration::BuiltIn(ir::spv::BuiltIn::Position));
|
||||
annotations.createSpvMemberDecorate(
|
||||
loc, gl_PerVertexStructT, 1,
|
||||
ir::spv::Decoration::BuiltIn(ir::spv::BuiltIn::PointSize));
|
||||
annotations.createSpvMemberDecorate(
|
||||
loc, gl_PerVertexStructT, 2,
|
||||
ir::spv::Decoration::BuiltIn(ir::spv::BuiltIn::ClipDistance));
|
||||
annotations.createSpvMemberDecorate(
|
||||
loc, gl_PerVertexStructT, 3,
|
||||
ir::spv::Decoration::BuiltIn(ir::spv::BuiltIn::CullDistance));
|
||||
|
||||
auto globals = Builder::createAppend(*this, layout.getOrCreateGlobals(*this));
|
||||
|
||||
perVertex = globals.createSpvVariable(loc, gl_PerVertexPtrT,
|
||||
ir::spv::StorageClass::Output);
|
||||
}
|
||||
|
||||
ir::Value spv::Context::createUniformBuffer(int descriptorSet,
|
||||
int binding,
|
||||
ir::Value structType) {
|
||||
auto globals = Builder::createAppend(*this, layout.getOrCreateGlobals(*this));
|
||||
auto annotations =
|
||||
Builder::createAppend(*this, layout.getOrCreateAnnotations(*this));
|
||||
auto loc = getUnknownLocation();
|
||||
|
||||
auto storageClass = ir::spv::StorageClass::StorageBuffer;
|
||||
auto blockType = globals.createSpvTypePointer(loc, storageClass, structType);
|
||||
|
||||
auto blockVariable = globals.createSpvVariable(loc, blockType, storageClass);
|
||||
|
||||
annotations.createSpvDecorate(
|
||||
loc, blockVariable, ir::spv::Decoration::DescriptorSet(descriptorSet));
|
||||
annotations.createSpvDecorate(loc, blockVariable,
|
||||
ir::spv::Decoration::Binding(binding));
|
||||
annotations.createSpvDecorate(loc, blockVariable,
|
||||
ir::spv::Decoration::Uniform());
|
||||
return blockVariable;
|
||||
}
|
||||
|
||||
ir::Value spv::Context::createRuntimeArrayUniformBuffer(
|
||||
int descriptorSet, int binding, ir::Value elementType) {
|
||||
auto globals = Builder::createAppend(*this, layout.getOrCreateGlobals(*this));
|
||||
auto annotations =
|
||||
Builder::createAppend(*this, layout.getOrCreateAnnotations(*this));
|
||||
auto loc = getUnknownLocation();
|
||||
|
||||
auto element = globals.createSpvTypeRuntimeArray(loc, elementType);
|
||||
annotations.createSpvDecorate(
|
||||
loc, element,
|
||||
ir::spv::Decoration::ArrayStride(
|
||||
shader::spv::getTypeInfo(elementType).width() / 8));
|
||||
|
||||
auto blockStruct = globals.createSpvTypeStruct(loc, {{element}});
|
||||
annotations.createSpvDecorate(loc, blockStruct, ir::spv::Decoration::Block());
|
||||
annotations.createSpvMemberDecorate(loc, blockStruct, 0,
|
||||
ir::spv::Decoration::Offset(0));
|
||||
return createUniformBuffer(descriptorSet, binding, blockStruct);
|
||||
}
|
||||
|
||||
ir::Value spv::Context::createOutput(ir::Location loc, int index) {
|
||||
auto &result = outputs[index];
|
||||
|
||||
if (result == nullptr) {
|
||||
auto floatType = getTypeFloat32();
|
||||
auto float32x4Type = getTypeVector(floatType, 4);
|
||||
auto variableType =
|
||||
getTypePointer(ir::spv::StorageClass::Output, float32x4Type);
|
||||
|
||||
auto globals =
|
||||
Builder::createAppend(*this, layout.getOrCreateGlobals(*this));
|
||||
auto annotations =
|
||||
Builder::createAppend(*this, layout.getOrCreateAnnotations(*this));
|
||||
auto debugs = Builder::createAppend(*this, layout.getOrCreateDebugs(*this));
|
||||
|
||||
auto variable = globals.createSpvVariable(loc, variableType,
|
||||
ir::spv::StorageClass::Output);
|
||||
|
||||
annotations.createSpvDecorate(loc, variable,
|
||||
ir::spv::Decoration::Location(index));
|
||||
|
||||
setName(variable, "output" + std::to_string(index));
|
||||
result = variable;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
ir::Value spv::Context::createInput(ir::Location loc, int index) {
|
||||
auto &result = inputs[index];
|
||||
|
||||
if (result == nullptr) {
|
||||
auto floatType = getTypeFloat32();
|
||||
auto float32x4Type = getTypeVector(floatType, 4);
|
||||
auto variableType =
|
||||
getTypePointer(ir::spv::StorageClass::Input, float32x4Type);
|
||||
|
||||
auto globals =
|
||||
Builder::createAppend(*this, layout.getOrCreateGlobals(*this));
|
||||
auto annotations =
|
||||
Builder::createAppend(*this, layout.getOrCreateAnnotations(*this));
|
||||
auto debugs = Builder::createAppend(*this, layout.getOrCreateDebugs(*this));
|
||||
|
||||
auto variable = globals.createSpvVariable(loc, variableType,
|
||||
ir::spv::StorageClass::Input);
|
||||
|
||||
annotations.createSpvDecorate(loc, variable,
|
||||
ir::spv::Decoration::Location(index));
|
||||
|
||||
setName(variable, "input" + std::to_string(index));
|
||||
result = variable;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
ir::Value spv::Context::createAttr(ir::Location loc, int attrId,
|
||||
bool perVertex, bool flat) {
|
||||
auto &result = inputs[attrId];
|
||||
|
||||
if (result == nullptr) {
|
||||
auto floatType = getTypeFloat32();
|
||||
auto float32x4Type = getTypeVector(floatType, 4);
|
||||
|
||||
auto attrArrayType = getTypeArray(float32x4Type, imm32(3));
|
||||
auto variableType =
|
||||
getTypePointer(ir::spv::StorageClass::Input,
|
||||
perVertex ? attrArrayType : float32x4Type);
|
||||
|
||||
auto globals =
|
||||
Builder::createAppend(*this, layout.getOrCreateGlobals(*this));
|
||||
auto annotations =
|
||||
Builder::createAppend(*this, layout.getOrCreateAnnotations(*this));
|
||||
auto debugs = Builder::createAppend(*this, layout.getOrCreateDebugs(*this));
|
||||
|
||||
auto variable = globals.createSpvVariable(loc, variableType,
|
||||
ir::spv::StorageClass::Input);
|
||||
|
||||
annotations.createSpvDecorate(loc, variable,
|
||||
ir::spv::Decoration::Location(attrId));
|
||||
|
||||
if (perVertex) {
|
||||
annotations.createSpvDecorate(loc, variable,
|
||||
ir::spv::Decoration::PerVertexKHR());
|
||||
} else if (flat) {
|
||||
annotations.createSpvDecorate(loc, variable, ir::spv::Decoration::Flat());
|
||||
}
|
||||
setName(variable, "attr" + std::to_string(attrId));
|
||||
result = variable;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
71
rpcsx-gpu2/lib/gcn-shader/src/SpvTypeInfo.cpp
Normal file
71
rpcsx-gpu2/lib/gcn-shader/src/SpvTypeInfo.cpp
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
#include "SpvTypeInfo.hpp"
|
||||
#include "dialect.hpp"
|
||||
|
||||
using namespace shader;
|
||||
|
||||
shader::spv::TypeInfo shader::spv::getTypeInfo(ir::Value type) {
|
||||
if (type == ir::spv::OpTypeBool) {
|
||||
return {
|
||||
.baseType = ir::spv::OpTypeBool,
|
||||
.componentWidth = 1,
|
||||
.componentsCount = 1,
|
||||
};
|
||||
}
|
||||
|
||||
if (type == ir::spv::OpTypeInt) {
|
||||
return {
|
||||
.baseType = ir::spv::OpTypeInt,
|
||||
.componentWidth = *type.getOperand(0).getAsInt32(),
|
||||
.componentsCount = 1,
|
||||
.isSigned = *type.getOperand(1).getAsInt32() ? true : false,
|
||||
};
|
||||
}
|
||||
|
||||
if (type == ir::spv::OpTypeFloat) {
|
||||
return {
|
||||
.baseType = ir::spv::OpTypeFloat,
|
||||
.componentWidth = *type.getOperand(0).getAsInt32(),
|
||||
.componentsCount = 1,
|
||||
};
|
||||
}
|
||||
|
||||
if (type == ir::spv::OpTypeVector) {
|
||||
auto componentInfo = getTypeInfo(type.getOperand(0).getAsValue());
|
||||
|
||||
return {
|
||||
.baseType = ir::spv::OpTypeVector,
|
||||
.componentType = componentInfo.baseType,
|
||||
.componentWidth = componentInfo.width(),
|
||||
.componentsCount = *type.getOperand(1).getAsInt32(),
|
||||
};
|
||||
}
|
||||
|
||||
if (type == ir::spv::OpTypeArray) {
|
||||
auto elementInfo = getTypeInfo(type.getOperand(0).getAsValue());
|
||||
auto countOfElements = type.getOperand(1).getAsValue();
|
||||
|
||||
return {
|
||||
.baseType = ir::spv::OpTypeArray,
|
||||
.componentType = elementInfo.baseType,
|
||||
.componentWidth = elementInfo.width(),
|
||||
.componentsCount = *countOfElements.getOperand(1).getAsInt32(),
|
||||
};
|
||||
}
|
||||
|
||||
if (type == ir::spv::OpTypeRuntimeArray) {
|
||||
auto elementInfo = getTypeInfo(type.getOperand(0).getAsValue());
|
||||
|
||||
return {
|
||||
.baseType = ir::spv::OpTypeRuntimeArray,
|
||||
.componentType = elementInfo.baseType,
|
||||
.componentWidth = elementInfo.width(),
|
||||
.componentsCount = 1,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
.baseType = static_cast<ir::spv::Op>(type.getOp()),
|
||||
.componentWidth = 0,
|
||||
.componentsCount = 0,
|
||||
};
|
||||
}
|
||||
1230
rpcsx-gpu2/lib/gcn-shader/src/analyze.cpp
Normal file
1230
rpcsx-gpu2/lib/gcn-shader/src/analyze.cpp
Normal file
File diff suppressed because it is too large
Load diff
688
rpcsx-gpu2/lib/gcn-shader/src/eval.cpp
Normal file
688
rpcsx-gpu2/lib/gcn-shader/src/eval.cpp
Normal file
|
|
@ -0,0 +1,688 @@
|
|||
#include "eval.hpp"
|
||||
#include "dialect.hpp"
|
||||
#include "ir.hpp"
|
||||
#include <cmath>
|
||||
#include <concepts>
|
||||
|
||||
using namespace shader;
|
||||
|
||||
template <typename Cond, typename... Args> consteval bool testVisitCond() {
|
||||
if constexpr (std::is_same_v<Cond, void>) {
|
||||
return true;
|
||||
} else {
|
||||
return Cond{}(std::remove_cvref_t<Args>{}...);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Cond, std::size_t U> consteval bool testVisitCond() {
|
||||
if constexpr (U >= eval::Value::StorageSize) {
|
||||
return false;
|
||||
} else if constexpr (std::is_same_v<Cond, void>) {
|
||||
return true;
|
||||
} else {
|
||||
return Cond{}(std::variant_alternative_t<U, eval::Value::Storage>{});
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Cond = void, size_t I = 0>
|
||||
constexpr eval::Value visitImpl(const eval::Value &variant, auto &&fn) {
|
||||
|
||||
#define DEFINE_CASE(N) \
|
||||
case I + N: \
|
||||
if constexpr (testVisitCond<Cond, I + N>()) { \
|
||||
return std::forward<decltype(fn)>(fn)(std::get<I + N>(variant.storage)); \
|
||||
} else { \
|
||||
return {}; \
|
||||
}
|
||||
|
||||
switch (variant.storage.index()) {
|
||||
DEFINE_CASE(0);
|
||||
DEFINE_CASE(1);
|
||||
DEFINE_CASE(2);
|
||||
DEFINE_CASE(3);
|
||||
DEFINE_CASE(4);
|
||||
DEFINE_CASE(5);
|
||||
DEFINE_CASE(6);
|
||||
DEFINE_CASE(7);
|
||||
DEFINE_CASE(8);
|
||||
DEFINE_CASE(9);
|
||||
DEFINE_CASE(10);
|
||||
DEFINE_CASE(11);
|
||||
DEFINE_CASE(12);
|
||||
DEFINE_CASE(13);
|
||||
DEFINE_CASE(14);
|
||||
DEFINE_CASE(15);
|
||||
DEFINE_CASE(16);
|
||||
DEFINE_CASE(17);
|
||||
DEFINE_CASE(18);
|
||||
DEFINE_CASE(19);
|
||||
DEFINE_CASE(20);
|
||||
DEFINE_CASE(21);
|
||||
DEFINE_CASE(22);
|
||||
DEFINE_CASE(23);
|
||||
DEFINE_CASE(24);
|
||||
DEFINE_CASE(25);
|
||||
DEFINE_CASE(26);
|
||||
DEFINE_CASE(27);
|
||||
DEFINE_CASE(28);
|
||||
DEFINE_CASE(29);
|
||||
DEFINE_CASE(30);
|
||||
DEFINE_CASE(31);
|
||||
DEFINE_CASE(32);
|
||||
DEFINE_CASE(33);
|
||||
DEFINE_CASE(34);
|
||||
DEFINE_CASE(35);
|
||||
DEFINE_CASE(36);
|
||||
DEFINE_CASE(37);
|
||||
DEFINE_CASE(38);
|
||||
DEFINE_CASE(39);
|
||||
DEFINE_CASE(40);
|
||||
DEFINE_CASE(41);
|
||||
DEFINE_CASE(42);
|
||||
DEFINE_CASE(43);
|
||||
DEFINE_CASE(44);
|
||||
DEFINE_CASE(45);
|
||||
DEFINE_CASE(46);
|
||||
DEFINE_CASE(47);
|
||||
DEFINE_CASE(48);
|
||||
DEFINE_CASE(49);
|
||||
DEFINE_CASE(50);
|
||||
DEFINE_CASE(51);
|
||||
DEFINE_CASE(52);
|
||||
DEFINE_CASE(53);
|
||||
DEFINE_CASE(54);
|
||||
DEFINE_CASE(55);
|
||||
DEFINE_CASE(56);
|
||||
DEFINE_CASE(57);
|
||||
DEFINE_CASE(58);
|
||||
DEFINE_CASE(59);
|
||||
DEFINE_CASE(60);
|
||||
DEFINE_CASE(61);
|
||||
DEFINE_CASE(62);
|
||||
DEFINE_CASE(63);
|
||||
}
|
||||
#undef DEFINE_CASE
|
||||
|
||||
constexpr auto NextIndex = I + 64;
|
||||
|
||||
if constexpr (NextIndex < eval::Value::StorageSize) {
|
||||
return visitImpl<Cond, NextIndex>(std::forward<decltype(fn)>(fn),
|
||||
std::forward<decltype(variant)>(variant));
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
template <typename Cond = void, typename Cb>
|
||||
constexpr eval::Value visitScalarType(ir::Value type, Cb &&cb)
|
||||
requires requires {
|
||||
{ std::forward<Cb>(cb)(int{}) } -> std::same_as<eval::Value>;
|
||||
}
|
||||
{
|
||||
auto invoke = [&](auto type) -> eval::Value {
|
||||
if constexpr (testVisitCond<Cond, std::remove_cvref_t<decltype(type)>>()) {
|
||||
return std::forward<Cb>(cb)(type);
|
||||
}
|
||||
return {};
|
||||
};
|
||||
|
||||
if (type == ir::spv::OpTypeBool) {
|
||||
return invoke(bool{});
|
||||
}
|
||||
|
||||
if (type == ir::spv::OpTypeInt) {
|
||||
auto isSigned = *type.getOperand(1).getAsInt32();
|
||||
|
||||
switch (*type.getOperand(0).getAsInt32()) {
|
||||
case 8:
|
||||
if (isSigned) {
|
||||
return invoke(std::int8_t{});
|
||||
}
|
||||
return invoke(std::uint8_t{});
|
||||
|
||||
case 16:
|
||||
if (isSigned) {
|
||||
return invoke(std::int16_t{});
|
||||
}
|
||||
return invoke(std::uint16_t{});
|
||||
|
||||
case 32:
|
||||
if (isSigned) {
|
||||
return invoke(std::int32_t{});
|
||||
}
|
||||
return invoke(std::uint32_t{});
|
||||
|
||||
case 64:
|
||||
if (isSigned) {
|
||||
return invoke(std::int64_t{});
|
||||
}
|
||||
return invoke(std::uint64_t{});
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
if (type == ir::spv::OpTypeFloat) {
|
||||
switch (*type.getOperand(0).getAsInt32()) {
|
||||
case 16:
|
||||
return invoke(shader::float16_t{});
|
||||
|
||||
case 32:
|
||||
return invoke(shader::float32_t{});
|
||||
|
||||
case 64:
|
||||
return invoke(shader::float64_t{});
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
template <typename Cond = void, typename Cb>
|
||||
constexpr eval::Value visitType(ir::Value type, Cb &&cb)
|
||||
requires requires {
|
||||
{ std::forward<Cb>(cb)(int{}) } -> std::same_as<eval::Value>;
|
||||
}
|
||||
{
|
||||
if (type == ir::spv::OpTypeInt || type == ir::spv::OpTypeFloat ||
|
||||
type == ir::spv::OpTypeBool) {
|
||||
return visitScalarType<Cond>(type, cb);
|
||||
}
|
||||
|
||||
auto invoke = [&](auto type) -> eval::Value {
|
||||
if constexpr (testVisitCond<Cond, std::remove_cvref_t<decltype(type)>>()) {
|
||||
return std::forward<Cb>(cb)(type);
|
||||
} else {
|
||||
return {};
|
||||
}
|
||||
};
|
||||
|
||||
if (type == ir::spv::OpTypeVector) {
|
||||
switch (*type.getOperand(1).getAsInt32()) {
|
||||
case 2:
|
||||
return visitScalarType(
|
||||
type.getOperand(0).getAsValue(),
|
||||
[&]<typename T>(T) { return invoke(shader::Vector<T, 2>{}); });
|
||||
|
||||
case 3:
|
||||
return visitScalarType(
|
||||
type.getOperand(0).getAsValue(),
|
||||
[&]<typename T>(T) { return invoke(shader::Vector<T, 3>{}); });
|
||||
|
||||
case 4:
|
||||
return visitScalarType(
|
||||
type.getOperand(0).getAsValue(),
|
||||
[&]<typename T>(T) { return invoke(shader::Vector<T, 4>{}); });
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
template <typename Cond = void, typename Cb>
|
||||
eval::Value visit(const eval::Value &value, Cb &&cb) {
|
||||
using VisitCond = decltype([](auto &&storage) {
|
||||
using T = std::remove_cvref_t<decltype(storage)>;
|
||||
if constexpr (std::is_same_v<T, std::nullptr_t>) {
|
||||
return false;
|
||||
} else {
|
||||
return testVisitCond<Cond, T>();
|
||||
}
|
||||
});
|
||||
|
||||
return visitImpl<VisitCond>(value, std::forward<Cb>(cb));
|
||||
}
|
||||
|
||||
template <typename Cb>
|
||||
eval::Value visit2(auto &&cond, const eval::Value &value, Cb &&cb) {
|
||||
if constexpr (cond()) {
|
||||
return visitImpl(value, std::forward<Cb>(cb));
|
||||
} else {
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
template <typename ValueCond = void, typename TypeVisitCond = void,
|
||||
typename TypeValueVisitCond = void, typename Cb>
|
||||
eval::Value visitWithType(const eval::Value &value, ir::Value type, Cb &&cb) {
|
||||
using ValueVisitCond = decltype([](auto storage) {
|
||||
if constexpr (std::is_same_v<decltype(storage), std::nullptr_t>) {
|
||||
return false;
|
||||
} else {
|
||||
return testVisitCond<ValueCond, decltype(storage)>();
|
||||
}
|
||||
});
|
||||
|
||||
return visitImpl<ValueVisitCond>(value, [&](auto &&value) -> eval::Value {
|
||||
return visitType<TypeVisitCond>(type, [&](auto type) -> eval::Value {
|
||||
if constexpr (testVisitCond<TypeValueVisitCond, decltype(type),
|
||||
decltype(value)>()) {
|
||||
return std::forward<Cb>(cb)(type, value);
|
||||
} else {
|
||||
return {};
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
namespace {
|
||||
template <typename T> struct ComponentTypeImpl {
|
||||
using type = T;
|
||||
};
|
||||
|
||||
template <typename T, std::size_t N> struct ComponentTypeImpl<Vector<T, N>> {
|
||||
using type = T;
|
||||
};
|
||||
|
||||
template <typename T, std::size_t N>
|
||||
struct ComponentTypeImpl<std::array<T, N>> {
|
||||
using type = T;
|
||||
};
|
||||
|
||||
template <typename T> struct MakeSignedImpl {
|
||||
using type = std::make_signed_t<T>;
|
||||
};
|
||||
|
||||
template <typename T, std::size_t N> struct MakeSignedImpl<Vector<T, N>> {
|
||||
using type = Vector<std::make_signed_t<T>, N>;
|
||||
};
|
||||
template <typename T> struct MakeUnsignedImpl {
|
||||
using type = std::make_unsigned_t<T>;
|
||||
};
|
||||
|
||||
template <typename T, std::size_t N> struct MakeUnsignedImpl<Vector<T, N>> {
|
||||
using type = Vector<std::make_unsigned_t<T>, N>;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
template <typename T> using ComponentType = typename ComponentTypeImpl<T>::type;
|
||||
template <typename T> using MakeSigned = typename MakeSignedImpl<T>::type;
|
||||
template <typename T> using MakeUnsigned = typename MakeUnsignedImpl<T>::type;
|
||||
|
||||
template <typename> constexpr std::size_t Components = 1;
|
||||
template <typename T, std::size_t N>
|
||||
constexpr std::size_t Components<Vector<T, N>> = N;
|
||||
template <typename T, std::size_t N>
|
||||
constexpr std::size_t Components<std::array<T, N>> = N;
|
||||
|
||||
template <typename> constexpr bool IsArray = false;
|
||||
template <typename T, std::size_t N>
|
||||
constexpr bool IsArray<std::array<T, N>> = true;
|
||||
|
||||
eval::Value
|
||||
eval::Value::compositeConstruct(ir::Value type,
|
||||
std::span<const eval::Value> constituents) {
|
||||
using Cond =
|
||||
decltype([](auto type) { return Components<decltype(type)> > 1; });
|
||||
|
||||
return visitType<Cond>(type, [&](auto type) -> Value {
|
||||
constexpr std::size_t N = Components<decltype(type)>;
|
||||
if (N != constituents.size()) {
|
||||
return {};
|
||||
}
|
||||
|
||||
decltype(type) result;
|
||||
|
||||
for (std::size_t i = 0; i < N; ++i) {
|
||||
if (auto value = constituents[i].as<ComponentType<decltype(type)>>()) {
|
||||
result[i] = *value;
|
||||
} else {
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
});
|
||||
}
|
||||
|
||||
eval::Value eval::Value::compositeExtract(const Value &index) const {
|
||||
using Cond =
|
||||
decltype([](auto type) { return Components<decltype(type)> > 1; });
|
||||
|
||||
auto optIndexInt = index.zExtScalar();
|
||||
if (!optIndexInt) {
|
||||
return {};
|
||||
}
|
||||
|
||||
auto indexInt = *optIndexInt;
|
||||
|
||||
return visit<Cond>(*this, [&](auto &&value) -> Value {
|
||||
using ValueType = std::remove_cvref_t<decltype(value)>;
|
||||
constexpr std::size_t N = Components<ValueType>;
|
||||
|
||||
if (indexInt >= N) {
|
||||
return {};
|
||||
}
|
||||
|
||||
return value[indexInt];
|
||||
});
|
||||
}
|
||||
|
||||
eval::Value eval::Value::isNan() const {
|
||||
using Cond = decltype([](auto type) {
|
||||
return std::is_floating_point_v<ComponentType<decltype(type)>> && !IsArray<decltype(type)>;
|
||||
});
|
||||
|
||||
return visit<Cond>(*this, [](auto &&value) -> Value {
|
||||
constexpr std::size_t N = Components<std::remove_cvref_t<decltype(value)>>;
|
||||
|
||||
if constexpr (N == 1) {
|
||||
return std::isnan(value);
|
||||
} else {
|
||||
Vector<bool, N> result;
|
||||
for (std::size_t i = 0; i < N; ++i) {
|
||||
result[i] = std::isnan(value[i]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
eval::Value eval::Value::isInf() const {
|
||||
using Cond = decltype([](auto type) {
|
||||
return std::is_floating_point_v<ComponentType<decltype(type)>> && !IsArray<decltype(type)>;
|
||||
});
|
||||
|
||||
return visit<Cond>(*this, [](auto &&value) -> Value {
|
||||
constexpr std::size_t N = Components<std::remove_cvref_t<decltype(value)>>;
|
||||
|
||||
if constexpr (N == 1) {
|
||||
return std::isinf(value);
|
||||
} else {
|
||||
Vector<bool, N> result;
|
||||
for (std::size_t i = 0; i < N; ++i) {
|
||||
result[i] = std::isinf(value[i]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
eval::Value eval::Value::isFinite() const {
|
||||
using Cond = decltype([](auto type) {
|
||||
return std::is_floating_point_v<ComponentType<decltype(type)>>;
|
||||
});
|
||||
|
||||
return visit<Cond>(*this, [](auto &&value) -> Value {
|
||||
constexpr std::size_t N = Components<std::remove_cvref_t<decltype(value)>>;
|
||||
|
||||
if constexpr (N == 1) {
|
||||
return std::isfinite(value);
|
||||
} else {
|
||||
Vector<bool, N> result;
|
||||
for (std::size_t i = 0; i < N; ++i) {
|
||||
result[i] = std::isfinite(value[i]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
eval::Value eval::Value::makeUnsigned() const {
|
||||
using Cond = decltype([](auto type) {
|
||||
return std::is_integral_v<ComponentType<decltype(type)>> &&
|
||||
!std::is_same_v<ComponentType<decltype(type)>, bool> &&
|
||||
!IsArray<decltype(type)>;
|
||||
});
|
||||
|
||||
return visit<Cond>(*this, [](auto &&value) -> Value {
|
||||
constexpr std::size_t N = Components<std::remove_cvref_t<decltype(value)>>;
|
||||
using T = std::make_unsigned_t<
|
||||
ComponentType<std::remove_cvref_t<decltype(value)>>>;
|
||||
|
||||
if constexpr (N == 1) {
|
||||
return static_cast<T>(value);
|
||||
} else {
|
||||
Vector<T, N> result;
|
||||
for (std::size_t i = 0; i < N; ++i) {
|
||||
result[i] = static_cast<T>(value[i]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
});
|
||||
}
|
||||
eval::Value eval::Value::makeSigned() const {
|
||||
using Cond = decltype([](auto type) {
|
||||
return std::is_integral_v<ComponentType<decltype(type)>> &&
|
||||
!std::is_same_v<ComponentType<decltype(type)>, bool> &&
|
||||
!IsArray<decltype(type)>;
|
||||
});
|
||||
|
||||
return visit<Cond>(*this, [](auto &&value) -> Value {
|
||||
constexpr std::size_t N = Components<std::remove_cvref_t<decltype(value)>>;
|
||||
using T =
|
||||
std::make_signed_t<ComponentType<std::remove_cvref_t<decltype(value)>>>;
|
||||
|
||||
if constexpr (N == 1) {
|
||||
return static_cast<T>(value);
|
||||
} else {
|
||||
Vector<T, N> result;
|
||||
for (std::size_t i = 0; i < N; ++i) {
|
||||
result[i] = static_cast<T>(value[i]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
eval::Value eval::Value::all() const {
|
||||
using Cond = decltype([](auto type) {
|
||||
return std::is_same_v<ComponentType<decltype(type)>, bool> &&
|
||||
(Components<decltype(type)> > 1) && !IsArray<decltype(type)>;
|
||||
});
|
||||
|
||||
return visit<Cond>(*this, [](auto &&value) {
|
||||
constexpr std::size_t N = Components<std::remove_cvref_t<decltype(value)>>;
|
||||
for (std::size_t i = 0; i < N; ++i) {
|
||||
if (!value[i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
eval::Value eval::Value::any() const {
|
||||
using Cond = decltype([](auto type) {
|
||||
return std::is_same_v<ComponentType<decltype(type)>, bool> &&
|
||||
(Components<decltype(type)> > 1) && !IsArray<decltype(type)>;
|
||||
});
|
||||
|
||||
return visit<Cond>(*this, [](auto &&value) {
|
||||
constexpr std::size_t N = Components<std::remove_cvref_t<decltype(value)>>;
|
||||
for (std::size_t i = 0; i < N; ++i) {
|
||||
if (value[i]) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
});
|
||||
}
|
||||
|
||||
eval::Value eval::Value::select(const Value &trueValue,
|
||||
const Value &falseValue) const {
|
||||
using Cond = decltype([](auto type) consteval {
|
||||
return std::is_same_v<ComponentType<decltype(type)>, bool> && !IsArray<decltype(type)>;
|
||||
});
|
||||
|
||||
return visit<Cond>(*this, [&](auto &&cond) -> Value {
|
||||
using CondType = std::remove_cvref_t<decltype(cond)>;
|
||||
using TrueCond = decltype([](auto type) consteval {
|
||||
return Components<decltype(type)> == Components<CondType>;
|
||||
});
|
||||
|
||||
return visit<TrueCond>(trueValue, [&](auto &&trueValue) {
|
||||
using TrueValue = std::remove_cvref_t<decltype(trueValue)>;
|
||||
using FalseCond = decltype([](auto type) {
|
||||
return std::is_same_v<TrueValue, std::remove_cvref_t<decltype(type)>>;
|
||||
});
|
||||
|
||||
return visit(falseValue, [&](auto &&falseValue) -> Value {
|
||||
if constexpr (std::is_same_v<TrueValue, std::remove_cvref_t<
|
||||
decltype(falseValue)>>) {
|
||||
constexpr std::size_t N = Components<CondType>;
|
||||
|
||||
if constexpr (N == 1) {
|
||||
return cond ? trueValue : falseValue;
|
||||
} else {
|
||||
Vector<bool, N> result;
|
||||
for (std::size_t i = 0; i < N; ++i) {
|
||||
result[i] = cond[i] ? trueValue[i] : falseValue[i];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
} else {
|
||||
return {};
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
eval::Value eval::Value::iConvert(ir::Value type, bool isSigned) const {
|
||||
using Cond = decltype([](auto type) {
|
||||
using Type = std::remove_cvref_t<decltype(type)>;
|
||||
|
||||
return std::is_integral_v<ComponentType<Type>> &&
|
||||
!std::is_same_v<bool, ComponentType<Type>> && !IsArray<decltype(type)>;
|
||||
});
|
||||
|
||||
using PairCond = decltype([](auto lhs, auto rhs) {
|
||||
using Lhs = decltype(lhs);
|
||||
using Rhs = decltype(rhs);
|
||||
|
||||
return !std::is_same_v<Lhs, Rhs> && Components<Lhs> == Components<Rhs>;
|
||||
});
|
||||
|
||||
return visitWithType<Cond, Cond, PairCond>(
|
||||
*this, type, [&](auto type, auto &&value) -> Value {
|
||||
using Type = std::remove_cvref_t<decltype(type)>;
|
||||
using ValueType = std::remove_cvref_t<decltype(value)>;
|
||||
if (isSigned) {
|
||||
return static_cast<Type>(static_cast<MakeSigned<ValueType>>(value));
|
||||
} else {
|
||||
return static_cast<Type>(static_cast<MakeUnsigned<ValueType>>(value));
|
||||
}
|
||||
});
|
||||
}
|
||||
eval::Value eval::Value::fConvert(ir::Value type) const {
|
||||
using Cond = decltype([](auto type) {
|
||||
return std::is_floating_point_v<ComponentType<decltype(type)>> && !IsArray<decltype(type)>;
|
||||
});
|
||||
|
||||
using PairCond = decltype([](auto lhs, auto rhs) {
|
||||
using Lhs = decltype(lhs);
|
||||
using Rhs = decltype(rhs);
|
||||
|
||||
return !std::is_same_v<Lhs, Rhs> && Components<Lhs> == Components<Rhs>;
|
||||
});
|
||||
|
||||
return visitWithType<void, void, PairCond>(
|
||||
*this, type, [&](auto type, auto &&value) -> Value {
|
||||
using Type = std::remove_cvref_t<decltype(type)>;
|
||||
return static_cast<Type>(value);
|
||||
});
|
||||
}
|
||||
|
||||
eval::Value eval::Value::bitcast(ir::Value type) const {
|
||||
using Cond = decltype([](auto type, auto value) {
|
||||
using Type = std::remove_cvref_t<decltype(type)>;
|
||||
|
||||
return sizeof(type) == sizeof(value);
|
||||
});
|
||||
|
||||
return visitWithType<void, void, Cond>(
|
||||
*this, type, [](auto type, auto &&value) -> Value {
|
||||
return std::bit_cast<decltype(type)>(value);
|
||||
});
|
||||
}
|
||||
|
||||
std::optional<std::uint64_t> eval::Value::zExtScalar() const {
|
||||
using Cond = decltype([](auto type) {
|
||||
return std::is_integral_v<ComponentType<decltype(type)>> &&
|
||||
!std::is_same_v<ComponentType<decltype(type)>, bool> &&
|
||||
Components<decltype(type)> == 1 && !IsArray<decltype(type)>;
|
||||
});
|
||||
|
||||
auto result = visit<Cond>(*this, [&](auto value) -> Value {
|
||||
return static_cast<std::uint64_t>(
|
||||
static_cast<MakeUnsigned<decltype(value)>>(value));
|
||||
});
|
||||
|
||||
if (result) {
|
||||
return result.as<std::uint64_t>();
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
std::optional<std::int64_t> eval::Value::sExtScalar() const {
|
||||
using Cond = decltype([](auto type) {
|
||||
return std::is_integral_v<ComponentType<decltype(type)>> &&
|
||||
!std::is_same_v<ComponentType<decltype(type)>, bool> &&
|
||||
Components<decltype(type)> == 1 && !IsArray<decltype(type)>;
|
||||
});
|
||||
|
||||
auto result = visit<Cond>(*this, [&](auto value) -> Value {
|
||||
return static_cast<std::int64_t>(
|
||||
static_cast<MakeSigned<decltype(value)>>(value));
|
||||
});
|
||||
|
||||
if (result) {
|
||||
return result.as<std::int64_t>();
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
#define DEFINE_BINARY_OP(OP) \
|
||||
eval::Value eval::Value::operator OP(const Value & rhs) const { \
|
||||
using LhsCond = decltype([](auto &&lhs) { \
|
||||
return requires { static_cast<Value>(lhs OP rhs); }; \
|
||||
}); \
|
||||
return visit<LhsCond>(*this, [&]<typename Lhs>(Lhs &&lhs) -> Value { \
|
||||
using RhsCond = decltype([](auto &&rhs) { \
|
||||
return requires(Lhs lhs) { static_cast<Value>(lhs OP rhs); }; \
|
||||
}); \
|
||||
return visit<RhsCond>(rhs, [&](auto &&rhs) -> Value { \
|
||||
return static_cast<Value>(lhs OP rhs); \
|
||||
}); \
|
||||
}); \
|
||||
}
|
||||
|
||||
#define DEFINE_UNARY_OP(OP) \
|
||||
eval::Value eval::Value::operator OP() const { \
|
||||
using Cond = decltype([](auto rhs) { \
|
||||
return requires { static_cast<Value>(OP rhs); }; \
|
||||
}); \
|
||||
return visit<Cond>(*this, [&](auto &&rhs) -> Value { \
|
||||
return static_cast<Value>(OP rhs); \
|
||||
}); \
|
||||
}
|
||||
|
||||
DEFINE_BINARY_OP(+);
|
||||
DEFINE_BINARY_OP(-);
|
||||
DEFINE_BINARY_OP(*);
|
||||
DEFINE_BINARY_OP(/);
|
||||
DEFINE_BINARY_OP(%);
|
||||
DEFINE_BINARY_OP(&);
|
||||
DEFINE_BINARY_OP(|);
|
||||
DEFINE_BINARY_OP(^);
|
||||
DEFINE_BINARY_OP(>>);
|
||||
DEFINE_BINARY_OP(<<);
|
||||
DEFINE_BINARY_OP(&&);
|
||||
DEFINE_BINARY_OP(||);
|
||||
DEFINE_BINARY_OP(<);
|
||||
DEFINE_BINARY_OP(>);
|
||||
DEFINE_BINARY_OP(<=);
|
||||
DEFINE_BINARY_OP(>=);
|
||||
DEFINE_BINARY_OP(==);
|
||||
DEFINE_BINARY_OP(!=);
|
||||
|
||||
DEFINE_UNARY_OP(-);
|
||||
DEFINE_UNARY_OP(~);
|
||||
DEFINE_UNARY_OP(!);
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue