gpu rewrite initial commit

This commit is contained in:
DH 2024-09-25 16:00:55 +03:00
parent 0d4ed51cd9
commit 4cf808facd
133 changed files with 35491 additions and 4 deletions

View file

@ -3,7 +3,8 @@ project(rpcsx)
set(CMAKE_CXX_EXTENSIONS off)
set(CMAKE_CXX_STANDARD 23)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_STANDARD_REQUIRED on)
set(CMAKE_BUILD_RPATH_USE_ORIGIN on)
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
@ -41,7 +42,7 @@ function(add_precompiled_vulkan_spirv target)
add_custom_command(
OUTPUT ${outputpath}
COMMAND $<TARGET_FILE:glslang::glslang-standalone> -V --target-env vulkan1.3 --vn "${varname}" -o "${outputpath}" "${CMAKE_CURRENT_SOURCE_DIR}/${input}"
COMMAND $<TARGET_FILE:glslang::glslang-standalone> -V --target-env vulkan1.2 --vn "${varname}" -o "${outputpath}" "${CMAKE_CURRENT_SOURCE_DIR}/${input}"
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${input}" glslang::glslang-standalone
COMMENT "Generating ${outputname}..."
)
@ -65,6 +66,7 @@ add_subdirectory(tools)
add_subdirectory(orbis-kernel)
add_subdirectory(rpcsx-os)
add_subdirectory(rpcsx-gpu)
add_subdirectory(rpcsx-gpu2)
add_subdirectory(hw/amdgpu)
add_subdirectory(rx)

View file

@ -1,6 +1,6 @@
#version 450
layout (triangles) in;
layout (triangles, invocations = 1) in;
layout (triangle_strip, max_vertices = 4) out;
void main(void)

36
rpcsx-gpu2/CMakeLists.txt Normal file
View file

@ -0,0 +1,36 @@
find_package(glfw3 3.3 REQUIRED)
add_precompiled_vulkan_spirv(rpcsx-gpu-shaders
shaders/fill_red.frag.glsl
shaders/flip.frag.glsl
shaders/flip.vert.glsl
shaders/rect_list.geom.glsl
)
add_executable(rpcsx-gpu2
Cache.cpp
main.cpp
Device.cpp
Pipe.cpp
Registers.cpp
Renderer.cpp
)
target_link_libraries(rpcsx-gpu2
PUBLIC
rpcsx-gpu-shaders
amdgpu::bridge
rx
gcn-shader
glfw
amdgpu::tiler::cpu
amdgpu::tiler::vulkan
rdna-semantic-spirv
gnm::vulkan
gnm
)
install(TARGETS rpcsx-gpu2 RUNTIME DESTINATION bin)
set_target_properties(rpcsx-gpu2 PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
add_subdirectory(lib)

1109
rpcsx-gpu2/Cache.cpp Normal file

File diff suppressed because it is too large Load diff

333
rpcsx-gpu2/Cache.hpp Normal file
View file

@ -0,0 +1,333 @@
#pragma once
#include "Pipe.hpp"
#include "amdgpu/tiler.hpp"
#include "gnm/constants.hpp"
#include "rx/die.hpp"
#include "shader/Access.hpp"
#include "shader/GcnConverter.hpp"
#include <algorithm>
#include <memory>
#include <mutex>
#include <rx/MemoryTable.hpp>
#include <shader/gcn.hpp>
#include <vulkan/vulkan_core.h>
namespace amdgpu {
using Access = shader::Access;
struct ShaderKey {
std::uint64_t address;
shader::gcn::Stage stage;
shader::gcn::Environment env;
};
struct ImageKey {
std::uint64_t address;
gnm::TextureType type;
gnm::DataFormat dfmt;
gnm::NumericFormat nfmt;
TileMode tileMode = {};
VkOffset3D offset = {};
VkExtent3D extent = {1, 1, 1};
std::uint32_t pitch = 1;
unsigned baseMipLevel = 0;
unsigned mipCount = 1;
unsigned baseArrayLayer = 0;
unsigned arrayLayerCount = 1;
bool pow2pad = false;
static ImageKey createFrom(const gnm::TBuffer &tbuffer);
};
struct ImageViewKey : ImageKey {
gnm::Swizzle R = gnm::Swizzle::R;
gnm::Swizzle G = gnm::Swizzle::G;
gnm::Swizzle B = gnm::Swizzle::B;
gnm::Swizzle A = gnm::Swizzle::A;
static ImageViewKey createFrom(const gnm::TBuffer &tbuffer);
};
struct SamplerKey {
VkFilter magFilter;
VkFilter minFilter;
VkSamplerMipmapMode mipmapMode;
VkSamplerAddressMode addressModeU;
VkSamplerAddressMode addressModeV;
VkSamplerAddressMode addressModeW;
float mipLodBias;
float maxAnisotropy;
VkCompareOp compareOp;
float minLod;
float maxLod;
VkBorderColor borderColor;
bool anisotropyEnable;
bool compareEnable;
bool unnormalizedCoordinates;
static SamplerKey createFrom(const gnm::SSampler &sampler);
auto operator<=>(const SamplerKey &other) const = default;
};
struct Cache {
static constexpr std::array kGraphicsStages = {
VK_SHADER_STAGE_VERTEX_BIT,
VK_SHADER_STAGE_GEOMETRY_BIT,
VK_SHADER_STAGE_FRAGMENT_BIT,
VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
};
static constexpr std::array kDescriptorBindings = {
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
VK_DESCRIPTOR_TYPE_SAMPLER,
VkDescriptorType(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE + 1 * 1000),
VkDescriptorType(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE + 2 * 1000),
VkDescriptorType(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE + 3 * 1000),
VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
};
static constexpr int getStageIndex(VkShaderStageFlagBits stage) {
auto it = std::find(kGraphicsStages.begin(), kGraphicsStages.end(), stage);
if (it == kGraphicsStages.end()) {
return -1;
}
return it - kGraphicsStages.begin();
}
static constexpr int getDescriptorBinding(VkDescriptorType type, int dim = 0) {
auto it =
std::find(kDescriptorBindings.begin(), kDescriptorBindings.end(), type + dim * 1000);
if (it == kDescriptorBindings.end()) {
return -1;
}
return it - kDescriptorBindings.begin();
}
enum class TagId : std::uint64_t {};
struct Entry;
int vmId = -1;
struct Shader {
VkShaderEXT handle;
shader::gcn::ShaderInfo *info;
VkShaderStageFlagBits stage;
};
struct Sampler {
VkSampler handle;
};
struct Buffer {
VkBuffer handle;
std::uint64_t offset;
std::uint64_t deviceAddress;
TagId tagId;
std::byte *data;
};
struct IndexBuffer {
VkBuffer handle;
std::uint64_t offset;
std::uint32_t indexCount;
gnm::PrimitiveType primType;
gnm::IndexType indexType;
};
struct Image {
VkImage handle;
};
struct ImageView {
VkImageView handle;
VkImage imageHandle;
};
class Tag {
Cache *mParent = nullptr;
Scheduler *mScheduler = nullptr;
TagId mTagId{};
std::vector<std::shared_ptr<Entry>> mAcquiredResources;
std::vector<std::array<VkDescriptorSet, kGraphicsStages.size()>>
mGraphicsDescriptorSets;
std::vector<VkDescriptorSet> mComputeDescriptorSets;
public:
Tag() = default;
Tag(Cache *parent, Scheduler &scheduler, TagId id)
: mParent(parent), mScheduler(&scheduler), mTagId(id) {}
Tag(const Tag &) = delete;
Tag(Tag &&other) { other.swap(*this); }
Tag &operator=(Tag &&other) {
other.swap(*this);
return *this;
}
void submitAndWait() {
mScheduler->submit();
mScheduler->wait();
}
~Tag() { release(); }
TagId getReadId() const { return TagId{std::uint64_t(mTagId) - 1}; }
TagId getWriteId() const { return mTagId; }
void swap(Tag &other) {
std::swap(mParent, other.mParent);
std::swap(mScheduler, other.mScheduler);
std::swap(mTagId, other.mTagId);
std::swap(mAcquiredResources, other.mAcquiredResources);
std::swap(mGraphicsDescriptorSets, other.mGraphicsDescriptorSets);
std::swap(mComputeDescriptorSets, other.mComputeDescriptorSets);
}
Cache *getCache() const { return mParent; }
Device *getDevice() const { return mParent->mDevice; }
int getVmId() const { return mParent->mVmIm; }
Shader getShader(const ShaderKey &key,
const ShaderKey *dependedKey = nullptr);
Sampler getSampler(const SamplerKey &key);
Buffer getBuffer(std::uint64_t address, std::uint64_t size, Access access);
Buffer getInternalBuffer(std::uint64_t size);
IndexBuffer getIndexBuffer(std::uint64_t address, std::uint32_t indexCount,
gnm::PrimitiveType primType,
gnm::IndexType indexType);
Image getImage(const ImageKey &key, Access access);
ImageView getImageView(const ImageViewKey &key, Access access);
void readMemory(void *target, std::uint64_t address, std::uint64_t size);
void writeMemory(const void *source, std::uint64_t address,
std::uint64_t size);
int compareMemory(const void *source, std::uint64_t address,
std::uint64_t size);
void release();
VkPipelineLayout getGraphicsPipelineLayout() const {
return getCache()->getGraphicsPipelineLayout();
}
VkPipelineLayout getComputePipelineLayout() const {
return getCache()->getComputePipelineLayout();
}
std::array<VkDescriptorSet, kGraphicsStages.size()>
createGraphicsDescriptorSets() {
auto result = getCache()->createGraphicsDescriptorSets();
mGraphicsDescriptorSets.push_back(result);
return result;
}
VkDescriptorSet createComputeDescriptorSet() {
auto result = getCache()->createComputeDescriptorSet();
mComputeDescriptorSets.push_back(result);
return result;
}
std::shared_ptr<Entry> findShader(const ShaderKey &key,
const ShaderKey *dependedKey = nullptr);
};
Cache(Device *device, int vmId);
~Cache();
Tag createTag(Scheduler &scheduler);
vk::Buffer &getMemoryTableBuffer() { return mMemoryTableBuffer; }
vk::Buffer &getGdsBuffer() { return mGdsBuffer; }
void addFrameBuffer(Scheduler &scheduler, int index, std::uint64_t address,
std::uint32_t width, std::uint32_t height, int format,
TileMode tileMode);
void removeFrameBuffer(Scheduler &scheduler, int index);
VkImage getFrameBuffer(Scheduler &scheduler, int index);
void invalidate(Scheduler &scheduler, std::uint64_t address,
std::uint64_t size);
void invalidate(Scheduler &scheduler) {
invalidate(scheduler, 0, ~static_cast<std::uint64_t>(0));
}
void flush(Scheduler &scheduler, std::uint64_t address, std::uint64_t size);
void flush(Scheduler &scheduler) {
flush(scheduler, 0, ~static_cast<std::uint64_t>(0));
}
const std::array<VkDescriptorSetLayout, kGraphicsStages.size()> &
getGraphicsDescriptorSetLayouts() const {
return mGraphicsDescriptorSetLayouts;
}
VkDescriptorSetLayout
getGraphicsDescriptorSetLayout(VkShaderStageFlagBits stage) const {
int index = getStageIndex(stage);
rx::dieIf(index < 0, "getGraphicsDescriptorSetLayout: unexpected stage");
return mGraphicsDescriptorSetLayouts[index];
}
VkDescriptorSetLayout getComputeDescriptorSetLayout() const {
return mComputeDescriptorSetLayout;
}
VkPipelineLayout getGraphicsPipelineLayout() const {
return mGraphicsPipelineLayout;
}
VkPipelineLayout getComputePipelineLayout() const {
return mComputePipelineLayout;
}
std::array<VkDescriptorSet, kGraphicsStages.size()>
createGraphicsDescriptorSets();
VkDescriptorSet createComputeDescriptorSet();
void destroyGraphicsDescriptorSets(
const std::array<VkDescriptorSet, kGraphicsStages.size()> &set) {
std::lock_guard lock(mDescriptorMtx);
mGraphicsDescriptorSets.push_back(set);
}
void destroyComputeDescriptorSet(VkDescriptorSet set) {
std::lock_guard lock(mDescriptorMtx);
mComputeDescriptorSets.push_back(set);
}
private:
TagId getSyncTag(std::uint64_t address, std::uint64_t size, TagId currentTag);
Device *mDevice;
int mVmIm;
TagId mNextTagId{2};
vk::Buffer mMemoryTableBuffer;
vk::Buffer mGdsBuffer;
std::mutex mDescriptorMtx;
std::array<VkDescriptorSetLayout, kGraphicsStages.size()>
mGraphicsDescriptorSetLayouts{};
VkDescriptorSetLayout mComputeDescriptorSetLayout{};
VkPipelineLayout mGraphicsPipelineLayout{};
VkPipelineLayout mComputePipelineLayout{};
VkDescriptorPool mGraphicsDescriptorPool{};
VkDescriptorPool mComputeDescriptorPool{};
std::vector<std::array<VkDescriptorSet, kGraphicsStages.size()>>
mGraphicsDescriptorSets;
std::vector<VkDescriptorSet> mComputeDescriptorSets;
std::map<SamplerKey, VkSampler> mSamplers;
std::shared_ptr<Entry> mFrameBuffers[10];
rx::MemoryTableWithPayload<std::shared_ptr<Entry>> mBuffers;
rx::MemoryTableWithPayload<std::shared_ptr<Entry>> mIndexBuffers;
rx::MemoryTableWithPayload<std::shared_ptr<Entry>> mImages;
rx::MemoryTableWithPayload<std::shared_ptr<Entry>> mShaders;
rx::MemoryTableWithPayload<std::shared_ptr<Entry>> mSyncTable;
};
} // namespace amdgpu

508
rpcsx-gpu2/Device.cpp Normal file
View file

@ -0,0 +1,508 @@
#include "Device.hpp"
#include "Renderer.hpp"
#include "amdgpu/tiler.hpp"
#include "gnm/constants.hpp"
#include "gnm/pm4.hpp"
#include "rx/bits.hpp"
#include "rx/die.hpp"
#include "rx/mem.hpp"
#include "shader/spv.hpp"
#include "shaders/rdna-semantic-spirv.hpp"
#include "vk.hpp"
#include <fcntl.h>
#include <sys/mman.h>
using namespace amdgpu;
Device::Device() {
if (!shader::spv::validate(g_rdna_semantic_spirv)) {
shader::spv::dump(g_rdna_semantic_spirv, true);
rx::die("builtin semantic validation failed");
}
if (auto sem = shader::spv::deserialize(
shaderSemanticContext, g_rdna_semantic_spirv,
shaderSemanticContext.getUnknownLocation())) {
auto shaderSemantic = *sem;
shader::gcn::canonicalizeSemantic(shaderSemanticContext, shaderSemantic);
shader::gcn::collectSemanticModuleInfo(gcnSemanticModuleInfo,
shaderSemantic);
gcnSemantic = shader::gcn::collectSemanticInfo(gcnSemanticModuleInfo);
} else {
rx::die("failed to deserialize builtin semantics\n");
}
for (int index = 0; auto &cache : caches) {
cache.vmId = index++;
}
for (auto &pipe : graphicsPipes) {
pipe.device = this;
}
// for (auto &pipe : computePipes) {
// pipe.device = this;
// }
}
Device::~Device() {
for (auto fd : dmemFd) {
if (fd >= 0) {
::close(fd);
}
}
for (auto &[pid, info] : processInfo) {
if (info.vmFd >= 0) {
::close(info.vmFd);
}
}
}
void Device::mapProcess(std::int64_t pid, int vmId, const char *shmName) {
auto &process = processInfo[pid];
process.vmId = vmId;
auto memory = amdgpu::RemoteMemory{vmId};
std::string pidVmName = shmName;
pidVmName += '-';
pidVmName += std::to_string(pid);
int memoryFd = ::shm_open(pidVmName.c_str(), O_RDWR, S_IRUSR | S_IWUSR);
process.vmFd = memoryFd;
if (memoryFd < 0) {
std::printf("failed to process %x shared memory\n", (int)pid);
std::abort();
}
for (auto [startAddress, endAddress, slot] : process.vmTable) {
auto gpuProt = slot.prot >> 4;
if (gpuProt == 0) {
continue;
}
auto devOffset = slot.offset + startAddress - slot.baseAddress;
int mapFd = memoryFd;
if (slot.memoryType >= 0) {
mapFd = dmemFd[slot.memoryType];
}
auto mmapResult =
::mmap(memory.getPointer(startAddress), endAddress - startAddress,
gpuProt, MAP_FIXED | MAP_SHARED, mapFd, devOffset);
if (mmapResult == MAP_FAILED) {
std::printf("failed to map process %x memory, address %lx-%lx, type %x\n",
(int)pid, startAddress, endAddress, slot.memoryType);
std::abort();
}
handleProtectChange(vmId, startAddress, endAddress - startAddress,
slot.prot);
}
}
void Device::unmapProcess(std::int64_t pid) {
auto &process = processInfo[pid];
auto startAddress = static_cast<std::uint64_t>(process.vmId) << 40;
auto size = static_cast<std::uint64_t>(1) << 40;
rx::mem::reserve(reinterpret_cast<void *>(startAddress), size);
::close(process.vmFd);
process.vmFd = -1;
process.vmId = -1;
}
void Device::protectMemory(int pid, std::uint64_t address, std::uint64_t size,
int prot) {
auto &process = processInfo[pid];
auto vmSlotIt = process.vmTable.queryArea(address);
if (vmSlotIt == process.vmTable.end()) {
std::abort();
}
auto vmSlot = (*vmSlotIt).payload;
process.vmTable.map(address, address + size,
VmMapSlot{
.memoryType = vmSlot.memoryType,
.prot = static_cast<int>(prot),
.offset = vmSlot.offset,
.baseAddress = vmSlot.baseAddress,
});
if (process.vmId >= 0) {
auto memory = amdgpu::RemoteMemory{process.vmId};
rx::mem::protect(memory.getPointer(address), size, prot >> 4);
handleProtectChange(process.vmId, address, size, prot);
}
}
void Device::onCommandBuffer(std::int64_t pid, int cmdHeader,
std::uint64_t address, std::uint64_t size) {
auto &process = processInfo[pid];
if (process.vmId < 0) {
return;
}
auto memory = RemoteMemory{process.vmId};
auto op = rx::getBits(cmdHeader, 15, 8);
if (op == gnm::IT_INDIRECT_BUFFER_CNST) {
graphicsPipes[0].setCeQueue(Queue::createFromRange(
process.vmId, memory.getPointer<std::uint32_t>(address),
size / sizeof(std::uint32_t)));
} else if (op == gnm::IT_INDIRECT_BUFFER) {
graphicsPipes[0].setDeQueue(
Queue::createFromRange(process.vmId,
memory.getPointer<std::uint32_t>(address),
size / sizeof(std::uint32_t)),
1);
} else {
rx::die("unimplemented command buffer %x", cmdHeader);
}
}
bool Device::processPipes() {
bool allProcessed = true;
// for (auto &pipe : computePipes) {
// if (!pipe.processAllRings()) {
// allProcessed = false;
// }
// }
for (auto &pipe : graphicsPipes) {
if (!pipe.processAllRings()) {
allProcessed = false;
}
}
return allProcessed;
}
static void
transitionImageLayout(VkCommandBuffer commandBuffer, VkImage image,
VkImageLayout oldLayout, VkImageLayout newLayout,
const VkImageSubresourceRange &subresourceRange) {
VkImageMemoryBarrier barrier{};
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
barrier.oldLayout = oldLayout;
barrier.newLayout = newLayout;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.image = image;
barrier.subresourceRange = subresourceRange;
auto layoutToStageAccess = [](VkImageLayout layout)
-> std::pair<VkPipelineStageFlags, VkAccessFlags> {
switch (layout) {
case VK_IMAGE_LAYOUT_UNDEFINED:
case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR:
case VK_IMAGE_LAYOUT_GENERAL:
return {VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0};
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
return {VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT};
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
return {VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT};
case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
return {VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_ACCESS_SHADER_READ_BIT};
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
return {VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT,
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT};
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
return {VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT};
default:
std::abort();
}
};
auto [sourceStage, sourceAccess] = layoutToStageAccess(oldLayout);
auto [destinationStage, destinationAccess] = layoutToStageAccess(newLayout);
barrier.srcAccessMask = sourceAccess;
barrier.dstAccessMask = destinationAccess;
vkCmdPipelineBarrier(commandBuffer, sourceStage, destinationStage, 0, 0,
nullptr, 0, nullptr, 1, &barrier);
}
bool Device::flip(std::int64_t pid, int bufferIndex, std::uint64_t arg,
VkCommandBuffer commandBuffer, VkImage swapchainImage,
VkImageView swapchainImageView, VkFence fence) {
auto &pipe = graphicsPipes[0];
auto &scheduler = pipe.scheduler;
auto &process = processInfo[pid];
if (process.vmId < 0) {
return false;
}
auto &buffer = process.buffers[bufferIndex];
auto &bufferAttr = process.bufferAttributes[buffer.attrId];
gnm::DataFormat dfmt;
gnm::NumericFormat nfmt;
CbCompSwap compSwap;
switch (bufferAttr.pixelFormat) {
case 0x80000000:
// bgra
dfmt = gnm::kDataFormat8_8_8_8;
nfmt = gnm::kNumericFormatSNormNoZero;
compSwap = CbCompSwap::Alt;
break;
case 0x80002200:
// rgba
dfmt = gnm::kDataFormat8_8_8_8;
nfmt = gnm::kNumericFormatSNormNoZero;
compSwap = CbCompSwap::Std;
break;
case 0x88060000:
// bgra
dfmt = gnm::kDataFormat2_10_10_10;
nfmt = gnm::kNumericFormatSNormNoZero;
compSwap = CbCompSwap::Alt;
break;
default:
rx::die("unimplemented color buffer format %x", bufferAttr.pixelFormat);
}
// std::printf("displaying buffer %lx\n", buffer.address);
VkCommandBufferBeginInfo beginInfo{};
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
vkBeginCommandBuffer(commandBuffer, &beginInfo);
auto cacheTag = getCacheTag(process.vmId, scheduler);
if (true) {
transitionImageLayout(commandBuffer, swapchainImage,
VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.levelCount = 1,
.layerCount = 1,
});
amdgpu::flip(cacheTag, commandBuffer, vk::context->swapchainExtent,
buffer.address, swapchainImageView,
{bufferAttr.width, bufferAttr.height}, compSwap,
getDefaultTileModes()[13], dfmt, nfmt);
transitionImageLayout(commandBuffer, swapchainImage,
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.levelCount = 1,
.layerCount = 1,
});
} else {
ImageKey frameKey{
.address = buffer.address,
.type = gnm::TextureType::Dim2D,
.dfmt = dfmt,
.nfmt = nfmt,
.tileMode = getDefaultTileModes()[13],
.extent =
{
.width = bufferAttr.width,
.height = bufferAttr.height,
.depth = 1,
},
.pitch = bufferAttr.width,
.mipCount = 1,
.arrayLayerCount = 1,
};
auto image = cacheTag.getImage(frameKey, Access::Read);
scheduler.submit();
scheduler.wait();
transitionImageLayout(commandBuffer, swapchainImage,
VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.levelCount = 1,
.layerCount = 1,
});
VkImageBlit region{
.srcSubresource = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = 1},
.srcOffsets = {{},
{static_cast<int32_t>(bufferAttr.width),
static_cast<int32_t>(bufferAttr.height), 1}},
.dstSubresource = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = 1},
.dstOffsets =
{{},
{static_cast<int32_t>(vk::context->swapchainExtent.width),
static_cast<int32_t>(vk::context->swapchainExtent.height), 1}},
};
vkCmdBlitImage(commandBuffer, image.handle, VK_IMAGE_LAYOUT_GENERAL,
swapchainImage, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1,
&region, VK_FILTER_LINEAR);
transitionImageLayout(commandBuffer, swapchainImage,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.levelCount = 1,
.layerCount = 1,
});
}
auto submitCompleteTask = scheduler.createExternalSubmit();
{
vkEndCommandBuffer(commandBuffer);
VkSemaphoreSubmitInfo signalSemSubmitInfos[] = {
{
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
.semaphore = vk::context->renderCompleteSemaphore,
.value = 1,
.stageMask = VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT,
},
{
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
.semaphore = scheduler.getSemaphoreHandle(),
.value = submitCompleteTask,
.stageMask = VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT,
},
};
VkSemaphoreSubmitInfo waitSemSubmitInfos[] = {
{
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
.semaphore = vk::context->presentCompleteSemaphore,
.value = 1,
.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
},
{
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
.semaphore = scheduler.getSemaphoreHandle(),
.value = submitCompleteTask - 1,
.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
},
};
VkCommandBufferSubmitInfo cmdBufferSubmitInfo{
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
.commandBuffer = commandBuffer,
};
VkSubmitInfo2 submitInfo{
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
.waitSemaphoreInfoCount = 1,
.pWaitSemaphoreInfos = waitSemSubmitInfos,
.commandBufferInfoCount = 1,
.pCommandBufferInfos = &cmdBufferSubmitInfo,
.signalSemaphoreInfoCount = 2,
.pSignalSemaphoreInfos = signalSemSubmitInfos,
};
vkQueueSubmit2(vk::context->presentQueue, 1, &submitInfo, fence);
// vkQueueWaitIdle(queue);
}
scheduler.then([=, this, cacheTag = std::move(cacheTag)] {
bridge->flipBuffer[process.vmId] = bufferIndex;
bridge->flipArg[process.vmId] = arg;
bridge->flipCount[process.vmId] = bridge->flipCount[process.vmId] + 1;
auto mem = RemoteMemory{process.vmId};
auto bufferInUse =
mem.getPointer<std::uint64_t>(bridge->bufferInUseAddress[process.vmId]);
if (bufferInUse != nullptr) {
bufferInUse[bufferIndex] = 0;
}
});
return true;
}
void Device::mapMemory(std::int64_t pid, std::uint64_t address,
std::uint64_t size, int memoryType, int dmemIndex,
int prot, std::int64_t offset) {
auto &process = processInfo[pid];
process.vmTable.map(address, address + size,
VmMapSlot{
.memoryType = memoryType >= 0 ? dmemIndex : -1,
.prot = prot,
.offset = offset,
.baseAddress = address,
});
if (process.vmId < 0) {
return;
}
auto memory = amdgpu::RemoteMemory{process.vmId};
int mapFd = process.vmFd;
if (memoryType >= 0) {
mapFd = dmemFd[dmemIndex];
}
auto mmapResult = ::mmap(memory.getPointer(address), size, prot >> 4,
MAP_FIXED | MAP_SHARED, mapFd, offset);
if (mmapResult == MAP_FAILED) {
rx::die("failed to map process %x memory, address %lx-%lx, type %x",
(int)pid, address, address + size, memoryType);
}
handleProtectChange(process.vmId, address, size, prot);
}
void Device::registerBuffer(std::int64_t pid, bridge::CmdBuffer buffer) {
auto &process = processInfo[pid];
if (buffer.attrId >= 10 || buffer.index >= 10) {
rx::die("out of buffers %u, %u", buffer.attrId, buffer.index);
}
process.buffers[buffer.index] = buffer;
}
void Device::registerBufferAttribute(std::int64_t pid,
bridge::CmdBufferAttribute attr) {
auto &process = processInfo[pid];
if (attr.attrId >= 10) {
rx::die("out of buffer attributes %u", attr.attrId);
}
process.bufferAttributes[attr.attrId] = attr;
}
void Device::handleProtectChange(int vmId, std::uint64_t address,
std::uint64_t size, int prot) {}

91
rpcsx-gpu2/Device.hpp Normal file
View file

@ -0,0 +1,91 @@
#pragma once
#include "Cache.hpp"
#include "Pipe.hpp"
#include "amdgpu/bridge/bridge.hpp"
#include "amdgpu/tiler_vulkan.hpp"
#include "gnm/descriptors.hpp"
#include "rx/MemoryTable.hpp"
#include "shader/SemanticInfo.hpp"
#include "shader/SpvConverter.hpp"
#include "shader/gcn.hpp"
#include <unordered_map>
#include <vulkan/vulkan_core.h>
namespace amdgpu {
struct VmMapSlot {
int memoryType;
int prot;
std::int64_t offset;
std::uint64_t baseAddress;
auto operator<=>(const VmMapSlot &) const = default;
};
struct ProcessInfo {
int vmId = -1;
int vmFd = -1;
amdgpu::bridge::CmdBufferAttribute bufferAttributes[10];
amdgpu::bridge::CmdBuffer buffers[10];
rx::MemoryTableWithPayload<VmMapSlot> vmTable;
};
struct RemoteMemory {
int vmId;
template <typename T = void> T *getPointer(std::uint64_t address) const {
return address ? reinterpret_cast<T *>(
static_cast<std::uint64_t>(vmId) << 40 | address)
: nullptr;
}
};
struct Device {
static constexpr auto kComputePipeCount = 8;
static constexpr auto kGfxPipeCount = 2;
shader::SemanticInfo gcnSemantic;
shader::spv::Context shaderSemanticContext;
shader::gcn::SemanticModuleInfo gcnSemanticModuleInfo;
amdgpu::bridge::BridgeHeader *bridge;
Registers::Config config;
GpuTiler tiler;
GraphicsPipe graphicsPipes[kGfxPipeCount]{0, 1};
// ComputePipe computePipes[kComputePipeCount]{0, 1, 2, 3, 4, 5, 6, 7};
int dmemFd[3] = {-1, -1, -1};
std::unordered_map<std::int64_t, ProcessInfo> processInfo;
Cache caches[6]{
{this, 0}, {this, 1}, {this, 2}, {this, 3}, {this, 4}, {this, 5},
};
Device();
~Device();
Cache::Tag getCacheTag(int vmId, Scheduler &scheduler) {
return caches[vmId].createTag(scheduler);
}
void mapProcess(std::int64_t pid, int vmId, const char *shmName);
void unmapProcess(std::int64_t pid);
void protectMemory(int pid, std::uint64_t address, std::uint64_t size,
int prot);
void onCommandBuffer(std::int64_t pid, int cmdHeader, std::uint64_t address,
std::uint64_t size);
bool processPipes();
bool flip(std::int64_t pid, int bufferIndex, std::uint64_t arg,
VkCommandBuffer commandBuffer, VkImage swapchainImage,
VkImageView swapchainImageView, VkFence fence);
void mapMemory(std::int64_t pid, std::uint64_t address, std::uint64_t size,
int memoryType, int dmemIndex, int prot, std::int64_t offset);
void registerBuffer(std::int64_t pid, bridge::CmdBuffer buffer);
void registerBufferAttribute(std::int64_t pid,
bridge::CmdBufferAttribute attr);
void handleProtectChange(int vmId, std::uint64_t address, std::uint64_t size,
int prot);
};
} // namespace amdgpu

987
rpcsx-gpu2/Pipe.cpp Normal file
View file

@ -0,0 +1,987 @@
#include "Pipe.hpp"
#include "Device.hpp"
#include "Registers.hpp"
#include "Renderer.hpp"
#include "gnm/mmio.hpp"
#include "gnm/pm4.hpp"
#include "vk.hpp"
#include <cstdio>
#include <rx/bits.hpp>
#include <rx/die.hpp>
#include <vulkan/vulkan_core.h>
using namespace amdgpu;
static Scheduler createGfxScheduler(int index) {
auto queue = vk::context->presentQueue;
auto family = vk::context->presentQueueFamily;
if (index != 0) {
for (auto [otherQueue, otherFamily] : vk::context->graphicsQueues) {
if (family != otherFamily) {
queue = otherQueue;
family = otherFamily;
}
}
}
return Scheduler{queue, family};
}
static Scheduler createComputeScheduler(int index) {
auto &compQueues = vk::context->computeQueues;
auto [queue, family] = compQueues[index % compQueues.size()];
return Scheduler{queue, family};
}
static bool compare(int cmpFn, std::uint32_t poll, std::uint32_t mask,
std::uint32_t ref) {
poll &= mask;
ref &= mask;
switch (cmpFn) {
case 0:
return true;
case 1:
return poll < ref;
case 2:
return poll <= ref;
case 3:
return poll == ref;
case 4:
return poll != ref;
case 5:
return poll >= ref;
case 6:
return poll > ref;
}
return false;
}
ComputePipe::ComputePipe(int index) : scheduler(createComputeScheduler(index)) {
for (auto &handler : commandHandlers) {
handler = &ComputePipe::unknownPacket;
}
commandHandlers[gnm::IT_NOP] = &ComputePipe::handleNop;
}
bool ComputePipe::processAllRings() {
bool allProcessed = true;
for (auto &ring : queues) {
processRing(ring);
if (ring.rptr != ring.wptr) {
allProcessed = false;
break;
}
}
return allProcessed;
}
void ComputePipe::processRing(Queue &queue) {
while (queue.rptr != queue.wptr) {
if (queue.rptr >= queue.base + queue.size) {
queue.rptr = queue.base;
}
auto header = *queue.rptr;
auto type = rx::getBits(header, 31, 30);
if (type == 3) {
auto op = rx::getBits(header, 15, 8);
auto len = rx::getBits(header, 29, 16) + 2;
// std::fprintf(stderr, "queue %d: %s\n", queue.indirectLevel,
// gnm::pm4OpcodeToString(op));
if (op == gnm::IT_COND_EXEC) {
rx::die("unimplemented COND_EXEC");
}
auto handler = commandHandlers[op];
if (!(this->*handler)(queue)) {
return;
}
queue.rptr += len;
continue;
}
if (type == 2) {
++queue.rptr;
continue;
}
rx::die("unexpected pm4 packet type %u", type);
}
}
bool ComputePipe::unknownPacket(Queue &queue) {
auto op = rx::getBits(queue.rptr[0], 15, 8);
rx::die("unimplemented compute pm4 packet: %s, queue %u\n",
gnm::pm4OpcodeToString(op), queue.indirectLevel);
return true;
}
bool ComputePipe::handleNop(Queue &queue) { return true; }
GraphicsPipe::GraphicsPipe(int index) : scheduler(createGfxScheduler(index)) {
for (auto &processorHandlers : commandHandlers) {
for (auto &handler : processorHandlers) {
handler = &GraphicsPipe::unknownPacket;
}
processorHandlers[gnm::IT_NOP] = &GraphicsPipe::handleNop;
}
auto &dataHandlers = commandHandlers[2];
auto &deHandlers = commandHandlers[1];
auto &ceHandlers = commandHandlers[0];
deHandlers[gnm::IT_SET_BASE] = &GraphicsPipe::setBase;
deHandlers[gnm::IT_CLEAR_STATE] = &GraphicsPipe::clearState;
deHandlers[gnm::IT_INDEX_BUFFER_SIZE] = &GraphicsPipe::indexBufferSize;
deHandlers[gnm::IT_DISPATCH_DIRECT] = &GraphicsPipe::dispatchDirect;
deHandlers[gnm::IT_DISPATCH_INDIRECT] = &GraphicsPipe::dispatchIndirect;
// IT_ATOMIC_GDS
// IT_OCCLUSION_QUERY
deHandlers[gnm::IT_SET_PREDICATION] = &GraphicsPipe::setPredication;
// IT_REG_RMW
// IT_COND_EXEC
// IT_PRED_EXEC
deHandlers[gnm::IT_DRAW_INDIRECT] = &GraphicsPipe::drawIndirect;
deHandlers[gnm::IT_DRAW_INDEX_INDIRECT] = &GraphicsPipe::drawIndexIndirect;
deHandlers[gnm::IT_INDEX_BASE] = &GraphicsPipe::indexBase;
deHandlers[gnm::IT_DRAW_INDEX_2] = &GraphicsPipe::drawIndex2;
deHandlers[gnm::IT_CONTEXT_CONTROL] = &GraphicsPipe::contextControl;
deHandlers[gnm::IT_INDEX_TYPE] = &GraphicsPipe::indexType;
// IT_DRAW_INDIRECT_MULTI
deHandlers[gnm::IT_DRAW_INDEX_AUTO] = &GraphicsPipe::drawIndexAuto;
deHandlers[gnm::IT_NUM_INSTANCES] = &GraphicsPipe::numInstances;
deHandlers[gnm::IT_DRAW_INDEX_MULTI_AUTO] = &GraphicsPipe::drawIndexMultiAuto;
// IT_INDIRECT_BUFFER_CNST
// IT_STRMOUT_BUFFER_UPDATE
deHandlers[gnm::IT_DRAW_INDEX_OFFSET_2] = &GraphicsPipe::drawIndexOffset2;
deHandlers[gnm::IT_DRAW_PREAMBLE] = &GraphicsPipe::drawPreamble;
deHandlers[gnm::IT_WRITE_DATA] = &GraphicsPipe::writeData;
deHandlers[gnm::IT_MEM_SEMAPHORE] = &GraphicsPipe::memSemaphore;
// IT_COPY_DW
deHandlers[gnm::IT_WAIT_REG_MEM] = &GraphicsPipe::waitRegMem;
deHandlers[gnm::IT_INDIRECT_BUFFER] = &GraphicsPipe::indirectBuffer;
// IT_COPY_DATA
deHandlers[gnm::IT_PFP_SYNC_ME] = &GraphicsPipe::pfpSyncMe;
// IT_SURFACE_SYNC
deHandlers[gnm::IT_COND_WRITE] = &GraphicsPipe::condWrite;
deHandlers[gnm::IT_EVENT_WRITE] = &GraphicsPipe::eventWrite;
deHandlers[gnm::IT_EVENT_WRITE_EOP] = &GraphicsPipe::eventWriteEop;
deHandlers[gnm::IT_EVENT_WRITE_EOS] = &GraphicsPipe::eventWriteEos;
deHandlers[gnm::IT_RELEASE_MEM] = &GraphicsPipe::releaseMem;
// IT_PREAMBLE_CNTL
deHandlers[gnm::IT_DMA_DATA] = &GraphicsPipe::dmaData;
deHandlers[gnm::IT_ACQUIRE_MEM] = &GraphicsPipe::acquireMem;
// IT_REWIND
// IT_LOAD_UCONFIG_REG
// IT_LOAD_SH_REG
// IT_LOAD_CONFIG_REG
// IT_LOAD_CONTEXT_REG
deHandlers[gnm::IT_SET_CONFIG_REG] = &GraphicsPipe::setConfigReg;
deHandlers[gnm::IT_SET_CONTEXT_REG] = &GraphicsPipe::setContextReg;
// IT_SET_CONTEXT_REG_INDIRECT
deHandlers[gnm::IT_SET_SH_REG] = &GraphicsPipe::setShReg;
// IT_SET_SH_REG_OFFSET
// IT_SET_QUEUE_REG
deHandlers[gnm::IT_SET_UCONFIG_REG] = &GraphicsPipe::setUConfigReg;
// IT_SCRATCH_RAM_WRITE
// IT_SCRATCH_RAM_READ
deHandlers[gnm::IT_INCREMENT_DE_COUNTER] = &GraphicsPipe::incrementDeCounter;
deHandlers[gnm::IT_WAIT_ON_CE_COUNTER] = &GraphicsPipe::waitOnCeCounter;
deHandlers[gnm::IT_SET_CE_DE_COUNTERS] = &GraphicsPipe::setCeDeCounters;
// IT_WAIT_ON_AVAIL_BUFFER
// IT_SWITCH_BUFFER
// IT_SET_RESOURCES
// IT_MAP_PROCESS
// IT_MAP_QUEUES
// IT_UNMAP_QUEUES
// IT_QUERY_STATUS
// IT_RUN_LIST
// IT_DISPATCH_DRAW_PREAMBLE
// IT_DISPATCH_DRAW
ceHandlers[gnm::IT_WAIT_ON_DE_COUNTER_DIFF] =
&GraphicsPipe::waitOnDeCounterDiff;
ceHandlers[gnm::IT_INCREMENT_CE_COUNTER] = &GraphicsPipe::incrementCeCounter;
ceHandlers[gnm::IT_LOAD_CONST_RAM] = &GraphicsPipe::loadConstRam;
ceHandlers[gnm::IT_WRITE_CONST_RAM] = &GraphicsPipe::writeConstRam;
ceHandlers[gnm::IT_DUMP_CONST_RAM] = &GraphicsPipe::dumpConstRam;
}
void GraphicsPipe::setCeQueue(Queue queue) {
queue.indirectLevel = -1;
ceQueue = queue;
}
void GraphicsPipe::setDeQueue(Queue queue, int ring) {
rx::dieIf(ring > 2, "out of indirect gfx rings, %u", ring);
queue.indirectLevel = 2 - ring;
deQueues[ring] = queue;
}
std::uint32_t *GraphicsPipe::getMmRegister(std::uint32_t dwAddress) {
// if (dwAddress >= Registers::Config::kMmioOffset &&
// dwAddress < Registers::Config::kMmioOffset +
// sizeof(Registers::Config) / sizeof(std::uint32_t)) {
// return reinterpret_cast<std::uint32_t *>(&config) + (dwAddress -
// Registers::Config::kMmioOffset);
// }
if (dwAddress >= Registers::ShaderConfig::kMmioOffset &&
dwAddress < Registers::ShaderConfig::kMmioOffset +
sizeof(Registers::ShaderConfig) / sizeof(std::uint32_t)) {
return reinterpret_cast<std::uint32_t *>(&sh) +
(dwAddress - Registers::ShaderConfig::kMmioOffset);
}
if (dwAddress >= Registers::UConfig::kMmioOffset &&
dwAddress < Registers::UConfig::kMmioOffset +
sizeof(Registers::UConfig) / sizeof(std::uint32_t)) {
return reinterpret_cast<std::uint32_t *>(&uConfig) +
(dwAddress - Registers::UConfig::kMmioOffset);
}
if (dwAddress >= Registers::Context::kMmioOffset &&
dwAddress < Registers::Context::kMmioOffset +
sizeof(Registers::Context) / sizeof(std::uint32_t)) {
return reinterpret_cast<std::uint32_t *>(&context) +
(dwAddress - Registers::Context::kMmioOffset);
}
rx::die("unexpected memory mapped register address %x, %s", dwAddress,
gnm::mmio::registerName(dwAddress));
}
bool GraphicsPipe::processAllRings() {
bool allProcessed = true;
if (ceQueue.rptr != ceQueue.wptr) {
processRing(ceQueue);
if (ceQueue.rptr != ceQueue.wptr) {
allProcessed = false;
}
}
for (int i = 0; i < 3; ++i) {
auto &queue = deQueues[i];
processRing(queue);
if (queue.rptr != queue.wptr) {
allProcessed = false;
break;
}
}
return allProcessed;
}
void GraphicsPipe::processRing(Queue &queue) {
auto cp = 1;
if (queue.indirectLevel < 0) {
cp = 0;
} else if (queue.indirectLevel == 2) {
cp = 2;
}
while (queue.rptr != queue.wptr) {
if (queue.rptr >= queue.base + queue.size) {
queue.rptr = queue.base;
}
auto header = *queue.rptr;
auto type = rx::getBits(header, 31, 30);
if (type == 3) {
auto op = rx::getBits(header, 15, 8);
auto len = rx::getBits(header, 29, 16) + 2;
// std::fprintf(stderr, "queue %d: %s\n", queue.indirectLevel,
// gnm::pm4OpcodeToString(op));
if (op == gnm::IT_COND_EXEC) {
rx::die("unimplemented COND_EXEC");
}
auto handler = commandHandlers[cp][op];
if (!(this->*handler)(queue)) {
return;
}
queue.rptr += len;
if (op == gnm::IT_INDIRECT_BUFFER || op == gnm::IT_INDIRECT_BUFFER_CNST) {
break;
}
continue;
}
if (type == 2) {
++queue.rptr;
continue;
}
rx::die("unexpected pm4 packet type %u", type);
}
}
bool GraphicsPipe::handleNop(Queue &queue) { return true; }
bool GraphicsPipe::setBase(Queue &queue) {
auto baseIndex = queue.rptr[1] & 0xf;
switch (baseIndex) {
case 0: {
auto address0 = queue.rptr[2] & ~3;
auto address1 = queue.rptr[3] & ((1 << 16) - 1);
displayListPatchBase =
address0 | (static_cast<std::uint64_t>(address1) << 32);
break;
}
case 1: {
auto address0 = queue.rptr[2] & ~3;
auto address1 = queue.rptr[3] & ((1 << 16) - 1);
drawIndexIndirPatchBase =
address0 | (static_cast<std::uint64_t>(address1) << 32);
break;
}
case 2: {
auto cs1Index = queue.rptr[2] & ((1 << 16) - 1);
auto cs2Index = queue.rptr[3] & ((1 << 16) - 1);
gdsPartitionBases[0] = cs1Index;
gdsPartitionBases[1] = cs2Index;
break;
}
case 3: {
auto cs1Index = queue.rptr[2] & ((1 << 16) - 1);
auto cs2Index = queue.rptr[3] & ((1 << 16) - 1);
cePartitionBases[0] = cs1Index;
cePartitionBases[1] = cs2Index;
break;
}
default:
rx::die("pm4: unknown SET_BASE index %u", baseIndex);
}
return true;
}
bool GraphicsPipe::clearState(Queue &queue) {
context = Registers::Context::Default;
return true;
}
bool GraphicsPipe::contextControl(Queue &queue) { return true; }
bool GraphicsPipe::acquireMem(Queue &queue) { return true; }
bool GraphicsPipe::releaseMem(Queue &queue) {
auto eventCntl = queue.rptr[1];
auto dataCntl = queue.rptr[2];
auto addressLo = queue.rptr[3] & ~3;
auto addressHi = queue.rptr[3] & ~3;
auto dataLo = queue.rptr[4];
auto dataHi = queue.rptr[5];
auto eventIndex = rx::getBits(eventCntl, 11, 8);
auto eventType = rx::getBits(eventCntl, 5, 0);
auto dataSel = rx::getBits(dataCntl, 31, 29);
auto intSel = rx::getBits(dataCntl, 25, 24);
auto address = addressLo | (static_cast<std::uint64_t>(addressHi) << 32);
auto pointer = RemoteMemory{queue.vmId}.getPointer<std::uint64_t>(address);
context.vgtEventInitiator = eventType;
switch (dataSel) {
case 0: // none
break;
case 1: // 32 bit, low
*reinterpret_cast<std::uint32_t *>(pointer) = dataLo;
break;
case 2: // 64 bit
*pointer = dataLo | (static_cast<std::uint64_t>(dataHi) << 32);
break;
case 3: // 64 bit, global GPU clock
*pointer = std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::system_clock::now().time_since_epoch())
.count();
break;
case 4: // 64 bit, perf counter
*pointer = std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::steady_clock::now().time_since_epoch())
.count();
break;
default:
rx::die("unimplemented event release mem data %#x", dataSel);
}
return true;
}
bool GraphicsPipe::drawPreamble(Queue &queue) { return true; }
bool GraphicsPipe::indexBufferSize(Queue &queue) {
vgtIndexBufferSize = queue.rptr[1];
return true;
}
bool GraphicsPipe::dispatchDirect(Queue &queue) {
auto dimX = queue.rptr[1];
auto dimY = queue.rptr[2];
auto dimZ = queue.rptr[3];
auto dispatchInitiator = queue.rptr[4];
sh.compute.computeDispatchInitiator = dispatchInitiator;
// FIXME
return true;
}
bool GraphicsPipe::dispatchIndirect(Queue &queue) {
auto offset = queue.rptr[1];
auto dispatchInitiator = queue.rptr[2];
sh.compute.computeDispatchInitiator = dispatchInitiator;
auto buffer = RemoteMemory{queue.vmId}.getPointer<std::uint32_t>(
drawIndexIndirPatchBase + offset);
auto dimX = buffer[0];
auto dimY = buffer[1];
auto dimZ = buffer[2];
// FIXME
return true;
}
bool GraphicsPipe::setPredication(Queue &queue) {
auto startAddressLo = queue.rptr[1] & ~0xf;
auto predProperties = queue.rptr[2];
auto startAddressHi = rx::getBits(predProperties, 15, 0);
auto predBool = rx::getBit(predProperties, 8);
auto hint = rx::getBit(predProperties, 12);
auto predOp = rx::getBits(predProperties, 18, 16);
auto cont = rx::getBit(predProperties, 31);
switch (predOp) {
case 0: // clear predicate
case 1: // set ZPass predicate
case 2: // set PrimCount predicate
break;
}
// TODO
return true;
}
bool GraphicsPipe::drawIndirect(Queue &queue) {
auto dataOffset = queue.rptr[1];
auto baseVtxLoc = queue.rptr[2] & ((1 << 16) - 1);
auto startInstLoc = queue.rptr[3] & ((1 << 16) - 1);
auto drawInitiator = queue.rptr[4];
context.vgtDrawInitiator = drawInitiator;
auto buffer = RemoteMemory{queue.vmId}.getPointer<std::uint32_t>(
drawIndexIndirPatchBase + dataOffset);
std::uint32_t vertexCountPerInstance = buffer[0];
std::uint32_t instanceCount = buffer[1];
std::uint32_t startVertexLocation = buffer[2];
std::uint32_t startInstanceLocation = buffer[3];
// FIXME
rx::die("drawIndirect");
return true;
}
bool GraphicsPipe::drawIndexIndirect(Queue &queue) {
auto dataOffset = queue.rptr[1];
auto baseVtxLoc = queue.rptr[2] & ((1 << 16) - 1);
auto drawInitiator = queue.rptr[3];
auto buffer = RemoteMemory{queue.vmId}.getPointer<std::uint32_t>(
drawIndexIndirPatchBase + dataOffset);
context.vgtDrawInitiator = drawInitiator;
std::uint32_t indexCountPerInstance = buffer[0];
std::uint32_t instanceCount = buffer[1];
std::uint32_t startIndexLocation = buffer[2];
std::uint32_t baseVertexLocation = buffer[3];
std::uint32_t startInstanceLocation = buffer[4];
// FIXME
rx::die("drawIndexIndirect");
return true;
}
bool GraphicsPipe::indexBase(Queue &queue) {
auto addressLo = queue.rptr[1] << 1;
auto addressHi = queue.rptr[2] & ((1 << 16) - 1);
auto address = addressLo | (static_cast<std::uint64_t>(addressHi) << 32);
vgtIndexBase = address;
return true;
}
bool GraphicsPipe::drawIndex2(Queue &queue) {
auto maxSize = queue.rptr[1];
auto indexOffset = queue.rptr[2];
auto indexCount = queue.rptr[3];
auto drawInitiator = queue.rptr[4];
context.vgtDrawInitiator = drawInitiator;
uConfig.vgtNumIndices = indexCount;
draw(*this, queue.vmId, 0, indexCount, 0, uConfig.vgtNumInstances,
vgtIndexBase + indexOffset, maxSize);
return true;
}
bool GraphicsPipe::indexType(Queue &queue) {
uConfig.vgtIndexType = static_cast<gnm::IndexType>(queue.rptr[1] & 1);
return true;
}
bool GraphicsPipe::drawIndexAuto(Queue &queue) {
auto indexCount = queue.rptr[1];
auto drawInitiator = queue.rptr[2];
uConfig.vgtNumIndices = indexCount;
context.vgtDrawInitiator = drawInitiator;
draw(*this, queue.vmId, 0, indexCount, 0, uConfig.vgtNumInstances, 0, 0);
return true;
}
bool GraphicsPipe::numInstances(Queue &queue) {
uConfig.vgtNumInstances = std::max(queue.rptr[1], 1u);
return true;
}
bool GraphicsPipe::drawIndexMultiAuto(Queue &queue) {
auto primCount = queue.rptr[1];
auto drawInitiator = queue.rptr[2];
auto control = queue.rptr[3];
auto indexOffset = rx::getBits(control, 15, 0);
auto primType = rx::getBits(control, 20, 16);
auto indexCount = rx::getBits(control, 31, 21);
context.vgtDrawInitiator = drawInitiator;
uConfig.vgtPrimitiveType = static_cast<gnm::PrimitiveType>(primType);
uConfig.vgtNumIndices = indexCount;
// FIXME
return true;
}
bool GraphicsPipe::drawIndexOffset2(Queue &queue) {
auto maxSize = queue.rptr[1];
auto indexOffset = queue.rptr[2];
auto indexCount = queue.rptr[3];
auto drawInitiator = queue.rptr[4];
context.vgtDrawInitiator = drawInitiator;
// FIXME
return true;
}
bool GraphicsPipe::writeData(Queue &queue) {
auto len = rx::getBits(queue.rptr[0], 29, 16) - 1;
auto control = queue.rptr[1];
auto dstAddressLo = queue.rptr[2];
auto dstAddressHi = queue.rptr[3];
auto data = queue.rptr + 4;
auto engineSel = rx::getBits(control, 31, 30);
auto wrConfirm = rx::getBit(control, 20);
auto wrOneAddress = rx::getBit(control, 16);
auto dstSel = rx::getBits(control, 11, 8);
std::uint32_t *dstPointer = nullptr;
switch (dstSel) {
case 0: // memory mapped register
dstPointer = getMmRegister(dstAddressLo & ((1 << 16) - 1));
break;
case 1: // memory sync
case 5: { // memory async
auto address =
(dstAddressLo & ~3) | (static_cast<std::uint64_t>(dstAddressHi) << 32);
dstPointer = RemoteMemory{queue.vmId}.getPointer<std::uint32_t>(address);
break;
}
default:
rx::die("unimplemented write data, dst sel = %#x", dstSel);
}
if (wrOneAddress) {
for (std::uint32_t i = 0; i < len; ++i) {
*dstPointer = data[i];
}
} else {
std::memcpy(dstPointer, data, len * sizeof(std::uint32_t));
}
return true;
}
bool GraphicsPipe::memSemaphore(Queue &queue) {
// FIXME
return true;
}
bool GraphicsPipe::waitRegMem(Queue &queue) {
auto engine = rx::getBit(queue.rptr[1], 8);
auto memSpace = rx::getBit(queue.rptr[1], 4);
auto function = rx::getBits(queue.rptr[1], 2, 0);
auto pollAddressLo = queue.rptr[2];
auto pollAddressHi = queue.rptr[3] & ((1 << 16) - 1);
auto reference = queue.rptr[4];
auto mask = queue.rptr[5];
auto pollInterval = queue.rptr[6];
std::uint32_t pollData;
if (memSpace == 0) {
pollData = *getMmRegister(pollAddressLo & ((1 << 16) - 1));
} else {
auto pollAddress = (pollAddressLo & ~3) |
(static_cast<std::uint64_t>(pollAddressHi) << 32);
pollData = *RemoteMemory{queue.vmId}.getPointer<std::uint32_t>(pollAddress);
}
return compare(function, pollData, mask, reference);
}
bool GraphicsPipe::indirectBuffer(Queue &queue) {
rx::dieIf(queue.indirectLevel < 0, "unexpected indirect buffer from CP");
auto addressLo = queue.rptr[1] & ~3;
auto addressHi = queue.rptr[2] & ((1 << 16) - 1);
auto vmId = queue.rptr[3] >> 24;
auto ibSize = queue.rptr[4] & ((1 << 20) - 1);
auto address = addressLo | (static_cast<std::uint64_t>(addressHi) << 32);
auto rptr = RemoteMemory{queue.vmId}.getPointer<std::uint32_t>(address);
setDeQueue(Queue::createFromRange(queue.vmId, rptr, ibSize),
queue.indirectLevel + 1);
return true;
}
bool GraphicsPipe::pfpSyncMe(Queue &queue) {
// TODO
return true;
}
bool GraphicsPipe::condWrite(Queue &queue) {
auto writeSpace = rx::getBit(queue.rptr[1], 8);
auto pollSpace = rx::getBit(queue.rptr[1], 4);
auto function = rx::getBits(queue.rptr[1], 2, 0);
auto pollAddressLo = queue.rptr[2];
auto pollAddressHi = queue.rptr[3] & ((1 << 16) - 1);
auto reference = queue.rptr[4];
auto mask = queue.rptr[5];
auto writeAddressLo = queue.rptr[6];
auto writeAddressHi = queue.rptr[7] & ((1 << 16) - 1);
auto writeData = queue.rptr[8];
std::uint32_t pollData;
if (pollSpace == 0) {
pollData = *getMmRegister(pollAddressLo & ((1 << 16) - 1));
} else {
auto pollAddress = (pollAddressLo & ~3) |
(static_cast<std::uint64_t>(pollAddressHi) << 32);
pollData = *RemoteMemory{queue.vmId}.getPointer<std::uint32_t>(pollAddress);
}
if (compare(function, pollData, mask, reference)) {
if (writeSpace == 0) {
*getMmRegister(writeAddressLo & ((1 << 16) - 1)) = writeData;
} else {
auto writeAddress = (writeAddressLo & ~3) |
(static_cast<std::uint64_t>(writeAddressHi) << 32);
*RemoteMemory{queue.vmId}.getPointer<std::uint32_t>(writeAddress) =
writeData;
}
}
return true;
}
bool GraphicsPipe::eventWrite(Queue &queue) {
enum {
kEventZPassDone = 1,
kEventSamplePipelineStat = 2,
kEventSampleStreamOutStat = 3,
kEventPartialFlush = 4,
};
auto eventCntl = queue.rptr[1];
auto invL2 = rx::getBit(eventCntl, 20);
auto eventIndex = rx::getBits(eventCntl, 11, 8);
auto eventType = rx::getBits(eventCntl, 5, 0);
context.vgtEventInitiator = eventType;
if (eventIndex == kEventZPassDone || eventIndex == kEventSamplePipelineStat ||
eventIndex == kEventSampleStreamOutStat) {
auto addressLo = queue.rptr[2] & ~7;
auto addressHi = queue.rptr[3] & ((1 << 16) - 1);
auto address = addressLo | (static_cast<std::uint64_t>(addressHi) << 32);
rx::die("unimplemented event write, event index %#x, address %lx",
eventIndex, address);
return true;
}
// FIXME
return true;
}
bool GraphicsPipe::eventWriteEop(Queue &queue) {
auto eventCntl = queue.rptr[1];
auto addressLo = queue.rptr[2] & ~3;
auto dataCntl = queue.rptr[3];
auto dataLo = queue.rptr[4];
auto dataHi = queue.rptr[5];
auto invL2 = rx::getBit(eventCntl, 20);
auto eventIndex = rx::getBits(eventCntl, 11, 8);
auto eventType = rx::getBits(eventCntl, 5, 0);
auto dataSel = rx::getBits(dataCntl, 31, 29);
auto intSel = rx::getBits(dataCntl, 25, 24);
auto addressHi = rx::getBits(dataCntl, 15, 0);
auto address = addressLo | (static_cast<std::uint64_t>(addressHi) << 32);
auto pointer = RemoteMemory{queue.vmId}.getPointer<std::uint64_t>(address);
context.vgtEventInitiator = eventType;
switch (dataSel) {
case 0: // none
break;
case 1: // 32 bit, low
*reinterpret_cast<std::uint32_t *>(pointer) = dataLo;
break;
case 2: // 64 bit
*pointer = dataLo | (static_cast<std::uint64_t>(dataHi) << 32);
break;
case 3: // 64 bit, global GPU clock
*pointer = std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::system_clock::now().time_since_epoch())
.count();
break;
case 4: // 64 bit, perf counter
*pointer = std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::steady_clock::now().time_since_epoch())
.count();
break;
default:
rx::die("unimplemented event write eop data %#x", dataSel);
}
return true;
}
bool GraphicsPipe::eventWriteEos(Queue &queue) {
auto eventCntl = queue.rptr[1];
auto addressLo = queue.rptr[2] & ~3;
auto cmdInfo = queue.rptr[3];
auto dataInfo = queue.rptr[4];
auto eventIndex = rx::getBits(eventCntl, 11, 8);
auto eventType = rx::getBits(eventCntl, 5, 0);
auto cmd = rx::getBits(cmdInfo, 31, 29);
auto addressHi = rx::getBits(cmdInfo, 15, 0);
auto address = addressLo | (static_cast<std::uint64_t>(addressHi) << 32);
auto pointer = RemoteMemory{queue.vmId}.getPointer<std::uint32_t>(address);
context.vgtEventInitiator = eventType;
switch (cmd) {
case 1: { // store GDS data to memory
auto sizeDw = rx::getBits(dataInfo, 31, 16);
auto gdsIndexDw = rx::getBits(dataInfo, 15, 0);
rx::die("unimplemented event write eos gds data");
break;
}
case 2: // after GDS writes confirm, store 32 bit DATA to memory as fence
*pointer = dataInfo;
break;
default:
rx::die("unexpected event write eos command: %#x", cmd);
}
return true;
}
bool GraphicsPipe::dmaData(Queue &queue) {
// FIXME
return true;
}
bool GraphicsPipe::setConfigReg(Queue &queue) {
rx::dieIf(queue.indirectLevel != 0, "setConfigReg from queue %d",
queue.indirectLevel);
auto len = rx::getBits(queue.rptr[0], 29, 16);
auto offset = queue.rptr[1];
auto data = queue.rptr + 2;
rx::dieIf(
(offset + len) * sizeof(std::uint32_t) > sizeof(device->config),
"out of Config regs, offset: %u, count %u, %s\n", offset, len,
gnm::mmio::registerName(decltype(device->config)::kMmioOffset + offset));
std::memcpy(reinterpret_cast<std::uint32_t *>(&device->config) + offset, data,
sizeof(std::uint32_t) * len);
return true;
}
bool GraphicsPipe::setShReg(Queue &queue) {
auto len = rx::getBits(queue.rptr[0], 29, 16);
auto offset = queue.rptr[1];
auto data = queue.rptr + 2;
rx::dieIf((offset + len) * sizeof(std::uint32_t) > sizeof(sh),
"out of SH regs, offset: %u, count %u, %s\n", offset, len,
gnm::mmio::registerName(decltype(sh)::kMmioOffset + offset));
std::memcpy(reinterpret_cast<std::uint32_t *>(&sh) + offset, data,
sizeof(std::uint32_t) * len);
return true;
}
bool GraphicsPipe::setUConfigReg(Queue &queue) {
auto len = rx::getBits(queue.rptr[0], 29, 16);
auto offset = queue.rptr[1];
auto data = queue.rptr + 2;
rx::dieIf((offset + len) * sizeof(std::uint32_t) > sizeof(uConfig),
"out of UConfig regs, offset: %u, count %u, %s\n", offset, len,
gnm::mmio::registerName(decltype(uConfig)::kMmioOffset + offset));
std::memcpy(reinterpret_cast<std::uint32_t *>(&uConfig) + offset, data,
sizeof(std::uint32_t) * len);
return true;
}
bool GraphicsPipe::setContextReg(Queue &queue) {
auto len = rx::getBits(queue.rptr[0], 29, 16);
auto offset = queue.rptr[1];
auto data = queue.rptr + 2;
rx::dieIf((offset + len) * sizeof(std::uint32_t) > sizeof(context),
"out of Context regs, offset: %u, count %u, %s\n", offset, len,
gnm::mmio::registerName(decltype(context)::kMmioOffset + offset));
std::memcpy(reinterpret_cast<std::uint32_t *>(&context) + offset, data,
sizeof(std::uint32_t) * len);
// for (std::size_t i = 0; i < len; ++i) {
// std::fprintf(stderr,
// "writing to %s value %x\n",
// gnm::mmio::registerName(decltype(context)::kMmioOffset + offset + i),
// data[i]);
// }
return true;
}
bool GraphicsPipe::setCeDeCounters(Queue &queue) {
auto counterLo = queue.rptr[1];
auto counterHi = queue.rptr[2];
auto counter = counterLo | (static_cast<std::uint64_t>(counterHi) << 32);
deCounter = counter;
ceCounter = counter;
return true;
}
bool GraphicsPipe::waitOnCeCounter(Queue &queue) {
auto counterLo = queue.rptr[1];
auto counterHi = queue.rptr[2];
auto counter = counterLo | (static_cast<std::uint64_t>(counterHi) << 32);
return deCounter >= counter;
}
bool GraphicsPipe::waitOnDeCounterDiff(Queue &queue) {
auto waitDiff = queue.rptr[1];
auto diff = ceCounter - deCounter;
return diff < waitDiff;
}
bool GraphicsPipe::incrementCeCounter(Queue &queue) {
ceCounter++;
return true;
}
bool GraphicsPipe::incrementDeCounter(Queue &queue) {
deCounter++;
return true;
}
bool GraphicsPipe::loadConstRam(Queue &queue) {
std::uint32_t addressLo = queue.rptr[1];
std::uint32_t addressHi = queue.rptr[2];
std::uint32_t numDw = queue.rptr[3] & ((1 << 15) - 1);
std::uint32_t offset =
(queue.rptr[4] & ((1 << 16) - 1)) / sizeof(std::uint32_t);
auto address = addressLo | (static_cast<std::uint64_t>(addressHi) << 32);
std::memcpy(constantMemory + offset,
RemoteMemory{queue.vmId}.getPointer(address),
numDw * sizeof(std::uint32_t));
return true;
}
bool GraphicsPipe::writeConstRam(Queue &queue) {
std::uint32_t offset =
(queue.rptr[1] & ((1 << 16) - 1)) / sizeof(std::uint32_t);
std::uint32_t data = queue.rptr[2];
std::memcpy(constantMemory + offset, &data, sizeof(std::uint32_t));
return true;
}
bool GraphicsPipe::dumpConstRam(Queue &queue) {
std::uint32_t offset =
(queue.rptr[1] & ((1 << 16) - 1)) / sizeof(std::uint32_t);
std::uint32_t numDw = queue.rptr[2] & ((1 << 15) - 1);
std::uint32_t addressLo = queue.rptr[3];
std::uint32_t addressHi = queue.rptr[4];
auto address = addressLo | (static_cast<std::uint64_t>(addressHi) << 32);
std::memcpy(RemoteMemory{queue.vmId}.getPointer(address),
constantMemory + offset, numDw * sizeof(std::uint32_t));
return true;
}
bool GraphicsPipe::unknownPacket(Queue &queue) {
auto op = rx::getBits(queue.rptr[0], 15, 8);
rx::die("unimplemented gfx pm4 packet: %s, queue %u\n",
gnm::pm4OpcodeToString(op), queue.indirectLevel);
}

135
rpcsx-gpu2/Pipe.hpp Normal file
View file

@ -0,0 +1,135 @@
#pragma once
#include "Registers.hpp"
#include "Scheduler.hpp"
#include <cstdint>
#include <vulkan/vulkan_core.h>
namespace amdgpu {
class Device;
struct Queue {
int vmId = -1;
int indirectLevel = -1;
std::uint32_t *doorbell{};
std::uint32_t *base{};
std::uint64_t size{};
std::uint32_t *rptr{};
std::uint32_t *wptr{};
static Queue createFromRange(int vmId, std::uint32_t *base,
std::uint64_t size, int indirectLevel = 0,
std::uint32_t *doorbell = nullptr) {
Queue result;
result.vmId = vmId;
result.indirectLevel = indirectLevel;
result.doorbell = doorbell;
result.base = base;
result.size = size;
result.rptr = base;
result.wptr = base + size;
return result;
}
};
struct ComputePipe {
Device *device;
Scheduler scheduler;
using CommandHandler = bool (ComputePipe::*)(Queue &);
CommandHandler commandHandlers[255];
Queue queues[8];
Registers::ComputeConfig computeConfig;
ComputePipe(int index);
bool processAllRings();
void processRing(Queue &queue);
void mapQueue(int queueId, Queue queue);
bool setShReg(Queue &queue);
bool unknownPacket(Queue &queue);
bool handleNop(Queue &queue);
};
struct GraphicsPipe {
Device *device;
Scheduler scheduler;
std::uint64_t ceCounter = 0;
std::uint64_t deCounter = 0;
std::uint64_t displayListPatchBase = 0;
std::uint64_t drawIndexIndirPatchBase = 0;
std::uint64_t gdsPartitionBases[2]{};
std::uint64_t cePartitionBases[2]{};
std::uint64_t vgtIndexBase = 0;
std::uint32_t vgtIndexBufferSize = 0;
std::uint32_t constantMemory[(48 * 1024) / sizeof(std::uint32_t)]{};
Registers::ShaderConfig sh;
Registers::Context context;
Registers::UConfig uConfig;
Queue deQueues[3];
Queue ceQueue;
using CommandHandler = bool (GraphicsPipe::*)(Queue &);
CommandHandler commandHandlers[3][255];
GraphicsPipe(int index);
void setCeQueue(Queue queue);
void setDeQueue(Queue queue, int ring);
bool processAllRings();
void processRing(Queue &queue);
bool drawPreamble(Queue &queue);
bool indexBufferSize(Queue &queue);
bool handleNop(Queue &queue);
bool contextControl(Queue &queue);
bool acquireMem(Queue &queue);
bool releaseMem(Queue &queue);
bool dispatchDirect(Queue &queue);
bool dispatchIndirect(Queue &queue);
bool writeData(Queue &queue);
bool memSemaphore(Queue &queue);
bool waitRegMem(Queue &queue);
bool indirectBuffer(Queue &queue);
bool condWrite(Queue &queue);
bool eventWrite(Queue &queue);
bool eventWriteEop(Queue &queue);
bool eventWriteEos(Queue &queue);
bool dmaData(Queue &queue);
bool setBase(Queue &queue);
bool clearState(Queue &queue);
bool setPredication(Queue &queue);
bool drawIndirect(Queue &queue);
bool drawIndexIndirect(Queue &queue);
bool indexBase(Queue &queue);
bool drawIndex2(Queue &queue);
bool indexType(Queue &queue);
bool drawIndexAuto(Queue &queue);
bool numInstances(Queue &queue);
bool drawIndexMultiAuto(Queue &queue);
bool drawIndexOffset2(Queue &queue);
bool pfpSyncMe(Queue &queue);
bool setCeDeCounters(Queue &queue);
bool waitOnCeCounter(Queue &queue);
bool waitOnDeCounterDiff(Queue &queue);
bool incrementCeCounter(Queue &queue);
bool incrementDeCounter(Queue &queue);
bool loadConstRam(Queue &queue);
bool writeConstRam(Queue &queue);
bool dumpConstRam(Queue &queue);
bool setConfigReg(Queue &queue);
bool setShReg(Queue &queue);
bool setUConfigReg(Queue &queue);
bool setContextReg(Queue &queue);
bool unknownPacket(Queue &queue);
std::uint32_t *getMmRegister(std::uint32_t dwAddress);
};
} // namespace amdgpu

52
rpcsx-gpu2/Registers.cpp Normal file
View file

@ -0,0 +1,52 @@
#include "Registers.hpp"
amdgpu::Registers::Context amdgpu::Registers::Context::Default = [] {
amdgpu::Registers::Context result{};
result.paScScreenScissor.bottom = 0x4000;
result.paScScreenScissor.right = 0x4000;
result.paScWindowScissor.top = 0x8000;
result.paScWindowScissor.bottom = 0x4000;
result.paScWindowScissor.right = 0x4000;
for (auto &clipRect : result.paScClipRect) {
clipRect.bottom = 0x4000;
clipRect.right = 0x4000;
}
result.unk_8c = 0xaa99aaaa;
result.paScGenericScissor.top = 0x8000;
result.paScGenericScissor.bottom = 0x4000;
result.paScGenericScissor.right = 0x4000;
for (auto &vportScissor : result.paScVportScissor) {
vportScissor.top = 0x8000;
vportScissor.bottom = 0x4000;
vportScissor.right = 0x4000;
}
for (auto &vportZ : result.paScVportZ) {
vportZ.min = 0.0f;
vportZ.max = 1.0f;
}
result.unk_d4 = 0x2a00161a;
result.spiPsInControl = 2;
result.paClClipCntl = 0x0009'0000;
result.paSuScModeCntl.polyMode = gnm::PolyMode::Dual;
result.vgtGsPerEs = 256;
result.vgtEsPerGs = 128;
result.vgtGsPerVs = 2;
result.iaMultiVgtParam = 0xff;
result.unk_2f7 = 0x00001000;
result.paSuVtxCntl.pixCenterHalf = true;
result.paSuVtxCntl.roundMode = gnm::RoundMode::RoundToEven;
result.paClGbVertClipAdj = 1.0f;
result.paClGbVertDiscAdj = 1.0f;
result.paClGbHorzClipAdj = 1.0f;
result.paClGbHorzDiscAdj = 1.0f;
result.unk_316 = 0xe;
result.vgtOutDeallocCntl = 0x10;
return result;
}();

931
rpcsx-gpu2/Registers.hpp Normal file
View file

@ -0,0 +1,931 @@
#pragma once
#include "amdgpu/tiler.hpp"
#include "gnm/constants.hpp"
#include <array>
#include <cstdint>
#include <type_traits>
namespace amdgpu {
enum class Engine {
ME,
PFP,
CE,
};
enum class EventIndex {
OTHER,
ZPASS_DONE,
SAMAPE_PIPELINE_STAT,
SAMPLE_STREAM_OUT_STATS,
CS_VS_PS_PARTIAL_FLUSH,
ANY_EOP_TIMESTAMP,
CS_PS_EOS,
};
enum class ProtectionFaultAccess : std::uint32_t {
Read = 0,
Write = 1,
};
namespace detail {
#pragma pack(push, 1)
template <std::size_t Count> struct Padding {
private:
std::uint32_t _[Count];
};
} // namespace detail
template <std::size_t Offset, typename ImplT = std::uint32_t>
struct Register : detail::Padding<Offset>, ImplT {
Register() = default;
Register(const Register &) = default;
Register &operator=(const Register &) = default;
Register &operator=(const ImplT &newValue) {
*static_cast<ImplT *>(this) = newValue;
return *this;
}
};
template <std::size_t Offset, typename ImplT>
requires(std::is_integral_v<ImplT> || std::is_floating_point_v<ImplT> ||
std::is_enum_v<ImplT>)
struct Register<Offset, ImplT> : detail::Padding<Offset> {
ImplT value;
Register() = default;
Register(const Register &) = default;
Register &operator=(const Register &) = default;
Register &operator=(ImplT newValue) {
value = newValue;
return *this;
}
operator ImplT() { return value; }
};
struct CbColorAttrib {
union {
struct {
std::uint32_t tileModeIndex : 5;
std::uint32_t fmaskTileModeIndex : 4;
std::uint32_t : 3;
std::uint32_t numSamples : 3;
std::uint32_t numFragments : 2;
std::uint32_t forceDstAlpha1 : 1;
};
std::uint32_t raw;
};
};
struct CbColorView {
union {
struct {
std::uint32_t sliceStart : 11;
std::uint32_t : 2;
std::uint32_t sliceMax : 11;
};
std::uint32_t raw;
};
};
struct CbColorControl {
union {
struct {
std::uint32_t : 3;
std::uint32_t degammaEnable : 1;
gnm::CbMode mode : 3;
std::uint32_t : 9;
std::uint32_t rop3 : 8;
};
std::uint32_t raw;
};
};
struct CbShaderMask {
union {
struct {
std::uint32_t output0Enable : 4;
std::uint32_t output1Enable : 4;
std::uint32_t output2Enable : 4;
std::uint32_t output3Enable : 4;
std::uint32_t output4Enable : 4;
std::uint32_t output5Enable : 4;
std::uint32_t output6Enable : 4;
std::uint32_t output7Enable : 4;
};
std::uint32_t raw;
};
};
struct CbTargetMask {
union {
struct {
std::uint32_t target0Enable : 4;
std::uint32_t target1Enable : 4;
std::uint32_t target2Enable : 4;
std::uint32_t target3Enable : 4;
std::uint32_t target4Enable : 4;
std::uint32_t target5Enable : 4;
std::uint32_t target6Enable : 4;
std::uint32_t target7Enable : 4;
};
std::uint32_t raw;
};
};
enum class CbCompSwap : std::uint32_t {
Std,
Alt,
StdRev,
AltRev,
};
struct CbColorInfo {
union {
struct {
std::uint32_t endian : 2;
gnm::DataFormat dfmt : 5;
std::uint32_t linearGeneral : 1;
gnm::NumericFormat nfmt : 3;
CbCompSwap compSwap : 2;
std::uint32_t fastClear : 1;
std::uint32_t compression : 1;
std::uint32_t blendClamp : 1;
std::uint32_t blendBypass : 1;
std::uint32_t simpleFloat : 1;
std::uint32_t roundMode : 1;
std::uint32_t cmaskIsLinear : 1;
std::uint32_t blendOptDontRdDst : 3;
std::uint32_t blendOptDiscardPixel : 3;
};
std::uint32_t raw;
};
};
struct CbColor {
std::uint32_t base;
std::uint32_t pitch;
std::uint32_t slice;
CbColorView view;
CbColorInfo info;
CbColorAttrib attrib;
std::uint32_t dccBase;
std::uint32_t cmask;
std::uint32_t cmaskSlice : 14;
std::uint32_t fmask;
std::uint32_t fmaskSlice;
std::uint32_t clearWord0;
std::uint32_t clearWord1;
std::uint32_t clearWord2;
std::uint32_t clearWord3;
};
struct PaClVport {
float xScale;
float xOffset;
float yScale;
float yOffset;
float zScale;
float zOffset;
};
struct PaScVportZ {
float min;
float max;
};
struct PaScRect {
std::uint16_t left;
std::uint16_t top;
std::uint16_t right;
std::uint16_t bottom;
};
struct SpiShaderPgm {
std::uint32_t rsrc3;
std::uint64_t address;
union {
struct {
std::uint32_t vgprs : 6;
std::uint32_t sgprs : 4;
std::uint32_t priority : 2;
std::uint32_t floatMode : 8;
std::uint32_t priv : 1;
std::uint32_t dx10Clamp : 1;
std::uint32_t debugMode : 1;
std::uint32_t ieeeMode : 1;
};
struct {
std::uint32_t : 24;
std::uint32_t cuGroupEnable : 1;
} es;
struct {
std::uint32_t : 24;
std::uint32_t cuGroupEnable : 1;
} gs;
struct {
std::uint32_t : 24;
std::uint32_t vgprCompCnt : 2;
} ls;
struct {
std::uint32_t : 24;
std::uint32_t cuGroupDisable : 1;
} ps;
struct {
std::uint32_t : 24;
std::uint32_t vgprCompCnt : 2;
std::uint32_t cuGroupEnable : 1;
} vs;
std::uint8_t getVGprCount() const { return (vgprs + 1) * 4; }
std::uint8_t getSGprCount() const { return (sgprs + 1) * 8; }
std::uint32_t raw;
} rsrc1;
union {
struct {
std::uint32_t scratchEn : 1;
std::uint32_t userSgpr : 5;
std::uint32_t trapPresent : 1;
};
struct {
std::uint32_t : 7;
std::uint32_t ocLdsEn : 1;
std::uint32_t soBase0En : 1;
std::uint32_t soBase1En : 1;
std::uint32_t soBase2En : 1;
std::uint32_t soBase3En : 1;
std::uint32_t soEn : 1;
std::uint32_t excpEn : 7;
} vs;
struct {
std::uint32_t : 7;
std::uint32_t ocLdsEn : 1;
std::uint32_t excpEn : 7;
} es;
struct {
std::uint32_t : 7;
std::uint32_t excpEn : 7;
} gs;
struct {
std::uint32_t : 7;
std::uint32_t ocLdsEn : 1;
std::uint32_t tgSizeEn : 1;
std::uint32_t excpEn : 7;
} hs;
struct {
std::uint32_t : 7;
std::uint32_t ldsSize : 9;
std::uint32_t excpEn : 7;
} ls;
std::uint32_t raw;
} rsrc2;
std::array<std::uint32_t, 16> userData;
};
struct VmProtectionFault {
std::uint32_t protection : 8;
std::uint32_t : 4;
std::uint32_t client : 8;
std::uint32_t : 4;
ProtectionFaultAccess rw : 1;
std::uint32_t vmid : 4;
std::uint32_t : 3;
};
enum class LsStage : std::uint32_t {
LsOff,
LsOn,
CsOn,
};
enum class EsStage : std::uint32_t {
EsOff,
EsDs,
EsReal,
};
enum class VsStage : std::uint32_t {
VsReal,
VsDs,
VsCopy,
};
struct VgtShaderStagesEn {
union {
struct {
LsStage lsEn : 2;
bool hsEn : 1;
EsStage esEn : 2;
bool gsEn : 1;
VsStage vsEn : 2;
bool dynamicHs : 1;
};
std::uint32_t raw;
};
};
struct FbInfo {
std::uint16_t base; // address >> 24
std::uint16_t unk;
};
struct DbDepthControl {
union {
struct {
bool stencilEnable : 1;
bool depthEnable : 1;
bool depthWriteEnable : 1;
bool depthBoundsEnable : 1;
gnm::CompareFunc zFunc : 3;
bool backFaceEnable : 1;
gnm::CompareFunc stencilFunc : 3;
std::uint32_t : 9;
gnm::CompareFunc stencilFuncBackFace : 3;
std::uint32_t : 7;
bool enableColorWritesOnDepthFail : 1;
bool disableColorWritesOnDepthPass : 1;
};
std::uint32_t raw;
};
};
struct DbZInfo {
union {
struct {
gnm::ZFormat format : 2;
std::uint32_t numSamples : 2;
std::uint32_t : 16;
std::uint32_t tileModeIndex : 3;
std::uint32_t : 4;
bool allowExpClear : 1;
std::uint32_t readSize : 1; // 0 - 256 bit, 1 - 512 bit
bool tileSurfaceEnable : 1;
std::uint32_t : 1;
bool zRangePrecision : 1;
};
std::uint32_t raw;
};
};
struct DbRenderControl {
union {
struct {
bool depthClearEnable : 1;
bool stencilClearEnable : 1;
bool depthCopy : 1;
bool stencilCopy : 1;
bool resummarizeEnable : 1;
bool stencilCompressDisable : 1;
bool depthCompressDisable : 1;
bool copyCentroid : 1;
std::uint32_t copySample : 4;
};
std::uint32_t raw;
};
};
struct CbBlendControl {
union {
struct {
gnm::BlendMultiplier colorSrcBlend : 5;
gnm::BlendFunc colorCombFcn : 3;
gnm::BlendMultiplier colorDstBlend : 5;
std::uint32_t : 3;
gnm::BlendMultiplier alphaSrcBlend : 5;
gnm::BlendFunc alphaCombFcn : 3;
gnm::BlendMultiplier alphaDstBlend : 5;
bool separateAlphaBlend : 1;
bool enable : 1;
bool disableRop3 : 1;
};
std::uint32_t raw;
};
};
struct PaSuScModeCntl {
union {
struct {
bool cullFront : 1;
bool cullBack : 1;
gnm::Face face : 1;
gnm::PolyMode polyMode : 2;
gnm::PolyModePtype polyModeFrontPtype : 3;
gnm::PolyModePtype polyModeBackPtype : 3;
bool polyOffsetFrontEnable : 1;
bool polyOffsetBackEnable : 1;
bool polyOffsetParaEnable : 1;
std::uint32_t : 2;
bool vtxWindowOffsetEnable : 1;
std::uint32_t : 2;
bool provokingVtxLast : 1;
bool perspCorrDis : 1;
bool multiPrimIbEna : 1;
};
std::uint32_t raw;
};
};
struct PaSuVtxCntl {
union {
struct {
bool pixCenterHalf : 1;
gnm::RoundMode roundMode : 2;
gnm::QuantMode quantMode : 3;
};
std::uint32_t raw;
};
};
struct SpiPsInput {
union {
struct {
bool perspSampleEna : 1;
bool perspCenterEna : 1;
bool perspCentroidEna : 1;
bool perspPullModelEna : 1;
bool linearSampleEna : 1;
bool linearCenterEna : 1;
bool linearCentroidEna : 1;
bool lineStippleTexEna : 1;
bool posXFloatEna : 1;
bool posYFloatEna : 1;
bool posZFloatEna : 1;
bool posWFloatEna : 1;
bool frontFaceEna : 1;
bool ancillaryEna : 1;
bool sampleCoverageEna : 1;
bool posFixedPtEna : 1;
};
std::uint32_t raw;
};
};
enum class SpiPsDefaultVal : std::uint8_t {
X0_Y0_Z0_W0,
X0_Y0_Z0_W1,
X1_Y1_Z1_W0,
X1_Y1_Z1_W1,
};
struct SpiPsInputCntl {
union {
struct {
std::uint32_t offset : 4;
bool useDefaultVal : 1;
std::uint32_t : 3;
SpiPsDefaultVal defaultVal : 2;
bool flatShade : 1;
std::uint32_t : 2;
std::uint32_t cylWrap : 4;
bool ptSpriteTex : 1;
};
std::uint32_t raw;
};
};
struct Registers {
static constexpr auto kRegisterCount = 0xf000;
struct Config {
static constexpr auto kMmioOffset = 0x2000;
Register<0xad, std::array<std::uint32_t, 3>> cpPrtLodStatsCntls;
Register<0x1c0> cpRbRptr;
Register<0x1bf> cpRb1Rptr;
Register<0x1be> cpRb2Rptr;
Register<0x232> vgtEsGsRingSize;
Register<0x233> vgtGsVsRingSize;
Register<0x262> vgtTfRingSize;
Register<0x26e> vgtTfMemoryBase;
Register<0x3c0, std::array<std::uint32_t, 4>> sqBufRsrcWords;
Register<0x3c4, std::array<std::uint32_t, 7>> sqImgRsrcWords;
Register<0x3cc, std::array<std::uint32_t, 4>> sqImgSampWords;
Register<0x644, std::array<TileMode, 32>> gbTileModes;
Register<0x664, std::array<MacroTileMode, 16>> gbMacroTileModes;
};
struct ComputeConfig {
static constexpr auto kMmioOffset = 0x2e00;
std::uint32_t computeDispatchInitiator;
std::uint32_t _pad0[6];
std::uint32_t computeNumThreadX;
std::uint32_t computeNumThreadY;
std::uint32_t computeNumThreadZ;
std::uint32_t _pad1[2];
std::uint32_t computePgmLo;
std::uint32_t computePgmHi;
std::uint32_t _pad2[4];
std::uint32_t computePgmRsrc1;
std::uint32_t computePgmRsrc2;
std::uint32_t _pad3[1];
std::uint32_t computeResourceLimits;
std::uint32_t computeStaticThreadMgmtSe0;
std::uint32_t computeStaticThreadMgmtSe1;
std::uint32_t computeTmpRingSize;
std::uint32_t _pad4[39];
std::array<std::uint32_t, 16> userData;
};
struct ShaderConfig {
static constexpr auto kMmioOffset = 0x2c00;
union {
Register<0x7, SpiShaderPgm> spiShaderPgmPs;
Register<0x47, SpiShaderPgm> spiShaderPgmVs;
Register<0x87, SpiShaderPgm> spiShaderPgmGs;
Register<0xc7, SpiShaderPgm> spiShaderPgmEs;
Register<0x107, SpiShaderPgm> spiShaderPgmHs;
Register<0x147, SpiShaderPgm> spiShaderPgmLs;
Register<0x200, ComputeConfig> compute;
};
};
struct Context {
static constexpr auto kMmioOffset = 0xa000;
static Context Default;
union {
Register<0x0, DbRenderControl> dbRenderControl;
Register<0x1> dbCountControl;
Register<0x2> dbDepthView;
Register<0x3> dbRenderOverride;
Register<0x4> dbRenderOverride2;
Register<0x5> dbHTileDataBase;
Register<0x8, float> dbDepthBoundsMin;
Register<0x9, float> dbDepthBoundsMax;
Register<0xa> dbStencilClear;
Register<0xb, float> dbDepthClear;
Register<0xc, PaScRect> paScScreenScissor;
Register<0xf> dbDepthInfo;
Register<0x10, DbZInfo> dbZInfo;
Register<0x11> dbStencilInfo;
Register<0x12> dbZReadBase;
Register<0x13> dbStencilReadBase;
Register<0x14> dbZWriteBase;
Register<0x15> dbStencilWriteBase;
Register<0x16> dbDepthSize;
Register<0x17> dbDepthSlice;
Register<0x20> taBcBaseAddr;
Register<0x80> paScWindowOffset;
Register<0x81, PaScRect> paScWindowScissor;
Register<0x83> paScClipRectRule;
Register<0x84, std::array<PaScRect, 4>> paScClipRect;
Register<0x8c> unk_8c;
Register<0x8d> paSuHardwareScreenOffset;
Register<0x8e, CbTargetMask> cbTargetMask;
Register<0x8f, CbShaderMask> cbShaderMask;
Register<0x90, PaScRect> paScGenericScissor;
Register<0x94, std::array<PaScRect, 16>> paScVportScissor;
Register<0xb4, std::array<PaScVportZ, 16>> paScVportZ;
Register<0xd4> unk_d4;
Register<0xd8> cpPerfMonCntxCntl;
Register<0x100> vgtMaxVtxIndx;
Register<0x101> vgtMinVtxIndx;
Register<0x102> vgtIndxOffset;
Register<0x103> vgtMultiPrimIbResetIndx;
Register<0x105, float> cbBlendRed;
Register<0x106, float> cbBlendGreen;
Register<0x107, float> cbBlendBlue;
Register<0x108, float> cbBlendAlpha;
Register<0x10b> dbStencilControl;
Register<0x10c> dbStencilRefMask;
Register<0x10d> dbStencilRefMaskBf;
Register<0x10f, std::array<PaClVport, 16>> paClVports;
Register<0x16f> paClUcp0X;
Register<0x170> paClUcp0Y;
Register<0x171> paClUcp0Z;
Register<0x172> paClUcp0W;
Register<0x191, std::array<SpiPsInputCntl, 32>> spiPsInputCntl;
Register<0x1b1> spiVsOutConfig;
Register<0x1b3, SpiPsInput> spiPsInputEna;
Register<0x1b4, SpiPsInput> spiPsInputAddr;
Register<0x1b6> spiPsInControl;
Register<0x1b8> spiBarycCntl;
Register<0x1ba> spiTmpRingSize;
Register<0x1c3> spiShaderPosFormat;
Register<0x1c4> spiShaderZFormat;
Register<0x1c5> spiShaderColFormat;
Register<0x1e0, std::array<CbBlendControl, 8>> cbBlendControl;
Register<0x1f9> vgtDmaBaseHi;
Register<0x1fa> vgtDmaBase;
Register<0x1fc> vgtDrawInitiator;
Register<0x1fd> vgtImmedData;
Register<0x200, DbDepthControl> dbDepthControl;
Register<0x201> dbEqaa;
Register<0x202, CbColorControl> cbColorControl;
Register<0x203> dbShaderControl;
Register<0x204> paClClipCntl;
Register<0x205, PaSuScModeCntl> paSuScModeCntl;
Register<0x206> paClVteCntl;
Register<0x207> paClVsOutCntl;
Register<0x280> paSuPointSize;
Register<0x281> paSuPointMinmax;
Register<0x282> paSuLineCntl;
Register<0x284> vgtOutputPathCntl;
Register<0x286> vgtHosMaxTessLevel;
Register<0x287> vgtHosMinTessLevel;
Register<0x290> vgtGsMode;
Register<0x291> vgtGsOnChipCntl;
Register<0x292> paScModeCntl0;
Register<0x293> paScModeCntl1;
Register<0x295> vgtGsPerEs;
Register<0x296> vgtEsPerGs;
Register<0x297> vgtGsPerVs;
Register<0x298, std::array<std::uint32_t, 3>> vgtGsVsRingOffsets;
Register<0x29b> vgtGsOutPrimType;
Register<0x29d> vgtDmaSize;
Register<0x29e> vgtDmaMaxSize;
Register<0x29f> vgtDmaIndexType;
Register<0x2a1> vgtPrimitiveIdEn;
Register<0x2a2> vgtDmaNumInstances;
Register<0x2a4> vgtEventInitiator;
Register<0x2a5> vgtMultiPrimIbResetEn;
Register<0x2a8> vgtInstanceStepRate0;
Register<0x2a9> vgtInstanceStepRate1;
Register<0x2aa> iaMultiVgtParam;
Register<0x2ab> vgtEsGsRingItemSize;
Register<0x2ac> vgtGsVsRingItemSize;
Register<0x2ad> vgtReuseOff;
Register<0x2ae> vgtVtxCntEn;
Register<0x2af> dbHTileSurface;
Register<0x2b0> dbSResultsCompareState0;
Register<0x2b1> dbSResultsCompareState1;
Register<0x2b4> vgtStrmOutBufferSize0;
Register<0x2b5> vgtStrmOutVtxStride0;
Register<0x2b8> vgtStrmOutBufferSize1;
Register<0x2b9> vgtStrmOutVtxStride1;
Register<0x2bc> vgtStrmOutBufferSize2;
Register<0x2bd> vgtStrmOutVtxStride2;
Register<0x2c0> vgtStrmOutBufferSize3;
Register<0x2c1> vgtStrmOutVtxStride3;
Register<0x2ca> vgtStrmOutDrawOpaqueOffset;
Register<0x2cb> vgtStrmOutDrawOpaqueBufferFilledSize;
Register<0x2cc> vgtStrmOutDrawOpaqueVertexStride;
Register<0x2ce> vgtGsMaxVertOut;
Register<0x2d5, VgtShaderStagesEn> vgtShaderStagesEn;
Register<0x2d6> vgtLsHsConfig;
Register<0x2d7, std::array<std::uint32_t, 4>> vgtGsVertItemSizes;
Register<0x2db> vgtTfParam;
Register<0x2dc> dbAlphaToMask;
Register<0x2dd> vgtDispatchDrawIndex;
Register<0x2de> paSuPolyOffsetDbFmtCntl;
Register<0x2df> paSuPolyOffsetClamp;
Register<0x2e0> paSuPolyOffsetFrontScale;
Register<0x2e1> paSuPolyOffsetFrontOffset;
Register<0x2e2> paSuPolyOffsetBackScale;
Register<0x2e3> paSuPolyOffsetBackOffset;
Register<0x2e4> vgtGsInstanceCnt;
Register<0x2e5> vgtStrmOutConfig;
Register<0x2e6> vgtStrmOutBufferConfig;
Register<0x2f5> paScCentroidPriority0;
Register<0x2f6> paScCentroidPriority1;
Register<0x2f7> unk_2f7;
Register<0x2f8> paScAaConfig;
Register<0x2f9, PaSuVtxCntl> paSuVtxCntl;
Register<0x2fa, float> paClGbVertClipAdj;
Register<0x2fb, float> paClGbVertDiscAdj;
Register<0x2fc, float> paClGbHorzClipAdj;
Register<0x2fd, float> paClGbHorzDiscAdj;
Register<0x2fe, std::array<std::uint32_t, 4>> paScAaSampleLocsPixelX0Y0;
Register<0x302, std::array<std::uint32_t, 4>> paScAaSampleLocsPixelX1Y0;
Register<0x306, std::array<std::uint32_t, 4>> paScAaSampleLocsPixelX0Y1;
Register<0x30a, std::array<std::uint32_t, 4>> paScAaSampleLocsPixelX1Y1;
Register<0x30e> paScAaMaskX0Y0_X1Y0;
Register<0x30f> paScAaMaskX0Y1_X1Y1;
Register<0x316> unk_316;
Register<0x317> vgtOutDeallocCntl;
Register<0x318, std::array<CbColor, 8>> cbColor;
};
};
struct UConfig {
static constexpr auto kMmioOffset = 0xc000;
union {
Register<0x3f> cpStrmOutCntl;
Register<0x79> cpCoherBaseHi;
Register<0x7d> cpCoherSize;
Register<0x7e> cpCoherBase;
Register<0x8b> cpDmaReadTags;
Register<0x8c> cpCoherSizeHi;
Register<0x200> grbmGfxIndex;
Register<0x242, gnm::PrimitiveType> vgtPrimitiveType;
Register<0x243, gnm::IndexType> vgtIndexType;
Register<0x24c> vgtNumIndices;
Register<0x24d> vgtNumInstances;
Register<0x340, std::array<std::uint32_t, 4>> sqThreadTraceUserdata;
Register<0x41d> gdsOaCntl;
Register<0x41e> gdsOaCounter;
Register<0x41f> gdsOaAddress;
};
};
struct Counters {
static constexpr auto kMmioOffset = 0xd000;
union {
Register<0x0, std::uint64_t> cpgPerfCounter1;
Register<0x2, std::uint64_t> cpgPerfCounter0;
Register<0x4, std::uint64_t> cpcPerfCounter1;
Register<0x6, std::uint64_t> cpcPerfCounter0;
Register<0x8, std::uint64_t> cpfPerfCounter1;
Register<0xa, std::uint64_t> cpfPerfCounter0;
Register<0x80, std::array<std::uint64_t, 4>> wdPerfCounters;
Register<0x88, std::array<std::uint64_t, 4>> iaPerfCounters;
Register<0x90, std::array<std::uint64_t, 4>> vgtPerfCounters;
Register<0x100, std::array<std::uint64_t, 4>> paSuPerfCounters;
Register<0x140, std::array<std::uint64_t, 8>> paScPerfCounters;
Register<0x180> spiPerfCounter0Hi;
Register<0x181> spiPerfCounter0Lo;
Register<0x182> spiPerfCounter1Hi;
Register<0x183> spiPerfCounter1Lo;
Register<0x184> spiPerfCounter2Hi;
Register<0x185> spiPerfCounter2Lo;
Register<0x186> spiPerfCounter3Hi;
Register<0x187> spiPerfCounter3Lo;
Register<0x188> spiPerfCounter4Hi;
Register<0x189> spiPerfCounter4Lo;
Register<0x18a> spiPerfCounter5Hi;
Register<0x18b> spiPerfCounter5Lo;
Register<0x1c0, std::array<std::uint64_t, 16>> sqPerfCounters;
Register<0x240, std::array<std::uint64_t, 4>> sxPerfCounters;
Register<0x280, std::array<std::uint64_t, 4>> gdsPerfCounters;
Register<0x2c0, std::array<std::uint64_t, 2>> taPerfCounters;
Register<0x300, std::array<std::uint64_t, 2>> tdPerfCounters;
Register<0x340, std::array<std::uint64_t, 4>> tcpPerfCounters;
Register<0x380, std::array<std::uint64_t, 4>> tccPerfCounters;
Register<0x390, std::array<std::uint64_t, 4>> tcaPerfCounters;
Register<0x3a0, std::array<std::uint64_t, 4>> tcsPerfCounters;
Register<0x406, std::array<std::uint64_t, 4>> cbPerfCounters;
Register<0x440, std::array<std::uint64_t, 4>> dbPerfCounters;
Register<0x800> cpgPerfCounter1Select;
Register<0x801> cpgPerfCounter0Select1;
Register<0x802> cpgPerfCounter0Select;
Register<0x803> cpcPerfCounter1Select;
Register<0x804> cpcPerfCounter0Select1;
Register<0x805> cpfPerfCounter1Select;
Register<0x806> cpfPerfCounter0Select1;
Register<0x807> cpfPerfCounter0Select;
Register<0x808> cpPerfMonCntl;
Register<0x809> cpcPerfCounter0Select;
Register<0x880> wdPerfCounter0Select;
Register<0x881> wdPerfCounter1Select;
Register<0x882> wdPerfCounter2Select;
Register<0x883> wdPerfCounter3Select;
Register<0x884> iaPerfCounter0Select;
Register<0x885> iaPerfCounter1Select;
Register<0x886> iaPerfCounter2Select;
Register<0x887> iaPerfCounter3Select;
Register<0x888> iaPerfCounter0Select1;
Register<0x88c> vgtPerfCounter0Select;
Register<0x88d> vgtPerfCounter1Select;
Register<0x88e> vgtPerfCounter2Select;
Register<0x88f> vgtPerfCounter3Select;
Register<0x890> vgtPerfCounter0Select1;
Register<0x891> vgtPerfCounter1Select1;
Register<0x900> paSuPerfCounter0Select;
Register<0x901> paSuPerfCounter0Select1;
Register<0x902> paSuPerfCounter1Select;
Register<0x903> paSuPerfCounter1Select1;
Register<0x904> paSuPerfCounter2Select;
Register<0x905> paSuPerfCounter3Select;
Register<0x940> paScPerfCounter0Select;
Register<0x941> paScPerfCounter0Select1;
Register<0x942> paScPerfCounter1Select;
Register<0x943> paScPerfCounter2Select;
Register<0x944> paScPerfCounter3Select;
Register<0x945> paScPerfCounter4Select;
Register<0x946> paScPerfCounter5Select;
Register<0x947> paScPerfCounter6Select;
Register<0x948> paScPerfCounter7Select;
Register<0x980> spiPerfCounter0Select;
Register<0x981> spiPerfCounter1Select;
Register<0x982> spiPerfCounter2Select;
Register<0x983> spiPerfCounter3Select;
Register<0x984> spiPerfCounter0Select1;
Register<0x985> spiPerfCounter1Select1;
Register<0x986> spiPerfCounter2Select1;
Register<0x987> spiPerfCounter3Select1;
Register<0x988> spiPerfCounter4Select;
Register<0x989> spiPerfCounter5Select;
Register<0x98a> spiPerfCounterBins;
Register<0x9c0, std::array<std::uint32_t, 16>> sqPerfCountersSelect;
Register<0x9e0> sqPerfCounterCtrl;
Register<0xa40> sxPerfCounter0Select;
Register<0xa41> sxPerfCounter1Select;
Register<0xa42> sxPerfCounter2Select;
Register<0xa43> sxPerfCounter3Select;
Register<0xa44> sxPerfCounter0Select1;
Register<0xa45> sxPerfCounter1Select1;
Register<0xa80> gdsPerfCounter0Select;
Register<0xa81> gdsPerfCounter1Select;
Register<0xa82> gdsPerfCounter2Select;
Register<0xa83> gdsPerfCounter3Select;
Register<0xa84> gdsPerfCounter0Select1;
Register<0xac0> taPerfCounter0Select;
Register<0xac1> taPerfCounter0Select1;
Register<0xac2> taPerfCounter1Select;
Register<0xb00> tdPerfCounter0Select;
Register<0xb01> tdPerfCounter0Select1;
Register<0xb02> tdPerfCounter1Select;
Register<0xb40> tcpPerfCounter0Select;
Register<0xb41> tcpPerfCounter0Select1;
Register<0xb42> tcpPerfCounter1Select;
Register<0xb43> tcpPerfCounter1Select1;
Register<0xb44> tcpPerfCounter2Select;
Register<0xb45> tcpPerfCounter3Select;
Register<0xb80> tccPerfCounter0Select;
Register<0xb81> tccPerfCounter0Select1;
Register<0xb82> tccPerfCounter1Select;
Register<0xb83> tccPerfCounter1Select1;
Register<0xb84> tccPerfCounter2Select;
Register<0xb85> tccPerfCounter3Select;
Register<0xb90> tcaPerfCounter0Select;
Register<0xb91> tcaPerfCounter0Select1;
Register<0xb92> tcaPerfCounter1Select;
Register<0xb93> tcaPerfCounter1Select1;
Register<0xb94> tcaPerfCounter2Select;
Register<0xb95> tcaPerfCounter3Select;
Register<0xba0> tcsPerfCounter0Select;
Register<0xba1> tcsPerfCounter0Select1;
Register<0xba2> tcsPerfCounter1Select;
Register<0xba3> tcsPerfCounter2Select;
Register<0xba4> tcsPerfCounter3Select;
Register<0xc00> cbPerfCounterFilter;
Register<0xc01> cbPerfCounter0Select;
Register<0xc02> cbPerfCounter0Select1;
Register<0xc03> cbPerfCounter1Select;
Register<0xc04> cbPerfCounter2Select;
Register<0xc05> cbPerfCounter3Select;
Register<0xc40> dbPerfCounter0Select;
Register<0xc41> dbPerfCounter0Select1;
Register<0xc42> dbPerfCounter1Select;
Register<0xc43> dbPerfCounter1Select1;
Register<0xc44> dbPerfCounter2Select;
Register<0xc46> dbPerfCounter3Select;
};
};
union {
Register<0x50c, std::uint32_t> vmContext0ProtectionIntrCtl;
Register<0x50d, std::uint32_t> vmContext1ProtectionIntrCtl;
Register<0x536, VmProtectionFault> vmContext0ProtectionFault;
Register<0x537, VmProtectionFault> vmContext1ProtectionFault;
Register<0x53e, std::uint32_t>
vmContext0ProtectionFaultPage; // address >> 12
Register<0x53f, std::uint32_t>
vmContext1ProtectionFaultPage; // address >> 12
Register<0x809, FbInfo> fbInfo;
Register<0xf82, std::uint32_t> ihRptr;
Register<0xf83, std::uint32_t> ihWptr;
Register<Config::kMmioOffset, Config> config;
Register<ShaderConfig::kMmioOffset, ShaderConfig> sh;
Register<0x3045> cpRbWptr;
Register<0x3064> cpRb1Wptr;
Register<0x3069> cpRb2Wptr;
Register<0x3049> cpIntCntl;
Register<0x304a> cpIntStatus;
Register<0x306a, std::array<std::uint32_t, 3>> cpIntCntlRings;
Register<0x306d, std::array<std::uint32_t, 3>> cpIntStatusRings;
Register<0x324b> cpHqdQueuePriority;
Register<0x324c> cpHqdQuantum;
Register<Context::kMmioOffset, Context> context;
Register<UConfig::kMmioOffset, UConfig> uconfig;
Register<Counters::kMmioOffset, Counters> counters;
std::uint32_t raw[kRegisterCount];
};
};
#pragma pack(pop)
} // namespace amdgpu

1273
rpcsx-gpu2/Renderer.cpp Normal file

File diff suppressed because it is too large Load diff

17
rpcsx-gpu2/Renderer.hpp Normal file
View file

@ -0,0 +1,17 @@
#pragma once
#include "Cache.hpp"
#include "Pipe.hpp"
#include <cstdint>
#include <vulkan/vulkan_core.h>
namespace amdgpu {
void draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
std::uint32_t vertexCount, std::uint32_t firstInstance,
std::uint32_t instanceCount, std::uint64_t indiciesAddress,
std::uint32_t indexCount);
void flip(Cache::Tag &cacheTag, VkCommandBuffer commandBuffer,
VkExtent2D targetExtent, std::uint64_t address, VkImageView target,
VkExtent2D imageExtent, CbCompSwap compSwap, TileMode tileMode,
gnm::DataFormat dfmt, gnm::NumericFormat nfmt);
} // namespace amdgpu

View file

@ -0,0 +1,4 @@
add_subdirectory(amdgpu-tiler)
add_subdirectory(gcn-shader)
add_subdirectory(vk)
add_subdirectory(gnm)

View file

@ -0,0 +1,22 @@
add_precompiled_vulkan_spirv(amdgpu_tiler_vulkan_shaders
shaders/tiler1d.comp.glsl
shaders/tiler2d.comp.glsl
shaders/tilerLinear.comp.glsl
shaders/detiler1d.comp.glsl
shaders/detiler2d.comp.glsl
shaders/detilerLinear.comp.glsl
)
add_library(amdgpu_tiler STATIC src/tiler.cpp)
target_include_directories(amdgpu_tiler PUBLIC include)
add_library(amdgpu_tiler_cpu STATIC src/tiler_cpu.cpp)
add_library(amdgpu_tiler_vulkan STATIC src/tiler_vulkan.cpp)
target_link_libraries(amdgpu_tiler PUBLIC gnm)
target_link_libraries(amdgpu_tiler_cpu PUBLIC amdgpu_tiler)
target_link_libraries(amdgpu_tiler_vulkan PUBLIC amdgpu_tiler amdgpu_tiler_vulkan_shaders vk)
add_library(amdgpu::tiler ALIAS amdgpu_tiler)
add_library(amdgpu::tiler::cpu ALIAS amdgpu_tiler_cpu)
add_library(amdgpu::tiler::vulkan ALIAS amdgpu_tiler_vulkan)

View file

@ -0,0 +1,505 @@
#pragma once
#include <array>
#include <cstdint>
#include <cstdlib>
#include <gnm/constants.hpp>
#include <gnm/descriptors.hpp>
namespace amdgpu {
inline constexpr uint32_t kMicroTileWidth = 8;
inline constexpr uint32_t kMicroTileHeight = 8;
inline constexpr uint32_t kDramRowSize = 0x400;
inline constexpr uint32_t kPipeInterleaveBytes = 256;
enum ArrayMode {
kArrayModeLinearGeneral = 0x00000000,
kArrayModeLinearAligned = 0x00000001,
kArrayMode1dTiledThin = 0x00000002,
kArrayMode1dTiledThick = 0x00000003,
kArrayMode2dTiledThin = 0x00000004,
kArrayModeTiledThinPrt = 0x00000005,
kArrayMode2dTiledThinPrt = 0x00000006,
kArrayMode2dTiledThick = 0x00000007,
kArrayMode2dTiledXThick = 0x00000008,
kArrayModeTiledThickPrt = 0x00000009,
kArrayMode2dTiledThickPrt = 0x0000000a,
kArrayMode3dTiledThinPrt = 0x0000000b,
kArrayMode3dTiledThin = 0x0000000c,
kArrayMode3dTiledThick = 0x0000000d,
kArrayMode3dTiledXThick = 0x0000000e,
kArrayMode3dTiledThickPrt = 0x0000000f,
};
enum MicroTileMode {
kMicroTileModeDisplay = 0x00000000,
kMicroTileModeThin = 0x00000001,
kMicroTileModeDepth = 0x00000002,
kMicroTileModeRotated = 0x00000003,
kMicroTileModeThick = 0x00000004,
};
enum PipeConfig {
kPipeConfigP8_32x32_8x16 = 0x0000000a,
kPipeConfigP8_32x32_16x16 = 0x0000000c,
kPipeConfigP16 = 0x00000012,
};
enum TileSplit {
kTileSplit64B = 0x00000000,
kTileSplit128B = 0x00000001,
kTileSplit256B = 0x00000002,
kTileSplit512B = 0x00000003,
kTileSplit1KB = 0x00000004,
kTileSplit2KB = 0x00000005,
kTileSplit4KB = 0x00000006,
};
enum SampleSplit {
kSampleSplit1 = 0x00000000,
kSampleSplit2 = 0x00000001,
kSampleSplit4 = 0x00000002,
kSampleSplit8 = 0x00000003,
};
enum NumBanks {
kNumBanks2 = 0x00000000,
kNumBanks4 = 0x00000001,
kNumBanks8 = 0x00000002,
kNumBanks16 = 0x00000003,
};
enum BankWidth {
kBankWidth1 = 0x00000000,
kBankWidth2 = 0x00000001,
kBankWidth4 = 0x00000002,
kBankWidth8 = 0x00000003,
};
enum BankHeight {
kBankHeight1 = 0x00000000,
kBankHeight2 = 0x00000001,
kBankHeight4 = 0x00000002,
kBankHeight8 = 0x00000003,
};
enum MacroTileAspect {
kMacroTileAspect1 = 0x00000000,
kMacroTileAspect2 = 0x00000001,
kMacroTileAspect4 = 0x00000002,
kMacroTileAspect8 = 0x00000003,
};
struct TileMode {
std::uint32_t raw;
constexpr ArrayMode arrayMode() const {
return ArrayMode((raw & 0x0000003c) >> 2);
}
constexpr PipeConfig pipeConfig() const {
return PipeConfig((raw & 0x000007c0) >> 6);
}
constexpr TileSplit tileSplit() const {
return TileSplit((raw & 0x00003800) >> 11);
}
constexpr MicroTileMode microTileMode() const {
return MicroTileMode((raw & 0x01c00000) >> 22);
}
constexpr SampleSplit sampleSplit() const {
return SampleSplit((raw & 0x06000000) >> 25);
}
constexpr std::uint32_t altPipeConfig() const {
return (raw & 0xf8000000) >> 27;
}
constexpr TileMode &arrayMode(ArrayMode mode) {
raw = (raw & ~0x0000003c) |
(static_cast<std::uint32_t>(mode) << 2) & 0x0000003c;
return *this;
}
constexpr TileMode &pipeConfig(PipeConfig mode) {
raw = (raw & ~0x000007c0) |
(static_cast<std::uint32_t>(mode) << 6) & 0x000007c0;
return *this;
}
constexpr TileMode &tileSplit(TileSplit mode) {
raw = (raw & ~0x00003800) |
(static_cast<std::uint32_t>(mode) << 11) & 0x00003800;
return *this;
}
constexpr TileMode &microTileMode(MicroTileMode mode) {
raw = (raw & ~0x01c00000) |
(static_cast<std::uint32_t>(mode) << 22) & 0x01c00000;
return *this;
}
constexpr TileMode &sampleSplit(SampleSplit mode) {
raw = (raw & ~0x06000000) |
(static_cast<std::uint32_t>(mode) << 25) & 0x06000000;
return *this;
}
};
struct MacroTileMode {
std::uint32_t raw;
constexpr std::uint32_t bankWidth() const { return (raw & 0x00000003) >> 0; }
constexpr std::uint32_t bankHeight() const { return (raw & 0x0000000c) >> 2; }
constexpr MacroTileAspect macroTileAspect() const {
return MacroTileAspect((raw & 0x00000030) >> 4);
}
constexpr std::uint32_t numBanks() const { return (raw & 0x000000c0) >> 6; }
constexpr std::uint32_t altBankHeight() const {
return (raw & 0x00000300) >> 8;
}
constexpr std::uint32_t altMacroTileAspect() const {
return (raw & 0x00000c00) >> 10;
}
constexpr std::uint32_t altNumBanks() const {
return (raw & 0x00003000) >> 12;
}
};
struct SurfaceInfo {
std::uint32_t width;
std::uint32_t height;
std::uint32_t depth;
std::uint32_t pitch;
int arrayLayerCount;
int numFragments;
int bitsPerElement;
std::uint64_t totalSize;
struct SubresourceInfo {
std::uint32_t dataWidth;
std::uint32_t dataHeight;
std::uint32_t dataDepth;
std::uint64_t offset;
std::uint64_t tiledSize;
std::uint64_t linearSize;
};
SubresourceInfo subresources[16];
void setSubresourceInfo(int mipLevel, const SubresourceInfo &subresource) {
subresources[mipLevel] = subresource;
}
const SubresourceInfo &getSubresourceInfo(int mipLevel) const {
return subresources[mipLevel];
}
};
constexpr uint32_t getMicroTileThickness(ArrayMode arrayMode) {
switch (arrayMode) {
case kArrayMode1dTiledThick:
case kArrayMode2dTiledThick:
case kArrayMode3dTiledThick:
case kArrayModeTiledThickPrt:
case kArrayMode2dTiledThickPrt:
case kArrayMode3dTiledThickPrt:
return 4;
case kArrayMode2dTiledXThick:
case kArrayMode3dTiledXThick:
return 8;
case kArrayModeLinearGeneral:
case kArrayModeLinearAligned:
case kArrayMode1dTiledThin:
case kArrayMode2dTiledThin:
case kArrayModeTiledThinPrt:
case kArrayMode2dTiledThinPrt:
case kArrayMode3dTiledThinPrt:
case kArrayMode3dTiledThin:
return 1;
}
std::abort();
}
constexpr bool isMacroTiled(ArrayMode arrayMode) {
switch (arrayMode) {
case kArrayModeLinearGeneral:
case kArrayModeLinearAligned:
case kArrayMode1dTiledThin:
case kArrayMode1dTiledThick:
return false;
case kArrayMode2dTiledThin:
case kArrayModeTiledThinPrt:
case kArrayMode2dTiledThinPrt:
case kArrayMode2dTiledThick:
case kArrayMode2dTiledXThick:
case kArrayModeTiledThickPrt:
case kArrayMode2dTiledThickPrt:
case kArrayMode3dTiledThinPrt:
case kArrayMode3dTiledThin:
case kArrayMode3dTiledThick:
case kArrayMode3dTiledXThick:
case kArrayMode3dTiledThickPrt:
return true;
}
std::abort();
}
constexpr bool isPrt(ArrayMode arrayMode) {
switch (arrayMode) {
case kArrayModeLinearGeneral:
case kArrayModeLinearAligned:
case kArrayMode1dTiledThin:
case kArrayMode1dTiledThick:
case kArrayMode2dTiledThin:
case kArrayMode2dTiledThick:
case kArrayMode2dTiledXThick:
case kArrayMode3dTiledThin:
case kArrayMode3dTiledThick:
case kArrayMode3dTiledXThick:
return false;
case kArrayModeTiledThinPrt:
case kArrayMode2dTiledThinPrt:
case kArrayModeTiledThickPrt:
case kArrayMode2dTiledThickPrt:
case kArrayMode3dTiledThinPrt:
case kArrayMode3dTiledThickPrt:
return true;
}
std::abort();
}
constexpr std::array<MacroTileMode, 16> getDefaultMacroTileModes() {
return {{
{.raw = 0x26e8},
{.raw = 0x26d4},
{.raw = 0x21d0},
{.raw = 0x21d0},
{.raw = 0x2080},
{.raw = 0x2040},
{.raw = 0x1000},
{.raw = 0x0000},
{.raw = 0x36ec},
{.raw = 0x26e8},
{.raw = 0x21d4},
{.raw = 0x20d0},
{.raw = 0x1080},
{.raw = 0x1040},
{.raw = 0x0000},
{.raw = 0x0000},
}};
}
constexpr std::array<TileMode, 32> getDefaultTileModes() {
return {{
{.raw = 0x90800310}, {.raw = 0x90800b10}, {.raw = 0x90801310},
{.raw = 0x90801b10}, {.raw = 0x90802310}, {.raw = 0x90800308},
{.raw = 0x90801318}, {.raw = 0x90802318}, {.raw = 0x90000304},
{.raw = 0x90000308}, {.raw = 0x92000310}, {.raw = 0x92000294},
{.raw = 0x92000318}, {.raw = 0x90400308}, {.raw = 0x92400310},
{.raw = 0x924002b0}, {.raw = 0x92400294}, {.raw = 0x92400318},
{.raw = 0x9240032c}, {.raw = 0x9100030c}, {.raw = 0x9100031c},
{.raw = 0x910002b4}, {.raw = 0x910002a4}, {.raw = 0x91000328},
{.raw = 0x910002bc}, {.raw = 0x91000320}, {.raw = 0x910002b8},
{.raw = 0x90c00308}, {.raw = 0x92c00310}, {.raw = 0x92c00294},
{.raw = 0x92c00318}, {.raw = 0x00000000},
}};
}
constexpr std::uint32_t getElementIndex(std::uint32_t x, std::uint32_t y,
std::uint32_t z,
std::uint32_t bitsPerElement,
MicroTileMode microTileMode,
ArrayMode arrayMode) {
std::uint32_t elem = 0;
if (microTileMode == kMicroTileModeDisplay) {
switch (bitsPerElement) {
case 8:
elem |= ((x >> 0) & 0x1) << 0;
elem |= ((x >> 1) & 0x1) << 1;
elem |= ((x >> 2) & 0x1) << 2;
elem |= ((y >> 1) & 0x1) << 3;
elem |= ((y >> 0) & 0x1) << 4;
elem |= ((y >> 2) & 0x1) << 5;
break;
case 16:
elem |= ((x >> 0) & 0x1) << 0;
elem |= ((x >> 1) & 0x1) << 1;
elem |= ((x >> 2) & 0x1) << 2;
elem |= ((y >> 0) & 0x1) << 3;
elem |= ((y >> 1) & 0x1) << 4;
elem |= ((y >> 2) & 0x1) << 5;
break;
case 32:
elem |= ((x >> 0) & 0x1) << 0;
elem |= ((x >> 1) & 0x1) << 1;
elem |= ((y >> 0) & 0x1) << 2;
elem |= ((x >> 2) & 0x1) << 3;
elem |= ((y >> 1) & 0x1) << 4;
elem |= ((y >> 2) & 0x1) << 5;
break;
case 64:
elem |= ((x >> 0) & 0x1) << 0;
elem |= ((y >> 0) & 0x1) << 1;
elem |= ((x >> 1) & 0x1) << 2;
elem |= ((x >> 2) & 0x1) << 3;
elem |= ((y >> 1) & 0x1) << 4;
elem |= ((y >> 2) & 0x1) << 5;
break;
default:
std::abort();
}
} else if (microTileMode == kMicroTileModeThin ||
microTileMode == kMicroTileModeDepth) {
elem |= ((x >> 0) & 0x1) << 0;
elem |= ((y >> 0) & 0x1) << 1;
elem |= ((x >> 1) & 0x1) << 2;
elem |= ((y >> 1) & 0x1) << 3;
elem |= ((x >> 2) & 0x1) << 4;
elem |= ((y >> 2) & 0x1) << 5;
switch (arrayMode) {
case kArrayMode2dTiledXThick:
case kArrayMode3dTiledXThick:
elem |= ((z >> 2) & 0x1) << 8;
case kArrayMode1dTiledThick:
case kArrayMode2dTiledThick:
case kArrayMode3dTiledThick:
case kArrayModeTiledThickPrt:
case kArrayMode2dTiledThickPrt:
case kArrayMode3dTiledThickPrt:
elem |= ((z >> 0) & 0x1) << 6;
elem |= ((z >> 1) & 0x1) << 7;
default:
break;
}
} else if (microTileMode == kMicroTileModeThick) {
switch (arrayMode) {
case kArrayMode2dTiledXThick:
case kArrayMode3dTiledXThick:
elem |= ((z >> 2) & 0x1) << 8;
case kArrayMode1dTiledThick:
case kArrayMode2dTiledThick:
case kArrayMode3dTiledThick:
case kArrayModeTiledThickPrt:
case kArrayMode2dTiledThickPrt:
case kArrayMode3dTiledThickPrt:
if (bitsPerElement == 8 || bitsPerElement == 16) {
elem |= ((x >> 0) & 0x1) << 0;
elem |= ((y >> 0) & 0x1) << 1;
elem |= ((x >> 1) & 0x1) << 2;
elem |= ((y >> 1) & 0x1) << 3;
elem |= ((z >> 0) & 0x1) << 4;
elem |= ((z >> 1) & 0x1) << 5;
elem |= ((x >> 2) & 0x1) << 6;
elem |= ((y >> 2) & 0x1) << 7;
} else if (bitsPerElement == 32) {
elem |= ((x >> 0) & 0x1) << 0;
elem |= ((y >> 0) & 0x1) << 1;
elem |= ((x >> 1) & 0x1) << 2;
elem |= ((z >> 0) & 0x1) << 3;
elem |= ((y >> 1) & 0x1) << 4;
elem |= ((z >> 1) & 0x1) << 5;
elem |= ((x >> 2) & 0x1) << 6;
elem |= ((y >> 2) & 0x1) << 7;
} else if (bitsPerElement == 64 || bitsPerElement == 128) {
elem |= ((x >> 0) & 0x1) << 0;
elem |= ((y >> 0) & 0x1) << 1;
elem |= ((z >> 0) & 0x1) << 2;
elem |= ((x >> 1) & 0x1) << 3;
elem |= ((y >> 1) & 0x1) << 4;
elem |= ((z >> 1) & 0x1) << 5;
elem |= ((x >> 2) & 0x1) << 6;
elem |= ((y >> 2) & 0x1) << 7;
} else {
std::abort();
}
break;
default:
std::abort();
}
}
return elem;
}
constexpr uint32_t getPipeIndex(uint32_t x, uint32_t y, PipeConfig pipeCfg) {
uint32_t pipe = 0;
switch (pipeCfg) {
case kPipeConfigP8_32x32_8x16:
pipe |= (((x >> 4) ^ (y >> 3) ^ (x >> 5)) & 0x1) << 0;
pipe |= (((x >> 3) ^ (y >> 4)) & 0x1) << 1;
pipe |= (((x >> 5) ^ (y >> 5)) & 0x1) << 2;
break;
case kPipeConfigP8_32x32_16x16:
pipe |= (((x >> 3) ^ (y >> 3) ^ (x >> 4)) & 0x1) << 0;
pipe |= (((x >> 4) ^ (y >> 4)) & 0x1) << 1;
pipe |= (((x >> 5) ^ (y >> 5)) & 0x1) << 2;
break;
case kPipeConfigP16:
pipe |= (((x >> 3) ^ (y >> 3) ^ (x >> 4)) & 0x1) << 0;
pipe |= (((x >> 4) ^ (y >> 4)) & 0x1) << 1;
pipe |= (((x >> 5) ^ (y >> 5)) & 0x1) << 2;
pipe |= (((x >> 6) ^ (y >> 5)) & 0x1) << 3;
break;
default:
std::abort();
}
return pipe;
}
constexpr uint32_t getBankIndex(std::uint32_t x, std::uint32_t y,
std::uint32_t bank_width,
std::uint32_t bank_height,
std::uint32_t num_banks,
std::uint32_t num_pipes) {
std::uint32_t x_shift_offset = std::countr_zero(bank_width * num_pipes);
std::uint32_t y_shift_offset = std::countr_zero(bank_height);
std::uint32_t xs = x >> x_shift_offset;
std::uint32_t ys = y >> y_shift_offset;
std::uint32_t bank = 0;
switch (num_banks) {
case 2:
bank |= (((xs >> 3) ^ (ys >> 3)) & 0x1) << 0;
break;
case 4:
bank |= (((xs >> 3) ^ (ys >> 4)) & 0x1) << 0;
bank |= (((xs >> 4) ^ (ys >> 3)) & 0x1) << 1;
break;
case 8:
bank |= (((xs >> 3) ^ (ys >> 5)) & 0x1) << 0;
bank |= (((xs >> 4) ^ (ys >> 4) ^ (ys >> 5)) & 0x1) << 1;
bank |= (((xs >> 5) ^ (ys >> 3)) & 0x1) << 2;
break;
case 16:
bank |= (((xs >> 3) ^ (ys >> 6)) & 0x1) << 0;
bank |= (((xs >> 4) ^ (ys >> 5) ^ (ys >> 6)) & 0x1) << 1;
bank |= (((xs >> 5) ^ (ys >> 4)) & 0x1) << 2;
bank |= (((xs >> 6) ^ (ys >> 3)) & 0x1) << 3;
break;
default:
std::abort();
}
return bank;
}
constexpr std::uint32_t getPipeCount(PipeConfig pipeConfig) {
switch (pipeConfig) {
case kPipeConfigP8_32x32_8x16:
case kPipeConfigP8_32x32_16x16:
return 8;
case kPipeConfigP16:
return 16;
default:
std::abort();
}
}
SurfaceInfo computeSurfaceInfo(TileMode tileMode, gnm::TextureType type,
gnm::DataFormat dfmt, std::uint32_t width,
std::uint32_t height, std::uint32_t depth,
std::uint32_t pitch, int baseArrayLayer,
int arrayCount, int baseMipLevel, int mipCount,
bool pow2pad);
SurfaceInfo computeSurfaceInfo(const gnm::TBuffer &tbuffer, TileMode tileMode);
} // namespace amdgpu

View file

@ -0,0 +1,14 @@
#pragma once
#include "gnm/constants.hpp"
#include "tiler.hpp"
#include <cstdint>
namespace amdgpu {
std::uint64_t getTiledOffset(gnm::TextureType texType, bool isPow2Padded,
int numFragments, gnm::DataFormat dfmt,
amdgpu::TileMode tileMode,
amdgpu::MacroTileMode macroTileMode, int mipLevel,
int arraySlice, int width, int height, int depth,
int pitch, int x, int y, int z, int fragmentIndex);
}

View file

@ -0,0 +1,24 @@
#pragma once
#include "tiler.hpp"
#include <Scheduler.hpp>
#include <memory>
namespace amdgpu {
struct GpuTiler {
struct Impl;
GpuTiler();
~GpuTiler();
void detile(Scheduler &scheduler, const amdgpu::SurfaceInfo &info,
amdgpu::TileMode tileMode, std::uint64_t srcTiledAddress,
std::uint64_t dstLinearAddress, int mipLevel, int baseArray,
int arrayCount);
void tile(Scheduler &scheduler, const amdgpu::SurfaceInfo &info,
amdgpu::TileMode tileMode, std::uint64_t srcLinearAddress,
std::uint64_t dstTiledAddress, int mipLevel, int baseArray,
int arrayCount);
private:
std::unique_ptr<Impl> mImpl;
};
} // namespace amdgpu

View file

@ -0,0 +1,76 @@
#version 460
#extension GL_GOOGLE_include_directive : enable
#extension GL_EXT_shader_explicit_arithmetic_types : enable
#extension GL_EXT_shader_atomic_int64 : enable
#extension GL_EXT_shader_atomic_float : enable
#extension GL_EXT_shader_image_load_formatted : enable
#extension GL_KHR_memory_scope_semantics : enable
#extension GL_EXT_shared_memory_block : enable
#extension GL_EXT_scalar_block_layout : enable
#extension GL_EXT_null_initializer : enable
#extension GL_EXT_buffer_reference2 : enable
#extension GL_EXT_buffer_reference_uvec2 : enable
#include "tiler.glsl"
void main() {
uvec3 pos = gl_GlobalInvocationID;
uint64_t tiledSliceOffset = 0;
uint64_t linearSliceOffset = 0;
if (config.tiledSurfaceSize != 0) {
tiledSliceOffset = pos.z * config.tiledSurfaceSize;
linearSliceOffset = pos.z * config.linearSurfaceSize;
pos.z = 0;
}
uint64_t tiledByteOffset = getTiledBitOffset1D(
config.tileMode,
pos,
config.dataSize,
config.bitsPerElement
) / 8;
tiledByteOffset += tiledSliceOffset;
uint64_t linearByteOffset = computeLinearElementByteOffset(
pos,
0,
config.dataSize.x,
config.dataSize.x * config.dataSize.y,
config.bitsPerElement,
1 << config.numFragments
);
linearByteOffset += linearSliceOffset;
switch ((config.bitsPerElement + 7) / 8) {
case 1:
buffer_reference_uint8_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint8_t(config.srcAddress + tiledByteOffset).data;
break;
case 2:
buffer_reference_uint16_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint16_t(config.srcAddress + tiledByteOffset).data;
break;
case 4:
buffer_reference_uint32_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint32_t(config.srcAddress + tiledByteOffset).data;
break;
case 8:
buffer_reference_uint64_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint64_t(config.srcAddress + tiledByteOffset).data;
break;
case 16:
buffer_reference_uint64_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint64_t(config.srcAddress + tiledByteOffset).data;
buffer_reference_uint64_t(config.dstAddress + linearByteOffset + 8).data = buffer_reference_uint64_t(config.srcAddress + tiledByteOffset + 8).data;
break;
case 32:
buffer_reference_uint64_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint64_t(config.srcAddress + tiledByteOffset).data;
buffer_reference_uint64_t(config.dstAddress + linearByteOffset + 8).data = buffer_reference_uint64_t(config.srcAddress + tiledByteOffset + 8).data;
buffer_reference_uint64_t(config.dstAddress + linearByteOffset + 16).data = buffer_reference_uint64_t(config.srcAddress + tiledByteOffset + 16).data;
buffer_reference_uint64_t(config.dstAddress + linearByteOffset + 24).data = buffer_reference_uint64_t(config.srcAddress + tiledByteOffset + 24).data;
break;
}
}

View file

@ -0,0 +1,76 @@
#version 460
#extension GL_GOOGLE_include_directive : enable
#extension GL_EXT_shader_explicit_arithmetic_types : enable
#extension GL_EXT_shader_atomic_int64 : enable
#extension GL_EXT_shader_atomic_float : enable
#extension GL_EXT_shader_image_load_formatted : enable
#extension GL_KHR_memory_scope_semantics : enable
#extension GL_EXT_shared_memory_block : enable
#extension GL_EXT_scalar_block_layout : enable
#extension GL_EXT_null_initializer : enable
#extension GL_EXT_buffer_reference2 : enable
#extension GL_EXT_buffer_reference_uvec2 : enable
#include "tiler.glsl"
void main() {
uvec3 pos = gl_GlobalInvocationID;
uint64_t tiledSliceOffset = 0;
uint64_t linearSliceOffset = 0;
if (config.tiledSurfaceSize != 0) {
tiledSliceOffset = pos.z * config.tiledSurfaceSize;
linearSliceOffset = pos.z * config.linearSurfaceSize;
pos.z = 0;
}
uint64_t tiledByteOffset = getTiledBitOffset1D(
config.tileMode,
pos,
config.dataSize,
config.bitsPerElement
) / 8;
tiledByteOffset += tiledSliceOffset;
uint64_t linearByteOffset = computeLinearElementByteOffset(
pos,
0,
config.dataSize.x,
config.dataSize.x * config.dataSize.y,
config.bitsPerElement,
1 << config.numFragments
);
linearByteOffset += linearSliceOffset;
switch ((config.bitsPerElement + 7) / 8) {
case 1:
buffer_reference_uint8_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint8_t(config.srcAddress + tiledByteOffset).data;
break;
case 2:
buffer_reference_uint16_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint16_t(config.srcAddress + tiledByteOffset).data;
break;
case 4:
buffer_reference_uint32_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint32_t(config.srcAddress + tiledByteOffset).data;
break;
case 8:
buffer_reference_uint64_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint64_t(config.srcAddress + tiledByteOffset).data;
break;
case 16:
buffer_reference_uint64_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint64_t(config.srcAddress + tiledByteOffset).data;
buffer_reference_uint64_t(config.dstAddress + linearByteOffset + 8).data = buffer_reference_uint64_t(config.srcAddress + tiledByteOffset + 8).data;
break;
case 32:
buffer_reference_uint64_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint64_t(config.srcAddress + tiledByteOffset).data;
buffer_reference_uint64_t(config.dstAddress + linearByteOffset + 8).data = buffer_reference_uint64_t(config.srcAddress + tiledByteOffset + 8).data;
buffer_reference_uint64_t(config.dstAddress + linearByteOffset + 16).data = buffer_reference_uint64_t(config.srcAddress + tiledByteOffset + 16).data;
buffer_reference_uint64_t(config.dstAddress + linearByteOffset + 24).data = buffer_reference_uint64_t(config.srcAddress + tiledByteOffset + 24).data;
break;
}
}

View file

@ -0,0 +1,76 @@
#version 460
#extension GL_GOOGLE_include_directive : enable
#extension GL_EXT_shader_explicit_arithmetic_types : enable
#extension GL_EXT_shader_atomic_int64 : enable
#extension GL_EXT_shader_atomic_float : enable
#extension GL_EXT_shader_image_load_formatted : enable
#extension GL_KHR_memory_scope_semantics : enable
#extension GL_EXT_shared_memory_block : enable
#extension GL_EXT_scalar_block_layout : enable
#extension GL_EXT_null_initializer : enable
#extension GL_EXT_buffer_reference2 : enable
#extension GL_EXT_buffer_reference_uvec2 : enable
#include "tiler.glsl"
void main() {
uvec3 pos = gl_GlobalInvocationID;
uint64_t tiledSliceOffset = 0;
uint64_t linearSliceOffset = 0;
if (config.tiledSurfaceSize != 0) {
tiledSliceOffset = pos.z * config.tiledSurfaceSize;
linearSliceOffset = pos.z * config.linearSurfaceSize;
pos.z = 0;
}
uint64_t tiledByteOffset = computeLinearOffset(
config.bitsPerElement,
config.dataSize.y,
config.dataSize.x,
pos
) / 8;
tiledByteOffset += tiledSliceOffset;
uint64_t linearByteOffset = computeLinearElementByteOffset(
pos,
0,
config.dataSize.x,
config.dataSize.x * config.dataSize.y,
config.bitsPerElement,
1 << config.numFragments
);
linearByteOffset += linearSliceOffset;
switch ((config.bitsPerElement + 7) / 8) {
case 1:
buffer_reference_uint8_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint8_t(config.srcAddress + tiledByteOffset).data;
break;
case 2:
buffer_reference_uint16_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint16_t(config.srcAddress + tiledByteOffset).data;
break;
case 4:
buffer_reference_uint32_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint32_t(config.srcAddress + tiledByteOffset).data;
break;
case 8:
buffer_reference_uint64_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint64_t(config.srcAddress + tiledByteOffset).data;
break;
case 16:
buffer_reference_uint64_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint64_t(config.srcAddress + tiledByteOffset).data;
buffer_reference_uint64_t(config.dstAddress + linearByteOffset + 8).data = buffer_reference_uint64_t(config.srcAddress + tiledByteOffset + 8).data;
break;
case 32:
buffer_reference_uint64_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint64_t(config.srcAddress + tiledByteOffset).data;
buffer_reference_uint64_t(config.dstAddress + linearByteOffset + 8).data = buffer_reference_uint64_t(config.srcAddress + tiledByteOffset + 8).data;
buffer_reference_uint64_t(config.dstAddress + linearByteOffset + 16).data = buffer_reference_uint64_t(config.srcAddress + tiledByteOffset + 16).data;
buffer_reference_uint64_t(config.dstAddress + linearByteOffset + 24).data = buffer_reference_uint64_t(config.srcAddress + tiledByteOffset + 24).data;
break;
}
}

View file

@ -0,0 +1,716 @@
#define FOR_ALL_BASE_TYPES(OP) \
OP(int8_t) \
OP(uint8_t) \
OP(int16_t) \
OP(uint16_t) \
OP(float16_t) \
OP(int32_t) \
OP(uint32_t) \
OP(float32_t) \
OP(int64_t) \
OP(uint64_t) \
OP(float64_t) \
#define DEFINE_BUFFER_REFERENCE(TYPE) \
layout(buffer_reference) buffer buffer_reference_##TYPE { \
TYPE data; \
}; \
FOR_ALL_BASE_TYPES(DEFINE_BUFFER_REFERENCE)
#define U32ARRAY_FETCH_BITS(ARRAY, START, BITCOUNT) ((ARRAY[(START) >> 5] >> ((START) & 31)) & ((1 << (BITCOUNT)) - 1))
#define U64ARRAY_FETCH_BITS(ARRAY, START, BITCOUNT) ((ARRAY[(START) >> 6] >> ((START) & 63)) & ((uint64_t(1) << (BITCOUNT)) - 1))
uint64_t tbuffer_base(u64vec4 tbuffer) {
return U64ARRAY_FETCH_BITS(tbuffer, 0, 38);
}
uint32_t tbuffer_mtype_L2(u64vec4 tbuffer) {
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 38, 2));
}
uint32_t tbuffer_min_lod(u64vec4 tbuffer) {
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 40, 12));
}
uint32_t tbuffer_dfmt(u64vec4 tbuffer) {
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 52, 6));
}
uint32_t tbuffer_nfmt(u64vec4 tbuffer) {
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 58, 4));
}
uint32_t tbuffer_mtype_l1(u64vec4 tbuffer) {
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 62, 2) | (U64ARRAY_FETCH_BITS(tbuffer, 122, 1) << 2));
}
uint32_t tbuffer_width(u64vec4 tbuffer) {
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 64, 14));
}
uint32_t tbuffer_height(u64vec4 tbuffer) {
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 78, 14));
}
uint32_t tbuffer_perfMod(u64vec4 tbuffer) {
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 92, 3));
}
bool tbuffer_interlaced(u64vec4 tbuffer) {
return U64ARRAY_FETCH_BITS(tbuffer, 95, 1) != 0;
}
uint32_t tbuffer_dst_sel_x(u64vec4 tbuffer) {
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 96, 3));
}
uint32_t tbuffer_dst_sel_y(u64vec4 tbuffer) {
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 99, 3));
}
uint32_t tbuffer_dst_sel_z(u64vec4 tbuffer) {
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 102, 3));
}
uint32_t tbuffer_dst_sel_w(u64vec4 tbuffer) {
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 105, 3));
}
uint32_t tbuffer_base_level(u64vec4 tbuffer) {
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 108, 4));
}
uint32_t tbuffer_last_level(u64vec4 tbuffer) {
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 112, 4));
}
uint32_t tbuffer_tiling_idx(u64vec4 tbuffer) {
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 116, 5));
}
bool tbuffer_pow2pad(u64vec4 tbuffer) {
return U64ARRAY_FETCH_BITS(tbuffer, 121, 1) != 0;
}
uint32_t tbuffer_type(u64vec4 tbuffer) {
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 124, 4));
}
uint32_t tbuffer_depth(u64vec4 tbuffer) {
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 128, 13));
}
uint32_t tbuffer_pitch(u64vec4 tbuffer) {
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 141, 14));
}
uint32_t tbuffer_base_array(u64vec4 tbuffer) {
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 160, 13));
}
uint32_t tbuffer_last_array(u64vec4 tbuffer) {
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 173, 13));
}
uint32_t tbuffer_min_lod_warn(u64vec4 tbuffer) {
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 192, 12));
}
uint32_t tbuffer_counter_bank_id(u64vec4 tbuffer) {
return uint32_t(U64ARRAY_FETCH_BITS(tbuffer, 204, 8));
}
bool tbuffer_LOD_hdw_cnt_en(u64vec4 tbuffer) {
return U64ARRAY_FETCH_BITS(tbuffer, 212, 1) != 0;
}
const int kTextureType1D = 8;
const int kTextureType2D = 9;
const int kTextureType3D = 10;
const int kTextureTypeCube = 11;
const int kTextureTypeArray1D = 12;
const int kTextureTypeArray2D = 13;
const int kTextureTypeMsaa2D = 14;
const int kTextureTypeMsaaArray2D = 15;
const uint32_t kMicroTileWidth = 8;
const uint32_t kMicroTileHeight = 8;
const uint32_t kDramRowSize = 0x400;
const uint32_t kPipeInterleaveBytes = 256;
const uint32_t kDataFormatInvalid = 0x00000000;
const uint32_t kDataFormat8 = 0x00000001;
const uint32_t kDataFormat16 = 0x00000002;
const uint32_t kDataFormat8_8 = 0x00000003;
const uint32_t kDataFormat32 = 0x00000004;
const uint32_t kDataFormat16_16 = 0x00000005;
const uint32_t kDataFormat10_11_11 = 0x00000006;
const uint32_t kDataFormat11_11_10 = 0x00000007;
const uint32_t kDataFormat10_10_10_2 = 0x00000008;
const uint32_t kDataFormat2_10_10_10 = 0x00000009;
const uint32_t kDataFormat8_8_8_8 = 0x0000000a;
const uint32_t kDataFormat32_32 = 0x0000000b;
const uint32_t kDataFormat16_16_16_16 = 0x0000000c;
const uint32_t kDataFormat32_32_32 = 0x0000000d;
const uint32_t kDataFormat32_32_32_32 = 0x0000000e;
const uint32_t kDataFormat5_6_5 = 0x00000010;
const uint32_t kDataFormat1_5_5_5 = 0x00000011;
const uint32_t kDataFormat5_5_5_1 = 0x00000012;
const uint32_t kDataFormat4_4_4_4 = 0x00000013;
const uint32_t kDataFormat8_24 = 0x00000014;
const uint32_t kDataFormat24_8 = 0x00000015;
const uint32_t kDataFormatX24_8_32 = 0x00000016;
const uint32_t kDataFormatGB_GR = 0x00000020;
const uint32_t kDataFormatBG_RG = 0x00000021;
const uint32_t kDataFormat5_9_9_9 = 0x00000022;
const uint32_t kDataFormatBc1 = 0x00000023;
const uint32_t kDataFormatBc2 = 0x00000024;
const uint32_t kDataFormatBc3 = 0x00000025;
const uint32_t kDataFormatBc4 = 0x00000026;
const uint32_t kDataFormatBc5 = 0x00000027;
const uint32_t kDataFormatBc6 = 0x00000028;
const uint32_t kDataFormatBc7 = 0x00000029;
const uint32_t kDataFormatFmask8_S2_F1 = 0x0000002C;
const uint32_t kDataFormatFmask8_S4_F1 = 0x0000002D;
const uint32_t kDataFormatFmask8_S8_F1 = 0x0000002E;
const uint32_t kDataFormatFmask8_S2_F2 = 0x0000002F;
const uint32_t kDataFormatFmask8_S4_F2 = 0x00000030;
const uint32_t kDataFormatFmask8_S4_F4 = 0x00000031;
const uint32_t kDataFormatFmask16_S16_F1 = 0x00000032;
const uint32_t kDataFormatFmask16_S8_F2 = 0x00000033;
const uint32_t kDataFormatFmask32_S16_F2 = 0x00000034;
const uint32_t kDataFormatFmask32_S8_F4 = 0x00000035;
const uint32_t kDataFormatFmask32_S8_F8 = 0x00000036;
const uint32_t kDataFormatFmask64_S16_F4 = 0x00000037;
const uint32_t kDataFormatFmask64_S16_F8 = 0x00000038;
const uint32_t kDataFormat4_4 = 0x00000039;
const uint32_t kDataFormat6_5_5 = 0x0000003A;
const uint32_t kDataFormat1 = 0x0000003B;
const uint32_t kDataFormat1Reversed = 0x0000003C;
const uint32_t kNumericFormatUNorm = 0x00000000;
const uint32_t kNumericFormatSNorm = 0x00000001;
const uint32_t kNumericFormatUScaled = 0x00000002;
const uint32_t kNumericFormatSScaled = 0x00000003;
const uint32_t kNumericFormatUInt = 0x00000004;
const uint32_t kNumericFormatSInt = 0x00000005;
const uint32_t kNumericFormatSNormNoZero = 0x00000006;
const uint32_t kNumericFormatFloat = 0x00000007;
const uint32_t kNumericFormatSrgb = 0x00000009;
const uint32_t kNumericFormatUBNorm = 0x0000000A;
const uint32_t kNumericFormatUBNormNoZero = 0x0000000B;
const uint32_t kNumericFormatUBInt = 0x0000000C;
const uint32_t kNumericFormatUBScaled = 0x0000000D;
const uint32_t kArrayModeLinearGeneral = 0x00000000;
const uint32_t kArrayModeLinearAligned = 0x00000001;
const uint32_t kArrayMode1dTiledThin = 0x00000002;
const uint32_t kArrayMode1dTiledThick = 0x00000003;
const uint32_t kArrayMode2dTiledThin = 0x00000004;
const uint32_t kArrayModeTiledThinPrt = 0x00000005;
const uint32_t kArrayMode2dTiledThinPrt = 0x00000006;
const uint32_t kArrayMode2dTiledThick = 0x00000007;
const uint32_t kArrayMode2dTiledXThick = 0x00000008;
const uint32_t kArrayModeTiledThickPrt = 0x00000009;
const uint32_t kArrayMode2dTiledThickPrt = 0x0000000a;
const uint32_t kArrayMode3dTiledThinPrt = 0x0000000b;
const uint32_t kArrayMode3dTiledThin = 0x0000000c;
const uint32_t kArrayMode3dTiledThick = 0x0000000d;
const uint32_t kArrayMode3dTiledXThick = 0x0000000e;
const uint32_t kArrayMode3dTiledThickPrt = 0x0000000f;
const uint32_t kMicroTileModeDisplay = 0x00000000;
const uint32_t kMicroTileModeThin = 0x00000001;
const uint32_t kMicroTileModeDepth = 0x00000002;
const uint32_t kMicroTileModeRotated = 0x00000003;
const uint32_t kMicroTileModeThick = 0x00000004;
const uint32_t kPipeConfigP8_32x32_8x16 = 0x0000000a;
const uint32_t kPipeConfigP8_32x32_16x16 = 0x0000000c;
const uint32_t kPipeConfigP16 = 0x00000012;
uint32_t getMicroTileThickness(uint32_t arrayMode) {
switch (arrayMode) {
case kArrayMode1dTiledThick:
case kArrayMode2dTiledThick:
case kArrayMode3dTiledThick:
case kArrayModeTiledThickPrt:
case kArrayMode2dTiledThickPrt:
case kArrayMode3dTiledThickPrt:
return 4;
case kArrayMode2dTiledXThick:
case kArrayMode3dTiledXThick:
return 8;
case kArrayModeLinearGeneral:
case kArrayModeLinearAligned:
case kArrayMode1dTiledThin:
case kArrayMode2dTiledThin:
case kArrayModeTiledThinPrt:
case kArrayMode2dTiledThinPrt:
case kArrayMode3dTiledThinPrt:
case kArrayMode3dTiledThin:
return 1;
}
return 1;
}
bool isMacroTiled(uint32_t arrayMode) {
switch (arrayMode) {
case kArrayModeLinearGeneral:
case kArrayModeLinearAligned:
case kArrayMode1dTiledThin:
case kArrayMode1dTiledThick:
return false;
case kArrayMode2dTiledThin:
case kArrayModeTiledThinPrt:
case kArrayMode2dTiledThinPrt:
case kArrayMode2dTiledThick:
case kArrayMode2dTiledXThick:
case kArrayModeTiledThickPrt:
case kArrayMode2dTiledThickPrt:
case kArrayMode3dTiledThinPrt:
case kArrayMode3dTiledThin:
case kArrayMode3dTiledThick:
case kArrayMode3dTiledXThick:
case kArrayMode3dTiledThickPrt:
return true;
}
return false;
}
bool isPrt(uint32_t arrayMode) {
switch (arrayMode) {
case kArrayModeLinearGeneral:
case kArrayModeLinearAligned:
case kArrayMode1dTiledThin:
case kArrayMode1dTiledThick:
case kArrayMode2dTiledThin:
case kArrayMode2dTiledThick:
case kArrayMode2dTiledXThick:
case kArrayMode3dTiledThin:
case kArrayMode3dTiledThick:
case kArrayMode3dTiledXThick:
return false;
case kArrayModeTiledThinPrt:
case kArrayMode2dTiledThinPrt:
case kArrayModeTiledThickPrt:
case kArrayMode2dTiledThickPrt:
case kArrayMode3dTiledThinPrt:
case kArrayMode3dTiledThickPrt:
return true;
}
return false;
}
int getTexelsPerElement(uint32_t dfmt) {
switch (dfmt) {
case kDataFormatBc1:
case kDataFormatBc2:
case kDataFormatBc3:
case kDataFormatBc4:
case kDataFormatBc5:
case kDataFormatBc6:
case kDataFormatBc7:
return 16;
case kDataFormat1:
case kDataFormat1Reversed:
return 8;
case kDataFormatGB_GR:
case kDataFormatBG_RG:
return 2;
default:
return 1;
}
}
int getBitsPerElement(uint32_t dfmt) {
switch (dfmt) {
case kDataFormatInvalid:
return 0;
case kDataFormat8:
return 8;
case kDataFormat16:
return 16;
case kDataFormat8_8:
return 16;
case kDataFormat32:
return 32;
case kDataFormat16_16:
return 32;
case kDataFormat10_11_11:
return 32;
case kDataFormat11_11_10:
return 32;
case kDataFormat10_10_10_2:
return 32;
case kDataFormat2_10_10_10:
return 32;
case kDataFormat8_8_8_8:
return 32;
case kDataFormat32_32:
return 64;
case kDataFormat16_16_16_16:
return 64;
case kDataFormat32_32_32:
return 96;
case kDataFormat32_32_32_32:
return 128;
case kDataFormat5_6_5:
return 16;
case kDataFormat1_5_5_5:
return 16;
case kDataFormat5_5_5_1:
return 16;
case kDataFormat4_4_4_4:
return 16;
case kDataFormat8_24:
return 32;
case kDataFormat24_8:
return 32;
case kDataFormatX24_8_32:
return 64;
case kDataFormatGB_GR:
return 16;
case kDataFormatBG_RG:
return 16;
case kDataFormat5_9_9_9:
return 32;
case kDataFormatBc1:
return 4;
case kDataFormatBc2:
return 8;
case kDataFormatBc3:
return 8;
case kDataFormatBc4:
return 4;
case kDataFormatBc5:
return 8;
case kDataFormatBc6:
return 8;
case kDataFormatBc7:
return 8;
case kDataFormatFmask8_S2_F1:
return 8;
case kDataFormatFmask8_S4_F1:
return 8;
case kDataFormatFmask8_S8_F1:
return 8;
case kDataFormatFmask8_S2_F2:
return 8;
case kDataFormatFmask8_S4_F2:
return 8;
case kDataFormatFmask8_S4_F4:
return 8;
case kDataFormatFmask16_S16_F1:
return 16;
case kDataFormatFmask16_S8_F2:
return 16;
case kDataFormatFmask32_S16_F2:
return 32;
case kDataFormatFmask32_S8_F4:
return 32;
case kDataFormatFmask32_S8_F8:
return 32;
case kDataFormatFmask64_S16_F4:
return 64;
case kDataFormatFmask64_S16_F8:
return 64;
case kDataFormat4_4:
return 8;
case kDataFormat6_5_5:
return 16;
case kDataFormat1:
return 1;
case kDataFormat1Reversed:
return 1;
}
return -1;
}
int getTotalBitsPerElement(uint32_t dfmt) {
return getBitsPerElement(dfmt) * getTexelsPerElement(dfmt);
}
int getNumComponentsPerElement(uint32_t dfmt) {
switch (dfmt) {
case kDataFormatInvalid:
return 0;
case kDataFormat8:
return 1;
case kDataFormat16:
return 1;
case kDataFormat8_8:
return 2;
case kDataFormat32:
return 1;
case kDataFormat16_16:
return 2;
case kDataFormat10_11_11:
return 3;
case kDataFormat11_11_10:
return 3;
case kDataFormat10_10_10_2:
return 4;
case kDataFormat2_10_10_10:
return 4;
case kDataFormat8_8_8_8:
return 4;
case kDataFormat32_32:
return 2;
case kDataFormat16_16_16_16:
return 4;
case kDataFormat32_32_32:
return 3;
case kDataFormat32_32_32_32:
return 4;
case kDataFormat5_6_5:
return 3;
case kDataFormat1_5_5_5:
return 4;
case kDataFormat5_5_5_1:
return 4;
case kDataFormat4_4_4_4:
return 4;
case kDataFormat8_24:
return 2;
case kDataFormat24_8:
return 2;
case kDataFormatX24_8_32:
return 2;
case kDataFormatGB_GR:
return 3;
case kDataFormatBG_RG:
return 3;
case kDataFormat5_9_9_9:
return 3;
case kDataFormatBc1:
return 4;
case kDataFormatBc2:
return 4;
case kDataFormatBc3:
return 4;
case kDataFormatBc4:
return 1;
case kDataFormatBc5:
return 2;
case kDataFormatBc6:
return 3;
case kDataFormatBc7:
return 4;
case kDataFormatFmask8_S2_F1:
return 2;
case kDataFormatFmask8_S4_F1:
return 2;
case kDataFormatFmask8_S8_F1:
return 2;
case kDataFormatFmask8_S2_F2:
return 2;
case kDataFormatFmask8_S4_F2:
return 2;
case kDataFormatFmask8_S4_F4:
return 2;
case kDataFormatFmask16_S16_F1:
return 2;
case kDataFormatFmask16_S8_F2:
return 2;
case kDataFormatFmask32_S16_F2:
return 2;
case kDataFormatFmask32_S8_F4:
return 2;
case kDataFormatFmask32_S8_F8:
return 2;
case kDataFormatFmask64_S16_F4:
return 2;
case kDataFormatFmask64_S16_F8:
return 2;
case kDataFormat4_4:
return 2;
case kDataFormat6_5_5:
return 3;
case kDataFormat1:
return 1;
case kDataFormat1Reversed:
return 1;
}
return -1;
}
uint32_t tileMode_getArrayMode(uint32_t tileMode) {
return (tileMode & 0x0000003c) >> 2;
}
uint32_t tileMode_getPipeConfig(uint32_t tileMode) {
return (tileMode & 0x000007c0) >> 6;
}
uint32_t tileMode_getTileSplit(uint32_t tileMode) {
return (tileMode & 0x00003800) >> 11;
}
uint32_t tileMode_getMicroTileMode(uint32_t tileMode) {
return (tileMode & 0x01c00000) >> 22;
}
uint32_t tileMode_getSampleSplit(uint32_t tileMode) {
return (tileMode & 0x06000000) >> 25;
}
uint32_t bit_ceil(uint32_t x) {
x = x - 1;
x |= x >> 1;
x |= x >> 2;
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
return x + 1;
}
uint32_t getElementIndex(uvec3 pos, uint32_t bitsPerElement, uint32_t microTileMode, uint32_t arrayMode) {
uint32_t elem = 0;
if (microTileMode == kMicroTileModeDisplay) {
switch (bitsPerElement) {
case 8:
elem |= ((pos.x >> 0) & 0x1) << 0;
elem |= ((pos.x >> 1) & 0x1) << 1;
elem |= ((pos.x >> 2) & 0x1) << 2;
elem |= ((pos.y >> 1) & 0x1) << 3;
elem |= ((pos.y >> 0) & 0x1) << 4;
elem |= ((pos.y >> 2) & 0x1) << 5;
break;
case 16:
elem |= ((pos.x >> 0) & 0x1) << 0;
elem |= ((pos.x >> 1) & 0x1) << 1;
elem |= ((pos.x >> 2) & 0x1) << 2;
elem |= ((pos.y >> 0) & 0x1) << 3;
elem |= ((pos.y >> 1) & 0x1) << 4;
elem |= ((pos.y >> 2) & 0x1) << 5;
break;
case 32:
elem |= ((pos.x >> 0) & 0x1) << 0;
elem |= ((pos.x >> 1) & 0x1) << 1;
elem |= ((pos.y >> 0) & 0x1) << 2;
elem |= ((pos.x >> 2) & 0x1) << 3;
elem |= ((pos.y >> 1) & 0x1) << 4;
elem |= ((pos.y >> 2) & 0x1) << 5;
break;
case 64:
elem |= ((pos.x >> 0) & 0x1) << 0;
elem |= ((pos.y >> 0) & 0x1) << 1;
elem |= ((pos.x >> 1) & 0x1) << 2;
elem |= ((pos.x >> 2) & 0x1) << 3;
elem |= ((pos.y >> 1) & 0x1) << 4;
elem |= ((pos.y >> 2) & 0x1) << 5;
break;
}
} else if (microTileMode == kMicroTileModeThin ||
microTileMode == kMicroTileModeDepth) {
elem |= ((pos.x >> 0) & 0x1) << 0;
elem |= ((pos.y >> 0) & 0x1) << 1;
elem |= ((pos.x >> 1) & 0x1) << 2;
elem |= ((pos.y >> 1) & 0x1) << 3;
elem |= ((pos.x >> 2) & 0x1) << 4;
elem |= ((pos.y >> 2) & 0x1) << 5;
switch (arrayMode) {
case kArrayMode2dTiledXThick:
case kArrayMode3dTiledXThick:
elem |= ((pos.z >> 2) & 0x1) << 8;
case kArrayMode1dTiledThick:
case kArrayMode2dTiledThick:
case kArrayMode3dTiledThick:
case kArrayModeTiledThickPrt:
case kArrayMode2dTiledThickPrt:
case kArrayMode3dTiledThickPrt:
elem |= ((pos.z >> 0) & 0x1) << 6;
elem |= ((pos.z >> 1) & 0x1) << 7;
default:
break;
}
} else if (microTileMode == kMicroTileModeThick) {
switch (arrayMode) {
case kArrayMode2dTiledXThick:
case kArrayMode3dTiledXThick:
elem |= ((pos.z >> 2) & 0x1) << 8;
case kArrayMode1dTiledThick:
case kArrayMode2dTiledThick:
case kArrayMode3dTiledThick:
case kArrayModeTiledThickPrt:
case kArrayMode2dTiledThickPrt:
case kArrayMode3dTiledThickPrt:
if (bitsPerElement == 8 || bitsPerElement == 16) {
elem |= ((pos.x >> 0) & 0x1) << 0;
elem |= ((pos.y >> 0) & 0x1) << 1;
elem |= ((pos.x >> 1) & 0x1) << 2;
elem |= ((pos.y >> 1) & 0x1) << 3;
elem |= ((pos.z >> 0) & 0x1) << 4;
elem |= ((pos.z >> 1) & 0x1) << 5;
elem |= ((pos.x >> 2) & 0x1) << 6;
elem |= ((pos.y >> 2) & 0x1) << 7;
} else if (bitsPerElement == 32) {
elem |= ((pos.x >> 0) & 0x1) << 0;
elem |= ((pos.y >> 0) & 0x1) << 1;
elem |= ((pos.x >> 1) & 0x1) << 2;
elem |= ((pos.z >> 0) & 0x1) << 3;
elem |= ((pos.y >> 1) & 0x1) << 4;
elem |= ((pos.z >> 1) & 0x1) << 5;
elem |= ((pos.x >> 2) & 0x1) << 6;
elem |= ((pos.y >> 2) & 0x1) << 7;
} else if (bitsPerElement == 64 || bitsPerElement == 128) {
elem |= ((pos.x >> 0) & 0x1) << 0;
elem |= ((pos.y >> 0) & 0x1) << 1;
elem |= ((pos.z >> 0) & 0x1) << 2;
elem |= ((pos.x >> 1) & 0x1) << 3;
elem |= ((pos.y >> 1) & 0x1) << 4;
elem |= ((pos.z >> 1) & 0x1) << 5;
elem |= ((pos.x >> 2) & 0x1) << 6;
elem |= ((pos.y >> 2) & 0x1) << 7;
}
break;
}
}
return elem;
}
uint64_t computeLinearElementByteOffset(
uvec3 pos, uint32_t fragmentIndex, uint32_t pitch,
uint32_t slicePitchElems, uint32_t bitsPerElement,
uint32_t numFragmentsPerPixel) {
uint64_t absoluteElementIndex = pos.z * slicePitchElems + pos.y * pitch + pos.x;
return ((absoluteElementIndex * bitsPerElement * numFragmentsPerPixel) +
(bitsPerElement * fragmentIndex)) / 8;
}
uint64_t computeLinearOffset(uint32_t bitsPerElement, uint height, uint pitch, uvec3 pos) {
uint paddedHeight = height;
uint paddedWidth = pitch;
if (bitsPerElement == 1) {
bitsPerElement *= 8;
paddedWidth = max((paddedWidth + 7) / 8, 1);
}
uint64_t tiledRowSizeBits = uint64_t(bitsPerElement) * paddedWidth;
uint64_t tiledSliceBits = uint64_t(paddedWidth) * paddedHeight * bitsPerElement;
return tiledSliceBits * pos.z + tiledRowSizeBits * pos.y + bitsPerElement * pos.x;
}
uint64_t getTiledBitOffset1D(uint32_t tileMode, uvec3 pos, uvec2 dataSize, uint32_t bitsPerElement) {
uint32_t arrayMode = tileMode_getArrayMode(tileMode);
uint32_t paddedWidth = dataSize.x;
uint32_t paddedHeight = dataSize.y;
int tileThickness = (arrayMode == kArrayMode1dTiledThick) ? 4 : 1;
uint64_t tileBytes = (kMicroTileWidth * kMicroTileHeight * tileThickness * bitsPerElement + 7) / 8;
uint32_t tilesPerRow = paddedWidth / kMicroTileWidth;
uint32_t tilesPerSlice = max(tilesPerRow * (paddedHeight / kMicroTileHeight), 1);
uint64_t elementIndex = getElementIndex(pos, bitsPerElement,
tileMode_getMicroTileMode(tileMode), arrayMode);
uint64_t sliceOffset = (pos.z / tileThickness) * tilesPerSlice * tileBytes;
uint64_t tileRowIndex = pos.y / kMicroTileHeight;
uint64_t tileColumnIndex = pos.x / kMicroTileWidth;
uint64_t tileOffset =
(tileRowIndex * tilesPerRow + tileColumnIndex) * tileBytes;
uint64_t elementOffset = elementIndex * bitsPerElement;
return (sliceOffset + tileOffset) * 8 + elementOffset;
}
layout(binding=0) uniform Config {
uint64_t srcAddress;
uint64_t dstAddress;
uvec2 dataSize;
uint32_t tileMode;
uint32_t numFragments;
uint32_t bitsPerElement;
uint32_t tiledSurfaceSize;
uint32_t linearSurfaceSize;
} config;

View file

@ -0,0 +1,76 @@
#version 460
#extension GL_GOOGLE_include_directive : enable
#extension GL_EXT_shader_explicit_arithmetic_types : enable
#extension GL_EXT_shader_atomic_int64 : enable
#extension GL_EXT_shader_atomic_float : enable
#extension GL_EXT_shader_image_load_formatted : enable
#extension GL_KHR_memory_scope_semantics : enable
#extension GL_EXT_shared_memory_block : enable
#extension GL_EXT_scalar_block_layout : enable
#extension GL_EXT_null_initializer : enable
#extension GL_EXT_buffer_reference2 : enable
#extension GL_EXT_buffer_reference_uvec2 : enable
#include "tiler.glsl"
void main() {
uvec3 pos = gl_GlobalInvocationID;
uint64_t tiledSliceOffset = 0;
uint64_t linearSliceOffset = 0;
if (config.tiledSurfaceSize != 0) {
tiledSliceOffset = pos.z * config.tiledSurfaceSize;
linearSliceOffset = pos.z * config.linearSurfaceSize;
pos.z = 0;
}
uint64_t tiledByteOffset = getTiledBitOffset1D(
config.tileMode,
pos,
config.dataSize,
config.bitsPerElement
) / 8;
tiledByteOffset += tiledSliceOffset;
uint64_t linearByteOffset = computeLinearElementByteOffset(
pos,
0,
config.dataSize.x,
config.dataSize.x * config.dataSize.y,
config.bitsPerElement,
1 << config.numFragments
);
linearByteOffset += linearSliceOffset;
switch ((config.bitsPerElement + 7) / 8) {
case 1:
buffer_reference_uint8_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint8_t(config.srcAddress + linearByteOffset).data;
break;
case 2:
buffer_reference_uint16_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint16_t(config.srcAddress + linearByteOffset).data;
break;
case 4:
buffer_reference_uint32_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint32_t(config.srcAddress + linearByteOffset).data;
break;
case 8:
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset).data;
break;
case 16:
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset).data;
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset + 8).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset + 8).data;
break;
case 32:
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset).data;
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset + 8).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset + 8).data;
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset + 16).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset + 16).data;
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset + 24).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset + 24).data;
break;
}
}

View file

@ -0,0 +1,76 @@
#version 460
#extension GL_GOOGLE_include_directive : enable
#extension GL_EXT_shader_explicit_arithmetic_types : enable
#extension GL_EXT_shader_atomic_int64 : enable
#extension GL_EXT_shader_atomic_float : enable
#extension GL_EXT_shader_image_load_formatted : enable
#extension GL_KHR_memory_scope_semantics : enable
#extension GL_EXT_shared_memory_block : enable
#extension GL_EXT_scalar_block_layout : enable
#extension GL_EXT_null_initializer : enable
#extension GL_EXT_buffer_reference2 : enable
#extension GL_EXT_buffer_reference_uvec2 : enable
#include "tiler.glsl"
void main() {
uvec3 pos = gl_GlobalInvocationID;
uint64_t tiledSliceOffset = 0;
uint64_t linearSliceOffset = 0;
if (config.tiledSurfaceSize != 0) {
tiledSliceOffset = pos.z * config.tiledSurfaceSize;
linearSliceOffset = pos.z * config.linearSurfaceSize;
pos.z = 0;
}
uint64_t tiledByteOffset = getTiledBitOffset1D(
config.tileMode,
pos,
config.dataSize,
config.bitsPerElement
) / 8;
tiledByteOffset += tiledSliceOffset;
uint64_t linearByteOffset = computeLinearElementByteOffset(
pos,
0,
config.dataSize.x,
config.dataSize.x * config.dataSize.y,
config.bitsPerElement,
1 << config.numFragments
);
linearByteOffset += linearSliceOffset;
switch ((config.bitsPerElement + 7) / 8) {
case 1:
buffer_reference_uint8_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint8_t(config.srcAddress + linearByteOffset).data;
break;
case 2:
buffer_reference_uint16_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint16_t(config.srcAddress + linearByteOffset).data;
break;
case 4:
buffer_reference_uint32_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint32_t(config.srcAddress + linearByteOffset).data;
break;
case 8:
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset).data;
break;
case 16:
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset).data;
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset + 8).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset + 8).data;
break;
case 32:
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset).data;
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset + 8).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset + 8).data;
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset + 16).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset + 16).data;
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset + 24).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset + 24).data;
break;
}
}

View file

@ -0,0 +1,76 @@
#version 460
#extension GL_GOOGLE_include_directive : enable
#extension GL_EXT_shader_explicit_arithmetic_types : enable
#extension GL_EXT_shader_atomic_int64 : enable
#extension GL_EXT_shader_atomic_float : enable
#extension GL_EXT_shader_image_load_formatted : enable
#extension GL_KHR_memory_scope_semantics : enable
#extension GL_EXT_shared_memory_block : enable
#extension GL_EXT_scalar_block_layout : enable
#extension GL_EXT_null_initializer : enable
#extension GL_EXT_buffer_reference2 : enable
#extension GL_EXT_buffer_reference_uvec2 : enable
#include "tiler.glsl"
void main() {
uvec3 pos = gl_GlobalInvocationID;
uint64_t tiledSliceOffset = 0;
uint64_t linearSliceOffset = 0;
if (config.tiledSurfaceSize != 0) {
tiledSliceOffset = pos.z * config.tiledSurfaceSize;
linearSliceOffset = pos.z * config.linearSurfaceSize;
pos.z = 0;
}
uint64_t tiledByteOffset = computeLinearOffset(
config.bitsPerElement,
config.dataSize.y,
config.dataSize.x,
pos
) / 8;
tiledByteOffset += tiledSliceOffset;
uint64_t linearByteOffset = computeLinearElementByteOffset(
pos,
0,
config.dataSize.x,
config.dataSize.x * config.dataSize.y,
config.bitsPerElement,
1 << config.numFragments
);
linearByteOffset += linearSliceOffset;
switch ((config.bitsPerElement + 7) / 8) {
case 1:
buffer_reference_uint8_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint8_t(config.srcAddress + linearByteOffset).data;
break;
case 2:
buffer_reference_uint16_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint16_t(config.srcAddress + linearByteOffset).data;
break;
case 4:
buffer_reference_uint32_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint32_t(config.srcAddress + linearByteOffset).data;
break;
case 8:
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset).data;
break;
case 16:
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset).data;
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset + 8).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset + 8).data;
break;
case 32:
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset).data;
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset + 8).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset + 8).data;
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset + 16).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset + 16).data;
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset + 24).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset + 24).data;
break;
}
}

View file

@ -0,0 +1,387 @@
#include "gnm/constants.hpp"
#include <amdgpu/tiler.hpp>
#include <gnm/gnm.hpp>
#include <bit>
using namespace amdgpu;
static constexpr SurfaceInfo
computeTexture1dInfo(ArrayMode arrayMode, gnm::TextureType type,
gnm::DataFormat dfmt, std::uint32_t width,
std::uint32_t height, std::uint32_t depth,
std::uint32_t pitch, int baseArrayLayer, int arrayCount,
int baseMipLevel, int mipCount, bool pow2pad) {
bool isCubemap = type == gnm::TextureType::Cube;
bool isVolume = type == gnm::TextureType::Dim3D;
auto bitsPerFragment = getBitsPerElement(dfmt);
std::uint32_t arraySliceCount = depth;
if (isCubemap) {
arraySliceCount *= 6;
} else if (isVolume) {
arraySliceCount = 1;
}
int numFragments = (type == gnm::TextureType::Msaa2D ||
type == gnm::TextureType::MsaaArray2D)
? (baseArrayLayer + arrayCount - 1)
: 0;
auto numFragmentsPerPixel = 1 << numFragments;
auto isBlockCompressed = getTexelsPerElement(dfmt) > 1;
auto bitsPerElement = bitsPerFragment;
depth = isVolume ? depth : 1;
if (isBlockCompressed) {
switch (bitsPerFragment) {
case 1:
bitsPerElement *= 8;
break;
case 4:
case 8:
bitsPerElement *= 16;
break;
case 16:
std::abort();
break;
default:
std::abort();
break;
}
}
if (pow2pad) {
arraySliceCount = std::bit_ceil(arraySliceCount);
}
std::uint64_t surfaceOffset = 0;
std::uint64_t surfaceSize = 0;
SurfaceInfo result;
result.width = width;
result.height = height;
result.depth = depth;
result.pitch = pitch;
result.numFragments = numFragments;
result.bitsPerElement = bitsPerElement;
result.arrayLayerCount = arraySliceCount;
auto thickness = getMicroTileThickness(arrayMode);
for (int mipLevel = 0; mipLevel < baseMipLevel + mipCount; mipLevel++) {
std::uint32_t elemWidth = std::max<std::uint64_t>(width >> mipLevel, 1);
std::uint32_t elemPitch = std::max<std::uint64_t>(pitch >> mipLevel, 1);
std::uint32_t elemHeight = std::max<std::uint64_t>(height >> mipLevel, 1);
std::uint32_t elemDepth = std::max<std::uint64_t>(depth >> mipLevel, 1);
std::uint32_t linearPitch = elemPitch;
std::uint32_t linearWidth = elemWidth;
std::uint32_t linearHeight = elemHeight;
std::uint32_t linearDepth = elemDepth;
if (isBlockCompressed) {
switch (bitsPerFragment) {
case 1:
linearWidth = std::max<std::uint64_t>((linearWidth + 7) / 8, 1);
linearPitch = std::max<std::uint64_t>((linearPitch + 7) / 8, 1);
break;
case 4:
case 8:
linearWidth = std::max<std::uint64_t>((linearWidth + 3) / 4, 1);
linearPitch = std::max<std::uint64_t>((linearPitch + 3) / 4, 1);
linearHeight = std::max<std::uint64_t>((linearHeight + 3) / 4, 1);
break;
case 16:
std::abort();
break;
default:
std::abort();
break;
}
}
if (pow2pad) {
linearPitch = std::bit_ceil(linearPitch);
linearWidth = std::bit_ceil(linearWidth);
linearHeight = std::bit_ceil(linearHeight);
linearDepth = std::bit_ceil(linearDepth);
}
if (mipLevel > 0 && pitch > 0) {
linearPitch = linearWidth;
}
std::uint32_t paddedPitch =
(linearPitch + kMicroTileWidth - 1) & ~(kMicroTileWidth - 1);
std::uint32_t paddedHeight =
(linearHeight + kMicroTileHeight - 1) & ~(kMicroTileHeight - 1);
std::uint32_t paddedDepth = linearDepth;
if (!isCubemap || (mipLevel > 0 && linearDepth > 1)) {
if (isCubemap) {
linearDepth = std::bit_ceil(linearDepth);
}
paddedDepth = (linearDepth + thickness - 1) & ~(thickness - 1);
}
std::uint32_t tempPitch = paddedPitch;
std::uint64_t logicalSliceSizeBytes = std::uint64_t(tempPitch) *
paddedHeight * bitsPerElement *
numFragmentsPerPixel;
logicalSliceSizeBytes = (logicalSliceSizeBytes + 7) / 8;
uint64_t physicalSliceSizeBytes = logicalSliceSizeBytes * thickness;
while ((physicalSliceSizeBytes % kPipeInterleaveBytes) != 0) {
tempPitch += kMicroTileWidth;
logicalSliceSizeBytes = std::uint64_t(tempPitch) * paddedHeight *
bitsPerElement * numFragmentsPerPixel;
logicalSliceSizeBytes = (logicalSliceSizeBytes + 7) / 8;
physicalSliceSizeBytes = logicalSliceSizeBytes * thickness;
}
surfaceSize = logicalSliceSizeBytes * paddedDepth;
auto linearSize =
linearDepth *
(linearPitch * linearHeight * bitsPerElement * numFragmentsPerPixel +
7) /
8;
result.setSubresourceInfo(mipLevel, {
.dataWidth = linearPitch,
.dataHeight = linearHeight,
.dataDepth = linearDepth,
.offset = surfaceOffset,
.tiledSize = surfaceSize,
.linearSize = linearSize,
});
surfaceOffset += arraySliceCount * surfaceSize;
}
result.totalSize = surfaceOffset;
return result;
}
static constexpr SurfaceInfo computeTextureLinearInfo(
ArrayMode arrayMode, gnm::TextureType type, gnm::DataFormat dfmt,
std::uint32_t width, std::uint32_t height, std::uint32_t depth,
std::uint32_t pitch, int baseArrayLayer, int arrayCount, int baseMipLevel,
int mipCount, bool pow2pad) {
bool isCubemap = type == gnm::TextureType::Cube;
bool isVolume = type == gnm::TextureType::Dim3D;
auto bitsPerFragment = getBitsPerElement(dfmt);
std::uint32_t arraySliceCount = depth;
if (isCubemap) {
arraySliceCount *= 6;
} else if (isVolume) {
arraySliceCount = 1;
}
int numFragments = (type == gnm::TextureType::Msaa2D ||
type == gnm::TextureType::MsaaArray2D)
? (baseArrayLayer + arrayCount - 1)
: 0;
auto numFragmentsPerPixel = 1 << numFragments;
auto isBlockCompressed = getTexelsPerElement(dfmt) > 1;
auto bitsPerElement = bitsPerFragment;
depth = isVolume ? depth : 1;
if (isBlockCompressed) {
switch (bitsPerFragment) {
case 1:
bitsPerElement *= 8;
break;
case 4:
case 8:
bitsPerElement *= 16;
break;
case 16:
std::abort();
break;
default:
std::abort();
break;
}
}
if (pow2pad) {
arraySliceCount = std::bit_ceil(arraySliceCount);
}
std::uint64_t surfaceOffset = 0;
std::uint64_t surfaceSize = 0;
SurfaceInfo result;
result.width = width;
result.height = height;
result.depth = depth;
result.pitch = pitch;
result.numFragments = numFragments;
result.bitsPerElement = bitsPerElement;
result.arrayLayerCount = arraySliceCount;
for (int mipLevel = 0; mipLevel < baseMipLevel + mipCount; mipLevel++) {
std::uint32_t elemWidth = std::max<std::uint64_t>(width >> mipLevel, 1);
std::uint32_t elemPitch = std::max<std::uint64_t>(pitch >> mipLevel, 1);
std::uint32_t elemHeight = std::max<std::uint64_t>(height >> mipLevel, 1);
std::uint32_t elemDepth = std::max<std::uint64_t>(depth >> mipLevel, 1);
std::uint32_t linearPitch = elemPitch;
std::uint32_t linearWidth = elemWidth;
std::uint32_t linearHeight = elemHeight;
std::uint32_t linearDepth = elemDepth;
if (isBlockCompressed) {
switch (bitsPerFragment) {
case 1:
linearWidth = std::max<std::uint64_t>((linearWidth + 7) / 8, 1);
linearPitch = std::max<std::uint64_t>((linearPitch + 7) / 8, 1);
break;
case 4:
case 8:
linearWidth = std::max<std::uint64_t>((linearWidth + 3) / 4, 1);
linearPitch = std::max<std::uint64_t>((linearPitch + 3) / 4, 1);
linearHeight = std::max<std::uint64_t>((linearHeight + 3) / 4, 1);
break;
case 16:
std::abort();
break;
default:
std::abort();
break;
}
}
if (pow2pad) {
linearPitch = std::bit_ceil(linearPitch);
linearWidth = std::bit_ceil(linearWidth);
linearHeight = std::bit_ceil(linearHeight);
linearDepth = std::bit_ceil(linearDepth);
}
if (mipLevel > 0 && pitch > 0) {
linearPitch = linearWidth;
}
if (arrayMode == kArrayModeLinearGeneral) {
surfaceSize = (static_cast<uint64_t>(linearPitch) *
(linearHeight)*bitsPerElement * numFragmentsPerPixel +
7) /
8;
surfaceSize *= linearDepth;
result.setSubresourceInfo(mipLevel, {
.dataWidth = linearPitch,
.dataHeight = linearHeight,
.dataDepth = linearDepth,
.offset = surfaceOffset,
.tiledSize = surfaceSize,
.linearSize = surfaceSize,
});
} else {
if (mipLevel > 0 && pitch > 0) {
linearPitch = linearWidth;
}
auto pitchAlign = std::max(8UL, 64UL / ((bitsPerElement + 7) / 8UL));
std::uint32_t paddedPitch =
(linearPitch + pitchAlign - 1) & ~(pitchAlign - 1);
std::uint32_t paddedHeight = linearHeight;
std::uint32_t paddedDepth = linearDepth;
if (!isCubemap || (mipLevel > 0 && linearDepth > 1)) {
if (isCubemap) {
linearDepth = std::bit_ceil(linearDepth);
}
auto thickness = getMicroTileThickness(arrayMode);
paddedDepth = (linearDepth + thickness - 1) & ~(thickness - 1);
}
std::uint32_t pixelsPerPipeInterleave =
kPipeInterleaveBytes / ((bitsPerElement + 7) / 8);
std::uint32_t sliceAlignInPixel =
pixelsPerPipeInterleave < 64 ? 64 : pixelsPerPipeInterleave;
auto pixelsPerSlice = static_cast<uint64_t>(paddedPitch) * paddedHeight *
numFragmentsPerPixel;
while (pixelsPerSlice % sliceAlignInPixel) {
paddedPitch += pitchAlign;
pixelsPerSlice = static_cast<uint64_t>(paddedPitch) * paddedHeight *
numFragmentsPerPixel;
}
surfaceSize = (pixelsPerSlice * bitsPerElement + 7) / 8 * paddedDepth;
result.setSubresourceInfo(mipLevel, {
.dataWidth = paddedPitch,
.dataHeight = paddedHeight,
.dataDepth = paddedDepth,
.offset = surfaceOffset,
.tiledSize = surfaceSize,
.linearSize = surfaceSize,
});
}
surfaceOffset += arraySliceCount * surfaceSize;
}
result.totalSize = surfaceOffset;
return result;
}
SurfaceInfo amdgpu::computeSurfaceInfo(
TileMode tileMode, gnm::TextureType type, gnm::DataFormat dfmt,
std::uint32_t width, std::uint32_t height, std::uint32_t depth,
std::uint32_t pitch, int baseArrayLayer, int arrayCount, int baseMipLevel,
int mipCount, bool pow2pad) {
switch (tileMode.arrayMode()) {
case kArrayModeLinearGeneral:
case kArrayModeLinearAligned:
return computeTextureLinearInfo(
tileMode.arrayMode(), type, dfmt, width, height, depth, pitch,
baseArrayLayer, arrayCount, baseMipLevel, mipCount, pow2pad);
case kArrayMode1dTiledThin:
case kArrayMode1dTiledThick:
return computeTexture1dInfo(tileMode.arrayMode(), type, dfmt, width, height,
depth, pitch, baseArrayLayer, arrayCount,
baseMipLevel, mipCount, pow2pad);
case kArrayMode2dTiledThin:
case kArrayMode2dTiledThick:
case kArrayMode2dTiledXThick:
case kArrayMode3dTiledThin:
case kArrayMode3dTiledThick:
case kArrayMode3dTiledXThick:
case kArrayModeTiledThinPrt:
case kArrayModeTiledThickPrt:
case kArrayMode2dTiledThinPrt:
case kArrayMode2dTiledThickPrt:
case kArrayMode3dTiledThinPrt:
case kArrayMode3dTiledThickPrt:
std::abort();
}
std::abort();
}
SurfaceInfo amdgpu::computeSurfaceInfo(const gnm::TBuffer &tbuffer,
TileMode tileMode) {
return computeSurfaceInfo(
tileMode, tbuffer.type, tbuffer.dfmt, tbuffer.width + 1,
tbuffer.height + 1, tbuffer.depth + 1, tbuffer.pitch + 1,
tbuffer.base_array, tbuffer.last_array - tbuffer.base_array + 1,
tbuffer.base_level, tbuffer.last_level - tbuffer.base_level + 1,
tbuffer.pow2pad != 0);
}

View file

@ -0,0 +1,441 @@
#include "amdgpu/tiler_cpu.hpp"
#include "amdgpu/tiler.hpp"
#include "gnm/gnm.hpp"
constexpr std::uint64_t
getTiledOffset1D(gnm::TextureType texType, bool isPow2Padded,
gnm::DataFormat dfmt, amdgpu::TileMode tileMode, int mipLevel,
int arraySlice, int numFragments, int width, int height,
int depth, int pitch, int x, int y, int z) {
using namespace amdgpu;
bool isCubemap = texType == gnm::TextureType::Cube;
bool isVolume = texType == gnm::TextureType::Dim3D;
auto bitsPerFragment = getBitsPerElement(dfmt);
uint32_t arraySliceCount = depth;
if (isCubemap) {
arraySliceCount *= 6;
} else if (isVolume) {
arraySliceCount = 1;
}
auto numFragmentsPerPixel = 1 << numFragments;
auto isBlockCompressed = getTexelsPerElement(dfmt) > 1;
auto arrayMode = tileMode.arrayMode();
auto bitsPerElement = bitsPerFragment;
auto paddedWidth = std::max((mipLevel != 0 ? pitch : width) >> mipLevel, 1);
auto paddedHeight = std::max(height >> mipLevel, 1);
auto tileThickness = (arrayMode == amdgpu::kArrayMode1dTiledThick) ? 4 : 1;
if (isBlockCompressed) {
switch (bitsPerFragment) {
case 1:
bitsPerElement *= 8;
paddedWidth = std::max((paddedWidth + 7) / 8, 1);
break;
case 4:
case 8:
bitsPerElement *= 16;
paddedWidth = std::max((paddedWidth + 3) / 4, 1);
paddedHeight = std::max((paddedHeight + 3) / 4, 1);
break;
case 16:
std::abort();
break;
default:
std::abort();
break;
}
}
if (isPow2Padded) {
arraySliceCount = std::bit_ceil(arraySliceCount);
paddedWidth = std::bit_ceil(unsigned(paddedWidth));
paddedHeight = std::bit_ceil(unsigned(paddedHeight));
}
uint64_t finalSurfaceOffset = 0;
uint64_t finalSurfaceSize = 0;
auto thickness = getMicroTileThickness(arrayMode);
for (int i = 0; i <= mipLevel; i++) {
finalSurfaceOffset += arraySliceCount * finalSurfaceSize;
std::uint32_t elemWidth =
std::max<std::uint64_t>((i > 0 ? pitch : width) >> i, 1);
std::uint32_t elemHeight = std::max<std::uint64_t>(height >> i, 1);
std::uint32_t elemDepth =
std::max<std::uint64_t>((isVolume ? depth : 1) >> i, 1);
if (isBlockCompressed) {
switch (bitsPerFragment) {
case 1:
elemWidth = std::max<std::uint64_t>((elemWidth + 7) / 8, 1);
break;
case 4:
case 8:
elemWidth = std::max<std::uint64_t>((elemWidth + 3) / 4, 1);
elemHeight = std::max<std::uint64_t>((elemHeight + 3) / 4, 1);
break;
case 16:
std::abort();
break;
default:
std::abort();
break;
}
}
if (isPow2Padded) {
elemWidth = std::bit_ceil(elemWidth);
elemHeight = std::bit_ceil(elemHeight);
elemDepth = std::bit_ceil(elemDepth);
}
elemWidth = (elemWidth + kMicroTileWidth - 1) & ~(kMicroTileWidth - 1);
elemHeight = (elemHeight + kMicroTileHeight - 1) & ~(kMicroTileHeight - 1);
elemDepth = (elemDepth + thickness - 1) & ~(thickness - 1);
std::uint32_t tempPitch = elemWidth;
std::uint64_t logicalSliceSizeBytes = std::uint64_t(tempPitch) *
elemHeight * bitsPerElement *
numFragmentsPerPixel;
logicalSliceSizeBytes = (logicalSliceSizeBytes + 7) / 8;
uint64_t physicalSliceSizeBytes = logicalSliceSizeBytes * thickness;
while ((physicalSliceSizeBytes % kPipeInterleaveBytes) != 0) {
tempPitch += 8;
logicalSliceSizeBytes = std::uint64_t(tempPitch) * elemHeight *
bitsPerElement * numFragmentsPerPixel;
logicalSliceSizeBytes = (logicalSliceSizeBytes + 7) / 8;
physicalSliceSizeBytes = logicalSliceSizeBytes * thickness;
}
finalSurfaceSize = logicalSliceSizeBytes * elemDepth;
}
finalSurfaceOffset += finalSurfaceSize * (uint64_t)arraySlice;
auto tileBytes =
(kMicroTileWidth * kMicroTileHeight * tileThickness * bitsPerElement +
7) /
8;
auto tilesPerRow = paddedWidth / kMicroTileWidth;
auto tilesPerSlice =
std::max(tilesPerRow * (paddedHeight / kMicroTileHeight), 1U);
uint64_t elementIndex = getElementIndex(x, y, z, bitsPerElement,
tileMode.microTileMode(), arrayMode);
uint64_t sliceOffset = (z / tileThickness) * tilesPerSlice * tileBytes;
uint64_t tileRowIndex = y / kMicroTileHeight;
uint64_t tileColumnIndex = x / kMicroTileWidth;
uint64_t tileOffset =
(tileRowIndex * tilesPerRow + tileColumnIndex) * tileBytes;
uint64_t elementOffset = elementIndex * bitsPerElement;
uint64_t finalOffset = (sliceOffset + tileOffset) * 8 + elementOffset;
return finalOffset + finalSurfaceOffset * 8;
}
constexpr std::uint64_t getTiledOffsetLinear(gnm::DataFormat dfmt, int height,
int pitch, int x, int y, int z) {
auto bitsPerFragment = getBitsPerElement(dfmt);
auto bitsPerElement = bitsPerFragment;
auto paddedHeight = height;
auto paddedWidth = pitch;
if (bitsPerFragment == 1) {
bitsPerElement *= 8;
paddedWidth = std::max((paddedWidth + 7) / 8, 1);
}
uint64_t tiledRowSizeBits = bitsPerElement * paddedWidth;
uint64_t tiledSliceBits = paddedWidth * paddedHeight * bitsPerElement;
return tiledSliceBits * z + tiledRowSizeBits * y + bitsPerElement * x;
}
constexpr std::uint64_t
getTiledOffset2D(gnm::TextureType texType, bool isPow2Padded,
gnm::DataFormat dfmt, amdgpu::TileMode tileMode,
amdgpu::MacroTileMode macroTileMode, int mipLevel,
int arraySlice, int numFragments, int width, int height,
int depth, int pitch, int x, int y, int z, int fragmentIndex) {
using namespace amdgpu;
bool isCubemap = texType == gnm::TextureType::Cube;
bool isVolume = texType == gnm::TextureType::Dim3D;
auto m_bitsPerFragment = getBitsPerElement(dfmt);
auto m_isBlockCompressed = getTexelsPerElement(dfmt) > 1;
auto tileSwizzleMask = 0;
auto numFragmentsPerPixel = 1 << numFragments;
auto arrayMode = tileMode.arrayMode();
auto tileThickness = 1;
switch (arrayMode) {
case amdgpu::kArrayMode2dTiledThin:
case amdgpu::kArrayMode3dTiledThin:
case amdgpu::kArrayModeTiledThinPrt:
case amdgpu::kArrayMode2dTiledThinPrt:
case amdgpu::kArrayMode3dTiledThinPrt:
tileThickness = 1;
break;
case amdgpu::kArrayMode1dTiledThick:
case amdgpu::kArrayMode2dTiledThick:
case amdgpu::kArrayMode3dTiledThick:
case amdgpu::kArrayModeTiledThickPrt:
case amdgpu::kArrayMode2dTiledThickPrt:
case amdgpu::kArrayMode3dTiledThickPrt:
tileThickness = 4;
break;
case amdgpu::kArrayMode2dTiledXThick:
case amdgpu::kArrayMode3dTiledXThick:
tileThickness = 8;
break;
default:
break;
}
auto bitsPerElement = m_bitsPerFragment;
auto paddedWidth = pitch;
auto paddedHeight = height;
if (m_isBlockCompressed) {
switch (m_bitsPerFragment) {
case 1:
bitsPerElement *= 8;
paddedWidth = std::max((paddedWidth + 7) / 8, 1);
break;
case 4:
case 8:
bitsPerElement *= 16;
paddedWidth = std::max((paddedWidth + 3) / 4, 1);
paddedHeight = std::max((paddedHeight + 3) / 4, 1);
break;
case 16:
std::abort();
break;
default:
std::abort();
break;
}
}
auto bankWidthHW = macroTileMode.bankWidth();
auto bankHeightHW = macroTileMode.bankHeight();
auto macroAspectHW = macroTileMode.macroTileAspect();
auto numBanksHW = macroTileMode.numBanks();
auto bankWidth = 1 << bankWidthHW;
auto bankHeight = 1 << bankHeightHW;
unsigned numBanks = 2 << numBanksHW;
auto macroTileAspect = 1 << macroAspectHW;
uint32_t tileBytes1x =
(tileThickness * bitsPerElement * kMicroTileWidth * kMicroTileHeight +
7) /
8;
auto sampleSplitHw = tileMode.sampleSplit();
auto tileSplitHw = tileMode.tileSplit();
uint32_t sampleSplit = 1 << sampleSplitHw;
uint32_t tileSplitC =
(tileMode.microTileMode() == amdgpu::kMicroTileModeDepth)
? (64 << tileSplitHw)
: std::max(256U, tileBytes1x * sampleSplit);
auto tileSplitBytes = std::min(kDramRowSize, tileSplitC);
auto numPipes = getPipeCount(tileMode.pipeConfig());
auto pipeInterleaveBits = std::countr_zero(kPipeInterleaveBytes);
auto pipeInterleaveMask = (1 << pipeInterleaveBits) - 1;
auto pipeBits = std::countr_zero(numPipes);
auto bankBits = std::countr_zero(numBanks);
// auto pipeMask = (numPipes - 1) << pipeInterleaveBits;
auto bankSwizzleMask = tileSwizzleMask;
auto pipeSwizzleMask = 0;
auto macroTileWidth =
(kMicroTileWidth * bankWidth * numPipes) * macroTileAspect;
auto macroTileHeight =
(kMicroTileHeight * bankHeight * numBanks) / macroTileAspect;
auto microTileMode = tileMode.microTileMode();
uint64_t elementIndex =
getElementIndex(x, y, z, bitsPerElement, microTileMode, arrayMode);
uint32_t xh = x, yh = y;
if (arrayMode == amdgpu::kArrayModeTiledThinPrt ||
arrayMode == amdgpu::kArrayModeTiledThickPrt) {
xh %= macroTileWidth;
yh %= macroTileHeight;
}
uint64_t pipe = getPipeIndex(xh, yh, tileMode.pipeConfig());
uint64_t bank =
getBankIndex(xh, yh, bankWidth, bankHeight, numBanks, numPipes);
uint32_t tileBytes = (kMicroTileWidth * kMicroTileHeight * tileThickness *
bitsPerElement * numFragmentsPerPixel +
7) /
8;
uint64_t elementOffset = 0;
if (microTileMode == amdgpu::kMicroTileModeDepth) {
uint64_t pixelOffset = elementIndex * bitsPerElement * numFragmentsPerPixel;
elementOffset = pixelOffset + (fragmentIndex * bitsPerElement);
} else {
uint64_t fragmentOffset =
fragmentIndex * (tileBytes / numFragmentsPerPixel) * 8;
elementOffset = fragmentOffset + (elementIndex * bitsPerElement);
}
uint64_t slicesPerTile = 1;
uint64_t tileSplitSlice = 0;
if (tileBytes > tileSplitBytes && tileThickness == 1) {
slicesPerTile = tileBytes / tileSplitBytes;
tileSplitSlice = elementOffset / (tileSplitBytes * 8);
elementOffset %= (tileSplitBytes * 8);
tileBytes = tileSplitBytes;
}
uint64_t macroTileBytes = (macroTileWidth / kMicroTileWidth) *
(macroTileHeight / kMicroTileHeight) * tileBytes /
(numPipes * numBanks);
uint64_t macroTilesPerRow = paddedWidth / macroTileWidth;
uint64_t macroTileRowIndex = y / macroTileHeight;
uint64_t macroTileColumnIndex = x / macroTileWidth;
uint64_t macroTileIndex =
(macroTileRowIndex * macroTilesPerRow) + macroTileColumnIndex;
uint64_t macro_tile_offset = macroTileIndex * macroTileBytes;
uint64_t macroTilesPerSlice =
macroTilesPerRow * (paddedHeight / macroTileHeight);
uint64_t sliceBytes = macroTilesPerSlice * macroTileBytes;
uint32_t slice = z;
uint64_t sliceOffset =
(tileSplitSlice + slicesPerTile * slice / tileThickness) * sliceBytes;
if (arraySlice != 0) {
slice = arraySlice;
}
uint64_t tileRowIndex = (y / kMicroTileHeight) % bankHeight;
uint64_t tileColumnIndex = ((x / kMicroTileWidth) / numPipes) % bankWidth;
uint64_t tileIndex = (tileRowIndex * bankWidth) + tileColumnIndex;
uint64_t tileOffset = tileIndex * tileBytes;
uint64_t bankSwizzle = bankSwizzleMask;
uint64_t pipeSwizzle = pipeSwizzleMask;
uint64_t pipeSliceRotation = 0;
switch (arrayMode) {
case amdgpu::kArrayMode3dTiledThin:
case amdgpu::kArrayMode3dTiledThick:
case amdgpu::kArrayMode3dTiledXThick:
pipeSliceRotation =
std::max(1UL, (numPipes / 2UL) - 1UL) * (slice / tileThickness);
break;
default:
break;
}
pipeSwizzle += pipeSliceRotation;
pipeSwizzle &= (numPipes - 1);
pipe = pipe ^ pipeSwizzle;
uint32_t sliceRotation = 0;
switch (arrayMode) {
case amdgpu::kArrayMode2dTiledThin:
case amdgpu::kArrayMode2dTiledThick:
case amdgpu::kArrayMode2dTiledXThick:
sliceRotation = ((numBanks / 2) - 1) * (slice / tileThickness);
break;
case amdgpu::kArrayMode3dTiledThin:
case amdgpu::kArrayMode3dTiledThick:
case amdgpu::kArrayMode3dTiledXThick:
sliceRotation = std::max(1UL, (numPipes / 2UL) - 1UL) *
(slice / tileThickness) / numPipes;
break;
default:
break;
}
uint64_t tileSplitSliceRotation = 0;
switch (arrayMode) {
case amdgpu::kArrayMode2dTiledThin:
case amdgpu::kArrayMode3dTiledThin:
case amdgpu::kArrayMode2dTiledThinPrt:
case amdgpu::kArrayMode3dTiledThinPrt:
tileSplitSliceRotation = ((numBanks / 2) + 1) * tileSplitSlice;
break;
default:
break;
}
bank ^= bankSwizzle + sliceRotation;
bank ^= tileSplitSliceRotation;
bank &= (numBanks - 1);
uint64_t totalOffset =
(sliceOffset + macro_tile_offset + tileOffset) * 8 + elementOffset;
uint64_t bitOffset = totalOffset & 0x7;
totalOffset /= 8;
uint64_t pipeInterleaveOffset = totalOffset & pipeInterleaveMask;
uint64_t offset = totalOffset >> pipeInterleaveBits;
uint64_t finalByteOffset =
pipeInterleaveOffset | (pipe << (pipeInterleaveBits)) |
(bank << (pipeInterleaveBits + pipeBits)) |
(offset << (pipeInterleaveBits + pipeBits + bankBits));
return (finalByteOffset << 3) | bitOffset;
}
std::uint64_t amdgpu::getTiledOffset(gnm::TextureType texType,
bool isPow2Padded, int numFragments,
gnm::DataFormat dfmt,
amdgpu::TileMode tileMode,
amdgpu::MacroTileMode macroTileMode,
int mipLevel, int arraySlice, int width,
int height, int depth, int pitch, int x,
int y, int z, int fragmentIndex) {
switch (tileMode.arrayMode()) {
case amdgpu::kArrayModeLinearGeneral:
case amdgpu::kArrayModeLinearAligned:
return getTiledOffsetLinear(dfmt, height, pitch, x, y, z);
case amdgpu::kArrayMode1dTiledThin:
case amdgpu::kArrayMode1dTiledThick: {
return getTiledOffset1D(texType, isPow2Padded, dfmt, tileMode, mipLevel,
arraySlice, numFragments, width, height, depth,
pitch, x, y, z);
}
case amdgpu::kArrayMode2dTiledThin:
case amdgpu::kArrayMode2dTiledThick:
case amdgpu::kArrayMode2dTiledXThick:
case amdgpu::kArrayMode3dTiledThin:
case amdgpu::kArrayMode3dTiledThick:
case amdgpu::kArrayMode3dTiledXThick:
case amdgpu::kArrayModeTiledThinPrt:
case amdgpu::kArrayModeTiledThickPrt:
case amdgpu::kArrayMode2dTiledThinPrt:
case amdgpu::kArrayMode2dTiledThickPrt:
case amdgpu::kArrayMode3dTiledThinPrt:
case amdgpu::kArrayMode3dTiledThickPrt:
return getTiledOffset2D(texType, isPow2Padded, dfmt, tileMode,
macroTileMode, mipLevel, arraySlice, numFragments,
width, height, depth, pitch, x, y, z,
fragmentIndex);
}
std::abort();
}

View file

@ -0,0 +1,354 @@
#include "amdgpu/tiler_vulkan.hpp"
#include "Scheduler.hpp"
#include "amdgpu/tiler.hpp"
#include <bit>
#include <cstring>
#include <memory>
#include <vk.hpp>
#include <shaders/detiler1d.comp.h>
#include <shaders/detiler2d.comp.h>
#include <shaders/detilerLinear.comp.h>
#include <shaders/tiler1d.comp.h>
#include <shaders/tiler2d.comp.h>
#include <shaders/tilerLinear.comp.h>
struct TilerDecriptorSetLayout {
VkDescriptorSetLayout layout;
TilerDecriptorSetLayout() {
std::vector<VkDescriptorSetLayoutBinding> bindings{{
.binding = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
}};
VkDescriptorSetLayoutCreateInfo layoutInfo{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.bindingCount = static_cast<uint32_t>(bindings.size()),
.pBindings = bindings.data(),
};
VK_VERIFY(vkCreateDescriptorSetLayout(vk::context->device, &layoutInfo,
nullptr, &layout));
}
~TilerDecriptorSetLayout() {
vkDestroyDescriptorSetLayout(vk::context->device, layout,
vk::context->allocator);
}
};
struct TilerShader {
VkShaderEXT shader;
TilerShader(TilerDecriptorSetLayout &setLayout,
std::span<const std::uint32_t> spirv) {
VkShaderCreateInfoEXT shaderInfo{
.sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO_EXT,
.flags = 0,
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
.nextStage = 0,
.codeType = VK_SHADER_CODE_TYPE_SPIRV_EXT,
.codeSize = spirv.size_bytes(),
.pCode = spirv.data(),
.pName = "main",
.setLayoutCount = 1,
.pSetLayouts = &setLayout.layout,
.pushConstantRangeCount = 0,
.pPushConstantRanges = 0,
.pSpecializationInfo = 0,
};
VK_VERIFY(vk::CreateShadersEXT(vk::context->device, 1, &shaderInfo, nullptr,
&shader));
}
~TilerShader() {
vk::DestroyShaderEXT(vk::context->device, shader, vk::context->allocator);
}
};
struct amdgpu::GpuTiler::Impl {
TilerDecriptorSetLayout descriptorSetLayout;
std::mutex descriptorMtx;
VkDescriptorSet descriptorSets[4]{};
VkDescriptorPool descriptorPool;
std::uint32_t inUseDescriptorSets = 0;
vk::Buffer configData;
TilerShader detilerLinear{descriptorSetLayout, spirv_detilerLinear_comp};
TilerShader detiler1d{descriptorSetLayout, spirv_detiler1d_comp};
TilerShader detiler2d{descriptorSetLayout, spirv_detilerLinear_comp};
TilerShader tilerLinear{descriptorSetLayout, spirv_tiler2d_comp};
TilerShader tiler1d{descriptorSetLayout, spirv_tiler1d_comp};
TilerShader tiler2d{descriptorSetLayout, spirv_tiler2d_comp};
VkPipelineLayout pipelineLayout;
struct Config {
uint64_t srcAddress;
uint64_t dstAddress;
uint32_t dataWidth;
uint32_t dataHeight;
uint32_t tileMode;
uint32_t numFragments;
uint32_t bitsPerElement;
uint32_t tiledSurfaceSize;
uint32_t linearSurfaceSize;
};
Impl() {
std::size_t count = 256;
configData = vk::Buffer::Allocate(
vk::getHostVisibleMemory(), sizeof(Config) * count,
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT);
VkPipelineLayoutCreateInfo piplineLayoutInfo{
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = 1,
.pSetLayouts = &descriptorSetLayout.layout,
};
VK_VERIFY(vkCreatePipelineLayout(vk::context->device, &piplineLayoutInfo,
nullptr, &pipelineLayout));
{
VkDescriptorPoolSize poolSizes[]{{
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.descriptorCount = 1,
}};
VkDescriptorPoolCreateInfo info{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
.maxSets = static_cast<std::uint32_t>(std::size(descriptorSets)) * 4,
.poolSizeCount = static_cast<uint32_t>(std::size(poolSizes)),
.pPoolSizes = poolSizes,
};
VK_VERIFY(vkCreateDescriptorPool(
vk::context->device, &info, vk::context->allocator, &descriptorPool));
}
VkDescriptorSetAllocateInfo info{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
.descriptorPool = descriptorPool,
.descriptorSetCount = 1,
.pSetLayouts = &descriptorSetLayout.layout,
};
for (std::size_t i = 0; i < std::size(descriptorSets); ++i) {
VK_VERIFY(vkAllocateDescriptorSets(vk::context->device, &info,
descriptorSets + i));
}
}
~Impl() {
vkDestroyDescriptorPool(vk::context->device, descriptorPool,
vk::context->allocator);
vkDestroyPipelineLayout(vk::context->device, pipelineLayout,
vk::context->allocator);
}
std::uint32_t allocateDescriptorSlot() {
std::lock_guard lock(descriptorMtx);
auto result = std::countl_one(inUseDescriptorSets);
rx::dieIf(result >= std::size(descriptorSets),
"out of tiler descriptor sets");
inUseDescriptorSets |= (1 << result);
return result;
}
void releaseDescriptorSlot(std::uint32_t slot) {
std::lock_guard lock(descriptorMtx);
inUseDescriptorSets &= ~(1u << slot);
}
};
amdgpu::GpuTiler::GpuTiler() { mImpl = std::make_unique<Impl>(); }
amdgpu::GpuTiler::~GpuTiler() = default;
void amdgpu::GpuTiler::detile(Scheduler &scheduler,
const amdgpu::SurfaceInfo &info,
amdgpu::TileMode tileMode,
std::uint64_t srcTiledAddress,
std::uint64_t dstLinearAddress, int mipLevel,
int baseArray, int arrayCount) {
auto commandBuffer = scheduler.getCommandBuffer();
auto slot = mImpl->allocateDescriptorSlot();
auto configOffset = slot * sizeof(Impl::Config);
auto config = reinterpret_cast<Impl::Config *>(mImpl->configData.getData() +
configOffset);
auto &subresource = info.getSubresourceInfo(mipLevel);
config->srcAddress = srcTiledAddress + subresource.offset +
(subresource.tiledSize * baseArray);
config->dstAddress = dstLinearAddress + (subresource.linearSize * baseArray);
config->dataWidth = subresource.dataWidth;
config->dataHeight = subresource.dataHeight;
config->tileMode = tileMode.raw;
config->numFragments = info.numFragments;
config->bitsPerElement = info.bitsPerElement;
uint32_t groupCountZ = subresource.dataDepth;
if (arrayCount > 1) {
config->tiledSurfaceSize = subresource.tiledSize;
config->linearSurfaceSize = subresource.linearSize;
groupCountZ = arrayCount;
} else {
config->tiledSurfaceSize = 0;
config->linearSurfaceSize = 0;
}
VkShaderStageFlagBits stages[]{VK_SHADER_STAGE_COMPUTE_BIT};
switch (tileMode.arrayMode()) {
case amdgpu::kArrayModeLinearGeneral:
case amdgpu::kArrayModeLinearAligned:
vk::CmdBindShadersEXT(commandBuffer, 1, stages,
&mImpl->detilerLinear.shader);
break;
case amdgpu::kArrayMode1dTiledThin:
case amdgpu::kArrayMode1dTiledThick:
vk::CmdBindShadersEXT(commandBuffer, 1, stages, &mImpl->detiler1d.shader);
break;
case amdgpu::kArrayMode2dTiledThin:
case amdgpu::kArrayModeTiledThinPrt:
case amdgpu::kArrayMode2dTiledThinPrt:
case amdgpu::kArrayMode2dTiledThick:
case amdgpu::kArrayMode2dTiledXThick:
case amdgpu::kArrayModeTiledThickPrt:
case amdgpu::kArrayMode2dTiledThickPrt:
case amdgpu::kArrayMode3dTiledThinPrt:
case amdgpu::kArrayMode3dTiledThin:
case amdgpu::kArrayMode3dTiledThick:
case amdgpu::kArrayMode3dTiledXThick:
case amdgpu::kArrayMode3dTiledThickPrt:
std::abort();
vk::CmdBindShadersEXT(commandBuffer, 1, stages, &mImpl->detiler2d.shader);
break;
}
VkDescriptorBufferInfo bufferInfo{
.buffer = mImpl->configData.getHandle(),
.offset = configOffset,
.range = sizeof(Impl::Config),
};
VkWriteDescriptorSet writeDescSet{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstSet = mImpl->descriptorSets[slot],
.dstBinding = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.pBufferInfo = &bufferInfo,
};
vkUpdateDescriptorSets(vk::context->device, 1, &writeDescSet, 0, nullptr);
vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
mImpl->pipelineLayout, 0, 1,
&mImpl->descriptorSets[slot], 0, nullptr);
vkCmdDispatch(commandBuffer, subresource.dataWidth, subresource.dataHeight,
groupCountZ);
scheduler.afterSubmit([this, slot] { mImpl->releaseDescriptorSlot(slot); });
}
void amdgpu::GpuTiler::tile(Scheduler &scheduler,
const amdgpu::SurfaceInfo &info,
amdgpu::TileMode tileMode,
std::uint64_t srcLinearAddress,
std::uint64_t dstTiledAddress, int mipLevel,
int baseArray, int arrayCount) {
auto commandBuffer = scheduler.getCommandBuffer();
auto slot = mImpl->allocateDescriptorSlot();
auto configOffset = slot * sizeof(Impl::Config);
auto config = reinterpret_cast<Impl::Config *>(mImpl->configData.getData() +
configOffset);
auto &subresource = info.getSubresourceInfo(mipLevel);
config->srcAddress = srcLinearAddress + subresource.offset +
subresource.linearSize * baseArray;
config->dstAddress = dstTiledAddress;
config->dataWidth = subresource.dataWidth;
config->dataHeight = subresource.dataHeight;
config->tileMode = tileMode.raw;
config->numFragments = info.numFragments;
config->bitsPerElement = info.bitsPerElement;
uint32_t groupCountZ = subresource.dataDepth;
if (arrayCount > 1) {
config->tiledSurfaceSize = subresource.tiledSize;
config->linearSurfaceSize = subresource.linearSize;
groupCountZ = arrayCount;
} else {
config->tiledSurfaceSize = 0;
config->linearSurfaceSize = 0;
}
VkShaderStageFlagBits stages[]{VK_SHADER_STAGE_COMPUTE_BIT};
switch (tileMode.arrayMode()) {
case amdgpu::kArrayModeLinearGeneral:
case amdgpu::kArrayModeLinearAligned:
vk::CmdBindShadersEXT(commandBuffer, 1, stages, &mImpl->tilerLinear.shader);
break;
case amdgpu::kArrayMode1dTiledThin:
case amdgpu::kArrayMode1dTiledThick:
vk::CmdBindShadersEXT(commandBuffer, 1, stages, &mImpl->tiler1d.shader);
break;
case amdgpu::kArrayMode2dTiledThin:
case amdgpu::kArrayModeTiledThinPrt:
case amdgpu::kArrayMode2dTiledThinPrt:
case amdgpu::kArrayMode2dTiledThick:
case amdgpu::kArrayMode2dTiledXThick:
case amdgpu::kArrayModeTiledThickPrt:
case amdgpu::kArrayMode2dTiledThickPrt:
case amdgpu::kArrayMode3dTiledThinPrt:
case amdgpu::kArrayMode3dTiledThin:
case amdgpu::kArrayMode3dTiledThick:
case amdgpu::kArrayMode3dTiledXThick:
case amdgpu::kArrayMode3dTiledThickPrt:
std::abort();
vk::CmdBindShadersEXT(commandBuffer, 1, stages, &mImpl->tiler2d.shader);
break;
}
VkDescriptorBufferInfo bufferInfo{
.buffer = mImpl->configData.getHandle(),
.offset = configOffset,
.range = sizeof(Impl::Config),
};
VkWriteDescriptorSet writeDescSet{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstSet = mImpl->descriptorSets[slot],
.dstBinding = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.pBufferInfo = &bufferInfo,
};
vkUpdateDescriptorSets(vk::context->device, 1, &writeDescSet, 0, nullptr);
vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
mImpl->pipelineLayout, 0, 1,
&mImpl->descriptorSets[slot], 0, nullptr);
vkCmdDispatch(commandBuffer, subresource.dataWidth, subresource.dataHeight,
groupCountZ);
scheduler.afterSubmit([this, slot] { mImpl->releaseDescriptorSlot(slot); });
}

View file

@ -0,0 +1,48 @@
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/dialect/)
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/include/dialect/spv.hpp
COMMAND $<TARGET_FILE:spv-gen> ${CMAKE_CURRENT_BINARY_DIR}/include/dialect/spv.hpp
DEPENDS spv-gen
WORKING_DIRECTORY $<TARGET_PROPERTY:SPIRV-Headers,INTERFACE_INCLUDE_DIRECTORIES>/spirv/unified1
COMMENT "Generating ${CMAKE_CURRENT_BINARY_DIR}/include/dialect/spv.hpp..."
)
add_custom_target(shader-spv-dialect-gen DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/include/dialect/spv.hpp)
add_library(shader-spv-dialect INTERFACE)
add_dependencies(shader-spv-dialect shader-spv-dialect-gen)
target_include_directories(shader-spv-dialect INTERFACE ${CMAKE_CURRENT_BINARY_DIR}/include/)
add_library(gcn-shader STATIC
src/analyze.cpp
src/eval.cpp
src/Evaluator.cpp
src/gcn.cpp
src/GcnConverter.cpp
src/GcnInstruction.cpp
src/glsl.cpp
src/ModuleInfo.cpp
src/opt.cpp
src/SemanticModuleInfo.cpp
src/spv.cpp
src/SpvConverter.cpp
src/SpvTypeInfo.cpp
src/transform.cpp
)
target_include_directories(gcn-shader PUBLIC include PRIVATE include/shader)
target_link_libraries(gcn-shader
PUBLIC
shader-spv-dialect
rx
PRIVATE
glslang::glslang
glslang::SPIRV
SPIRV-Tools
SPIRV-Tools-opt
spirv-cross-c-shared
)
add_subdirectory(shaders)

View file

@ -0,0 +1,26 @@
#pragma once
namespace shader {
enum class Access {
None = 0,
Read = 1 << 0,
Write = 1 << 1,
ReadWrite = Read | Write
};
constexpr Access operator|(Access lhs, Access rhs) {
return static_cast<Access>(static_cast<int>(lhs) | static_cast<int>(rhs));
}
constexpr Access operator&(Access lhs, Access rhs) {
return static_cast<Access>(static_cast<int>(lhs) & static_cast<int>(rhs));
}
constexpr Access operator~(Access rhs) {
return static_cast<Access>(~static_cast<int>(rhs));
}
constexpr Access &operator|=(Access &lhs, Access rhs) {
return ((lhs = lhs | rhs));
}
constexpr Access &operator&=(Access &lhs, Access rhs) {
return ((lhs = lhs & rhs));
}
} // namespace shader

View file

@ -0,0 +1,20 @@
#pragma once
#include "eval.hpp"
#include <map>
namespace shader::eval {
class Evaluator {
std::map<ir::Value, Value> values;
public:
virtual ~Evaluator() = default;
void invalidate(ir::Value node) { values.erase(node); }
void setValue(ir::Value node, Value value) { values[node] = value; }
Value eval(const ir::Operand &op, ir::Value type = nullptr);
virtual Value eval(ir::Value op);
virtual Value eval(ir::InstructionId instId,
std::span<const ir::Operand> operands);
};
} // namespace shader::eval

View file

@ -0,0 +1,131 @@
#pragma once
#include "gcn.hpp"
#include "rx/MemoryTable.hpp"
#include <cstdint>
#include <optional>
#include <vector>
namespace shader::gcn {
enum class PsVGprInput {
IPerspSample,
JPerspSample,
IPerspCenter,
JPerspCenter,
IPerspCentroid,
JPerspCentroid,
IW,
JW,
_1W,
ILinearSample,
JLinearSample,
ILinearCenter,
JLinearCenter,
ILinearCentroid,
JLinearCentroid,
X,
Y,
Z,
W,
FrontFace,
Ancillary,
SampleCoverage,
PosFixed,
Count
};
enum class ConfigType {
Imm,
UserSgpr,
ResourceSlot,
MemoryTable,
Gds,
PsInputVGpr,
VsPrimType,
CbCompSwap,
ViewPortOffsetX,
ViewPortOffsetY,
ViewPortOffsetZ,
ViewPortScaleX,
ViewPortScaleY,
ViewPortScaleZ,
};
struct ConfigSlot {
ConfigType type;
std::uint64_t data;
};
struct Resources {
struct Resource {
std::uint32_t resourceSlot;
};
struct Pointer : Resource {
std::uint32_t size;
ir::Value base;
ir::Value offset;
};
struct Texture : Resource {
Access access;
ir::Value words[8];
};
struct Buffer : Resource {
Access access;
ir::Value words[4];
};
struct Sampler : Resource {
bool unorm;
ir::Value words[4];
};
spv::Context context;
bool hasUnknown = false;
std::uint32_t slots = 0;
std::vector<Pointer> pointers;
std::vector<Texture> textures;
std::vector<Buffer> buffers;
std::vector<Sampler> samplers;
void print(std::ostream &os, ir::NameStorage &ns) const;
void dump();
};
struct ShaderInfo {
std::vector<ConfigSlot> configSlots;
rx::MemoryAreaTable<> memoryMap;
std::vector<std::pair<int, std::uint32_t>> requiredSgprs;
Resources resources;
std::uint32_t create(ConfigType type, std::uint64_t data) {
for (std::size_t slotIndex = 0; auto &slotInfo : configSlots) {
if (slotInfo.type == type && slotInfo.data == data) {
return slotIndex;
}
slotIndex++;
}
configSlots.push_back({
.type = type,
.data = data,
});
return configSlots.size() - 1;
}
};
struct ConvertedShader {
std::vector<std::uint32_t> spv;
ShaderInfo info;
};
std::optional<ConvertedShader>
convertToSpv(Context &context, ir::Region body,
const SemanticModuleInfo &semanticModule, Stage stage,
const Environment &state);
} // namespace shader::gcn

View file

@ -0,0 +1,256 @@
#pragma once
#include "dialect.hpp"
#include "ir/Kind.hpp"
#include <functional>
#include <ostream>
#include <span>
#include <type_traits>
namespace shader {
struct GcnOperand {
enum class Kind : std::uint8_t {
Invalid,
Constant,
Immediate,
VccLo,
VccHi,
M0,
ExecLo,
ExecHi,
Scc,
VccZ,
ExecZ,
LdsDirect,
Vgpr,
Sgpr,
Attr,
Buffer,
Texture128,
Texture256,
Sampler,
Pointer,
};
static constexpr auto R = 1 << 0;
static constexpr auto W = 1 << 1;
union {
std::uint32_t value;
std::uint64_t address = 0;
struct {
std::uint16_t attrId;
std::uint16_t attrChannel;
};
struct {
Kind firstRegisterKind;
union {
struct {
Kind pointerOffsetKind;
std::uint16_t pointeeSize;
};
bool samplerUnorm;
};
std::uint32_t firstRegisterIndex;
union {
std::uint32_t pointerOffsetValue;
std::uint64_t pointerOffsetAddress;
};
};
};
Kind kind = Kind::Invalid;
std::uint8_t access = 0;
std::uint8_t omod : 4 = 0;
bool abs : 1 = false;
bool clamp : 1 = false;
bool neg : 1 = false;
constexpr GcnOperand getUnderlyingOperand(int offset = 0) const {
return {
.value = firstRegisterIndex + offset,
.kind = firstRegisterKind,
};
}
constexpr GcnOperand getPointerOffsetOperand() const {
return {
.address = pointerOffsetAddress,
.kind = pointerOffsetKind,
};
}
static constexpr GcnOperand createImmediateConstant(std::uint64_t address) {
return GcnOperand{
.address = address,
.kind = Kind::Immediate,
.access = R,
};
}
static constexpr GcnOperand createConstant(std::uint32_t value) {
return GcnOperand{
.value = value,
.kind = Kind::Constant,
.access = R,
};
}
static constexpr GcnOperand createConstant(bool value) {
return createConstant(std::uint32_t(value ? 1 : 0));
}
static constexpr GcnOperand createConstant(float value) {
return createConstant(std::bit_cast<std::uint32_t>(value));
}
static constexpr GcnOperand createVgpr(std::uint32_t index) {
return {
.value = index,
.kind = Kind::Vgpr,
};
}
static constexpr GcnOperand createSgpr(std::uint32_t index) {
return {
.value = index,
.kind = Kind::Sgpr,
};
}
static constexpr GcnOperand createSampler(GcnOperand firstReg, bool unorm) {
return {
.firstRegisterKind = firstReg.kind,
.samplerUnorm = unorm,
.firstRegisterIndex = static_cast<std::uint8_t>(firstReg.value),
.kind = Kind::Sampler,
};
}
static constexpr GcnOperand createTexture(GcnOperand firstReg, bool is128) {
return {
.firstRegisterKind = firstReg.kind,
.firstRegisterIndex = static_cast<std::uint8_t>(firstReg.value),
.kind = (is128 ? Kind::Texture128 : Kind::Texture256),
};
}
static constexpr GcnOperand createBuffer(GcnOperand firstReg) {
return {
.firstRegisterKind = firstReg.kind,
.firstRegisterIndex = static_cast<std::uint8_t>(firstReg.value),
.kind = Kind::Buffer,
};
}
static constexpr GcnOperand
createPointer(GcnOperand firstReg, std::uint16_t size, GcnOperand offset) {
return {
.firstRegisterKind = firstReg.kind,
.pointerOffsetKind = offset.kind,
.pointeeSize = size,
.firstRegisterIndex = static_cast<std::uint8_t>(firstReg.value),
.pointerOffsetAddress = offset.address,
.kind = Kind::Pointer,
};
}
static constexpr GcnOperand createAttr(std::uint16_t id,
std::uint16_t channel) {
return {
.attrId = id,
.attrChannel = channel,
.kind = Kind::Attr,
};
}
constexpr GcnOperand withRW() const { return withAccess(R | W); }
constexpr GcnOperand withR() const { return withAccess(R); }
constexpr GcnOperand withW() const { return withAccess(W); }
constexpr GcnOperand withAccess(std::uint8_t access) const {
GcnOperand result = *this;
result.access = access;
return result;
}
constexpr GcnOperand withNeg(bool value) const {
GcnOperand result = *this;
result.neg = value;
return result;
}
constexpr GcnOperand withAbs(bool value) const {
GcnOperand result = *this;
result.abs = value;
return result;
}
constexpr GcnOperand withClamp(bool value) const {
GcnOperand result = *this;
result.clamp = value;
return result;
}
constexpr GcnOperand withOutputModifier(std::uint8_t value) const {
GcnOperand result = *this;
result.omod = value;
return result;
}
static constexpr GcnOperand createVccLo() { return {.kind = Kind::VccLo}; }
static constexpr GcnOperand createVccHi() { return {.kind = Kind::VccHi}; }
static constexpr GcnOperand createM0() { return {.kind = Kind::M0}; }
static constexpr GcnOperand createExecLo() { return {.kind = Kind::ExecLo}; }
static constexpr GcnOperand createExecHi() { return {.kind = Kind::ExecHi}; }
static constexpr GcnOperand createVccZ() { return {.kind = Kind::VccZ}; }
static constexpr GcnOperand createExecZ() { return {.kind = Kind::ExecZ}; }
static constexpr GcnOperand createScc() { return {.kind = Kind::Scc}; }
static constexpr GcnOperand createLdsDirect() {
return {.kind = Kind::LdsDirect};
}
void print(std::ostream &os) const;
void dump() const;
};
struct GcnInstruction {
ir::Kind kind = ir::Kind::Builtin;
unsigned op = ir::builtin::INVALID_INSTRUCTION;
GcnOperand operands[16];
std::size_t operandCount{};
std::span<const GcnOperand> getOperands() const {
return {operands, operandCount};
}
const GcnOperand &getOperand(std::size_t index) const {
if (index >= operandCount) {
std::abort();
}
return operands[index];
}
void addOperand(GcnOperand op) {
if (operandCount >= std::size(operands)) {
std::abort();
}
operands[operandCount++] = op;
}
template <typename T>
bool operator==(T testOp)
requires(ir::kOpToKind<std::remove_cvref_t<T>> != ir::Kind::Count)
{
return ir::kOpToKind<std::remove_cvref_t<T>> == kind && op == testOp;
}
void print(std::ostream &os) const;
void dump() const;
};
void readGcnInst(GcnInstruction &isaInst, std::uint64_t &address,
const std::function<std::uint32_t(std::uint64_t)> &readMemory);
} // namespace shader

View file

@ -0,0 +1,28 @@
#pragma once
#include "Access.hpp"
#include "ir/Value.hpp"
#include "spv.hpp"
#include <map>
#include <vector>
namespace shader {
struct ModuleInfo {
struct Param {
ir::Value type;
Access access = Access::None;
};
struct Function {
std::map<ir::Value, Access> variables;
std::vector<Param> parameters;
ir::Value returnType;
};
std::map<ir::Value, Function> functions;
};
ModuleInfo::Function &collectFunctionInfo(ModuleInfo &moduleInfo,
ir::Value function);
void collectModuleInfo(ModuleInfo &moduleInfo, const spv::BinaryLayout &layout);
} // namespace shader

View file

@ -0,0 +1,46 @@
#pragma once
#include "ModuleInfo.hpp"
#include "SpvTypeInfo.hpp"
namespace shader {
struct SemanticModuleInfo : ModuleInfo {
std::unordered_map<ir::InstructionId, ir::Value> semantics;
ir::Value findSemanticOf(ir::InstructionId sem) const {
auto semIt = semantics.find(sem);
if (semIt == semantics.end()) {
return nullptr;
}
return semIt->second;
}
};
struct SemanticInfo {
struct Param {
spv::TypeInfo type;
Access access = Access::None;
};
struct Function {
std::unordered_map<int, Access> registerAccesses;
std::vector<Param> parameters;
spv::TypeInfo returnType;
Access bufferAccess = Access::None;
};
std::unordered_map<ir::InstructionId, Function> semantics;
const Function *findSemantic(ir::InstructionId sem) const {
if (auto it = semantics.find(sem); it != semantics.end()) {
return &it->second;
}
return nullptr;
}
};
void collectSemanticModuleInfo(SemanticModuleInfo &moduleInfo,
const spv::BinaryLayout &layout);
} // namespace shader

View file

@ -0,0 +1,154 @@
#pragma once
#include "SpvTypeInfo.hpp"
#include "dialect/spv.hpp"
#include "spv.hpp"
namespace shader::spv {
struct Import : ir::CloneMap {
ir::Node getOrCloneImpl(ir::Context &context, ir::Node node,
bool isOperand) override;
};
struct Context : ir::Context {
BinaryLayout layout;
ir::Location rootLocation;
ir::NameStorage ns;
ir::Value perVertex;
std::map<int, ir::Value> outputs;
std::map<int, ir::Value> inputs;
ir::RegionLike localVariables;
ir::RegionLike epilogue;
ir::Value entryPoint;
std::map<ir::InstructionId, std::vector<ir::Value>> globals;
std::map<ir::InstructionId, std::vector<ir::Value>> constants;
Context();
ir::Value createRegionWithLabel(ir::Location loc);
void setName(ir::spv::IdRef inst, std::string name);
void setConstantName(ir::Value constant);
ir::Value getOrCreateConstant(ir::Value typeValue, const ir::Operand &value);
ir::Value getType(ir::spv::Op baseType, int width, bool isSigned);
ir::Value getType(const TypeInfo &info);
ir::Value imm64(std::uint64_t value) {
return getOrCreateConstant(getTypeUInt64(), value);
}
ir::Value imm32(std::uint32_t value) {
return getOrCreateConstant(getTypeUInt32(), value);
}
ir::Value simm64(std::int64_t value) {
return getOrCreateConstant(getTypeSInt64(), value);
}
ir::Value simm32(std::int32_t value) {
return getOrCreateConstant(getTypeSInt32(), value);
}
ir::Value fimm64(double value) {
return getOrCreateConstant(getTypeFloat(64), value);
}
ir::Value fimm32(float value) {
return getOrCreateConstant(getTypeFloat(32), value);
}
ir::Value getBool(bool value) { return value ? getTrue() : getFalse(); }
ir::Value getTrue() {
return getOrCreateGlobal(ir::spv::OpConstantTrue, {{getTypeBool()}});
}
ir::Value getFalse() {
return getOrCreateGlobal(ir::spv::OpConstantFalse, {{getTypeBool()}});
}
ir::Value getIndex(std::int32_t index) { return simm32(index); }
void setTypeName(ir::Value type);
void addGlobal(ir::Value type) {
globals[type.getInstId()].push_back(type);
setTypeName(type);
}
ir::Value findGlobal(ir::spv::Op op,
std::span<const ir::Operand> operands = {}) const;
ir::Value createGlobal(ir::spv::Op op, std::span<const ir::Operand> operands);
ir::Value getOrCreateGlobal(ir::spv::Op op,
std::span<const ir::Operand> operands = {});
ir::Value getTypeInt(int width, bool sign) {
return getOrCreateGlobal(ir::spv::OpTypeInt, {{width, sign ? 1 : 0}});
}
ir::Value getTypeFloat(int width) {
return getOrCreateGlobal(ir::spv::OpTypeFloat, {{width}});
}
ir::Value getTypeVoid() { return getOrCreateGlobal(ir::spv::OpTypeVoid); }
ir::Value getTypeBool() { return getOrCreateGlobal(ir::spv::OpTypeBool); }
ir::Value getTypeSampler() {
return getOrCreateGlobal(ir::spv::OpTypeSampler);
}
ir::Value getTypeArray(ir::Value elementType, ir::Value count) {
return getOrCreateGlobal(ir::spv::OpTypeArray, {{elementType, count}});
}
ir::Value getTypeVector(ir::Value elementType, int count) {
return getOrCreateGlobal(ir::spv::OpTypeVector, {{elementType, count}});
}
ir::Value getTypeStruct(auto... elements) {
return getOrCreateGlobal(ir::spv::OpTypeStruct, {{elements...}});
}
ir::Value getTypeSInt8() { return getTypeInt(8, true); }
ir::Value getTypeUInt8() { return getTypeInt(8, false); }
ir::Value getTypeSInt16() { return getTypeInt(16, true); }
ir::Value getTypeUInt16() { return getTypeInt(16, false); }
ir::Value getTypeSInt32() { return getTypeInt(32, true); }
ir::Value getTypeUInt32() { return getTypeInt(32, false); }
ir::Value getTypeSInt64() { return getTypeInt(64, true); }
ir::Value getTypeUInt64() { return getTypeInt(64, false); }
ir::Value getTypeFloat16() { return getTypeFloat(16); }
ir::Value getTypeFloat32() { return getTypeFloat(32); }
ir::Value getTypeFloat64() { return getTypeFloat(64); }
ir::Value getTypeFunction(ir::Value returnType,
std::span<const ir::Value> params) {
std::vector<ir::Operand> operands;
operands.reserve(1 + params.size());
operands.push_back(returnType);
for (auto param : params) {
operands.push_back(param);
}
return getOrCreateGlobal(ir::spv::OpTypeFunction, operands);
}
ir::Value getTypePointer(ir::spv::StorageClass storageClass,
ir::spv::IdRef pointeeType) {
return getOrCreateGlobal(ir::spv::OpTypePointer,
{{storageClass, pointeeType}});
}
ir::Value getTypeImage(ir::spv::IdRef sampledType, ir::spv::Dim dim,
std::int32_t depth, bool arrayed, bool multisampled,
std::int32_t sampled, ir::spv::ImageFormat format) {
return getOrCreateGlobal(
ir::spv::OpTypeImage,
{{sampledType, dim, depth, arrayed, multisampled, sampled, format}});
}
ir::Value getOperandValue(const ir::Operand &op, ir::Value type = {});
void createPerVertex();
ir::Value createUniformBuffer(int descriptorSet, int binding,
ir::Value structType);
ir::Value createRuntimeArrayUniformBuffer(int descriptorSet, int binding,
ir::Value elementType);
ir::Value createOutput(ir::Location loc, int index);
ir::Value createInput(ir::Location loc, int index);
ir::Value createAttr(ir::Location loc, int attrId, bool perVertex, bool flat);
};
} // namespace shader::spv

View file

@ -0,0 +1,18 @@
#pragma once
#include "dialect/spv.hpp"
namespace shader::spv {
struct TypeInfo {
ir::spv::Op baseType = {};
ir::spv::Op componentType = {};
int componentWidth = 0;
int componentsCount = 1;
bool isSigned = false;
int width() const { return componentWidth * componentsCount; }
bool operator==(const TypeInfo &other) const = default;
};
TypeInfo getTypeInfo(ir::Value type);
} // namespace shader::spv

View file

@ -0,0 +1,129 @@
#pragma once
#include <array>
#include <cstdint>
namespace shader {
template <typename T, std::size_t N> struct Vector : std::array<T, N> {
using std::array<T, N>::array;
template<typename U>
constexpr explicit operator Vector<U, N>() const {
Vector<U, N> result;
for (std::size_t i = 0; i < N; ++i) {
result[i] = static_cast<U>((*this)[i]);
}
return result;
}
#define DEFINE_BINOP(OP) \
constexpr auto operator OP(const Vector &other) const \
requires requires(T lhs, T rhs) { lhs OP rhs; } \
{ \
using ResultElementT = \
std::remove_cvref_t<decltype(std::declval<T>() OP std::declval<T>())>; \
Vector<ResultElementT, N> result; \
for (std::size_t i = 0; i < N; ++i) { \
result[i] = (*this)[i] OP other[i]; \
} \
return result; \
} \
constexpr auto operator OP(const T &other) const \
requires requires(T lhs, T rhs) { lhs OP rhs; } \
{ \
using ResultElementT = \
std::remove_cvref_t<decltype(std::declval<T>() OP std::declval<T>())>; \
Vector<ResultElementT, N> result; \
for (std::size_t i = 0; i < N; ++i) { \
result[i] = (*this)[i] OP other; \
} \
return result; \
}
#define DEFINE_UNOP(OP) \
constexpr auto operator OP() const \
requires requires(T rhs) { OP rhs; } \
{ \
using ResultElementT = \
std::remove_cvref_t<decltype(OP std::declval<T>())>; \
Vector<ResultElementT, N> result; \
for (std::size_t i = 0; i < N; ++i) { \
result[i] = OP(*this)[i]; \
} \
return result; \
}
DEFINE_BINOP(+)
DEFINE_BINOP(-)
DEFINE_BINOP(*)
DEFINE_BINOP(/)
DEFINE_BINOP(%)
DEFINE_BINOP(&)
DEFINE_BINOP(|)
DEFINE_BINOP(^)
DEFINE_BINOP(>>)
DEFINE_BINOP(<<)
DEFINE_BINOP(&&)
DEFINE_BINOP(||)
DEFINE_BINOP(<)
DEFINE_BINOP(>)
DEFINE_BINOP(<=)
DEFINE_BINOP(>=)
DEFINE_BINOP(==)
DEFINE_BINOP(!=)
DEFINE_UNOP(-)
DEFINE_UNOP(~)
DEFINE_UNOP(!)
#undef DEFINE_BINOP
#undef DEFINE_UNOP
};
using float16_t = _Float16;
using float32_t = float;
using float64_t = double;
using u8vec2 = Vector<std::uint8_t, 2>;
using u8vec3 = Vector<std::uint8_t, 3>;
using u8vec4 = Vector<std::uint8_t, 4>;
using i8vec2 = Vector<std::int8_t, 2>;
using i8vec3 = Vector<std::int8_t, 3>;
using i8vec4 = Vector<std::int8_t, 4>;
using u16vec2 = Vector<std::uint16_t, 2>;
using u16vec3 = Vector<std::uint16_t, 3>;
using u16vec4 = Vector<std::uint16_t, 4>;
using i16vec2 = Vector<std::int16_t, 2>;
using i16vec3 = Vector<std::int16_t, 3>;
using i16vec4 = Vector<std::int16_t, 4>;
using u32vec2 = Vector<std::uint32_t, 2>;
using u32vec3 = Vector<std::uint32_t, 3>;
using u32vec4 = Vector<std::uint32_t, 4>;
using i32vec2 = Vector<std::int32_t, 2>;
using i32vec3 = Vector<std::int32_t, 3>;
using i32vec4 = Vector<std::int32_t, 4>;
using u64vec2 = Vector<std::uint64_t, 2>;
using u64vec3 = Vector<std::uint64_t, 3>;
using u64vec4 = Vector<std::uint64_t, 4>;
using i64vec2 = Vector<std::int64_t, 2>;
using i64vec3 = Vector<std::int64_t, 3>;
using i64vec4 = Vector<std::int64_t, 4>;
using f32vec2 = Vector<float32_t, 2>;
using f32vec3 = Vector<float32_t, 3>;
using f32vec4 = Vector<float32_t, 4>;
using f64vec2 = Vector<float64_t, 2>;
using f64vec3 = Vector<float64_t, 3>;
using f64vec4 = Vector<float64_t, 4>;
using f16vec2 = Vector<float16_t, 2>;
using f16vec3 = Vector<float16_t, 3>;
using f16vec4 = Vector<float16_t, 4>;
using bvec2 = Vector<bool, 2>;
using bvec3 = Vector<bool, 3>;
using bvec4 = Vector<bool, 4>;
} // namespace shader

View file

@ -0,0 +1,445 @@
#pragma once
#include "ModuleInfo.hpp"
#include "SemanticInfo.hpp"
#include "dialect/memssa.hpp"
#include "graph.hpp"
#include "ir/Instruction.hpp"
#include "ir/Value.hpp"
#include "rx/FunctionRef.hpp"
#include "rx/TypeId.hpp"
#include <map>
#include <ostream>
#include <utility>
#include <vector>
namespace shader {
struct DomTree;
struct PostDomTree;
class CFG {
public:
class Node {
ir::Value mLabel;
ir::Instruction mTerminator;
std::unordered_set<Node *> mPredecessors;
std::unordered_set<Node *> mSuccessors;
public:
using Iterator = std::unordered_set<Node *>::iterator;
Node() = default;
Node(ir::Value label) : mLabel(label) {}
ir::Value getLabel() { return mLabel; }
void setTerminator(ir::Instruction inst) { mTerminator = inst; }
bool hasTerminator() { return mTerminator != nullptr; }
ir::Instruction getTerminator() { return mTerminator; }
void addEdge(Node *to) {
to->mPredecessors.insert(this);
mSuccessors.insert(to);
}
bool hasPredecessor(Node *node) { return mPredecessors.contains(node); }
bool hasSuccessor(Node *node) { return mSuccessors.contains(node); }
auto &getPredecessors() { return mPredecessors; }
auto &getSuccessors() { return mSuccessors; }
std::size_t getPredecessorCount() { return mPredecessors.size(); }
std::size_t getSuccessorCount() { return mSuccessors.size(); }
bool hasPredecessors() { return !mPredecessors.empty(); }
bool hasSuccessors() { return !mSuccessors.empty(); }
template <typename T = ir::Instruction> auto range() {
return ir::range<T>(mLabel, mTerminator.getNext());
}
template <typename T = ir::Instruction> auto rangeWithoutLabel() {
return ir::range<T>(mLabel.getNext(),
mTerminator ? mTerminator.getNext() : nullptr);
}
template <typename T = ir::Instruction> auto rangeWithoutTerminator() {
return ir::range<T>(mLabel, mTerminator);
}
template <typename T = ir::Instruction>
auto rangeWithoutLabelAndTerminator() {
return ir::range<T>(mLabel.getNext(), mTerminator);
}
};
private:
std::map<ir::Value, Node> mNodes;
std::vector<Node *> mPreorderNodes;
std::vector<Node *> mPostorderNodes;
Node *mEntryNode = nullptr;
public:
bool empty() { return mNodes.empty(); }
void clear() {
mNodes.clear();
mPreorderNodes.clear();
mPostorderNodes.clear();
mEntryNode = nullptr;
}
void addPreorderNode(Node *node) { mPreorderNodes.push_back(node); }
void addPostorderNode(Node *node) { mPostorderNodes.push_back(node); }
Node *getEntryNode() { return mEntryNode; }
ir::Value getEntryLabel() { return getEntryNode()->getLabel(); }
void setEntryNode(Node *node) { mEntryNode = node; }
std::span<Node *> getPreorderNodes() { return mPreorderNodes; }
std::span<Node *> getPostorderNodes() { return mPostorderNodes; }
Node *getOrCreateNode(ir::Value label) {
return &mNodes.emplace(label, label).first->second;
}
Node *getNode(ir::Value label) {
if (auto it = mNodes.find(label); it != mNodes.end()) {
return &it->second;
}
return nullptr;
}
auto &getSuccessors(ir::Value label) {
return getNode(label)->getSuccessors();
}
auto &getPredecessors(ir::Value label) {
return getNode(label)->getPredecessors();
}
void print(std::ostream &os, ir::NameStorage &ns, bool subgraph = false,
std::string_view nameSuffix = "");
std::string genTest();
CFG buildView(CFG::Node *from, PostDomTree *domTree = nullptr,
const std::unordered_set<ir::Value> &stopLabels = {},
ir::Value continueLabel = nullptr);
CFG buildView(ir::Value from, PostDomTree *domTree = nullptr,
const std::unordered_set<ir::Value> &stopLabels = {},
ir::Value continueLabel = nullptr) {
return buildView(getNode(from), domTree, stopLabels, continueLabel);
}
};
class MemorySSA {
public:
ir::Context context;
ir::Region region;
std::map<ir::Value, ir::memssa::Var> variableToVar;
std::map<ir::Instruction, std::map<ir::memssa::Var, ir::memssa::Def>>
userDefs;
ir::memssa::Var getVar(ir::Value variable, std::span<const ir::Operand> path);
ir::memssa::Var getVar(ir::Value pointer);
ir::memssa::Def getDef(ir::Instruction user, ir::memssa::Var var) {
auto userIt = userDefs.find(user);
if (userIt == userDefs.end()) {
return {};
}
if (auto it = userIt->second.find(var); it != userIt->second.end()) {
return it->second;
}
return {};
}
ir::memssa::Def getDef(ir::Instruction user, ir::Value pointer) {
if (auto var = getVar(pointer)) {
return getDef(user, var);
}
return {};
}
ir::Instruction getDefInst(ir::Instruction user, ir::Value pointer) {
if (auto def = getDef(user, pointer)) {
return def.getLinkedInst();
}
return {};
}
void print(std::ostream &os, ir::Region irRegion, ir::NameStorage &ns);
void print(std::ostream &os, ir::NameStorage &ns);
void dump();
private:
ir::memssa::Var getVarImpl(ir::Value variable);
};
bool isWithoutSideEffects(ir::InstructionId id);
bool isTerminator(ir::Instruction inst);
bool isBranch(ir::Instruction inst);
ir::Value unwrapPointer(ir::Value pointer);
graph::DomTree<ir::Value> buildDomTree(CFG &cfg, ir::Value root = nullptr);
graph::DomTree<ir::Value> buildPostDomTree(CFG &cfg, ir::Value root);
CFG buildCFG(ir::Instruction firstInstruction,
const std::unordered_set<ir::Value> &exitLabels = {},
ir::Value continueLabel = nullptr);
MemorySSA buildMemorySSA(CFG &cfg, ModuleInfo *moduleInfo = nullptr);
MemorySSA buildMemorySSA(CFG &cfg, const SemanticInfo &instructionSemantic,
std::function<ir::Value(int)> getRegisterVarCb);
bool dominates(ir::Instruction a, ir::Instruction b, bool isPostDom,
graph::DomTree<ir::Value> &domTree);
ir::Value findNearestCommonDominator(ir::Instruction a, ir::Instruction b,
graph::DomTree<ir::Value> &domTree);
class BackEdgeStorage {
std::unordered_map<ir::Value, std::unordered_set<ir::Value>> backEdges;
public:
BackEdgeStorage() = default;
BackEdgeStorage(CFG &cfg);
const std::unordered_set<ir::Value> *get(ir::Value value) {
if (auto it = backEdges.find(value); it != backEdges.end()) {
return &it->second;
}
return nullptr;
}
auto &all() { return backEdges; }
};
struct AnalysisStorage {
template <typename... T>
requires(sizeof...(T) > 0)
bool invalidate() {
bool invalidated = false;
((invalidated = invalidate(rx::TypeId::get<T>()) || invalidated), ...);
return invalidated;
}
bool invalidate(rx::TypeId id) {
if (auto it = mStorage.find(id); it != mStorage.end()) {
return std::exchange(it->second.invalid, true) == false;
}
return false;
}
void invalidateAll() {
for (auto &entry : mStorage) {
entry.second.invalid = true;
}
}
template <typename T, typename... ArgsT>
T &get(ArgsT &&...args)
requires requires { T(std::forward<ArgsT>(args)...); }
{
void *result = getImpl(
rx::TypeId::get<T>(), getDeleter<T>(),
[&] {
return std::make_unique<T>(std::forward<ArgsT>(args)...).release();
},
[&](void *object) {
*reinterpret_cast<T *>(object) = T(std::forward<ArgsT>(args)...);
});
return *static_cast<T *>(result);
}
template <typename T, typename BuilderFn>
T &get(BuilderFn &&builder)
requires requires { T(std::forward<BuilderFn>(builder)()); }
{
void *result = getImpl(
rx::TypeId::get<T>(), getDeleter<T>(),
[&] {
return std::make_unique<T>(std::forward<BuilderFn>(builder)())
.release();
},
[&](void *object) {
*reinterpret_cast<T *>(object) = std::forward<BuilderFn>(builder)();
});
return *static_cast<T *>(result);
}
private:
template <typename T> static void (*getDeleter())(void *) {
return +[](void *data) { delete static_cast<T *>(data); };
}
void *getImpl(rx::TypeId typeId, void (*deleter)(void *),
rx::FunctionRef<void *()> constructor,
rx::FunctionRef<void(void *)> placementConstructor) {
auto [it, inserted] = mStorage.emplace(typeId, getNullPointer());
if (inserted) {
it->second.object =
std::unique_ptr<void, void (*)(void *)>(constructor(), deleter);
} else if (it->second.invalid) {
placementConstructor(it->second.object.get());
it->second.invalid = false;
}
return it->second.object.get();
}
static constexpr std::unique_ptr<void, void (*)(void *)> getNullPointer() {
return {nullptr, [](void *) {}};
}
struct Entry {
std::unique_ptr<void, void (*)(void *)> object;
bool invalid = false;
};
std::map<rx::TypeId, Entry> mStorage;
};
struct PostDomTree : graph::DomTree<ir::Value> {
PostDomTree() = default;
PostDomTree(graph::DomTree<ir::Value> &&other)
: graph::DomTree<ir::Value>::DomTree(std::move(other)) {}
PostDomTree(CFG &cfg, ir::Value root)
: PostDomTree(buildPostDomTree(cfg, root)) {}
};
struct DomTree : graph::DomTree<ir::Value> {
DomTree() = default;
DomTree(graph::DomTree<ir::Value> &&other)
: graph::DomTree<ir::Value>::DomTree(std::move(other)) {}
DomTree(CFG &cfg, ir::Value root = nullptr)
: DomTree(buildDomTree(cfg, root)) {}
};
template <typename T, std::size_t> struct Tag : T {
using T::T;
using T::operator=;
Tag(T &&other) : T(std::move(other)) {}
Tag(const T &other) : T(other) {}
Tag &operator=(T &&other) {
T::operator=(std::move(other));
return *this;
}
Tag &operator=(const T &other) {
T::operator=(other);
return *this;
}
};
struct Construct {
Construct *parent;
std::forward_list<Construct> children;
ir::Value header;
ir::Value merge;
ir::Value loopBody;
ir::Value loopContinue;
AnalysisStorage analysis;
static std::unique_ptr<Construct> createRoot(ir::RegionLike region,
ir::Value merge) {
auto result = std::make_unique<Construct>();
auto &cfg =
result->analysis.get<CFG>([&] { return buildCFG(region.getFirst()); });
result->header = cfg.getEntryLabel();
result->merge = merge;
return result;
}
Construct *createChild(ir::Value header, ir::Value merge) {
auto &result = children.emplace_front();
result.parent = this;
result.header = header;
result.merge = merge;
return &result;
}
Construct *createChild(ir::Value header, ir::Value merge,
ir::Value loopContinue, ir::Value loopBody) {
auto &result = children.emplace_front();
result.parent = this;
result.header = header;
result.merge = merge;
result.loopContinue = loopContinue;
result.loopBody = loopBody;
return &result;
}
Construct createTemporaryChild(ir::Value header, ir::Value merge) {
Construct result;
result.parent = this;
result.header = header;
result.merge = merge;
return result;
}
CFG &getCfg() {
return analysis.get<CFG>([this] {
if (parent != nullptr) {
return parent->getCfg().buildView(
header,
&parent->getPostDomTree(),
{header, merge});
}
return buildCFG(header);
});
}
CFG &getCfgWithoutContinue() {
if (loopContinue == nullptr) {
return getCfg();
}
return analysis.get<Tag<CFG, kWithoutContinue>>([this] {
if (parent != nullptr) {
return parent->getCfg().buildView(
header,
&parent->getPostDomTree(),
{header, merge}, loopContinue);
}
return buildCFG(header, {}, loopContinue);
});
}
DomTree &getDomTree() { return analysis.get<DomTree>(getCfg(), header); }
PostDomTree &getPostDomTree() {
return analysis.get<PostDomTree>(getCfg(), merge);
}
BackEdgeStorage &getBackEdgeStorage() {
return analysis.get<BackEdgeStorage>(getCfg());
}
BackEdgeStorage &getBackEdgeWithoutContinueStorage() {
if (loopContinue == nullptr) {
return getBackEdgeStorage();
}
return analysis.get<Tag<BackEdgeStorage, kWithoutContinue>>(
getCfgWithoutContinue());
}
auto getBackEdges(ir::Value node) { return getBackEdgeStorage().get(node); }
auto getBackEdgesWithoutContinue(ir::Value node) {
return getBackEdgeWithoutContinueStorage().get(node);
}
auto getBackEdges() { return getBackEdges(header); }
void invalidate();
void invalidateAll();
bool isNull() const { return header == nullptr; }
void removeLastChild() { children.pop_front(); }
private:
enum {
kWithoutContinue,
};
};
} // namespace shader

View file

@ -0,0 +1,78 @@
#pragma once
#include "dialect/builtin.hpp" // IWYU pragma: export
#include "dialect/ds.hpp" // IWYU pragma: export
#include "dialect/exp.hpp" // IWYU pragma: export
#include "dialect/memssa.hpp" // IWYU pragma: export
#include "dialect/mimg.hpp" // IWYU pragma: export
#include "dialect/mtbuf.hpp" // IWYU pragma: export
#include "dialect/mubuf.hpp" // IWYU pragma: export
#include "dialect/smrd.hpp" // IWYU pragma: export
#include "dialect/sop1.hpp" // IWYU pragma: export
#include "dialect/sop2.hpp" // IWYU pragma: export
#include "dialect/sopc.hpp" // IWYU pragma: export
#include "dialect/sopk.hpp" // IWYU pragma: export
#include "dialect/sopp.hpp" // IWYU pragma: export
#include "dialect/vintrp.hpp" // IWYU pragma: export
#include "dialect/vop1.hpp" // IWYU pragma: export
#include "dialect/vop2.hpp" // IWYU pragma: export
#include "dialect/vop3.hpp" // IWYU pragma: export
#include "dialect/vopc.hpp" // IWYU pragma: export
#include "dialect/spv.hpp" // IWYU pragma: export
#include "dialect/amdgpu.hpp" // IWYU pragma: export
#include <concepts>
namespace shader::ir {
template <> inline constexpr Kind kOpToKind<spv::Op> = Kind::Spv;
template <> inline constexpr Kind kOpToKind<builtin::Op> = Kind::Builtin;
template <> inline constexpr Kind kOpToKind<amdgpu::Op> = Kind::AmdGpu;
template <> inline constexpr Kind kOpToKind<vop2::Op> = Kind::Vop2;
template <> inline constexpr Kind kOpToKind<sop2::Op> = Kind::Sop2;
template <> inline constexpr Kind kOpToKind<sopk::Op> = Kind::Sopk;
template <> inline constexpr Kind kOpToKind<smrd::Op> = Kind::Smrd;
template <> inline constexpr Kind kOpToKind<vop3::Op> = Kind::Vop3;
template <> inline constexpr Kind kOpToKind<mubuf::Op> = Kind::Mubuf;
template <> inline constexpr Kind kOpToKind<mtbuf::Op> = Kind::Mtbuf;
template <> inline constexpr Kind kOpToKind<mimg::Op> = Kind::Mimg;
template <> inline constexpr Kind kOpToKind<ds::Op> = Kind::Ds;
template <> inline constexpr Kind kOpToKind<vintrp::Op> = Kind::Vintrp;
template <> inline constexpr Kind kOpToKind<exp::Op> = Kind::Exp;
template <> inline constexpr Kind kOpToKind<vop1::Op> = Kind::Vop1;
template <> inline constexpr Kind kOpToKind<vopc::Op> = Kind::Vopc;
template <> inline constexpr Kind kOpToKind<sop1::Op> = Kind::Sop1;
template <> inline constexpr Kind kOpToKind<sopc::Op> = Kind::Sopc;
template <> inline constexpr Kind kOpToKind<sopp::Op> = Kind::Sopp;
template <> inline constexpr Kind kOpToKind<memssa::Op> = Kind::MemSSA;
template <typename T>
requires(kOpToKind<std::remove_cvref_t<T>> != Kind::Count)
constexpr InstructionId getInstructionId(T op) {
return getInstructionId(kOpToKind<std::remove_cvref_t<T>>, op);
}
constexpr bool operator==(ir::Instruction lhs, InstructionId rhs) {
return lhs && lhs.getInstId() == rhs;
}
template <typename L, typename R>
constexpr bool operator==(L lhs, R rhs)
requires requires {
requires(!std::is_same_v<L, R>);
{ getInstructionId(lhs) == rhs } -> std::convertible_to<bool>;
}
{
return getInstructionId(lhs) == rhs;
}
template <typename L, typename R>
constexpr bool operator==(L lhs, R rhs)
requires requires {
requires(!std::is_same_v<L, R>);
{ getTypeId(lhs) == rhs } -> std::convertible_to<bool>;
}
{
return getTypeId(lhs) == rhs;
}
} // namespace ir

View file

@ -0,0 +1,57 @@
#pragma once
namespace shader::ir::amdgpu {
enum Op {
EXEC_TEST,
BRANCH,
IMM,
USER_SGPR,
VBUFFER,
SAMPLER,
TBUFFER,
POINTER,
OMOD,
NEG_ABS,
PS_INPUT_VGPR,
PS_COMP_SWAP,
VS_GET_INDEX,
RESOURCE_PHI,
OpCount,
};
inline const char *getInstructionName(unsigned op) {
switch (op) {
case EXEC_TEST:
return "exec_test";
case BRANCH:
return "branch";
case IMM:
return "imm";
case USER_SGPR:
return "user_sgpr";
case VBUFFER:
return "vbuffer";
case SAMPLER:
return "sampler";
case TBUFFER:
return "tbuffer";
case POINTER:
return "pointer";
case OMOD:
return "omod";
case NEG_ABS:
return "neg_abs";
case PS_INPUT_VGPR:
return "ps_input_vgpr";
case PS_COMP_SWAP:
return "ps_comp_swap";
case VS_GET_INDEX:
return "vs_get_index";
case RESOURCE_PHI:
return "resource_phi";
}
return nullptr;
}
} // namespace shader::ir::amdgpu

View file

@ -0,0 +1,193 @@
#pragma once
#include "../ir/Block.hpp"
#include "../ir/Builder.hpp"
#include "../ir/Value.hpp"
namespace shader::ir {
template <typename T> inline constexpr Kind kOpToKind = Kind::Count;
}
namespace shader::ir::builtin {
enum Op {
INVALID_INSTRUCTION,
BLOCK,
IF_ELSE,
LOOP,
};
inline const char *getInstructionName(unsigned id) {
switch (id) {
case INVALID_INSTRUCTION:
return "<invalid instruction>";
case BLOCK:
return "block";
case IF_ELSE:
return "ifElse";
case LOOP:
return "loop";
}
return nullptr;
}
template <typename ImplT>
struct Builder : BuilderFacade<Builder<ImplT>, ImplT> {
/**
* Creates an invalid instruction with the given location.
*
* @param location the location of the instruction
*
* @return the created invalid instruction
*/
Instruction createInvalidInstruction(Location location) {
return this->template create<Instruction>(location, Kind::Builtin,
INVALID_INSTRUCTION);
}
Instruction createIfElse(Location location, Value cond, Block ifTrue,
Block ifFalse = {}) {
std::vector<Operand> operands = {{cond, ifTrue}};
if (ifFalse) {
operands.push_back(ifFalse);
}
return this->template create<Instruction>(location, Kind::Builtin, IF_ELSE,
operands);
}
Instruction createLoop(Location location, Block body) {
return this->template create<Instruction>(location, Kind::Builtin, IF_ELSE,
{{body}});
}
auto createBlock(Location location) {
return this->template create<Block>(location);
}
auto createRegion(Location location) {
return this->getContext().template create<Region>(location);
}
/**
* Creates an instruction with the given location, kind, op, and operands.
*
* @param location the location of the instruction
* @param kind the kind of the instruction
* @param op the opcode of the instruction
* @param operands the operands of the instruction
*
* @return the created instruction
*/
Instruction createInstruction(Location location, Kind kind, unsigned op,
std::span<const Operand> operands = {}) {
return this->template create<Instruction>(location, kind, op, operands);
}
template <typename OpT>
Instruction createInstruction(Location location, OpT &&op,
std::span<const Operand> operands = {})
requires requires {
this->template create<Instruction>(
location, kOpToKind<std::remove_cvref_t<OpT>>, op, operands);
}
{
return this->template create<Instruction>(
location, kOpToKind<std::remove_cvref_t<OpT>>, op, operands);
}
/**
* Creates an Instruction object with the given location, kind, opcode, and
* operands.
*
* @param location the location of the instruction
* @param kind the kind of the instruction
* @param op the opcode of the instruction
* @param operands variadic parameter pack of operands for the instruction
*
* @return the created Instruction object
*/
template <typename... T>
Instruction createInstruction(Location location, Kind kind, unsigned op,
T &&...operands)
requires requires {
createInstruction(location, kind, op,
{{Operand(std::forward<T>(operands))...}});
}
{
return createInstruction(location, kind, op,
{{Operand(std::forward<T>(operands))...}});
}
template <typename OpT, typename... T>
Instruction createInstruction(Location location, OpT &&op, T &&...operands)
requires requires {
createInstruction(location, std::forward<OpT>(op),
{{Operand(std::forward<T>(operands))...}});
}
{
return createInstruction(location, std::forward<OpT>(op),
{{Operand(std::forward<T>(operands))...}});
}
/**
* Creates a Value object with the given location, kind, opcode, and operands.
*
* @param location the location of the Value object
* @param kind the kind of the Value object
* @param op the opcode of the Value object
* @param operands a span of operands for the Value object
*
* @return the created Value object
*/
auto createValue(Location location, Kind kind, unsigned op,
std::span<const Operand> operands = {}) {
return this->template create<Value>(location, kind, op, operands);
}
template <typename OpT>
auto createValue(Location location, OpT &&op,
std::span<const Operand> operands = {})
requires requires {
this->template create<Value>(
location, kOpToKind<std::remove_cvref_t<OpT>>, op, operands);
}
{
return this->template create<Value>(
location, kOpToKind<std::remove_cvref_t<OpT>>, op, operands);
}
/**
* Creates a Value object with the given location, kind, opcode, and operands.
*
* @param location the location of the Value object
* @param kind the kind of the Value object
* @param op the opcode of the Value object
* @param operands variadic parameter pack of operands for the Value object
*
* @return the created Value object
*/
template <typename... T>
auto createValue(Location location, Kind kind, unsigned op, T &&...operands)
requires requires {
createValue(location, kind, op,
{{Operand(std::forward<T>(operands))...}});
}
{
return createValue(location, kind, op,
{{Operand(std::forward<T>(operands))...}});
}
template <typename OpT, typename... T>
requires requires { kOpToKind<std::remove_cvref_t<OpT>>; }
auto createValue(Location location, OpT &&op, T &&...operands)
requires requires {
createValue(location, std::forward<OpT>(op),
{{Operand(std::forward<T>(operands))...}});
}
{
return createValue(location, std::forward<OpT>(op),
{{Operand(std::forward<T>(operands))...}});
}
};
} // namespace shader::ir::builtin

View file

@ -0,0 +1,294 @@
#pragma once
namespace shader::ir::ds {
enum Op {
ADD_U32,
SUB_U32,
RSUB_U32,
INC_U32,
DEC_U32,
MIN_I32,
MAX_I32,
MIN_U32,
MAX_U32,
AND_B32,
OR_B32,
XOR_B32,
MSKOR_B32,
WRITE_B32,
WRITE2_B32,
WRITE2ST64_B32,
CMPST_B32,
CMPST_F32,
MIN_F32,
MAX_F32,
NOP,
GWS_SEMA_RELEASE_ALL = 24,
GWS_INIT,
GWS_SEMA_V,
GWS_SEMA_BR,
GWS_SEMA_P,
GWS_BARRIER,
WRITE_B8,
WRITE_B16,
ADD_RTN_U32,
SUB_RTN_U32,
RSUB_RTN_U32,
INC_RTN_U32,
DEC_RTN_U32,
MIN_RTN_I32,
MAX_RTN_I32,
MIN_RTN_U32,
MAX_RTN_U32,
AND_RTN_B32,
OR_RTN_B32,
XOR_RTN_B32,
MSKOR_RTN_B32,
WRXCHG_RTN_B32,
WRXCHG2_RTN_B32,
WRXCHG2ST64_RTN_B32,
CMPST_RTN_B32,
CMPST_RTN_F32,
MIN_RTN_F32,
MAX_RTN_F32,
WRAP_RTN_B32,
SWIZZLE_B32,
READ_B32,
READ2_B32,
READ2ST64_B32,
READ_I8,
READ_U8,
READ_I16,
READ_U16,
CONSUME,
APPEND,
ORDERED_COUNT,
ADD_U64,
SUB_U64,
RSUB_U64,
INC_U64,
DEC_U64,
MIN_I64,
MAX_I64,
MIN_U64,
MAX_U64,
AND_B64,
OR_B64,
XOR_B64,
MSKOR_B64,
WRITE_B64,
WRITE2_B64,
WRITE2ST64_B64,
CMPST_B64,
CMPST_F64,
MIN_F64,
MAX_F64,
ADD_RTN_U64 = 96,
SUB_RTN_U64,
RSUB_RTN_U64,
INC_RTN_U64,
DEC_RTN_U64,
MIN_RTN_I64,
MAX_RTN_I64,
MIN_RTN_U64,
MAX_RTN_U64,
AND_RTN_B64,
OR_RTN_B64,
XOR_RTN_B64,
MSKOR_RTN_B64,
WRXCHG_RTN_B64,
WRXCHG2_RTN_B64,
WRXCHG2ST64_RTN_B64,
CMPST_RTN_B64,
CMPST_RTN_F64,
MIN_RTN_F64,
MAX_RTN_F64,
READ_B64 = 118,
READ2_B64,
READ2ST64_B64,
CONDXCHG32_RTN_B64 = 126,
ADD_SRC2_U32 = 128,
SUB_SRC2_U32,
RSUB_SRC2_U32,
INC_SRC2_U32,
DEC_SRC2_U32,
MIN_SRC2_I32,
MAX_SRC2_I32,
MIN_SRC2_U32,
MAX_SRC2_U32,
AND_SRC2_B32,
OR_SRC2_B32,
XOR_SRC2_B32,
WRITE_SRC2_B32,
MIN_SRC2_F32 = 146,
MAX_SRC2_F32,
ADD_SRC2_U64 = 192,
SUB_SRC2_U64,
RSUB_SRC2_U64,
INC_SRC2_U64,
DEC_SRC2_U64,
MIN_SRC2_I64,
MAX_SRC2_I64,
MIN_SRC2_U64,
MAX_SRC2_U64,
AND_SRC2_B64,
OR_SRC2_B64,
XOR_SRC2_B64,
WRITE_SRC2_B64,
MIN_SRC2_F64 = 210,
MAX_SRC2_F64,
WRITE_B96 = 222,
WRITE_B128,
CONDXCHG32_RTN_B128 = 253,
READ_B96,
READ_B128,
OpCount
};
inline const char *getInstructionName(unsigned id) {
switch (id) {
case ADD_U32: return "ds_add_u32";
case SUB_U32: return "ds_sub_u32";
case RSUB_U32: return "ds_rsub_u32";
case INC_U32: return "ds_inc_u32";
case DEC_U32: return "ds_dec_u32";
case MIN_I32: return "ds_min_i32";
case MAX_I32: return "ds_max_i32";
case MIN_U32: return "ds_min_u32";
case MAX_U32: return "ds_max_u32";
case AND_B32: return "ds_and_b32";
case OR_B32: return "ds_or_b32";
case XOR_B32: return "ds_xor_b32";
case MSKOR_B32: return "ds_mskor_b32";
case WRITE_B32: return "ds_write_b32";
case WRITE2_B32: return "ds_write2_b32";
case WRITE2ST64_B32: return "ds_write2st64_b32";
case CMPST_B32: return "ds_cmpst_b32";
case CMPST_F32: return "ds_cmpst_f32";
case MIN_F32: return "ds_min_f32";
case MAX_F32: return "ds_max_f32";
case NOP: return "ds_nop";
case GWS_SEMA_RELEASE_ALL: return "ds_gws_sema_release_all";
case GWS_INIT: return "ds_gws_init";
case GWS_SEMA_V: return "ds_gws_sema_v";
case GWS_SEMA_BR: return "ds_gws_sema_br";
case GWS_SEMA_P: return "ds_gws_sema_p";
case GWS_BARRIER: return "ds_gws_barrier";
case WRITE_B8: return "ds_write_b8";
case WRITE_B16: return "ds_write_b16";
case ADD_RTN_U32: return "ds_add_rtn_u32";
case SUB_RTN_U32: return "ds_sub_rtn_u32";
case RSUB_RTN_U32: return "ds_rsub_rtn_u32";
case INC_RTN_U32: return "ds_inc_rtn_u32";
case DEC_RTN_U32: return "ds_dec_rtn_u32";
case MIN_RTN_I32: return "ds_min_rtn_i32";
case MAX_RTN_I32: return "ds_max_rtn_i32";
case MIN_RTN_U32: return "ds_min_rtn_u32";
case MAX_RTN_U32: return "ds_max_rtn_u32";
case AND_RTN_B32: return "ds_and_rtn_b32";
case OR_RTN_B32: return "ds_or_rtn_b32";
case XOR_RTN_B32: return "ds_xor_rtn_b32";
case MSKOR_RTN_B32: return "ds_mskor_rtn_b32";
case WRXCHG_RTN_B32: return "ds_wrxchg_rtn_b32";
case WRXCHG2_RTN_B32: return "ds_wrxchg2_rtn_b32";
case WRXCHG2ST64_RTN_B32: return "ds_wrxchg2st64_rtn_b32";
case CMPST_RTN_B32: return "ds_cmpst_rtn_b32";
case CMPST_RTN_F32: return "ds_cmpst_rtn_f32";
case MIN_RTN_F32: return "ds_min_rtn_f32";
case MAX_RTN_F32: return "ds_max_rtn_f32";
case WRAP_RTN_B32: return "ds_wrap_rtn_b32";
case SWIZZLE_B32: return "ds_swizzle_b32";
case READ_B32: return "ds_read_b32";
case READ2_B32: return "ds_read2_b32";
case READ2ST64_B32: return "ds_read2st64_b32";
case READ_I8: return "ds_read_i8";
case READ_U8: return "ds_read_u8";
case READ_I16: return "ds_read_i16";
case READ_U16: return "ds_read_u16";
case CONSUME: return "ds_consume";
case APPEND: return "ds_append";
case ORDERED_COUNT: return "ds_ordered_count";
case ADD_U64: return "ds_add_u64";
case SUB_U64: return "ds_sub_u64";
case RSUB_U64: return "ds_rsub_u64";
case INC_U64: return "ds_inc_u64";
case DEC_U64: return "ds_dec_u64";
case MIN_I64: return "ds_min_i64";
case MAX_I64: return "ds_max_i64";
case MIN_U64: return "ds_min_u64";
case MAX_U64: return "ds_max_u64";
case AND_B64: return "ds_and_b64";
case OR_B64: return "ds_or_b64";
case XOR_B64: return "ds_xor_b64";
case MSKOR_B64: return "ds_mskor_b64";
case WRITE_B64: return "ds_write_b64";
case WRITE2_B64: return "ds_write2_b64";
case WRITE2ST64_B64: return "ds_write2st64_b64";
case CMPST_B64: return "ds_cmpst_b64";
case CMPST_F64: return "ds_cmpst_f64";
case MIN_F64: return "ds_min_f64";
case MAX_F64: return "ds_max_f64";
case ADD_RTN_U64: return "ds_add_rtn_u64";
case SUB_RTN_U64: return "ds_sub_rtn_u64";
case RSUB_RTN_U64: return "ds_rsub_rtn_u64";
case INC_RTN_U64: return "ds_inc_rtn_u64";
case DEC_RTN_U64: return "ds_dec_rtn_u64";
case MIN_RTN_I64: return "ds_min_rtn_i64";
case MAX_RTN_I64: return "ds_max_rtn_i64";
case MIN_RTN_U64: return "ds_min_rtn_u64";
case MAX_RTN_U64: return "ds_max_rtn_u64";
case AND_RTN_B64: return "ds_and_rtn_b64";
case OR_RTN_B64: return "ds_or_rtn_b64";
case XOR_RTN_B64: return "ds_xor_rtn_b64";
case MSKOR_RTN_B64: return "ds_mskor_rtn_b64";
case WRXCHG_RTN_B64: return "ds_wrxchg_rtn_b64";
case WRXCHG2_RTN_B64: return "ds_wrxchg2_rtn_b64";
case WRXCHG2ST64_RTN_B64: return "ds_wrxchg2st64_rtn_b64";
case CMPST_RTN_B64: return "ds_cmpst_rtn_b64";
case CMPST_RTN_F64: return "ds_cmpst_rtn_f64";
case MIN_RTN_F64: return "ds_min_rtn_f64";
case MAX_RTN_F64: return "ds_max_rtn_f64";
case READ_B64: return "ds_read_b64";
case READ2_B64: return "ds_read2_b64";
case READ2ST64_B64: return "ds_read2st64_b64";
case CONDXCHG32_RTN_B64: return "ds_condxchg32_rtn_b64";
case ADD_SRC2_U32: return "ds_add_src2_u32";
case SUB_SRC2_U32: return "ds_sub_src2_u32";
case RSUB_SRC2_U32: return "ds_rsub_src2_u32";
case INC_SRC2_U32: return "ds_inc_src2_u32";
case DEC_SRC2_U32: return "ds_dec_src2_u32";
case MIN_SRC2_I32: return "ds_min_src2_i32";
case MAX_SRC2_I32: return "ds_max_src2_i32";
case MIN_SRC2_U32: return "ds_min_src2_u32";
case MAX_SRC2_U32: return "ds_max_src2_u32";
case AND_SRC2_B32: return "ds_and_src2_b32";
case OR_SRC2_B32: return "ds_or_src2_b32";
case XOR_SRC2_B32: return "ds_xor_src2_b32";
case WRITE_SRC2_B32: return "ds_write_src2_b32";
case MIN_SRC2_F32: return "ds_min_src2_f32";
case MAX_SRC2_F32: return "ds_max_src2_f32";
case ADD_SRC2_U64: return "ds_add_src2_u64";
case SUB_SRC2_U64: return "ds_sub_src2_u64";
case RSUB_SRC2_U64: return "ds_rsub_src2_u64";
case INC_SRC2_U64: return "ds_inc_src2_u64";
case DEC_SRC2_U64: return "ds_dec_src2_u64";
case MIN_SRC2_I64: return "ds_min_src2_i64";
case MAX_SRC2_I64: return "ds_max_src2_i64";
case MIN_SRC2_U64: return "ds_min_src2_u64";
case MAX_SRC2_U64: return "ds_max_src2_u64";
case AND_SRC2_B64: return "ds_and_src2_b64";
case OR_SRC2_B64: return "ds_or_src2_b64";
case XOR_SRC2_B64: return "ds_xor_src2_b64";
case WRITE_SRC2_B64: return "ds_write_src2_b64";
case MIN_SRC2_F64: return "ds_min_src2_f64";
case MAX_SRC2_F64: return "ds_max_src2_f64";
case WRITE_B96: return "ds_write_b96";
case WRITE_B128: return "ds_write_b128";
case CONDXCHG32_RTN_B128: return "ds_condxchg32_rtn_b128";
case READ_B96: return "ds_read_b96";
case READ_B128: return "ds_read_b128";
}
return nullptr;
}
} // namespace shader::ir::ds

View file

@ -0,0 +1,11 @@
#pragma once
namespace shader::ir::exp {
enum Op {
EXP = 0,
OpCount
};
inline const char *getInstructionName(unsigned) { return "exp"; }
} // namespace shader::ir::exp

View file

@ -0,0 +1,423 @@
#pragma once
#include "../ir/Block.hpp"
#include "../ir/Builder.hpp"
#include "../ir/Value.hpp"
#include "../ir/ValueImpl.hpp"
namespace shader::ir::memssa {
enum Op {
OpVar,
OpDef,
OpPhi,
OpUse,
OpBarrier,
OpJump,
OpExit,
OpCount,
};
template <typename BaseT> struct BaseImpl : BaseT {
Instruction link;
using BaseT::BaseT;
using BaseT::operator=;
void print(std::ostream &os, NameStorage &ns) const override {
BaseT::print(os, ns);
if (link) {
os << " : ";
link.print(os, ns);
}
}
};
template <typename ImplT, template <typename> typename BaseT>
struct BaseWrapper : BaseT<ImplT> {
using BaseT<ImplT>::BaseT;
using BaseT<ImplT>::operator=;
Instruction getLinkedInst() const { return this->impl->link; }
};
struct DefImpl : BaseImpl<ValueImpl> {
using BaseImpl::BaseImpl;
using BaseImpl::operator=;
Node clone(Context &context, CloneMap &map) const override;
};
struct UseImpl : BaseImpl<InstructionImpl> {
using BaseImpl::BaseImpl;
using BaseImpl::operator=;
Node clone(Context &context, CloneMap &map) const override;
};
struct VarImpl : BaseImpl<ValueImpl> {
using BaseImpl::BaseImpl;
using BaseImpl::operator=;
Node clone(Context &context, CloneMap &map) const override;
};
struct PhiImpl : DefImpl {
using DefImpl::DefImpl;
using DefImpl::operator=;
Node clone(Context &context, CloneMap &map) const override;
};
using Use = BaseWrapper<UseImpl, InstructionWrapper>;
using Var = BaseWrapper<VarImpl, ValueWrapper>;
template <typename ImplT> struct DefWrapper : BaseWrapper<ImplT, ValueWrapper> {
using BaseWrapper<ImplT, ValueWrapper>::BaseWrapper;
using BaseWrapper<ImplT, ValueWrapper>::operator=;
void addVariable(Var variable) {
this->addOperand(variable);
std::vector<Var> workList;
for (auto &comp : variable.getOperands()) {
auto compVar = comp.getAsValue().staticCast<Var>();
this->addOperand(compVar);
if (compVar.getOperandCount() > 1) {
workList.push_back(compVar);
} else if (compVar.getOperandCount() == 1) {
this->addOperand(compVar.getOperand(0).getAsValue().staticCast<Var>());
}
}
while (!workList.empty()) {
auto var = workList.back();
workList.pop_back();
for (auto &comp : var.getOperands()) {
auto compVar = comp.getAsValue().staticCast<Var>();
this->addOperand(compVar);
if (compVar.getOperandCount() > 1) {
workList.push_back(var);
} else if (compVar.getOperandCount() == 1) {
this->addOperand(
compVar.getOperand(0).getAsValue().staticCast<Var>());
}
}
}
}
Var getRootVar() {
return this->getOperand(0).getAsValue().template staticCast<Var>();
}
Var getVar(std::size_t index) {
return this->getOperand(index).getAsValue().template staticCast<Var>();
}
};
struct ScopeImpl : BaseImpl<ir::BlockImpl> {
using BaseImpl::BaseImpl;
using BaseImpl::operator=;
Node clone(Context &context, CloneMap &map) const override;
};
template <typename ImplT> struct ScopeWrapper;
using Scope = ScopeWrapper<ScopeImpl>;
using Def = DefWrapper<DefImpl>;
template <typename ImplT> struct BarrierWrapper : DefWrapper<ImplT> {
using DefWrapper<ImplT>::DefWrapper;
using DefWrapper<ImplT>::operator=;
};
using Barrier = BarrierWrapper<PhiImpl>;
template <typename ImplT>
struct ScopeWrapper : BaseWrapper<ImplT, ir::BlockWrapper> {
using BaseWrapper<ImplT, ir::BlockWrapper>::BaseWrapper;
using BaseWrapper<ImplT, ir::BlockWrapper>::operator=;
Scope getSingleSuccessor() {
if (this->empty()) {
return {};
}
auto terminator = this->getLast();
if (terminator.getKind() != Kind::MemSSA || terminator.getOp() != OpJump) {
return {};
}
if (terminator.getOperandCount() != 1) {
return {};
}
return terminator.getOperand(0).getAsValue().template cast<Scope>();
}
std::vector<Scope> getSuccessors() {
if (this->empty()) {
return {};
}
auto terminator = this->getLast();
if (terminator.getKind() != Kind::MemSSA || terminator.getOp() != OpJump) {
return {};
}
std::vector<Scope> result;
result.reserve(terminator.getOperandCount());
for (auto &successor : terminator.getOperands()) {
if (auto block = successor.getAsValue().template cast<Scope>()) {
result.push_back(block);
}
}
return result;
}
auto getPredecessors() {
std::set<Scope> predecessors;
for (auto &use : this->getUseList()) {
if (use.user != OpJump) {
continue;
}
if (auto userParent = use.user.getParent().template cast<Scope>()) {
predecessors.insert(userParent);
}
}
return predecessors;
}
auto getSinglePredecessor() {
Scope predecessor;
for (auto &use : this->getUseList()) {
if (use.user != OpJump) {
continue;
}
if (auto userParent = use.user.getParent().template cast<Scope>()) {
if (predecessor == nullptr) {
predecessor = userParent;
} else if (predecessor != userParent) {
return Scope(nullptr);
}
}
}
return predecessor;
}
Def findVarDef(Var var, Instruction point = nullptr) {
if (point == nullptr) {
point = this->getLast();
}
std::optional<std::set<Var>> compList;
auto buildMatchList = [&] {
std::set<Var> result;
std::vector<Var> workList;
for (auto comp : var.getOperands()) {
auto compVar = comp.getAsValue().staticCast<Var>();
result.insert(compVar);
if (compVar.getOperandCount() > 1) {
workList.push_back(compVar);
} else if (compVar.getOperandCount() == 1) {
result.insert(compVar.getOperand(0).getAsValue().staticCast<Var>());
}
}
while (!workList.empty()) {
auto var = workList.back();
workList.pop_back();
for (auto comp : var.getOperands()) {
auto compVar = comp.getAsValue().staticCast<Var>();
result.insert(compVar);
if (compVar.getOperandCount() > 1) {
workList.push_back(compVar);
} else if (compVar.getOperandCount() == 1) {
result.insert(compVar.getOperand(0).getAsValue().staticCast<Var>());
}
}
}
return result;
};
for (auto child : revRange(point)) {
if (child.getKind() != Kind::MemSSA) {
continue;
}
if (child.getOp() == OpDef || child.getOp() == OpPhi) {
if (child.getOperand(0) == var) {
return child.template staticCast<Def>();
}
if (!compList) {
compList = buildMatchList();
}
if (compList->empty()) {
continue;
}
if (compList->contains(
child.getOperand(0).getAsValue().staticCast<Var>())) {
return child.template staticCast<Def>();
}
}
if (child.getOp() == OpBarrier) {
// barrier is definition for everything
return child.template staticCast<Def>();
}
}
return {};
}
};
template <typename ImplT> struct PhiWrapper : ValueWrapper<ImplT> {
using ValueWrapper<ImplT>::ValueWrapper;
using ValueWrapper<ImplT>::operator=;
void addValue(Scope scope, Def def) {
this->addOperand(scope);
this->addOperand(def);
}
// Set value for specified block or add new node
// Returns true if node was added
bool setValue(Scope pred, Def def) {
for (std::size_t i = 1, end = this->getOperandCount(); i < end; i += 2) {
if (pred == this->getOperand(i).getAsValue()) {
this->replaceOperand(i + 1, def);
return false;
}
}
addValue(pred, def);
return true;
}
Def getDef(Scope pred) {
for (std::size_t i = 1, end = this->getOperandCount(); i < end; i += 2) {
if (pred == this->getOperand(i).getAsValue()) {
return this->getOperand(i + 1).getAsValue().template staticCast<Def>();
}
}
return {};
}
bool empty() { return this->getOperandCount() < 2; }
Def getUniqDef() {
if (empty()) {
return {};
}
Def result = this->getOperand(2).getAsValue().template staticCast<Def>();
for (std::size_t i = 4, end = this->getOperandCount(); i < end; i += 2) {
if (this->getOperand(i) != result) {
return {};
}
}
return result;
}
Var getVar() {
return this->getOperand(0).getAsValue().template staticCast<Var>();
}
};
using Phi = PhiWrapper<PhiImpl>;
template <typename ImplT>
struct Builder : BuilderFacade<Builder<ImplT>, ImplT> {
Def createDef(Instruction defInst, Var var) {
auto result =
this->template create<Def>(defInst.getLocation(), Kind::MemSSA, OpDef);
result.impl->link = defInst;
result.addOperand(var);
return result;
}
Scope createScope(ir::Instruction labelInst) {
Scope result = this->template create<Scope>(labelInst.getLocation());
result.impl->link = labelInst;
return result;
}
Phi createPhi(Var var) {
auto result =
this->template create<Phi>(var.getLocation(), Kind::MemSSA, OpPhi);
result.addOperand(var);
return result;
}
Use createUse(ir::Instruction useInst) {
Use result =
this->template create<Use>(useInst.getLocation(), Kind::MemSSA, OpUse);
result.impl->link = useInst;
return result;
}
Use createUse(ir::Instruction useInst, Def def) {
auto result = createUse(useInst);
result.addOperand(def);
return result;
}
Var createVar(ir::Instruction varInst) {
Var result =
this->template create<Var>(varInst.getLocation(), Kind::MemSSA, OpVar);
result.impl->link = varInst;
return result;
}
Barrier createBarrier(ir::Instruction barrierInst) {
Barrier result = this->template create<Barrier>(barrierInst.getLocation(),
Kind::MemSSA, OpBarrier);
result.impl->link = barrierInst;
return result;
}
Instruction createJump(Location loc) {
return this->template create<Instruction>(loc, Kind::MemSSA, OpJump);
}
Instruction createExit(Location loc) {
return this->template create<Instruction>(loc, Kind::MemSSA, OpExit);
}
};
inline const char *getInstructionName(unsigned op) {
switch (op) {
case OpVar:
return "var";
case OpDef:
return "def";
case OpPhi:
return "phi";
case OpUse:
return "use";
case OpBarrier:
return "barrier";
case OpJump:
return "jump";
case OpExit:
return "exit";
}
return nullptr;
}
} // namespace shader::ir::memssa

View file

@ -0,0 +1,199 @@
#pragma once
namespace shader::ir::mimg {
enum Op {
LOAD,
LOAD_MIP,
LOAD_PCK,
LOAD_PCK_SGN,
LOAD_MIP_PCK,
LOAD_MIP_PCK_SGN,
STORE = 8,
STORE_MIP,
STORE_PCK,
STORE_MIP_PCK,
GET_RESINFO = 14,
ATOMIC_SWAP,
ATOMIC_CMPSWAP,
ATOMIC_ADD,
ATOMIC_SUB,
ATOMIC_RSUB,
ATOMIC_SMIN,
ATOMIC_UMIN,
ATOMIC_SMAX,
ATOMIC_UMAX,
ATOMIC_AND,
ATOMIC_OR,
ATOMIC_XOR,
ATOMIC_INC,
ATOMIC_DEC,
ATOMIC_FCMPSWAP,
ATOMIC_FMIN,
ATOMIC_FMAX,
SAMPLE,
SAMPLE_CL,
SAMPLE_D,
SAMPLE_D_CL,
SAMPLE_L,
SAMPLE_B,
SAMPLE_B_CL,
SAMPLE_LZ,
SAMPLE_C,
SAMPLE_C_CL,
SAMPLE_C_D,
SAMPLE_C_D_CL,
SAMPLE_C_L,
SAMPLE_C_B,
SAMPLE_C_B_CL,
SAMPLE_C_LZ,
SAMPLE_O,
SAMPLE_CL_O,
SAMPLE_D_O,
SAMPLE_D_CL_O,
SAMPLE_L_O,
SAMPLE_B_O,
SAMPLE_B_CL_O,
SAMPLE_LZ_O,
SAMPLE_C_O,
SAMPLE_C_CL_O,
SAMPLE_C_D_O,
SAMPLE_C_D_CL_O,
SAMPLE_C_L_O,
SAMPLE_C_B_O,
SAMPLE_C_B_CL_O,
SAMPLE_C_LZ_O,
GATHER4,
GATHER4_CL,
GATHER4_L = 68,
GATHER4_B,
GATHER4_B_CL,
GATHER4_LZ,
GATHER4_C,
GATHER4_C_CL,
GATHER4_C_L = 76,
GATHER4_C_B,
GATHER4_C_B_CL,
GATHER4_C_LZ,
GATHER4_O,
GATHER4_CL_O,
GATHER4_L_O = 84,
GATHER4_B_O,
GATHER4_B_CL_O,
GATHER4_LZ_O,
GATHER4_C_O,
GATHER4_C_CL_O,
GATHER4_C_L_O = 92,
GATHER4_C_B_O,
GATHER4_C_B_CL_O,
GATHER4_C_LZ_O,
GET_LOD,
SAMPLE_CD = 104,
SAMPLE_CD_CL,
SAMPLE_C_CD,
SAMPLE_C_CD_CL,
SAMPLE_CD_O,
SAMPLE_CD_CL_O,
SAMPLE_C_CD_O,
SAMPLE_C_CD_CL_O,
OpCount
};
inline const char *getInstructionName(unsigned id) {
switch (id) {
case LOAD: return "image_load";
case LOAD_MIP: return "image_load_mip";
case LOAD_PCK: return "image_load_pck";
case LOAD_PCK_SGN: return "image_load_pck_sgn";
case LOAD_MIP_PCK: return "image_load_mip_pck";
case LOAD_MIP_PCK_SGN: return "image_load_mip_pck_sgn";
case STORE: return "image_store";
case STORE_MIP: return "image_store_mip";
case STORE_PCK: return "image_store_pck";
case STORE_MIP_PCK: return "image_store_mip_pck";
case GET_RESINFO: return "image_get_resinfo";
case ATOMIC_SWAP: return "image_atomic_swap";
case ATOMIC_CMPSWAP: return "image_atomic_cmpswap";
case ATOMIC_ADD: return "image_atomic_add";
case ATOMIC_SUB: return "image_atomic_sub";
case ATOMIC_RSUB: return "image_atomic_rsub";
case ATOMIC_SMIN: return "image_atomic_smin";
case ATOMIC_UMIN: return "image_atomic_umin";
case ATOMIC_SMAX: return "image_atomic_smax";
case ATOMIC_UMAX: return "image_atomic_umax";
case ATOMIC_AND: return "image_atomic_and";
case ATOMIC_OR: return "image_atomic_or";
case ATOMIC_XOR: return "image_atomic_xor";
case ATOMIC_INC: return "image_atomic_inc";
case ATOMIC_DEC: return "image_atomic_dec";
case ATOMIC_FCMPSWAP: return "image_atomic_fcmpswap";
case ATOMIC_FMIN: return "image_atomic_fmin";
case ATOMIC_FMAX: return "image_atomic_fmax";
case SAMPLE: return "image_sample";
case SAMPLE_CL: return "image_sample_cl";
case SAMPLE_D: return "image_sample_d";
case SAMPLE_D_CL: return "image_sample_d_cl";
case SAMPLE_L: return "image_sample_l";
case SAMPLE_B: return "image_sample_b";
case SAMPLE_B_CL: return "image_sample_b_cl";
case SAMPLE_LZ: return "image_sample_lz";
case SAMPLE_C: return "image_sample_c";
case SAMPLE_C_CL: return "image_sample_c_cl";
case SAMPLE_C_D: return "image_sample_c_d";
case SAMPLE_C_D_CL: return "image_sample_c_d_cl";
case SAMPLE_C_L: return "image_sample_c_l";
case SAMPLE_C_B: return "image_sample_c_b";
case SAMPLE_C_B_CL: return "image_sample_c_b_cl";
case SAMPLE_C_LZ: return "image_sample_c_lz";
case SAMPLE_O: return "image_sample_o";
case SAMPLE_CL_O: return "image_sample_cl_o";
case SAMPLE_D_O: return "image_sample_d_o";
case SAMPLE_D_CL_O: return "image_sample_d_cl_o";
case SAMPLE_L_O: return "image_sample_l_o";
case SAMPLE_B_O: return "image_sample_b_o";
case SAMPLE_B_CL_O: return "image_sample_b_cl_o";
case SAMPLE_LZ_O: return "image_sample_lz_o";
case SAMPLE_C_O: return "image_sample_c_o";
case SAMPLE_C_CL_O: return "image_sample_c_cl_o";
case SAMPLE_C_D_O: return "image_sample_c_d_o";
case SAMPLE_C_D_CL_O: return "image_sample_c_d_cl_o";
case SAMPLE_C_L_O: return "image_sample_c_l_o";
case SAMPLE_C_B_O: return "image_sample_c_b_o";
case SAMPLE_C_B_CL_O: return "image_sample_c_b_cl_o";
case SAMPLE_C_LZ_O: return "image_sample_c_lz_o";
case GATHER4: return "image_gather4";
case GATHER4_CL: return "image_gather4_cl";
case GATHER4_L: return "image_gather4_l";
case GATHER4_B: return "image_gather4_b";
case GATHER4_B_CL: return "image_gather4_b_cl";
case GATHER4_LZ: return "image_gather4_lz";
case GATHER4_C: return "image_gather4_c";
case GATHER4_C_CL: return "image_gather4_c_cl";
case GATHER4_C_L: return "image_gather4_c_l";
case GATHER4_C_B: return "image_gather4_c_b";
case GATHER4_C_B_CL: return "image_gather4_c_b_cl";
case GATHER4_C_LZ: return "image_gather4_c_lz";
case GATHER4_O: return "image_gather4_o";
case GATHER4_CL_O: return "image_gather4_cl_o";
case GATHER4_L_O: return "image_gather4_l_o";
case GATHER4_B_O: return "image_gather4_b_o";
case GATHER4_B_CL_O: return "image_gather4_b_cl_o";
case GATHER4_LZ_O: return "image_gather4_lz_o";
case GATHER4_C_O: return "image_gather4_c_o";
case GATHER4_C_CL_O: return "image_gather4_c_cl_o";
case GATHER4_C_L_O: return "image_gather4_c_l_o";
case GATHER4_C_B_O: return "image_gather4_c_b_o";
case GATHER4_C_B_CL_O: return "image_gather4_c_b_cl_o";
case GATHER4_C_LZ_O: return "image_gather4_c_lz_o";
case GET_LOD: return "image_get_lod";
case SAMPLE_CD: return "image_sample_cd";
case SAMPLE_CD_CL: return "image_sample_cd_cl";
case SAMPLE_C_CD: return "image_sample_c_cd";
case SAMPLE_C_CD_CL: return "image_sample_c_cd_cl";
case SAMPLE_CD_O: return "image_sample_cd_o";
case SAMPLE_CD_CL_O: return "image_sample_cd_cl_o";
case SAMPLE_C_CD_O: return "image_sample_c_cd_o";
case SAMPLE_C_CD_CL_O: return "image_sample_c_cd_cl_o";
}
return nullptr;
}
} // namespace shader::ir::mimg

View file

@ -0,0 +1,37 @@
#pragma once
namespace shader::ir::mtbuf {
enum Op {
LOAD_FORMAT_X,
LOAD_FORMAT_XY,
LOAD_FORMAT_XYZ,
LOAD_FORMAT_XYZW,
STORE_FORMAT_X,
STORE_FORMAT_XY,
STORE_FORMAT_XYZ,
STORE_FORMAT_XYZW,
OpCount
};
inline const char *getInstructionName(unsigned id) {
switch (id) {
case LOAD_FORMAT_X:
return "tbuffer_load_format_x";
case LOAD_FORMAT_XY:
return "tbuffer_load_format_xy";
case LOAD_FORMAT_XYZ:
return "tbuffer_load_format_xyz";
case LOAD_FORMAT_XYZW:
return "tbuffer_load_format_xyzw";
case STORE_FORMAT_X:
return "tbuffer_store_format_x";
case STORE_FORMAT_XY:
return "tbuffer_store_format_xy";
case STORE_FORMAT_XYZ:
return "tbuffer_store_format_xyz";
case STORE_FORMAT_XYZW:
return "tbuffer_store_format_xyzw";
}
return nullptr;
}
} // namespace shader::ir::mtbuf

View file

@ -0,0 +1,129 @@
#pragma once
namespace shader::ir::mubuf {
enum Op {
LOAD_FORMAT_X,
LOAD_FORMAT_XY,
LOAD_FORMAT_XYZ,
LOAD_FORMAT_XYZW,
STORE_FORMAT_X,
STORE_FORMAT_XY,
STORE_FORMAT_XYZ,
STORE_FORMAT_XYZW,
LOAD_UBYTE,
LOAD_SBYTE,
LOAD_USHORT,
LOAD_SSHORT,
LOAD_DWORD,
LOAD_DWORDX2,
LOAD_DWORDX4,
LOAD_DWORDX3,
STORE_BYTE = 24,
STORE_SHORT = 26,
STORE_DWORD = 28,
STORE_DWORDX2,
STORE_DWORDX4,
STORE_DWORDX3,
ATOMIC_SWAP = 48,
ATOMIC_CMPSWAP,
ATOMIC_ADD,
ATOMIC_SUB,
ATOMIC_RSUB,
ATOMIC_SMIN,
ATOMIC_UMIN,
ATOMIC_SMAX,
ATOMIC_UMAX,
ATOMIC_AND,
ATOMIC_OR,
ATOMIC_XOR,
ATOMIC_INC,
ATOMIC_DEC,
ATOMIC_FCMPSWAP,
ATOMIC_FMIN,
ATOMIC_FMAX,
ATOMIC_SWAP_X2 = 80,
ATOMIC_CMPSWAP_X2,
ATOMIC_ADD_X2,
ATOMIC_SUB_X2,
ATOMIC_RSUB_X2,
ATOMIC_SMIN_X2,
ATOMIC_UMIN_X2,
ATOMIC_SMAX_X2,
ATOMIC_UMAX_X2,
ATOMIC_AND_X2,
ATOMIC_OR_X2,
ATOMIC_XOR_X2,
ATOMIC_INC_X2,
ATOMIC_DEC_X2,
ATOMIC_FCMPSWAP_X2,
ATOMIC_FMIN_X2,
ATOMIC_FMAX_X2,
WBINVL1_SC_VOL = 112,
WBINVL1,
OpCount
};
inline const char *getInstructionName(unsigned id) {
switch (id) {
case LOAD_FORMAT_X:return "buffer_load_format_x";
case LOAD_FORMAT_XY:return "buffer_load_format_xy";
case LOAD_FORMAT_XYZ:return "buffer_load_format_xyz";
case LOAD_FORMAT_XYZW:return "buffer_load_format_xyzw";
case STORE_FORMAT_X:return "buffer_store_format_x";
case STORE_FORMAT_XY:return "buffer_store_format_xy";
case STORE_FORMAT_XYZ:return "buffer_store_format_xyz";
case STORE_FORMAT_XYZW:return "buffer_store_format_xyzw";
case LOAD_UBYTE:return "buffer_load_ubyte";
case LOAD_SBYTE:return "buffer_load_sbyte";
case LOAD_USHORT:return "buffer_load_ushort";
case LOAD_SSHORT:return "buffer_load_sshort";
case LOAD_DWORD:return "buffer_load_dword";
case LOAD_DWORDX2:return "buffer_load_dwordx2";
case LOAD_DWORDX4:return "buffer_load_dwordx4";
case LOAD_DWORDX3:return "buffer_load_dwordx3";
case STORE_BYTE:return "buffer_store_byte";
case STORE_SHORT:return "buffer_store_short";
case STORE_DWORD:return "buffer_store_dword";
case STORE_DWORDX2:return "buffer_store_dwordx2";
case STORE_DWORDX4:return "buffer_store_dwordx4";
case STORE_DWORDX3:return "buffer_store_dwordx3";
case ATOMIC_SWAP:return "buffer_atomic_swap";
case ATOMIC_CMPSWAP:return "buffer_atomic_cmpswap";
case ATOMIC_ADD:return "buffer_atomic_add";
case ATOMIC_SUB:return "buffer_atomic_sub";
case ATOMIC_RSUB:return "buffer_atomic_rsub";
case ATOMIC_SMIN:return "buffer_atomic_smin";
case ATOMIC_UMIN:return "buffer_atomic_umin";
case ATOMIC_SMAX:return "buffer_atomic_smax";
case ATOMIC_UMAX:return "buffer_atomic_umax";
case ATOMIC_AND:return "buffer_atomic_and";
case ATOMIC_OR:return "buffer_atomic_or";
case ATOMIC_XOR:return "buffer_atomic_xor";
case ATOMIC_INC:return "buffer_atomic_inc";
case ATOMIC_DEC:return "buffer_atomic_dec";
case ATOMIC_FCMPSWAP:return "buffer_atomic_fcmpswap";
case ATOMIC_FMIN:return "buffer_atomic_fmin";
case ATOMIC_FMAX:return "buffer_atomic_fmax";
case ATOMIC_SWAP_X2:return "buffer_atomic_swap_x2";
case ATOMIC_CMPSWAP_X2:return "buffer_atomic_cmpswap_x2";
case ATOMIC_ADD_X2:return "buffer_atomic_add_x2";
case ATOMIC_SUB_X2:return "buffer_atomic_sub_x2";
case ATOMIC_RSUB_X2:return "buffer_atomic_rsub_x2";
case ATOMIC_SMIN_X2:return "buffer_atomic_smin_x2";
case ATOMIC_UMIN_X2:return "buffer_atomic_umin_x2";
case ATOMIC_SMAX_X2:return "buffer_atomic_smax_x2";
case ATOMIC_UMAX_X2:return "buffer_atomic_umax_x2";
case ATOMIC_AND_X2:return "buffer_atomic_and_x2";
case ATOMIC_OR_X2:return "buffer_atomic_or_x2";
case ATOMIC_XOR_X2:return "buffer_atomic_xor_x2";
case ATOMIC_INC_X2:return "buffer_atomic_inc_x2";
case ATOMIC_DEC_X2:return "buffer_atomic_dec_x2";
case ATOMIC_FCMPSWAP_X2:return "buffer_atomic_fcmpswap_x2";
case ATOMIC_FMIN_X2:return "buffer_atomic_fmin_x2";
case ATOMIC_FMAX_X2:return "buffer_atomic_fmax_x2";
case WBINVL1_SC_VOL:return "buffer_wbinvl1_sc_vol";
case WBINVL1:return "buffer_wbinvl1";
}
return nullptr;
}
} // namespace shader::ir::mubuf

View file

@ -0,0 +1,39 @@
#pragma once
namespace shader::ir::smrd {
enum Op {
LOAD_DWORD,
LOAD_DWORDX2,
LOAD_DWORDX4,
LOAD_DWORDX8,
LOAD_DWORDX16,
BUFFER_LOAD_DWORD = 8,
BUFFER_LOAD_DWORDX2,
BUFFER_LOAD_DWORDX4,
BUFFER_LOAD_DWORDX8,
BUFFER_LOAD_DWORDX16,
DCACHE_INV_VOL = 29,
MEMTIME,
DCACHE_INV,
OpCount
};
inline const char *getInstructionName(unsigned id) {
switch (id) {
case LOAD_DWORD: return "s_load_dword";
case LOAD_DWORDX2: return "s_load_dwordx2";
case LOAD_DWORDX4: return "s_load_dwordx4";
case LOAD_DWORDX8: return "s_load_dwordx8";
case LOAD_DWORDX16: return "s_load_dwordx16";
case BUFFER_LOAD_DWORD: return "s_buffer_load_dword";
case BUFFER_LOAD_DWORDX2: return "s_buffer_load_dwordx2";
case BUFFER_LOAD_DWORDX4: return "s_buffer_load_dwordx4";
case BUFFER_LOAD_DWORDX8: return "s_buffer_load_dwordx8";
case BUFFER_LOAD_DWORDX16: return "s_buffer_load_dwordx16";
case DCACHE_INV_VOL: return "s_dcache_inv_vol";
case MEMTIME: return "s_memtime";
case DCACHE_INV: return "s_dcache_inv";
}
return nullptr;
}
} // namespace shader::ir::smrd

View file

@ -0,0 +1,109 @@
#pragma once
namespace shader::ir::sop1 {
enum Op {
MOV_B32 = 3,
MOV_B64,
CMOV_B32,
CMOV_B64,
NOT_B32,
NOT_B64,
WQM_B32,
WQM_B64,
BREV_B32,
BREV_B64,
BCNT0_I32_B32,
BCNT0_I32_B64,
BCNT1_I32_B32,
BCNT1_I32_B64,
FF0_I32_B32,
FF0_I32_B64,
FF1_I32_B32,
FF1_I32_B64,
FLBIT_I32_B32,
FLBIT_I32_B64,
FLBIT_I32,
FLBIT_I32_I64,
SEXT_I32_I8,
SEXT_I32_I16,
BITSET0_B32,
BITSET0_B64,
BITSET1_B32,
BITSET1_B64,
GETPC_B64,
SETPC_B64,
SWAPPC_B64,
AND_SAVEEXEC_B64 = 36,
OR_SAVEEXEC_B64,
XOR_SAVEEXEC_B64,
ANDN2_SAVEEXEC_B64,
ORN2_SAVEEXEC_B64,
NAND_SAVEEXEC_B64,
NOR_SAVEEXEC_B64,
XNOR_SAVEEXEC_B64,
QUADMASK_B32,
QUADMASK_B64,
MOVRELS_B32,
MOVRELS_B64,
MOVRELD_B32,
MOVRELD_B64,
CBRANCH_JOIN,
ABS_I32 = 52,
MOV_FED_B32,
OpCount
};
inline const char *getInstructionName(unsigned id) {
switch (id) {
case MOV_B32: return "s_mov_b32";
case MOV_B64: return "s_mov_b64";
case CMOV_B32: return "s_cmov_b32";
case CMOV_B64: return "s_cmov_b64";
case NOT_B32: return "s_not_b32";
case NOT_B64: return "s_not_b64";
case WQM_B32: return "s_wqm_b32";
case WQM_B64: return "s_wqm_b64";
case BREV_B32: return "s_brev_b32";
case BREV_B64: return "s_brev_b64";
case BCNT0_I32_B32: return "s_bcnt0_i32_b32";
case BCNT0_I32_B64: return "s_bcnt0_i32_b64";
case BCNT1_I32_B32: return "s_bcnt1_i32_b32";
case BCNT1_I32_B64: return "s_bcnt1_i32_b64";
case FF0_I32_B32: return "s_ff0_i32_b32";
case FF0_I32_B64: return "s_ff0_i32_b64";
case FF1_I32_B32: return "s_ff1_i32_b32";
case FF1_I32_B64: return "s_ff1_i32_b64";
case FLBIT_I32_B32: return "s_flbit_i32_b32";
case FLBIT_I32_B64: return "s_flbit_i32_b64";
case FLBIT_I32: return "s_flbit_i32";
case FLBIT_I32_I64: return "s_flbit_i32_i64";
case SEXT_I32_I8: return "s_sext_i32_i8";
case SEXT_I32_I16: return "s_sext_i32_i16";
case BITSET0_B32: return "s_bitset0_b32";
case BITSET0_B64: return "s_bitset0_b64";
case BITSET1_B32: return "s_bitset1_b32";
case BITSET1_B64: return "s_bitset1_b64";
case GETPC_B64: return "s_getpc_b64";
case SETPC_B64: return "s_setpc_b64";
case SWAPPC_B64: return "s_swappc_b64";
case AND_SAVEEXEC_B64: return "s_and_saveexec_b64";
case OR_SAVEEXEC_B64: return "s_or_saveexec_b64";
case XOR_SAVEEXEC_B64: return "s_xor_saveexec_b64";
case ANDN2_SAVEEXEC_B64: return "s_andn2_saveexec_b64";
case ORN2_SAVEEXEC_B64: return "s_orn2_saveexec_b64";
case NAND_SAVEEXEC_B64: return "s_nand_saveexec_b64";
case NOR_SAVEEXEC_B64: return "s_nor_saveexec_b64";
case XNOR_SAVEEXEC_B64: return "s_xnor_saveexec_b64";
case QUADMASK_B32: return "s_quadmask_b32";
case QUADMASK_B64: return "s_quadmask_b64";
case MOVRELS_B32: return "s_movrels_b32";
case MOVRELS_B64: return "s_movrels_b64";
case MOVRELD_B32: return "s_movreld_b32";
case MOVRELD_B64: return "s_movreld_b64";
case CBRANCH_JOIN: return "s_cbranch_join";
case ABS_I32: return "s_abs_i32";
case MOV_FED_B32: return "s_mov_fed_b32";
}
return nullptr;
}
}

View file

@ -0,0 +1,171 @@
#pragma once
#include "../ir.hpp"
namespace shader::ir::sop2 {
enum Op {
ADD_U32,
SUB_U32,
ADD_I32,
SUB_I32,
ADDC_U32,
SUBB_U32,
MIN_I32,
MIN_U32,
MAX_I32,
MAX_U32,
CSELECT_B32,
CSELECT_B64,
AND_B32 = 14,
AND_B64,
OR_B32,
OR_B64,
XOR_B32,
XOR_B64,
ANDN2_B32,
ANDN2_B64,
ORN2_B32,
ORN2_B64,
NAND_B32,
NAND_B64,
NOR_B32,
NOR_B64,
XNOR_B32,
XNOR_B64,
LSHL_B32,
LSHL_B64,
LSHR_B32,
LSHR_B64,
ASHR_I32,
ASHR_I64,
BFM_B32,
BFM_B64,
MUL_I32,
BFE_U32,
BFE_I32,
BFE_U64,
BFE_I64,
CBRANCH_G_FORK,
ABSDIFF_I32,
LSHL1_ADD_U32,
LSHL2_ADD_U32,
LSHL3_ADD_U32,
LSHL4_ADD_U32,
PACK_LL_B32_B16,
PACK_LH_B32_B16,
PACK_HH_B32_B16,
MUL_HI_U32,
MUL_HI_I32,
OpCount
};
inline const char *getInstructionName(unsigned id) {
switch (id) {
case ADD_U32:
return "s_add_u32";
case SUB_U32:
return "s_sub_u32";
case ADD_I32:
return "s_add_i32";
case SUB_I32:
return "s_sub_i32";
case ADDC_U32:
return "s_addc_u32";
case SUBB_U32:
return "s_subb_u32";
case MIN_I32:
return "s_min_i32";
case MIN_U32:
return "s_min_u32";
case MAX_I32:
return "s_max_i32";
case MAX_U32:
return "s_max_u32";
case CSELECT_B32:
return "s_cselect_b32";
case CSELECT_B64:
return "s_cselect_b64";
case AND_B32:
return "s_and_b32";
case AND_B64:
return "s_and_b64";
case OR_B32:
return "s_or_b32";
case OR_B64:
return "s_or_b64";
case XOR_B32:
return "s_xor_b32";
case XOR_B64:
return "s_xor_b64";
case ANDN2_B32:
return "s_andn2_b32";
case ANDN2_B64:
return "s_andn2_b64";
case ORN2_B32:
return "s_orn2_b32";
case ORN2_B64:
return "s_orn2_b64";
case NAND_B32:
return "s_nand_b32";
case NAND_B64:
return "s_nand_b64";
case NOR_B32:
return "s_nor_b32";
case NOR_B64:
return "s_nor_b64";
case XNOR_B32:
return "s_xnor_b32";
case XNOR_B64:
return "s_xnor_b64";
case LSHL_B32:
return "s_lshl_b32";
case LSHL_B64:
return "s_lshl_b64";
case LSHR_B32:
return "s_lshr_b32";
case LSHR_B64:
return "s_lshr_b64";
case ASHR_I32:
return "s_ashr_i32";
case ASHR_I64:
return "s_ashr_i64";
case BFM_B32:
return "s_bfm_b32";
case BFM_B64:
return "s_bfm_b64";
case MUL_I32:
return "s_mul_i32";
case BFE_U32:
return "s_bfe_u32";
case BFE_I32:
return "s_bfe_i32";
case BFE_U64:
return "s_bfe_u64";
case BFE_I64:
return "s_bfe_i64";
case CBRANCH_G_FORK:
return "s_cbranch_g_fork";
case ABSDIFF_I32:
return "s_absdiff_i32";
case LSHL1_ADD_U32:
return "s_lshl1_add_u32";
case LSHL2_ADD_U32:
return "s_lshl2_add_u32";
case LSHL3_ADD_U32:
return "s_lshl3_add_u32";
case LSHL4_ADD_U32:
return "s_lshl4_add_u32";
case PACK_LL_B32_B16:
return "s_pack_ll_b32_b16";
case PACK_LH_B32_B16:
return "s_pack_lh_b32_b16";
case PACK_HH_B32_B16:
return "s_pack_hh_b32_b16";
case MUL_HI_U32:
return "s_mul_hi_u32";
case MUL_HI_I32:
return "s_mul_hi_i32";
}
return nullptr;
}
} // namespace shader::ir::sop2

View file

@ -0,0 +1,67 @@
#pragma once
namespace shader::ir::sopc {
enum Op {
CMP_EQ_I32,
CMP_LG_I32,
CMP_GT_I32,
CMP_GE_I32,
CMP_LT_I32,
CMP_LE_I32,
CMP_EQ_U32,
CMP_LG_U32,
CMP_GT_U32,
CMP_GE_U32,
CMP_LT_U32,
CMP_LE_U32,
BITCMP0_B32,
BITCMP1_B32,
BITCMP0_B64,
BITCMP1_B64,
SETVSKIP,
ILLEGALD,
OpCount
};
inline const char *getInstructionName(unsigned id) {
switch (id) {
case CMP_EQ_I32:
return "s_cmp_eq_i32";
case CMP_LG_I32:
return "s_cmp_lg_i32";
case CMP_GT_I32:
return "s_cmp_gt_i32";
case CMP_GE_I32:
return "s_cmp_ge_i32";
case CMP_LT_I32:
return "s_cmp_lt_i32";
case CMP_LE_I32:
return "s_cmp_le_i32";
case CMP_EQ_U32:
return "s_cmp_eq_u32";
case CMP_LG_U32:
return "s_cmp_lg_u32";
case CMP_GT_U32:
return "s_cmp_gt_u32";
case CMP_GE_U32:
return "s_cmp_ge_u32";
case CMP_LT_U32:
return "s_cmp_lt_u32";
case CMP_LE_U32:
return "s_cmp_le_u32";
case BITCMP0_B32:
return "bitcmp0_b32";
case BITCMP1_B32:
return "bitcmp1_b32";
case BITCMP0_B64:
return "bitcmp0_b64";
case BITCMP1_B64:
return "bitcmp1_b64";
case SETVSKIP:
return "setvskip";
case ILLEGALD:
return "illegald";
}
return nullptr;
}
} // namespace shader::ir::sopc

View file

@ -0,0 +1,73 @@
#pragma once
namespace shader::ir::sopk {
enum Op {
MOVK_I32,
CMOVK_I32 = 2,
CMPK_EQ_I32,
CMPK_LG_I32,
CMPK_GT_I32,
CMPK_GE_I32,
CMPK_LT_I32,
CMPK_LE_I32,
CMPK_EQ_U32,
CMPK_LG_U32,
CMPK_GT_U32,
CMPK_GE_U32,
CMPK_LT_U32,
CMPK_LE_U32,
ADDK_I32,
MULK_I32,
CBRANCH_I_FORK,
GETREG_B32,
SETREG_B32,
SETREG_IMM,
OpCount
};
inline const char *getInstructionName(unsigned id) {
switch (id) {
case MOVK_I32:
return "s_movk_i32";
case CMOVK_I32:
return "s_cmovk_i32";
case CMPK_EQ_I32:
return "s_cmpk_eq_i32";
case CMPK_LG_I32:
return "s_cmpk_lg_i32";
case CMPK_GT_I32:
return "s_cmpk_gt_i32";
case CMPK_GE_I32:
return "s_cmpk_ge_i32";
case CMPK_LT_I32:
return "s_cmpk_lt_i32";
case CMPK_LE_I32:
return "s_cmpk_le_i32";
case CMPK_EQ_U32:
return "s_cmpk_eq_u32";
case CMPK_LG_U32:
return "s_cmpk_lg_u32";
case CMPK_GT_U32:
return "s_cmpk_gt_u32";
case CMPK_GE_U32:
return "s_cmpk_ge_u32";
case CMPK_LT_U32:
return "s_cmpk_lt_u32";
case CMPK_LE_U32:
return "s_cmpk_le_u32";
case ADDK_I32:
return "s_addk_i32";
case MULK_I32:
return "s_mulk_i32";
case CBRANCH_I_FORK:
return "s_cbranch_i_fork";
case GETREG_B32:
return "s_getreg_b32";
case SETREG_B32:
return "s_setreg_b32";
case SETREG_IMM:
return "s_setreg_imm";
}
return nullptr;
}
} // namespace shader::ir::sopk

View file

@ -0,0 +1,89 @@
#pragma once
namespace shader::ir::sopp {
enum Op {
NOP,
ENDPGM,
BRANCH,
CBRANCH_SCC0 = 4,
CBRANCH_SCC1,
CBRANCH_VCCZ,
CBRANCH_VCCNZ,
CBRANCH_EXECZ,
CBRANCH_EXECNZ,
BARRIER,
WAITCNT = 12,
SETHALT,
SLEEP,
SETPRIO,
SENDMSG,
SENDMSGHALT,
TRAP,
ICACHE_INV,
INCPERFLEVEL,
DECPERFLEVEL,
TTRACEDATA,
CBRANCH_CDBGSYS = 23,
CBRANCH_CDBGUSER = 24,
CBRANCH_CDBGSYS_OR_USER = 25,
CBRANCH_CDBGSYS_AND_USER = 26,
OpCount
};
inline const char *getInstructionName(unsigned id) {
switch (id) {
case NOP:
return "s_nop";
case ENDPGM:
return "s_endpgm";
case BRANCH:
return "s_branch";
case CBRANCH_SCC0:
return "s_cbranch_scc0";
case CBRANCH_SCC1:
return "s_cbranch_scc1";
case CBRANCH_VCCZ:
return "s_cbranch_vccz";
case CBRANCH_VCCNZ:
return "s_cbranch_vccnz";
case CBRANCH_EXECZ:
return "s_cbranch_execz";
case CBRANCH_EXECNZ:
return "s_cbranch_execnz";
case BARRIER:
return "s_barrier";
case WAITCNT:
return "s_waitcnt";
case SETHALT:
return "s_sethalt";
case SLEEP:
return "s_sleep";
case SETPRIO:
return "s_setprio";
case SENDMSG:
return "s_sendmsg";
case SENDMSGHALT:
return "s_sendmsghalt";
case TRAP:
return "s_trap";
case ICACHE_INV:
return "s_icache_inv";
case INCPERFLEVEL:
return "s_incperflevel";
case DECPERFLEVEL:
return "s_decperflevel";
case TTRACEDATA:
return "s_ttracedata";
case CBRANCH_CDBGSYS:
return "s_cbranch_cdbgsys";
case CBRANCH_CDBGUSER:
return "s_cbranch_cdbguser";
case CBRANCH_CDBGSYS_OR_USER:
return "s_cbranch_cdbgsys_or_user";
case CBRANCH_CDBGSYS_AND_USER:
return "s_cbranch_cdbgsys_and_user";
}
return nullptr;
}
} // namespace shader::ir::sopp

View file

@ -0,0 +1,23 @@
#pragma once
namespace shader::ir::vintrp {
enum Op {
P1_F32,
P2_F32,
MOV_F32,
OpCount
};
inline const char *getInstructionName(unsigned id) {
switch (id) {
case P1_F32:
return "v_interp_p1_f32";
case P2_F32:
return "v_interp_p2_f32";
case MOV_F32:
return "v_interp_mov_f32";
}
return nullptr;
}
} // namespace shader::ir::vintrp

View file

@ -0,0 +1,259 @@
#pragma once
namespace shader::ir::vop1 {
enum Op {
NOP,
MOV_B32,
READFIRSTLANE_B32,
CVT_I32_F64,
CVT_F64_I32,
CVT_F32_I32,
CVT_F32_U32,
CVT_U32_F32,
CVT_I32_F32,
MOV_FED_B32,
CVT_F16_F32,
CVT_F32_F16,
CVT_RPI_I32_F32,
CVT_FLR_I32_F32,
CVT_OFF_F32_I4,
CVT_F32_F64,
CVT_F64_F32,
CVT_F32_UBYTE0,
CVT_F32_UBYTE1,
CVT_F32_UBYTE2,
CVT_F32_UBYTE3,
CVT_U32_F64,
CVT_F64_U32,
FRACT_F32 = 32,
TRUNC_F32,
CEIL_F32,
RNDNE_F32,
FLOOR_F32,
EXP_F32,
LOG_CLAMP_F32,
LOG_F32,
RCP_CLAMP_F32,
RCP_LEGACY_F32,
RCP_F32,
RCP_IFLAG_F32,
RSQ_CLAMP_F32,
RSQ_LEGACY_F32,
RSQ_F32,
RCP_F64,
RCP_CLAMP_F64,
RSQ_F64,
RSQ_CLAMP_F64,
SQRT_F32,
SQRT_F64,
SIN_F32,
COS_F32,
NOT_B32,
BFREV_B32,
FFBH_U32,
FFBL_B32,
FFBH_I32,
FREXP_EXP_I32_F64,
FREXP_MANT_F64,
FRACT_F64,
FREXP_EXP_I32_F32,
FREXP_MANT_F32,
CLREXCP,
MOVRELD_B32,
MOVRELS_B32,
MOVRELSD_B32,
CVT_F16_U16 = 80,
CVT_F16_I16,
CVT_U16_F16,
CVT_I16_F16,
RCP_F16,
SQRT_F16,
RSQ_F16,
LOG_F16,
EXP_F16,
FREXP_MANT_F16,
FREXP_EXP_I16_F16,
FLOOR_F16,
CEIL_F16,
TRUNC_F16,
RNDNE_F16,
FRACT_F16,
SIN_F16,
COS_F16,
SAT_PK_U8_I16,
CVT_NORM_I16_F16,
CVT_NORM_U16_F16,
SWAP_B32,
OpCount
};
inline const char *getInstructionName(unsigned id) {
switch (id) {
case NOP:
return "v_nop";
case MOV_B32:
return "v_mov_b32";
case READFIRSTLANE_B32:
return "v_readfirstlane_b32";
case CVT_I32_F64:
return "v_cvt_i32_f64";
case CVT_F64_I32:
return "v_cvt_f64_i32";
case CVT_F32_I32:
return "v_cvt_f32_i32";
case CVT_F32_U32:
return "v_cvt_f32_u32";
case CVT_U32_F32:
return "v_cvt_u32_f32";
case CVT_I32_F32:
return "v_cvt_i32_f32";
case MOV_FED_B32:
return "v_mov_fed_b32";
case CVT_F16_F32:
return "v_cvt_f16_f32";
case CVT_F32_F16:
return "v_cvt_f32_f16";
case CVT_RPI_I32_F32:
return "v_cvt_rpi_i32_f32";
case CVT_FLR_I32_F32:
return "v_cvt_flr_i32_f32";
case CVT_OFF_F32_I4:
return "v_cvt_off_f32_i4";
case CVT_F32_F64:
return "v_cvt_f32_f64";
case CVT_F64_F32:
return "v_cvt_f64_f32";
case CVT_F32_UBYTE0:
return "v_cvt_f32_ubyte0";
case CVT_F32_UBYTE1:
return "v_cvt_f32_ubyte1";
case CVT_F32_UBYTE2:
return "v_cvt_f32_ubyte2";
case CVT_F32_UBYTE3:
return "v_cvt_f32_ubyte3";
case CVT_U32_F64:
return "v_cvt_u32_f64";
case CVT_F64_U32:
return "v_cvt_f64_u32";
case FRACT_F32:
return "v_fract_f32";
case TRUNC_F32:
return "v_trunc_f32";
case CEIL_F32:
return "v_ceil_f32";
case RNDNE_F32:
return "v_rndne_f32";
case FLOOR_F32:
return "v_floor_f32";
case EXP_F32:
return "v_exp_f32";
case LOG_CLAMP_F32:
return "v_log_clamp_f32";
case LOG_F32:
return "v_log_f32";
case RCP_CLAMP_F32:
return "v_rcp_clamp_f32";
case RCP_LEGACY_F32:
return "v_rcp_legacy_f32";
case RCP_F32:
return "v_rcp_f32";
case RCP_IFLAG_F32:
return "v_rcp_iflag_f32";
case RSQ_CLAMP_F32:
return "v_rsq_clamp_f32";
case RSQ_LEGACY_F32:
return "v_rsq_legacy_f32";
case RSQ_F32:
return "v_rsq_f32";
case RCP_F64:
return "v_rcp_f64";
case RCP_CLAMP_F64:
return "v_rcp_clamp_f64";
case RSQ_F64:
return "v_rsq_f64";
case RSQ_CLAMP_F64:
return "v_rsq_clamp_f64";
case SQRT_F32:
return "v_sqrt_f32";
case SQRT_F64:
return "v_sqrt_f64";
case SIN_F32:
return "v_sin_f32";
case COS_F32:
return "v_cos_f32";
case NOT_B32:
return "v_not_b32";
case BFREV_B32:
return "v_bfrev_b32";
case FFBH_U32:
return "v_ffbh_u32";
case FFBL_B32:
return "v_ffbl_b32";
case FFBH_I32:
return "v_ffbh_i32";
case FREXP_EXP_I32_F64:
return "v_frexp_exp_i32_f64";
case FREXP_MANT_F64:
return "v_frexp_mant_f64";
case FRACT_F64:
return "v_fract_f64";
case FREXP_EXP_I32_F32:
return "v_frexp_exp_i32_f32";
case FREXP_MANT_F32:
return "v_frexp_mant_f32";
case CLREXCP:
return "v_clrexcp";
case MOVRELD_B32:
return "v_movreld_b32";
case MOVRELS_B32:
return "v_movrels_b32";
case MOVRELSD_B32:
return "v_movrelsd_b32";
case CVT_F16_U16:
return "v_cvt_f16_u16";
case CVT_F16_I16:
return "v_cvt_f16_i16";
case CVT_U16_F16:
return "v_cvt_u16_f16";
case CVT_I16_F16:
return "v_cvt_i16_f16";
case RCP_F16:
return "v_rcp_f16";
case SQRT_F16:
return "v_sqrt_f16";
case RSQ_F16:
return "v_rsq_f16";
case LOG_F16:
return "v_log_f16";
case EXP_F16:
return "v_exp_f16";
case FREXP_MANT_F16:
return "v_frexp_mant_f16";
case FREXP_EXP_I16_F16:
return "v_frexp_exp_i16_f16";
case FLOOR_F16:
return "v_floor_f16";
case CEIL_F16:
return "v_ceil_f16";
case TRUNC_F16:
return "v_trunc_f16";
case RNDNE_F16:
return "v_rndne_f16";
case FRACT_F16:
return "v_fract_f16";
case SIN_F16:
return "v_sin_f16";
case COS_F16:
return "v_cos_f16";
case SAT_PK_U8_I16:
return "v_sat_pk_u8_i16";
case CVT_NORM_I16_F16:
return "v_cvt_norm_i16_f16";
case CVT_NORM_U16_F16:
return "v_cvt_norm_u16_f16";
case SWAP_B32:
return "v_swap_b32";
}
return nullptr;
}
} // namespace shader::ir::vop1

View file

@ -0,0 +1,164 @@
#pragma once
namespace shader::ir::vop2 {
enum Op {
CNDMASK_B32,
READLANE_B32,
WRITELANE_B32,
ADD_F32,
SUB_F32,
SUBREV_F32,
MAC_LEGACY_F32,
MUL_LEGACY_F32,
MUL_F32,
MUL_I32_I24,
MUL_HI_I32_I24,
MUL_U32_U24,
MUL_HI_U32_U24,
MIN_LEGACY_F32,
MAX_LEGACY_F32,
MIN_F32,
MAX_F32,
MIN_I32,
MAX_I32,
MIN_U32,
MAX_U32,
LSHR_B32,
LSHRREV_B32,
ASHR_I32,
ASHRREV_I32,
LSHL_B32,
LSHLREV_B32,
AND_B32,
OR_B32,
XOR_B32,
BFM_B32,
MAC_F32,
MADMK_F32,
MADAK_F32,
BCNT_U32_B32,
MBCNT_LO_U32_B32,
MBCNT_HI_U32_B32,
ADD_I32,
SUB_I32,
SUBREV_I32,
ADDC_U32,
SUBB_U32,
SUBBREV_U32,
LDEXP_F32,
CVT_PKACCUM_U8_F32,
CVT_PKNORM_I16_F32,
CVT_PKNORM_U16_F32,
CVT_PKRTZ_F16_F32,
CVT_PK_U16_U32,
CVT_PK_I16_I32,
OpCount
};
inline const char *getInstructionName(unsigned id) {
switch (id) {
case CNDMASK_B32:
return "v_cndmask_b32";
case READLANE_B32:
return "v_readlane_b32";
case WRITELANE_B32:
return "v_writelane_b32";
case ADD_F32:
return "v_add_f32";
case SUB_F32:
return "v_sub_f32";
case SUBREV_F32:
return "v_subrev_f32";
case MAC_LEGACY_F32:
return "v_mac_legacy_f32";
case MUL_LEGACY_F32:
return "v_mul_legacy_f32";
case MUL_F32:
return "v_mul_f32";
case MUL_I32_I24:
return "v_mul_i32_i24";
case MUL_HI_I32_I24:
return "v_mul_hi_i32_i24";
case MUL_U32_U24:
return "v_mul_u32_u24";
case MUL_HI_U32_U24:
return "v_mul_hi_u32_u24";
case MIN_LEGACY_F32:
return "v_min_legacy_f32";
case MAX_LEGACY_F32:
return "v_max_legacy_f32";
case MIN_F32:
return "v_min_f32";
case MAX_F32:
return "v_max_f32";
case MIN_I32:
return "v_min_i32";
case MAX_I32:
return "v_max_i32";
case MIN_U32:
return "v_min_u32";
case MAX_U32:
return "v_max_u32";
case LSHR_B32:
return "v_lshr_b32";
case LSHRREV_B32:
return "v_lshrrev_b32";
case ASHR_I32:
return "v_ashr_i32";
case ASHRREV_I32:
return "v_ashrrev_i32";
case LSHL_B32:
return "v_lshl_b32";
case LSHLREV_B32:
return "v_lshlrev_b32";
case AND_B32:
return "v_and_b32";
case OR_B32:
return "v_or_b32";
case XOR_B32:
return "v_xor_b32";
case BFM_B32:
return "v_bfm_b32";
case MAC_F32:
return "v_mac_f32";
case MADMK_F32:
return "v_madmk_f32";
case MADAK_F32:
return "v_madak_f32";
case BCNT_U32_B32:
return "v_bcnt_u32_b32";
case MBCNT_LO_U32_B32:
return "v_mbcnt_lo_u32_b32";
case MBCNT_HI_U32_B32:
return "v_mbcnt_hi_u32_b32";
case ADD_I32:
return "v_add_i32";
case SUB_I32:
return "v_sub_i32";
case SUBREV_I32:
return "v_subrev_i32";
case ADDC_U32:
return "v_addc_u32";
case SUBB_U32:
return "v_subb_u32";
case SUBBREV_U32:
return "v_subbrev_u32";
case LDEXP_F32:
return "v_ldexp_f32";
case CVT_PKACCUM_U8_F32:
return "v_cvt_pkaccum_u8_f32";
case CVT_PKNORM_I16_F32:
return "v_cvt_pknorm_i16_f32";
case CVT_PKNORM_U16_F32:
return "v_cvt_pknorm_u16_f32";
case CVT_PKRTZ_F16_F32:
return "v_cvt_pkrtz_f16_f32";
case CVT_PK_U16_U32:
return "v_cvt_pk_u16_u32";
case CVT_PK_I16_I32:
return "v_cvt_pk_i16_i32";
}
return nullptr;
}
} // namespace shader::ir::vop2

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,522 @@
#pragma once
namespace shader::ir::vopc {
enum Op {
CMP_F_F32,
CMP_LT_F32,
CMP_EQ_F32,
CMP_LE_F32,
CMP_GT_F32,
CMP_LG_F32,
CMP_GE_F32,
CMP_O_F32,
CMP_U_F32,
CMP_NGE_F32,
CMP_NLG_F32,
CMP_NGT_F32,
CMP_NLE_F32,
CMP_NEQ_F32,
CMP_NLT_F32,
CMP_TRU_F32,
CMPX_F_F32,
CMPX_LT_F32,
CMPX_EQ_F32,
CMPX_LE_F32,
CMPX_GT_F32,
CMPX_LG_F32,
CMPX_GE_F32,
CMPX_O_F32,
CMPX_U_F32,
CMPX_NGE_F32,
CMPX_NLG_F32,
CMPX_NGT_F32,
CMPX_NLE_F32,
CMPX_NEQ_F32,
CMPX_NLT_F32,
CMPX_TRU_F32,
CMP_F_F64,
CMP_LT_F64,
CMP_EQ_F64,
CMP_LE_F64,
CMP_GT_F64,
CMP_LG_F64,
CMP_GE_F64,
CMP_O_F64,
CMP_U_F64,
CMP_NGE_F64,
CMP_NLG_F64,
CMP_NGT_F64,
CMP_NLE_F64,
CMP_NEQ_F64,
CMP_NLT_F64,
CMP_TRU_F64,
CMPX_F_F64,
CMPX_LT_F64,
CMPX_EQ_F64,
CMPX_LE_F64,
CMPX_GT_F64,
CMPX_LG_F64,
CMPX_GE_F64,
CMPX_O_F64,
CMPX_U_F64,
CMPX_NGE_F64,
CMPX_NLG_F64,
CMPX_NGT_F64,
CMPX_NLE_F64,
CMPX_NEQ_F64,
CMPX_NLT_F64,
CMPX_TRU_F64,
CMPS_F_F32,
CMPS_LT_F32,
CMPS_EQ_F32,
CMPS_LE_F32,
CMPS_GT_F32,
CMPS_LG_F32,
CMPS_GE_F32,
CMPS_O_F32,
CMPS_U_F32,
CMPS_NGE_F32,
CMPS_NLG_F32,
CMPS_NGT_F32,
CMPS_NLE_F32,
CMPS_NEQ_F32,
CMPS_NLT_F32,
CMPS_TRU_F32,
CMPSX_F_F32,
CMPSX_LT_F32,
CMPSX_EQ_F32,
CMPSX_LE_F32,
CMPSX_GT_F32,
CMPSX_LG_F32,
CMPSX_GE_F32,
CMPSX_O_F32,
CMPSX_U_F32,
CMPSX_NGE_F32,
CMPSX_NLG_F32,
CMPSX_NGT_F32,
CMPSX_NLE_F32,
CMPSX_NEQ_F32,
CMPSX_NLT_F32,
CMPSX_TRU_F32,
CMPS_F_F64,
CMPS_LT_F64,
CMPS_EQ_F64,
CMPS_LE_F64,
CMPS_GT_F64,
CMPS_LG_F64,
CMPS_GE_F64,
CMPS_O_F64,
CMPS_U_F64,
CMPS_NGE_F64,
CMPS_NLG_F64,
CMPS_NGT_F64,
CMPS_NLE_F64,
CMPS_NEQ_F64,
CMPS_NLT_F64,
CMPS_TRU_F64,
CMPSX_F_F64,
CMPSX_LT_F64,
CMPSX_EQ_F64,
CMPSX_LE_F64,
CMPSX_GT_F64,
CMPSX_LG_F64,
CMPSX_GE_F64,
CMPSX_O_F64,
CMPSX_U_F64,
CMPSX_NGE_F64,
CMPSX_NLG_F64,
CMPSX_NGT_F64,
CMPSX_NLE_F64,
CMPSX_NEQ_F64,
CMPSX_NLT_F64,
CMPSX_TRU_F64,
CMP_F_I32,
CMP_LT_I32,
CMP_EQ_I32,
CMP_LE_I32,
CMP_GT_I32,
CMP_NE_I32,
CMP_GE_I32,
CMP_T_I32,
CMP_CLASS_F32,
CMP_LT_I16,
CMP_EQ_I16,
CMP_LE_I16,
CMP_GT_I16,
CMP_NE_I16,
CMP_GE_I16,
CMP_CLASS_F16,
CMPX_F_I32,
CMPX_LT_I32,
CMPX_EQ_I32,
CMPX_LE_I32,
CMPX_GT_I32,
CMPX_NE_I32,
CMPX_GE_I32,
CMPX_T_I32,
CMPX_CLASS_F32,
CMPX_LT_I16,
CMPX_EQ_I16,
CMPX_LE_I16,
CMPX_GT_I16,
CMPX_NE_I16,
CMPX_GE_I16,
CMPX_CLASS_F16,
CMP_F_I64,
CMP_LT_I64,
CMP_EQ_I64,
CMP_LE_I64,
CMP_GT_I64,
CMP_NE_I64,
CMP_GE_I64,
CMP_T_I64,
CMP_CLASS_F64,
CMP_LT_U16,
CMP_EQ_U16,
CMP_LE_U16,
CMP_GT_U16,
CMP_NE_U16,
CMP_GE_U16,
CMPX_F_I64 = 176,
CMPX_LT_I64,
CMPX_EQ_I64,
CMPX_LE_I64,
CMPX_GT_I64,
CMPX_NE_I64,
CMPX_GE_I64,
CMPX_T_I64,
CMPX_CLASS_F64,
CMPX_LT_U16,
CMPX_EQ_U16,
CMPX_LE_U16,
CMPX_GT_U16,
CMPX_NE_U16,
CMPX_GE_U16,
CMP_F_U32 = 192,
CMP_LT_U32,
CMP_EQ_U32,
CMP_LE_U32,
CMP_GT_U32,
CMP_NE_U32,
CMP_GE_U32,
CMP_T_U32,
CMP_F_F16,
CMP_LT_F16,
CMP_EQ_F16,
CMP_LE_F16,
CMP_GT_F16,
CMP_LG_F16,
CMP_GE_F16,
CMP_O_F16,
CMPX_F_U32,
CMPX_LT_U32,
CMPX_EQ_U32,
CMPX_LE_U32,
CMPX_GT_U32,
CMPX_NE_U32,
CMPX_GE_U32,
CMPX_T_U32,
CMPX_F_F16,
CMPX_LT_F16,
CMPX_EQ_F16,
CMPX_LE_F16,
CMPX_GT_F16,
CMPX_LG_F16,
CMPX_GE_F16,
CMPX_O_F16,
CMP_F_U64,
CMP_LT_U64,
CMP_EQ_U64,
CMP_LE_U64,
CMP_GT_U64,
CMP_NE_U64,
CMP_GE_U64,
CMP_T_U64,
CMP_U_F16,
CMP_NGE_F16,
CMP_NLG_F16,
CMP_NGT_F16,
CMP_NLE_F16,
CMP_NEQ_F16,
CMP_NLT_F16,
CMP_TRU_F16,
CMPX_F_U64,
CMPX_LT_U64,
CMPX_EQ_U64,
CMPX_LE_U64,
CMPX_GT_U64,
CMPX_NE_U64,
CMPX_GE_U64,
CMPX_T_U64,
CMPX_U_F16,
CMPX_NGE_F16,
CMPX_NLG_F16,
CMPX_NGT_F16,
CMPX_NLE_F16,
CMPX_NEQ_F16,
CMPX_NLT_F16,
CMPX_TRU_F16,
OpCount
};
inline const char *getInstructionName(unsigned id) {
switch (id) {
case CMP_F_F32: return "v_cmp_f_f32";
case CMP_LT_F32: return "v_cmp_lt_f32";
case CMP_EQ_F32: return "v_cmp_eq_f32";
case CMP_LE_F32: return "v_cmp_le_f32";
case CMP_GT_F32: return "v_cmp_gt_f32";
case CMP_LG_F32: return "v_cmp_lg_f32";
case CMP_GE_F32: return "v_cmp_ge_f32";
case CMP_O_F32: return "v_cmp_o_f32";
case CMP_U_F32: return "v_cmp_u_f32";
case CMP_NGE_F32: return "v_cmp_nge_f32";
case CMP_NLG_F32: return "v_cmp_nlg_f32";
case CMP_NGT_F32: return "v_cmp_ngt_f32";
case CMP_NLE_F32: return "v_cmp_nle_f32";
case CMP_NEQ_F32: return "v_cmp_neq_f32";
case CMP_NLT_F32: return "v_cmp_nlt_f32";
case CMP_TRU_F32: return "v_cmp_tru_f32";
case CMPX_F_F32: return "v_cmpx_f_f32";
case CMPX_LT_F32: return "v_cmpx_lt_f32";
case CMPX_EQ_F32: return "v_cmpx_eq_f32";
case CMPX_LE_F32: return "v_cmpx_le_f32";
case CMPX_GT_F32: return "v_cmpx_gt_f32";
case CMPX_LG_F32: return "v_cmpx_lg_f32";
case CMPX_GE_F32: return "v_cmpx_ge_f32";
case CMPX_O_F32: return "v_cmpx_o_f32";
case CMPX_U_F32: return "v_cmpx_u_f32";
case CMPX_NGE_F32: return "v_cmpx_nge_f32";
case CMPX_NLG_F32: return "v_cmpx_nlg_f32";
case CMPX_NGT_F32: return "v_cmpx_ngt_f32";
case CMPX_NLE_F32: return "v_cmpx_nle_f32";
case CMPX_NEQ_F32: return "v_cmpx_neq_f32";
case CMPX_NLT_F32: return "v_cmpx_nlt_f32";
case CMPX_TRU_F32: return "v_cmpx_tru_f32";
case CMP_F_F64: return "v_cmp_f_f64";
case CMP_LT_F64: return "v_cmp_lt_f64";
case CMP_EQ_F64: return "v_cmp_eq_f64";
case CMP_LE_F64: return "v_cmp_le_f64";
case CMP_GT_F64: return "v_cmp_gt_f64";
case CMP_LG_F64: return "v_cmp_lg_f64";
case CMP_GE_F64: return "v_cmp_ge_f64";
case CMP_O_F64: return "v_cmp_o_f64";
case CMP_U_F64: return "v_cmp_u_f64";
case CMP_NGE_F64: return "v_cmp_nge_f64";
case CMP_NLG_F64: return "v_cmp_nlg_f64";
case CMP_NGT_F64: return "v_cmp_ngt_f64";
case CMP_NLE_F64: return "v_cmp_nle_f64";
case CMP_NEQ_F64: return "v_cmp_neq_f64";
case CMP_NLT_F64: return "v_cmp_nlt_f64";
case CMP_TRU_F64: return "v_cmp_tru_f64";
case CMPX_F_F64: return "v_cmpx_f_f64";
case CMPX_LT_F64: return "v_cmpx_lt_f64";
case CMPX_EQ_F64: return "v_cmpx_eq_f64";
case CMPX_LE_F64: return "v_cmpx_le_f64";
case CMPX_GT_F64: return "v_cmpx_gt_f64";
case CMPX_LG_F64: return "v_cmpx_lg_f64";
case CMPX_GE_F64: return "v_cmpx_ge_f64";
case CMPX_O_F64: return "v_cmpx_o_f64";
case CMPX_U_F64: return "v_cmpx_u_f64";
case CMPX_NGE_F64: return "v_cmpx_nge_f64";
case CMPX_NLG_F64: return "v_cmpx_nlg_f64";
case CMPX_NGT_F64: return "v_cmpx_ngt_f64";
case CMPX_NLE_F64: return "v_cmpx_nle_f64";
case CMPX_NEQ_F64: return "v_cmpx_neq_f64";
case CMPX_NLT_F64: return "v_cmpx_nlt_f64";
case CMPX_TRU_F64: return "v_cmpx_tru_f64";
case CMPS_F_F32: return "v_cmps_f_f32";
case CMPS_LT_F32: return "v_cmps_lt_f32";
case CMPS_EQ_F32: return "v_cmps_eq_f32";
case CMPS_LE_F32: return "v_cmps_le_f32";
case CMPS_GT_F32: return "v_cmps_gt_f32";
case CMPS_LG_F32: return "v_cmps_lg_f32";
case CMPS_GE_F32: return "v_cmps_ge_f32";
case CMPS_O_F32: return "v_cmps_o_f32";
case CMPS_U_F32: return "v_cmps_u_f32";
case CMPS_NGE_F32: return "v_cmps_nge_f32";
case CMPS_NLG_F32: return "v_cmps_nlg_f32";
case CMPS_NGT_F32: return "v_cmps_ngt_f32";
case CMPS_NLE_F32: return "v_cmps_nle_f32";
case CMPS_NEQ_F32: return "v_cmps_neq_f32";
case CMPS_NLT_F32: return "v_cmps_nlt_f32";
case CMPS_TRU_F32: return "v_cmps_tru_f32";
case CMPSX_F_F32: return "v_cmpsx_f_f32";
case CMPSX_LT_F32: return "v_cmpsx_lt_f32";
case CMPSX_EQ_F32: return "v_cmpsx_eq_f32";
case CMPSX_LE_F32: return "v_cmpsx_le_f32";
case CMPSX_GT_F32: return "v_cmpsx_gt_f32";
case CMPSX_LG_F32: return "v_cmpsx_lg_f32";
case CMPSX_GE_F32: return "v_cmpsx_ge_f32";
case CMPSX_O_F32: return "v_cmpsx_o_f32";
case CMPSX_U_F32: return "v_cmpsx_u_f32";
case CMPSX_NGE_F32: return "v_cmpsx_nge_f32";
case CMPSX_NLG_F32: return "v_cmpsx_nlg_f32";
case CMPSX_NGT_F32: return "v_cmpsx_ngt_f32";
case CMPSX_NLE_F32: return "v_cmpsx_nle_f32";
case CMPSX_NEQ_F32: return "v_cmpsx_neq_f32";
case CMPSX_NLT_F32: return "v_cmpsx_nlt_f32";
case CMPSX_TRU_F32: return "v_cmpsx_tru_f32";
case CMPS_F_F64: return "v_cmps_f_f64";
case CMPS_LT_F64: return "v_cmps_lt_f64";
case CMPS_EQ_F64: return "v_cmps_eq_f64";
case CMPS_LE_F64: return "v_cmps_le_f64";
case CMPS_GT_F64: return "v_cmps_gt_f64";
case CMPS_LG_F64: return "v_cmps_lg_f64";
case CMPS_GE_F64: return "v_cmps_ge_f64";
case CMPS_O_F64: return "v_cmps_o_f64";
case CMPS_U_F64: return "v_cmps_u_f64";
case CMPS_NGE_F64: return "v_cmps_nge_f64";
case CMPS_NLG_F64: return "v_cmps_nlg_f64";
case CMPS_NGT_F64: return "v_cmps_ngt_f64";
case CMPS_NLE_F64: return "v_cmps_nle_f64";
case CMPS_NEQ_F64: return "v_cmps_neq_f64";
case CMPS_NLT_F64: return "v_cmps_nlt_f64";
case CMPS_TRU_F64: return "v_cmps_tru_f64";
case CMPSX_F_F64: return "v_cmpsx_f_f64";
case CMPSX_LT_F64: return "v_cmpsx_lt_f64";
case CMPSX_EQ_F64: return "v_cmpsx_eq_f64";
case CMPSX_LE_F64: return "v_cmpsx_le_f64";
case CMPSX_GT_F64: return "v_cmpsx_gt_f64";
case CMPSX_LG_F64: return "v_cmpsx_lg_f64";
case CMPSX_GE_F64: return "v_cmpsx_ge_f64";
case CMPSX_O_F64: return "v_cmpsx_o_f64";
case CMPSX_U_F64: return "v_cmpsx_u_f64";
case CMPSX_NGE_F64: return "v_cmpsx_nge_f64";
case CMPSX_NLG_F64: return "v_cmpsx_nlg_f64";
case CMPSX_NGT_F64: return "v_cmpsx_ngt_f64";
case CMPSX_NLE_F64: return "v_cmpsx_nle_f64";
case CMPSX_NEQ_F64: return "v_cmpsx_neq_f64";
case CMPSX_NLT_F64: return "v_cmpsx_nlt_f64";
case CMPSX_TRU_F64: return "v_cmpsx_tru_f64";
case CMP_F_I32: return "v_cmp_f_i32";
case CMP_LT_I32: return "v_cmp_lt_i32";
case CMP_EQ_I32: return "v_cmp_eq_i32";
case CMP_LE_I32: return "v_cmp_le_i32";
case CMP_GT_I32: return "v_cmp_gt_i32";
case CMP_NE_I32: return "v_cmp_ne_i32";
case CMP_GE_I32: return "v_cmp_ge_i32";
case CMP_T_I32: return "v_cmp_t_i32";
case CMP_CLASS_F32: return "v_cmp_class_f32";
case CMP_LT_I16: return "v_cmp_lt_i16";
case CMP_EQ_I16: return "v_cmp_eq_i16";
case CMP_LE_I16: return "v_cmp_le_i16";
case CMP_GT_I16: return "v_cmp_gt_i16";
case CMP_NE_I16: return "v_cmp_ne_i16";
case CMP_GE_I16: return "v_cmp_ge_i16";
case CMP_CLASS_F16: return "v_cmp_class_f16";
case CMPX_F_I32: return "v_cmpx_f_i32";
case CMPX_LT_I32: return "v_cmpx_lt_i32";
case CMPX_EQ_I32: return "v_cmpx_eq_i32";
case CMPX_LE_I32: return "v_cmpx_le_i32";
case CMPX_GT_I32: return "v_cmpx_gt_i32";
case CMPX_NE_I32: return "v_cmpx_ne_i32";
case CMPX_GE_I32: return "v_cmpx_ge_i32";
case CMPX_T_I32: return "v_cmpx_t_i32";
case CMPX_CLASS_F32: return "v_cmpx_class_f32";
case CMPX_LT_I16: return "v_cmpx_lt_i16";
case CMPX_EQ_I16: return "v_cmpx_eq_i16";
case CMPX_LE_I16: return "v_cmpx_le_i16";
case CMPX_GT_I16: return "v_cmpx_gt_i16";
case CMPX_NE_I16: return "v_cmpx_ne_i16";
case CMPX_GE_I16: return "v_cmpx_ge_i16";
case CMPX_CLASS_F16: return "v_cmpx_class_f16";
case CMP_F_I64: return "v_cmp_f_i64";
case CMP_LT_I64: return "v_cmp_lt_i64";
case CMP_EQ_I64: return "v_cmp_eq_i64";
case CMP_LE_I64: return "v_cmp_le_i64";
case CMP_GT_I64: return "v_cmp_gt_i64";
case CMP_NE_I64: return "v_cmp_ne_i64";
case CMP_GE_I64: return "v_cmp_ge_i64";
case CMP_T_I64: return "v_cmp_t_i64";
case CMP_CLASS_F64: return "v_cmp_class_f64";
case CMP_LT_U16: return "v_cmp_lt_u16";
case CMP_EQ_U16: return "v_cmp_eq_u16";
case CMP_LE_U16: return "v_cmp_le_u16";
case CMP_GT_U16: return "v_cmp_gt_u16";
case CMP_NE_U16: return "v_cmp_ne_u16";
case CMP_GE_U16: return "v_cmp_ge_u16";
case CMPX_F_I64: return "v_cmpx_f_i64";
case CMPX_LT_I64: return "v_cmpx_lt_i64";
case CMPX_EQ_I64: return "v_cmpx_eq_i64";
case CMPX_LE_I64: return "v_cmpx_le_i64";
case CMPX_GT_I64: return "v_cmpx_gt_i64";
case CMPX_NE_I64: return "v_cmpx_ne_i64";
case CMPX_GE_I64: return "v_cmpx_ge_i64";
case CMPX_T_I64: return "v_cmpx_t_i64";
case CMPX_CLASS_F64: return "v_cmpx_class_f64";
case CMPX_LT_U16: return "v_cmpx_lt_u16";
case CMPX_EQ_U16: return "v_cmpx_eq_u16";
case CMPX_LE_U16: return "v_cmpx_le_u16";
case CMPX_GT_U16: return "v_cmpx_gt_u16";
case CMPX_NE_U16: return "v_cmpx_ne_u16";
case CMPX_GE_U16: return "v_cmpx_ge_u16";
case CMP_F_U32: return "v_cmp_f_u32";
case CMP_LT_U32: return "v_cmp_lt_u32";
case CMP_EQ_U32: return "v_cmp_eq_u32";
case CMP_LE_U32: return "v_cmp_le_u32";
case CMP_GT_U32: return "v_cmp_gt_u32";
case CMP_NE_U32: return "v_cmp_ne_u32";
case CMP_GE_U32: return "v_cmp_ge_u32";
case CMP_T_U32: return "v_cmp_t_u32";
case CMP_F_F16: return "v_cmp_f_f16";
case CMP_LT_F16: return "v_cmp_lt_f16";
case CMP_EQ_F16: return "v_cmp_eq_f16";
case CMP_LE_F16: return "v_cmp_le_f16";
case CMP_GT_F16: return "v_cmp_gt_f16";
case CMP_LG_F16: return "v_cmp_lg_f16";
case CMP_GE_F16: return "v_cmp_ge_f16";
case CMP_O_F16: return "v_cmp_o_f16";
case CMPX_F_U32: return "v_cmpx_f_u32";
case CMPX_LT_U32: return "v_cmpx_lt_u32";
case CMPX_EQ_U32: return "v_cmpx_eq_u32";
case CMPX_LE_U32: return "v_cmpx_le_u32";
case CMPX_GT_U32: return "v_cmpx_gt_u32";
case CMPX_NE_U32: return "v_cmpx_ne_u32";
case CMPX_GE_U32: return "v_cmpx_ge_u32";
case CMPX_T_U32: return "v_cmpx_t_u32";
case CMPX_F_F16: return "v_cmpx_f_f16";
case CMPX_LT_F16: return "v_cmpx_lt_f16";
case CMPX_EQ_F16: return "v_cmpx_eq_f16";
case CMPX_LE_F16: return "v_cmpx_le_f16";
case CMPX_GT_F16: return "v_cmpx_gt_f16";
case CMPX_LG_F16: return "v_cmpx_lg_f16";
case CMPX_GE_F16: return "v_cmpx_ge_f16";
case CMPX_O_F16: return "v_cmpx_o_f16";
case CMP_F_U64: return "v_cmp_f_u64";
case CMP_LT_U64: return "v_cmp_lt_u64";
case CMP_EQ_U64: return "v_cmp_eq_u64";
case CMP_LE_U64: return "v_cmp_le_u64";
case CMP_GT_U64: return "v_cmp_gt_u64";
case CMP_NE_U64: return "v_cmp_ne_u64";
case CMP_GE_U64: return "v_cmp_ge_u64";
case CMP_T_U64: return "v_cmp_t_u64";
case CMP_U_F16: return "v_cmp_u_f16";
case CMP_NGE_F16: return "v_cmp_nge_f16";
case CMP_NLG_F16: return "v_cmp_nlg_f16";
case CMP_NGT_F16: return "v_cmp_ngt_f16";
case CMP_NLE_F16: return "v_cmp_nle_f16";
case CMP_NEQ_F16: return "v_cmp_neq_f16";
case CMP_NLT_F16: return "v_cmp_nlt_f16";
case CMP_TRU_F16: return "v_cmp_tru_f16";
case CMPX_F_U64: return "v_cmpx_f_u64";
case CMPX_LT_U64: return "v_cmpx_lt_u64";
case CMPX_EQ_U64: return "v_cmpx_eq_u64";
case CMPX_LE_U64: return "v_cmpx_le_u64";
case CMPX_GT_U64: return "v_cmpx_gt_u64";
case CMPX_NE_U64: return "v_cmpx_ne_u64";
case CMPX_GE_U64: return "v_cmpx_ge_u64";
case CMPX_T_U64: return "v_cmpx_t_u64";
case CMPX_U_F16: return "v_cmpx_u_f16";
case CMPX_NGE_F16: return "v_cmpx_nge_f16";
case CMPX_NLG_F16: return "v_cmpx_nlg_f16";
case CMPX_NGT_F16: return "v_cmpx_ngt_f16";
case CMPX_NLE_F16: return "v_cmpx_nle_f16";
case CMPX_NEQ_F16: return "v_cmpx_neq_f16";
case CMPX_NLT_F16: return "v_cmpx_nlt_f16";
case CMPX_TRU_F16: return "v_cmpx_tru_f16";
}
return nullptr;
}
}

View file

@ -0,0 +1,92 @@
#pragma once
#include "Vector.hpp"
#include "ir/Value.hpp"
#include <cstdint>
#include <variant>
#include <array>
namespace shader::eval {
struct Value {
using Storage = std::variant<
std::nullptr_t, std::int8_t, std::int16_t, std::int32_t, std::int64_t,
std::uint8_t, std::uint16_t, std::uint32_t, std::uint64_t, float16_t,
float32_t, float64_t, u8vec2, u8vec3, u8vec4, i8vec2, i8vec3, i8vec4,
u16vec2, u16vec3, u16vec4, i16vec2, i16vec3, i16vec4, u32vec2, u32vec3,
u32vec4, i32vec2, i32vec3, i32vec4, u64vec2, u64vec3, u64vec4, i64vec2,
i64vec3, i64vec4, f32vec2, f32vec3, f32vec4, f64vec2, f64vec3, f64vec4,
f16vec2, f16vec3, f16vec4, bool, bvec2, bvec3, bvec4, std::array<uint32_t, 8>>;
static constexpr auto StorageSize = std::variant_size_v<Storage>;
Storage storage;
explicit operator bool() const { return !empty(); }
bool empty() const { return storage.index() == 0; }
Value() : storage(nullptr) {}
template <typename T>
Value(T &&value)
requires requires { Storage(std::forward<T>(value)); }
: storage(std::forward<T>(value)) {}
static Value compositeConstruct(ir::Value type,
std::span<const Value> constituents);
Value compositeExtract(const Value &index) const;
// Value compositeInsert(const Value &object, std::size_t index) const;
Value isNan() const;
Value isInf() const;
Value isFinite() const;
Value makeUnsigned() const;
Value makeSigned() const;
Value all() const;
Value any() const;
Value select(const Value &trueValue, const Value &falseValue) const;
Value iConvert(ir::Value type, bool isSigned) const;
Value sConvert(ir::Value type) const { return iConvert(type, true); }
Value uConvert(ir::Value type) const { return iConvert(type, false); }
Value fConvert(ir::Value type) const;
Value bitcast(ir::Value type) const;
std::optional<std::uint64_t> zExtScalar() const;
std::optional<std::int64_t> sExtScalar() const;
template <typename T>
requires requires { std::get<T>(storage); }
T get() const {
return std::get<T>(storage);
}
template <typename T>
requires requires { std::get<T>(storage); }
std::optional<T> as() const {
if (auto result = std::get_if<T>(&storage)) {
return *result;
}
return std::nullopt;
}
Value operator+(const Value &rhs) const;
Value operator-(const Value &rhs) const;
Value operator*(const Value &rhs) const;
Value operator/(const Value &rhs) const;
Value operator%(const Value &rhs) const;
Value operator&(const Value &rhs) const;
Value operator|(const Value &rhs) const;
Value operator^(const Value &rhs) const;
Value operator>>(const Value &rhs) const;
Value operator<<(const Value &rhs) const;
Value operator&&(const Value &rhs) const;
Value operator||(const Value &rhs) const;
Value operator<(const Value &rhs) const;
Value operator>(const Value &rhs) const;
Value operator<=(const Value &rhs) const;
Value operator>=(const Value &rhs) const;
Value operator==(const Value &rhs) const;
Value operator!=(const Value &rhs) const;
Value operator-() const;
Value operator~() const;
Value operator!() const;
};
} // namespace shader::eval

View file

@ -0,0 +1,125 @@
#pragma once
#include "SemanticInfo.hpp"
#include "SpvConverter.hpp"
#include "analyze.hpp"
#include "rx/MemoryTable.hpp"
#include "spv.hpp"
#include <cstdint>
#include <functional>
namespace shader::gcn {
using Builder = ir::Builder<ir::spv::Builder, ir::builtin::Builder>;
enum class Stage {
Ps,
VsVs,
VsEs,
VsLs,
Cs,
Gs,
GsVs,
Hs,
DsVs,
DsEs,
Invalid,
};
struct Import : spv::Import {
ir::Node getOrCloneImpl(ir::Context &context, ir::Node node,
bool isOperand) override;
};
struct SemanticModuleInfo : shader::SemanticModuleInfo {
std::map<int, ir::Value> registerVariables;
};
void canonicalizeSemantic(ir::Context &context,
const spv::BinaryLayout &semantic);
void collectSemanticModuleInfo(SemanticModuleInfo &moduleInfo,
const spv::BinaryLayout &layout);
SemanticInfo collectSemanticInfo(const SemanticModuleInfo &moduleInfo);
struct InstructionRegion : ir::RegionLikeImpl {
ir::RegionLike base;
ir::Instruction *firstInstruction;
void insertAfter(ir::Instruction point, ir::Instruction node) {
if (!*firstInstruction) {
*firstInstruction = node;
}
base.insertAfter(point, node);
}
};
enum RegId {
Sgpr,
Vgpr,
M0,
Scc,
Vcc,
Exec,
VccZ,
ExecZ,
LdsDirect,
SgprCount,
VgprCount,
ThreadId,
MemoryTable,
Gds,
};
struct Context : spv::Context {
ir::Region body;
rx::MemoryAreaTable<> memoryMap;
std::uint32_t requiredUserSgprs = 0;
std::map<RegId, ir::Value> registerVariables;
std::map<std::uint64_t, ir::Instruction> instructions;
AnalysisStorage analysis;
std::pair<ir::Value, bool> getOrCreateLabel(ir::Location loc, ir::Region body,
std::uint64_t address);
Builder createBuilder(InstructionRegion &region, ir::Region bodyRegion,
std::uint64_t address);
ir::Value createCast(ir::Location loc, Builder &builder, ir::Value targetType,
ir::Value value);
void setRegisterVariable(RegId id, ir::Value value) {
registerVariables[id] = value;
}
ir::Value getOrCreateRegisterVariable(RegId id);
ir::Value getRegisterRef(ir::Location loc, Builder &builder, RegId id,
const ir::Operand &index, ir::Value lane = nullptr);
ir::Value readReg(ir::Location loc, Builder &builder, ir::Value typeValue,
RegId id, const ir::Operand &index,
ir::Value lane = nullptr);
void writeReg(ir::Location loc, Builder &builder, RegId id,
const ir::Operand &index, ir::Value value,
ir::Value lane = nullptr);
ir::Value createRegisterAccess(Builder &builder, ir::Location loc,
ir::Value reg, const ir::Operand &index,
ir::Value lane = nullptr);
};
struct Environment {
std::uint8_t vgprCount;
std::uint8_t sgprCount;
std::span<const std::uint32_t> userSgprs;
bool supportsBarycentric = true;
bool supportsInt8 = false;
bool supportsInt64Atomics = false;
};
ir::Region deserialize(Context &context, const Environment &environment,
const SemanticInfo &semanticInfo, std::uint64_t base,
std::function<std::uint32_t(std::uint64_t)> readMemory);
} // namespace shader::gcn

View file

@ -0,0 +1,31 @@
#pragma once
#include "ir/Location.hpp"
#include "spv.hpp"
#include <filesystem>
namespace shader::glsl {
enum class Stage {
Library,
Vertex,
TessControl,
TessEvaluation,
Geometry,
Fragment,
Compute,
RayGen,
Intersect,
AnyHit,
ClosestHit,
Miss,
Callable,
Task,
Mesh,
};
std::optional<spv::BinaryLayout> parseFile(ir::Context &context, Stage stage,
const std::filesystem::path &path);
std::optional<spv::BinaryLayout> parseSource(ir::Context &context, Stage stage,
std::string_view source,
ir::Location loc = nullptr);
std::string decompile(std::span<const std::uint32_t> spv);
} // namespace shader::glsl

View file

@ -0,0 +1,320 @@
#pragma once
#include <map>
#include <vector>
namespace graph {
template <typename BasicBlockPtrT> class DomTree {
public:
struct Node {
BasicBlockPtrT block = nullptr;
Node *immDom = nullptr;
unsigned dfsNumIn = ~0;
unsigned dfsNumOut = ~0;
unsigned level = 0;
std::vector<Node *> children;
bool isLeaf() const { return children.empty(); }
bool dominatedBy(const Node *other) const {
return this->dfsNumIn >= other->dfsNumIn &&
this->dfsNumOut <= other->dfsNumOut;
}
};
private:
std::map<BasicBlockPtrT, Node> bbToNodes;
Node *rootNode = nullptr;
public:
Node *getNode(BasicBlockPtrT bb) {
auto it = bbToNodes.find(bb);
if (it != bbToNodes.end()) {
return &it->second;
}
return nullptr;
}
Node *createChild(BasicBlockPtrT bb, Node *parent) {
auto &child = bbToNodes[bb];
child.block = bb;
child.immDom = parent;
child.level = parent->level + 1;
parent->children.push_back(&child);
return &child;
}
Node *createRoot(BasicBlockPtrT bb) {
auto &root = bbToNodes[bb];
rootNode = &root;
root.block = bb;
return rootNode;
}
Node *getRootNode() { return rootNode; }
void updateDFSNumbers() {
std::vector<std::pair<Node *, typename std::vector<Node *>::iterator>>
workStack;
auto root = getRootNode();
if (!root)
return;
workStack.push_back({root, root->children.begin()});
unsigned dfsNum = 0;
root->dfsNumIn = dfsNum++;
while (!workStack.empty()) {
auto node = workStack.back().first;
const auto childIt = workStack.back().second;
if (childIt == node->children.end()) {
node->dfsNumOut = dfsNum++;
workStack.pop_back();
} else {
auto child = *childIt;
++workStack.back().second;
workStack.push_back({child, child->children.begin()});
child->dfsNumIn = dfsNum++;
}
}
}
bool dominates(Node *a, Node *b) {
if (a == b || b->immDom == a) {
return true;
}
if (a->immDom == b || a->level >= b->level) {
return false;
}
return b->dominatedBy(a);
}
bool dominates(BasicBlockPtrT a, BasicBlockPtrT b) {
return dominates(getNode(a), getNode(b));
}
BasicBlockPtrT getImmediateDominator(BasicBlockPtrT a) {
auto immDom = getNode(a)->immDom;
if (immDom) {
return immDom->block;
}
return{};
}
bool isImmediateDominator(BasicBlockPtrT block, BasicBlockPtrT immDomBlock) {
if (immDomBlock == nullptr) {
return false;
}
return getImmediateDominator(immDomBlock) == block;
}
BasicBlockPtrT findNearestCommonDominator(BasicBlockPtrT a,
BasicBlockPtrT b) {
auto aNode = getNode(a);
auto bNode = getNode(b);
if (aNode == rootNode || bNode == rootNode) {
return rootNode->block;
}
while (aNode != bNode) {
if (aNode->level < bNode->level) {
std::swap(aNode, bNode);
}
aNode = aNode->immDom;
}
return aNode->block;
}
};
template <typename BasicBlockPtrT> class DomTreeBuilder {
using DomTreeNode = typename DomTree<BasicBlockPtrT>::Node;
struct NodeInfo {
unsigned dfsNum = 0;
unsigned parent = 0;
unsigned semi = 0;
BasicBlockPtrT label = nullptr;
BasicBlockPtrT immDom = nullptr;
std::vector<BasicBlockPtrT> revChildren;
};
std::vector<BasicBlockPtrT> indexToNode = {nullptr};
std::map<BasicBlockPtrT, NodeInfo> nodeToInfo;
template <typename WalkFn>
void runDFS(BasicBlockPtrT root, const WalkFn &walk) {
std::vector<BasicBlockPtrT> workList;
workList.reserve(10);
workList.push_back(root);
unsigned index = 0;
while (!workList.empty()) {
auto bb = workList.back();
workList.pop_back();
auto &bbInfo = nodeToInfo[bb];
if (bbInfo.dfsNum != 0) {
continue;
}
bbInfo.dfsNum = bbInfo.semi = ++index;
bbInfo.label = bb;
indexToNode.push_back(bb);
walk(bb, [&](BasicBlockPtrT successor) {
auto it = nodeToInfo.find(successor);
if (it != nodeToInfo.end() && it->second.dfsNum != 0) {
if (successor != bb) {
it->second.revChildren.push_back(bb);
}
return;
}
auto &succInfo = nodeToInfo[successor];
workList.push_back(successor);
succInfo.parent = index;
succInfo.revChildren.push_back(bb);
});
}
}
void runSemiNCA() {
const unsigned nextDFS = indexToNode.size();
for (unsigned i = 1; i < nextDFS; ++i) {
const BasicBlockPtrT node = indexToNode[i];
auto &NodeInfo = nodeToInfo[node];
NodeInfo.immDom = indexToNode[NodeInfo.parent];
}
std::vector<NodeInfo *> evalStack;
evalStack.reserve(10);
for (unsigned i = nextDFS - 1; i >= 2; --i) {
BasicBlockPtrT node = indexToNode[i];
auto &nodeInfo = nodeToInfo[node];
nodeInfo.semi = nodeInfo.parent;
for (const auto &child : nodeInfo.revChildren) {
if (!nodeToInfo.contains(child)) {
continue;
}
unsigned childSemi = nodeToInfo[eval(child, i + 1, evalStack)].semi;
if (childSemi < nodeInfo.semi) {
nodeInfo.semi = childSemi;
}
}
}
for (unsigned i = 2; i < nextDFS; ++i) {
const BasicBlockPtrT node = indexToNode[i];
auto &nodeInfo = nodeToInfo[node];
const unsigned sDomNum = nodeToInfo[indexToNode[nodeInfo.semi]].dfsNum;
BasicBlockPtrT immDom = nodeInfo.immDom;
while (nodeToInfo[immDom].dfsNum > sDomNum) {
immDom = nodeToInfo[immDom].immDom;
}
nodeInfo.immDom = immDom;
}
}
BasicBlockPtrT eval(BasicBlockPtrT block, unsigned LastLinked,
std::vector<NodeInfo *> &stack) {
NodeInfo *blockInfo = &nodeToInfo[block];
if (blockInfo->parent < LastLinked)
return blockInfo->label;
do {
stack.push_back(blockInfo);
blockInfo = &nodeToInfo[indexToNode[blockInfo->parent]];
} while (blockInfo->parent >= LastLinked);
const NodeInfo *pInfo = blockInfo;
const NodeInfo *pLabelInfo = &nodeToInfo[pInfo->label];
do {
blockInfo = stack.back();
stack.pop_back();
blockInfo->parent = pInfo->parent;
const NodeInfo *labelInfo = &nodeToInfo[blockInfo->label];
if (pLabelInfo->semi < labelInfo->semi) {
blockInfo->label = pInfo->label;
} else {
pLabelInfo = labelInfo;
}
pInfo = blockInfo;
} while (!stack.empty());
return blockInfo->label;
}
DomTreeNode *getNodeForBlock(BasicBlockPtrT BB, DomTree<BasicBlockPtrT> &DT) {
if (auto Node = DT.getNode(BB))
return Node;
BasicBlockPtrT IDom = getIDom(BB);
auto IDomNode = getNodeForBlock(IDom, DT);
return DT.createChild(BB, IDomNode);
}
BasicBlockPtrT getIDom(BasicBlockPtrT BB) const {
auto InfoIt = nodeToInfo.find(BB);
if (InfoIt == nodeToInfo.end())
return nullptr;
return InfoIt->second.immDom;
}
public:
template <typename WalkFn>
DomTree<BasicBlockPtrT> build(BasicBlockPtrT root,
const WalkFn &walkSuccessors) {
runDFS(root, walkSuccessors);
runSemiNCA();
DomTree<BasicBlockPtrT> domTree;
domTree.createRoot(root);
nodeToInfo[indexToNode[1]].immDom = root;
for (size_t i = 1, e = indexToNode.size(); i != e; ++i) {
BasicBlockPtrT node = indexToNode[i];
if (domTree.getNode(node))
continue;
BasicBlockPtrT immDom = getIDom(node);
auto immDomNode = getNodeForBlock(immDom, domTree);
domTree.createChild(node, immDomNode);
}
domTree.updateDFSNumbers();
return domTree;
}
};
template <typename BasicBlockPtrT>
DomTree<BasicBlockPtrT> buildDomTree(BasicBlockPtrT root, auto &&walkSuccessors)
requires requires(void (*cb)(BasicBlockPtrT)) { walkSuccessors(root, cb); }
{
return DomTreeBuilder<BasicBlockPtrT>().build(root, walkSuccessors);
}
} // namespace graph

View file

@ -0,0 +1,14 @@
#pragma once
#include "ir/Context.hpp" // IWYU pragma: export
#include "ir/Instruction.hpp" // IWYU pragma: export
#include "ir/Location.hpp" // IWYU pragma: export
#include "ir/Node.hpp" // IWYU pragma: export
#include "ir/Operand.hpp" // IWYU pragma: export
#include "ir/PointerWrapper.hpp" // IWYU pragma: export
#include "ir/PrintableWrapper.hpp" // IWYU pragma: export
#include "ir/Value.hpp" // IWYU pragma: export
#include "ir/Builder.hpp" // IWYU pragma: export
#include "ir/Region.hpp" // IWYU pragma: export
#include "ir/OperandPrint.hpp" // IWYU pragma: export
#include "ir/Impl.hpp" // IWYU pragma: export

View file

@ -0,0 +1,52 @@
#pragma once
#include "RegionLike.hpp"
#include "RegionLikeImpl.hpp"
#include "ValueImpl.hpp"
namespace shader::ir {
template <typename ImplT>
struct BlockWrapper : RegionLikeWrapper<ImplT, ValueWrapper> {
using RegionLikeWrapper<ImplT, ValueWrapper>::RegionLikeWrapper;
using RegionLikeWrapper<ImplT, ValueWrapper>::operator=;
};
struct BlockImpl;
struct Block : BlockWrapper<BlockImpl> {
using BlockWrapper<BlockImpl>::BlockWrapper;
using BlockWrapper<BlockImpl>::operator=;
};
struct BlockImpl : ValueImpl, RegionLikeImpl {
BlockImpl(Location loc);
Node clone(Context &context, CloneMap &map) const override;
void print(std::ostream &os, NameStorage &ns) const override {
os << '%' << ns.getNameOf(const_cast<BlockImpl *>(this));
os << " = ";
if (!getOperands().empty()) {
os << '[';
for (bool first = true; auto &operand : getOperands()) {
if (first) {
first = false;
} else {
os << ", ";
}
operand.print(os, ns);
}
os << "] ";
}
os << "{\n";
for (auto child : children()) {
os << " ";
child.print(os, ns);
os << "\n";
}
os << "}";
}
};
} // namespace ir

View file

@ -0,0 +1,84 @@
#pragma once
#include "Context.hpp"
#include "Node.hpp"
#include "RegionLikeImpl.hpp"
namespace shader::ir {
template <typename BuilderT, typename ImplT> struct BuilderFacade {
ImplT &instance() {
return *static_cast<ImplT *>(static_cast<BuilderT *>(this));
}
Context &getContext() { return instance().getContext(); }
Node getInsertionStorage() { return instance().getInsertionStorage(); }
template <typename T, typename... ArgsT>
requires requires {
typename T::underlying_type;
requires std::is_constructible_v<typename T::underlying_type, ArgsT...>;
requires std::is_base_of_v<NodeImpl, typename T::underlying_type>;
}
T create(ArgsT &&...args) {
return instance().template create<T>(std::forward<ArgsT>(args)...);
}
};
template <template <typename> typename... InterfaceTs>
class Builder : public InterfaceTs<Builder<InterfaceTs...>>... {
Context *mContext{};
RegionLike mInsertionStorage;
Instruction mInsertionPoint;
public:
Builder() = default;
Builder(Context &context) : mContext(&context) {}
static Builder createInsertAfter(Context &context, Instruction point) {
auto result = Builder(context);
result.mInsertionStorage = point.getParent();
result.mInsertionPoint = point;
return result;
}
static Builder createInsertBefore(Context &context, Instruction point) {
auto result = Builder(context);
result.mInsertionStorage = point.getParent();
result.mInsertionPoint = point.getPrev().cast<Instruction>();
return result;
}
static Builder createAppend(Context &context, RegionLike storage) {
auto result = Builder(context);
result.mInsertionStorage = storage;
result.mInsertionPoint = storage.getLast().cast<Instruction>();
return result;
}
static Builder createPrepend(Context &context, RegionLike storage) {
auto result = Builder(context);
result.mInsertionStorage = storage;
result.mInsertionPoint = nullptr;
return result;
}
Context &getContext() { return *mContext; }
RegionLike getInsertionStorage() { return mInsertionStorage; }
Instruction getInsertionPoint() { return mInsertionPoint; }
void setInsertionPoint(Instruction inst) { mInsertionPoint = inst; }
template <typename T, typename... ArgsT>
requires requires {
typename T::underlying_type;
requires std::is_constructible_v<typename T::underlying_type, ArgsT...>;
requires std::is_base_of_v<NodeImpl, typename T::underlying_type>;
}
T create(ArgsT &&...args) {
auto result = getContext().template create<T>(std::forward<ArgsT>(args)...);
using InstanceType = typename T::underlying_type;
getInsertionStorage().insertAfter(getInsertionPoint(), result);
if constexpr (requires { mInsertionPoint = Instruction(result); }) {
mInsertionPoint = Instruction(result);
}
return result;
}
};
} // namespace ir

View file

@ -0,0 +1,84 @@
#pragma once
#include "Location.hpp"
#include "NodeImpl.hpp"
#include "Operand.hpp"
#include <forward_list>
#include <memory>
#include <set>
#include <type_traits>
#include <utility>
namespace shader::ir {
struct UniqPtrCompare {
static bool operator()(const auto &lhs, const auto &rhs)
requires requires { *lhs <=> *rhs; }
{
return (*lhs <=> *rhs) == std::strong_ordering::less;
}
};
class Context {
std::forward_list<std::unique_ptr<NodeImpl>> mNodes;
std::set<std::unique_ptr<LocationImpl>, UniqPtrCompare> mLocations;
std::unique_ptr<UnknownLocationImpl> mUnknownLocation;
public:
Context() = default;
Context(const Context &) = delete;
Context(Context &&) = default;
Context& operator=(Context &&) = default;
template <typename T, typename... ArgsT>
requires requires {
typename T::underlying_type;
requires std::is_constructible_v<typename T::underlying_type, ArgsT...>;
requires std::is_base_of_v<NodeImpl, typename T::underlying_type>;
}
T create(ArgsT &&...args) {
auto result = new typename T::underlying_type(std::forward<ArgsT>(args)...);
mNodes.emplace_front(std::unique_ptr<NodeImpl>{result});
return T(result);
}
template <typename T, typename... ArgsT>
requires requires {
typename T::underlying_type;
requires std::is_constructible_v<typename T::underlying_type, ArgsT...>;
requires std::is_base_of_v<LocationImpl, typename T::underlying_type>;
}
T getLocation(ArgsT &&...args) {
auto result = std::make_unique<typename T::underlying_type>(
std::forward<ArgsT>(args)...);
auto ptr = mLocations.insert(std::move(result)).first->get();
return T(static_cast<typename T::underlying_type *>(ptr));
}
PathLocation getPathLocation(std::string path) {
return getLocation<PathLocation>(std::move(path));
}
TextFileLocation getTextFileLocation(PathLocation location,
std::uint64_t line,
std::uint64_t column = 0) {
return getLocation<TextFileLocation>(location, line, column);
}
TextFileLocation getTextFileLocation(std::string path, std::uint64_t line,
std::uint64_t column = 0) {
return getLocation<TextFileLocation>(getPathLocation(path), line, column);
}
OffsetLocation getOffsetLocation(Location baseLocation,
std::uint64_t offset) {
return getLocation<OffsetLocation>(baseLocation, offset);
}
MemoryLocation getMemoryLocation(std::uint64_t address, std::uint64_t size) {
return getLocation<MemoryLocation>(address, size);
}
UnknownLocation getUnknownLocation() {
if (mUnknownLocation == nullptr) {
mUnknownLocation = std::make_unique<UnknownLocationImpl>();
}
return mUnknownLocation.get();
}
};
} // namespace shader::ir

View file

@ -0,0 +1,361 @@
#pragma once
#include "../dialect/builtin.hpp"
#include "../dialect/memssa.hpp"
#include "Block.hpp"
#include "Context.hpp"
#include "InstructionImpl.hpp"
#include "NodeImpl.hpp"
#include "RegionImpl.hpp"
#include "ValueImpl.hpp"
namespace shader::ir {
inline void InstructionImpl::addOperand(Operand operand) {
if (operand != nullptr) {
if (auto value = operand.getAsValue()) {
value.get()->addUse(this, operands.size());
}
}
operands.addOperand(std::move(operand));
}
inline Operand InstructionImpl::replaceOperand(int index, Operand operand) {
if (operands.size() <= unsigned(index)) {
std::abort();
}
if (!operands[index].isNull()) {
if (auto value = operands[index].getAsValue()) {
value.get()->removeUse(this, index);
}
}
if (auto value = operand.getAsValue()) {
value.get()->addUse(this, index);
}
return std::exchange(operands[index], std::move(operand));
}
inline Operand InstructionImpl::eraseOperand(int index, int count) {
if (index + count == operands.size()) {
auto result = replaceOperand(index, nullptr);
for (int i = 1; i < count; ++i) {
replaceOperand(i + index, nullptr);
}
operands.resize(operands.size() - count);
return result;
}
auto result = replaceOperand(index, replaceOperand(index + 1, nullptr));
for (int i = 1; i < count; ++i) {
replaceOperand(index + i, nullptr);
}
for (int i = index + 1; i < operands.size() - count; ++i) {
replaceOperand(i, replaceOperand(i + count, nullptr));
}
operands.resize(operands.size() - count);
return result;
}
inline void InstructionImpl::remove() {
if (auto value = Instruction(this).cast<Value>()) {
if (!value.isUnused()) {
std::abort();
}
}
for (int index = 0; auto &operand : operands) {
if (auto value = operand.getAsValue()) {
value.get()->removeUse(this, index);
}
index++;
}
operands.clear();
if (parent != nullptr) {
erase();
}
}
inline void InstructionImpl::erase() {
assert(parent != nullptr);
if (prev != nullptr) {
prev.get()->next = next;
} else {
parent.get()->first = next;
}
if (next != nullptr) {
next.get()->prev = prev;
} else {
parent.get()->last = prev;
}
prev = nullptr;
next = nullptr;
parent = nullptr;
}
template <typename ImplT, template <typename> typename BaseWrapper>
void RegionLikeWrapper<ImplT, BaseWrapper>::appendRegion(RegionLike other) {
for (auto child = other.getFirst(); child != nullptr;) {
auto node = child;
child = child.getNext();
node.erase();
this->addChild(node);
}
}
inline void RegionLikeImpl::insertAfter(Instruction point, Instruction node) {
assert(point == nullptr || point.getParent() == this);
assert(node.getParent() == nullptr);
assert(node.getPrev() == nullptr);
assert(node.getNext() == nullptr);
if (point == nullptr) {
prependChild(node);
return;
}
assert(first != nullptr);
assert(last != nullptr);
node.get()->parent = this;
node.get()->prev = point.get();
if (auto pointNext = point.getNext()) {
pointNext.get()->prev = node.get();
node.get()->next = pointNext.get();
} else {
assert(last == point);
last = node.get();
}
point.get()->next = node.get();
}
inline void RegionLikeImpl::prependChild(Instruction node) {
assert(node.getParent() == nullptr);
assert(node.getPrev() == nullptr);
assert(node.getNext() == nullptr);
node.get()->parent = this;
if (last == nullptr) {
last = node;
} else {
first.get()->prev = node;
node.get()->next = first;
}
first = node;
}
inline void RegionLikeImpl::addChild(Instruction node) {
assert(node.getParent() == nullptr);
assert(node.getPrev() == nullptr);
assert(node.getNext() == nullptr);
node.get()->parent = this;
if (first == nullptr) {
first = node;
} else {
last.get()->next = node;
node.get()->prev = last;
}
last = node;
}
inline void RegionImpl::print(std::ostream &os, NameStorage &ns) const {
os << "{\n";
for (auto child : children()) {
os << " ";
child.print(os, ns);
os << "\n";
}
os << "}";
}
inline Value Operand::getAsValue() const {
if (auto node = std::get_if<ValueImpl *>(&value)) {
return Value(const_cast<ValueImpl *>(*node));
}
return {};
}
template <typename T>
T clone(T object, Context &context, CloneMap &map, bool isOperand = false)
requires requires {
map.getOrClone(context, object, isOperand).template staticCast<T>();
}
{
return map.getOrClone(context, object, isOperand).template staticCast<T>();
}
template <typename T>
T clone(T object, Context &context)
requires requires(CloneMap map) { clone(object, context, map); }
{
CloneMap map;
return clone(object, context, map);
}
template <typename T>
T clone(T location, Context &context)
requires requires { Location(location).get()->clone(context); }
{
if (location == nullptr) {
return nullptr;
}
return Location(location).get()->clone(context).staticCast<T>();
}
namespace detail {
template <typename T, typename U, typename... ArgsT>
requires(std::is_same_v<typename T::underlying_type, U>)
T cloneInstructionImpl(const U *object, Context &context, CloneMap &map,
ArgsT &&...args) {
auto result = context.create<T>(clone(object->getLocation(), context),
std::forward<ArgsT>(args)...);
for (auto &&operand : object->getOperands()) {
result.addOperand(operand.clone(context, map));
}
return result;
}
} // namespace detail
inline Node InstructionImpl::clone(Context &context, CloneMap &map) const {
return detail::cloneInstructionImpl<Instruction>(this, context, map, kind,
op);
}
inline Node ValueImpl::clone(Context &context, CloneMap &map) const {
return detail::cloneInstructionImpl<Value>(this, context, map, kind, op);
}
inline Node RegionImpl::clone(Context &context, CloneMap &map) const {
auto result = context.create<Region>(ir::clone(getLocation(), context));
for (auto &&child : children()) {
result.addChild(ir::clone(child, context, map));
}
return result;
}
inline BlockImpl::BlockImpl(Location loc)
: ValueImpl(loc, ir::Kind::Builtin, builtin::BLOCK) {}
inline Node BlockImpl::clone(Context &context, CloneMap &map) const {
auto result = context.create<Block>(ir::clone(getLocation(), context));
for (auto &&operand : getOperands()) {
result.addOperand(operand.clone(context, map));
}
for (auto &&child : children()) {
result.addChild(ir::clone(child, context, map));
}
return result;
}
inline Operand Operand::clone(Context &context, CloneMap &map) const {
if (auto value = getAsValue()) {
return ir::clone(value, context, map, true);
}
return *this;
}
inline Node memssa::PhiImpl::clone(Context &context, CloneMap &map) const {
auto self = Phi(const_cast<PhiImpl *>(this));
auto result = context.create<Phi>(ir::clone(self.getLocation(), context),
self.getKind(), self.getOp());
for (auto &&operand : self.getOperands()) {
result.addOperand(operand.clone(context, map));
}
return result;
}
inline Node memssa::VarImpl::clone(Context &context, CloneMap &map) const {
auto self = Var(const_cast<VarImpl *>(this));
auto result = context.create<Var>(ir::clone(self.getLocation(), context),
self.getKind(), self.getOp());
for (auto &&operand : self.getOperands()) {
result.addOperand(operand.clone(context, map));
}
return result;
}
inline Node memssa::UseImpl::clone(Context &context, CloneMap &map) const {
auto self = Use(const_cast<UseImpl *>(this));
auto result = context.create<Use>(ir::clone(self.getLocation(), context),
self.getKind(), self.getOp());
for (auto &&operand : self.getOperands()) {
result.addOperand(operand.clone(context, map));
}
return result;
}
inline Node memssa::DefImpl::clone(Context &context, CloneMap &map) const {
auto self = Def(const_cast<DefImpl *>(this));
auto result = context.create<Def>(ir::clone(self.getLocation(), context),
self.getKind(), self.getOp());
for (auto &&operand : self.getOperands()) {
result.addOperand(operand.clone(context, map));
}
return result;
}
inline Node memssa::ScopeImpl::clone(Context &context, CloneMap &map) const {
auto self = Scope(const_cast<ScopeImpl *>(this));
auto result = context.create<Scope>(ir::clone(self.getLocation(), context));
for (auto &&operand : self.getOperands()) {
result.addOperand(operand.clone(context, map));
}
for (auto child : self.children()) {
result.addChild(ir::clone(child, context, map));
}
return result;
}
inline Location PathLocationImpl::clone(Context &context) const {
return context.getPathLocation(data.path);
}
inline Location TextFileLocationImpl::clone(Context &context) const {
return context.getTextFileLocation(data.file, data.line, data.column);
}
inline Location OffsetLocationImpl::clone(Context &context) const {
return context.getOffsetLocation(baseLocation, offset);
}
inline Location MemoryLocationImpl::clone(Context &context) const {
return context.getMemoryLocation(data.address, data.size);
}
inline Location UnknownLocationImpl::clone(Context &context) const {
return context.getUnknownLocation();
}
inline Node CloneMap::getOrCloneImpl(Context &context, Node node, bool) {
Node result = node.get()->clone(context, *this);
overrides[node] = result;
return result;
}
} // namespace shader::ir

View file

@ -0,0 +1,72 @@
#pragma once
#include "Kind.hpp"
#include "Node.hpp"
namespace shader::ir {
enum class InstructionId : std::uint32_t {};
constexpr InstructionId getInstructionId(ir::Kind kind, unsigned op) {
return static_cast<InstructionId>(static_cast<std::uint32_t>(kind) |
static_cast<std::uint32_t>(op) << 5);
}
constexpr ir::Kind getInstructionKind(InstructionId id) {
return static_cast<ir::Kind>(static_cast<std::uint32_t>(id) & 0x1f);
}
constexpr unsigned getInstructionOp(InstructionId id) {
return static_cast<unsigned>(static_cast<std::uint32_t>(id) >> 5);
}
struct Region;
struct InstructionImpl;
struct Instruction;
template <typename ImplT> struct InstructionWrapper : NodeWrapper<ImplT> {
using NodeWrapper<ImplT>::NodeWrapper;
using NodeWrapper<ImplT>::operator=;
Kind getKind() const { return this->impl->kind; }
unsigned getOp() const { return this->impl->op; }
InstructionId getInstId() const {
return getInstructionId(getKind(), getOp());
}
auto getParent() const { return this->impl->parent; };
bool hasParent() const { return this->impl->parent != nullptr; }
auto getNext() const { return Instruction(this->impl->next); }
auto getPrev() const { return Instruction(this->impl->prev); }
void addOperand(Operand operand) const { this->impl->addOperand(operand); }
decltype(auto) replaceOperand(int index, Operand operand) const {
return this->impl->replaceOperand(index, operand);
}
decltype(auto) eraseOperand(int index, int count = 1) const {
return this->impl->eraseOperand(index, count);
}
void insertAfter(Node point, Node node) const {
this->impl->insertAfter(point, node);
}
void erase() const { this->impl->erase(); }
void remove() const { this->impl->remove(); }
template <typename T = Node> auto children() const {
return this->impl->template children<T>();
}
decltype(auto) getOperand(std::size_t i) const { return this->impl->getOperand(i); }
decltype(auto) getOperands() const { return this->impl->getOperands(); }
std::size_t getOperandCount() const { return getOperands().size(); }
template <typename T>
requires std::is_enum_v<T>
void addOperand(T enumValue) {
addOperand(std::to_underlying(enumValue));
}
};
struct Instruction : InstructionWrapper<InstructionImpl> {
using InstructionWrapper<InstructionImpl>::InstructionWrapper;
using InstructionWrapper<InstructionImpl>::operator=;
};
} // namespace ir

View file

@ -0,0 +1,69 @@
#pragma once
#include "Instruction.hpp"
#include "Kind.hpp"
#include "Location.hpp"
#include "NodeImpl.hpp"
#include "PrintableWrapper.hpp"
#include "RegionLike.hpp"
#include <ostream>
#include <span>
namespace shader::ir {
struct InstructionImpl : NodeImpl {
Kind kind;
unsigned op;
RegionLike parent;
Instruction prev;
Instruction next;
OperandList operands;
InstructionImpl(Location location, Kind kind, unsigned op,
std::span<const Operand> operands = {})
: kind(kind), op(op) {
setLocation(location);
for (auto &&op : operands) {
addOperand(std::move(op));
}
}
template <typename T>
requires std::is_enum_v<T>
void addOperand(T enumValue) {
addOperand(std::to_underlying(enumValue));
}
void addOperand(Operand operand);
Operand replaceOperand(int index, Operand operand);
Operand eraseOperand(int index, int count);
void remove();
void erase();
decltype(auto) getOperand(std::size_t i) const {
return operands.getOperand(i);
}
decltype(auto) getOperands() const { return std::span(operands); }
void print(std::ostream &os, NameStorage &ns) const override {
os << getInstructionName(kind, op);
if (!operands.empty()) {
os << "(";
for (bool first = true; auto operand : operands) {
if (first) {
first = false;
} else {
os << ", ";
}
operand.print(os, ns);
}
os << ")";
}
}
Node clone(Context &context, CloneMap &map) const override;
};
} // namespace ir

View file

@ -0,0 +1,205 @@
#pragma once
#include <string>
namespace shader::ir {
enum class Kind {
Spv,
Builtin,
AmdGpu,
Vop2,
Sop2,
Sopk,
Smrd,
Vop3,
Mubuf,
Mtbuf,
Mimg,
Ds,
Vintrp,
Exp,
Vop1,
Vopc,
Sop1,
Sopc,
Sopp,
MemSSA,
Count,
};
namespace spv {
const char *getInstructionName(unsigned id);
}
namespace builtin {
const char *getInstructionName(unsigned id);
}
namespace amdgpu {
const char *getInstructionName(unsigned id);
}
namespace vop2 {
const char *getInstructionName(unsigned id);
}
namespace sop2 {
const char *getInstructionName(unsigned id);
}
namespace sopk {
const char *getInstructionName(unsigned id);
}
namespace smrd {
const char *getInstructionName(unsigned id);
}
namespace vop3 {
const char *getInstructionName(unsigned id);
}
namespace mubuf {
const char *getInstructionName(unsigned id);
}
namespace mtbuf {
const char *getInstructionName(unsigned id);
}
namespace mimg {
const char *getInstructionName(unsigned id);
}
namespace ds {
const char *getInstructionName(unsigned id);
}
namespace vintrp {
const char *getInstructionName(unsigned id);
}
namespace exp {
const char *getInstructionName(unsigned id);
}
namespace vop1 {
const char *getInstructionName(unsigned id);
}
namespace vopc {
const char *getInstructionName(unsigned id);
}
namespace sop1 {
const char *getInstructionName(unsigned id);
}
namespace sopc {
const char *getInstructionName(unsigned id);
}
namespace sopp {
const char *getInstructionName(unsigned id);
}
namespace memssa {
const char *getInstructionName(unsigned id);
}
inline const char *getKindName(Kind kind) {
switch (kind) {
case Kind::Spv:
return "spv";
case Kind::Builtin:
return "builtin";
case Kind::AmdGpu:
return "amdgpu";
case Kind::Vop2:
return "vop2";
case Kind::Sop2:
return "sop2";
case Kind::Sopk:
return "sopk";
case Kind::Smrd:
return "smrd";
case Kind::Vop3:
return "vop3";
case Kind::Mubuf:
return "mubuf";
case Kind::Mtbuf:
return "mtbuf";
case Kind::Mimg:
return "mimg";
case Kind::Ds:
return "ds";
case Kind::Vintrp:
return "vintrp";
case Kind::Exp:
return "exp";
case Kind::Vop1:
return "vop1";
case Kind::Vopc:
return "vopc";
case Kind::Sop1:
return "sop1";
case Kind::Sopc:
return "sopc";
case Kind::Sopp:
return "sopp";
case Kind::MemSSA:
return "memssa";
case Kind::Count:
break;
}
return "<invalid>";
}
inline const char *getInstructionShortName(Kind kind, unsigned op) {
switch (kind) {
case Kind::Spv:
return spv::getInstructionName(op);
case Kind::Builtin:
return builtin::getInstructionName(op);
case Kind::AmdGpu:
return amdgpu::getInstructionName(op);
case Kind::Vop2:
return vop2::getInstructionName(op);
case Kind::Sop2:
return sop2::getInstructionName(op);
case Kind::Sopk:
return sopk::getInstructionName(op);
case Kind::Smrd:
return smrd::getInstructionName(op);
case Kind::Vop3:
return vop3::getInstructionName(op);
case Kind::Mubuf:
return mubuf::getInstructionName(op);
case Kind::Mtbuf:
return mtbuf::getInstructionName(op);
case Kind::Mimg:
return mimg::getInstructionName(op);
case Kind::Ds:
return ds::getInstructionName(op);
case Kind::Vintrp:
return vintrp::getInstructionName(op);
case Kind::Exp:
return exp::getInstructionName(op);
case Kind::Vop1:
return vop1::getInstructionName(op);
case Kind::Vopc:
return vopc::getInstructionName(op);
case Kind::Sop1:
return sop1::getInstructionName(op);
case Kind::Sopc:
return sopc::getInstructionName(op);
case Kind::Sopp:
return sopp::getInstructionName(op);
case Kind::MemSSA:
return memssa::getInstructionName(op);
case Kind::Count:
break;
}
return nullptr;
}
inline std::string getInstructionName(Kind kind, unsigned op) {
std::string result = getKindName(kind);
result += '.';
if (auto name = getInstructionShortName(kind, op)) {
result += name;
} else {
result += "<invalid ";
result += std::to_string(op);
result += ">";
}
return result;
}
} // namespace ir

View file

@ -0,0 +1,200 @@
#pragma once
#include "PrintableWrapper.hpp"
#include <cstdint>
#include <string>
namespace shader::ir {
struct LocationImpl;
struct CloneMap;
class Context;
template <typename ImplT> struct LocationWrapper : PrintableWrapper<ImplT> {
using PrintableWrapper<ImplT>::PrintableWrapper;
using PrintableWrapper<ImplT>::operator=;
};
using Location = LocationWrapper<LocationImpl>;
struct LocationImpl {
virtual ~LocationImpl() {}
virtual void print(std::ostream &os) = 0;
virtual std::strong_ordering compare(const LocationImpl &other) const = 0;
virtual Location clone(Context &context) const = 0;
auto operator<=>(const LocationImpl &other) const { return compare(other); }
};
struct PathLocationImpl final : LocationImpl {
struct Data {
std::string path;
auto operator<=>(const Data &other) const = default;
} data;
PathLocationImpl(std::string path) : data{.path = std::move(path)} {}
void print(std::ostream &os) override { os << data.path; }
std::strong_ordering compare(const LocationImpl &other) const override {
if (this == &other) {
return std::strong_ordering::equal;
}
if (auto p = dynamic_cast<const PathLocationImpl *>(&other)) {
return this->data <=> p->data;
}
return this <=> &other;
}
Location clone(Context &context) const override;
};
struct PathLocation : LocationWrapper<PathLocationImpl> {
using LocationWrapper::LocationWrapper;
using LocationWrapper::operator=;
const std::string &getPath() const { return impl->data.path; }
};
struct TextFileLocationImpl final : LocationImpl {
struct Data {
PathLocation file;
std::uint64_t line;
std::uint64_t column;
auto operator<=>(const Data &other) const = default;
} data;
TextFileLocationImpl(PathLocation file, std::uint64_t line,
std::uint64_t column)
: data{.file = file, .line = line, .column = column} {}
void print(std::ostream &os) override {
data.file.print(os);
os << ':' << data.line << ':' << data.column;
}
auto operator<=>(const TextFileLocationImpl &other) const = default;
std::strong_ordering compare(const LocationImpl &other) const override {
if (this == &other) {
return std::strong_ordering::equal;
}
if (auto p = dynamic_cast<const TextFileLocationImpl *>(&other)) {
return *this <=> *p;
}
return this <=> &other;
}
Location clone(Context &context) const override;
};
struct TextFileLocation : LocationWrapper<TextFileLocationImpl> {
using LocationWrapper::LocationWrapper;
using LocationWrapper::operator=;
PathLocation getFile() const { return impl->data.file; }
std::uint64_t getLine() const { return impl->data.line; }
std::uint64_t getColumn() const { return impl->data.column; }
};
struct OffsetLocationData {
Location baseLocation;
std::uint64_t offset;
OffsetLocationData(Location baseLocation, std::uint64_t offset)
: baseLocation(baseLocation), offset(offset) {}
auto operator<=>(const OffsetLocationData &other) const = default;
};
struct OffsetLocationImpl final : OffsetLocationData, LocationImpl {
OffsetLocationImpl(Location file, std::uint64_t offset)
: OffsetLocationData(file, offset) {}
void print(std::ostream &os) override {
baseLocation.print(os);
os << '+' << offset;
}
std::strong_ordering compare(const LocationImpl &other) const override {
if (this == &other) {
return std::strong_ordering::equal;
}
if (auto p = dynamic_cast<const OffsetLocationData *>(&other)) {
return static_cast<const OffsetLocationData &>(*this) <=> *p;
}
return this <=> &other;
}
Location clone(Context &context) const override;
};
struct OffsetLocation : LocationWrapper<OffsetLocationImpl> {
using LocationWrapper::LocationWrapper;
using LocationWrapper::operator=;
Location getBaseLocation() const { return impl->baseLocation; }
std::uint64_t getOffset() const { return impl->offset; }
};
struct MemoryLocationImpl final : LocationImpl {
struct Data {
std::uint64_t address;
std::uint64_t size;
auto operator<=>(const Data &other) const = default;
} data;
MemoryLocationImpl(std::uint64_t address, std::uint64_t size)
: data{.address = address, .size = size} {}
void print(std::ostream &os) override {
os << '(' << data.address << " - " << data.size << ')';
}
std::strong_ordering compare(const LocationImpl &other) const override {
if (this == &other) {
return std::strong_ordering::equal;
}
if (auto p = dynamic_cast<const MemoryLocationImpl *>(&other)) {
return data <=> p->data;
}
return this <=> &other;
}
Location clone(Context &context) const override;
};
struct MemoryLocation : LocationWrapper<MemoryLocationImpl> {
using LocationWrapper::LocationWrapper;
using LocationWrapper::operator=;
std::uint64_t getAddress() const { return impl->data.address; }
std::uint64_t getSize() const { return impl->data.size; }
};
struct UnknownLocationImpl final : LocationImpl {
void print(std::ostream &os) override { os << "unknown"; }
std::strong_ordering compare(const LocationImpl &other) const override {
if (this == &other) {
return std::strong_ordering::equal;
}
if (dynamic_cast<const MemoryLocationImpl *>(&other)) {
return std::strong_ordering::equal;
}
return this <=> &other;
}
Location clone(Context &context) const override;
};
struct UnknownLocation : LocationWrapper<UnknownLocationImpl> {
using LocationWrapper::LocationWrapper;
using LocationWrapper::operator=;
};
} // namespace ir

View file

@ -0,0 +1,90 @@
#pragma once
#include "Node.hpp"
#include <set>
#include <string>
#include <unordered_map>
namespace shader::ir {
class NameStorage {
std::set<std::string> mNames;
std::unordered_map<const NodeImpl *, const std::string *> mNodeToName;
public:
void setUniqueNameOf(Node node, std::string name) {
auto [nodeIt, nodeInserted] = mNodeToName.try_emplace(node.impl, nullptr);
if (!nodeInserted && *nodeIt->second == name) {
return;
}
auto [nameIt, nameInserted] = mNames.insert(name);
if (!nameInserted) {
std::size_t i = 1;
while (true) {
auto newName = name + "_" + std::to_string(i);
auto [newNameIt, newNameInserted] = mNames.insert(std::move(newName));
if (!newNameInserted) {
++i;
continue;
}
nameIt = newNameIt;
break;
}
}
nodeIt->second = &*nameIt;
}
void setNameOf(Node node, std::string name) {
auto [nodeIt, nodeInserted] = mNodeToName.try_emplace(node.impl, nullptr);
if (!nodeInserted && *nodeIt->second == name) {
return;
}
auto [nameIt, nameInserted] = mNames.insert(name);
nodeIt->second = &*nameIt;
}
std::string_view tryGetNameOf(Node node) const {
auto it = mNodeToName.find(node.impl);
if (it == mNodeToName.end()) {
return {};
}
return *it->second;
}
const std::string &getNameOf(Node node) {
auto [it, inserted] = mNodeToName.emplace(node.impl, nullptr);
if (inserted) {
std::size_t i = mNames.size() + 1;
while (true) {
auto newName = std::to_string(i);
auto [newNameIt, newNameInserted] = mNames.insert(std::move(newName));
if (!newNameInserted) {
++i;
continue;
}
it->second = &*newNameIt;
break;
}
}
return *it->second;
}
void clear() {
mNames.clear();
mNodeToName.clear();
}
};
} // namespace shader::ir

View file

@ -0,0 +1,17 @@
#pragma once
#include "Operand.hpp"
#include "PrintableWrapper.hpp"
namespace shader::ir {
template <typename ImplT> struct NodeWrapper;
using Node = NodeWrapper<NodeImpl>;
template <typename ImplT> struct NodeWrapper : PrintableWrapper<ImplT> {
using PrintableWrapper<ImplT>::PrintableWrapper;
using PrintableWrapper<ImplT>::operator=;
auto getLocation() const { return this->impl->getLocation(); }
};
} // namespace ir

View file

@ -0,0 +1,65 @@
#pragma once
#include "Location.hpp"
#include "Node.hpp"
#include "Operand.hpp"
#include <cassert>
#include <map>
namespace shader::ir {
struct NodeImpl;
struct CloneMap;
class NameStorage;
class Context;
// namespace debug {
// [[gnu::used, gnu::noinline]] void dump(Node object);
// [[gnu::used, gnu::noinline]] void dump(NodeImpl *object);
// } // namespace debug
struct CloneMap {
virtual ~CloneMap() = default;
std::map<Node, Node> overrides;
void setOverride(Node from, Node to) { overrides[from] = to; }
Node getOverride(Node from) {
if (auto it = overrides.find(from); it != overrides.end()) {
return it->second;
}
return {};
}
virtual Node getOrClone(Context &context, Node node, bool isOperand) {
// if (auto it = overrides.find(node); it != overrides.end()) {
// return it->second;
// }
// return getOrCloneImpl(context, node, isOperand);
if (node == nullptr) {
return node;
}
auto [it, inserted] = overrides.insert({node, nullptr});
if (inserted) {
it->second = getOrCloneImpl(context, node, isOperand);
overrides[it->second] = it->second;
}
return it->second;
}
virtual Node getOrCloneImpl(Context &context, Node node, bool isOperand);
};
struct NodeImpl {
Location location;
virtual ~NodeImpl() = default;
void setLocation(Location newLocation) { location = newLocation; }
Location getLocation() const { return location; }
virtual void print(std::ostream &os, NameStorage &ns) const = 0;
virtual Node clone(Context &context, CloneMap &map) const = 0;
};
} // namespace shader::ir

View file

@ -0,0 +1,152 @@
#pragma once
#include "../Vector.hpp"
#include <bit>
#include <compare>
#include <cstddef>
#include <cstdint>
#include <span>
#include <string>
#include <type_traits>
#include <utility>
#include <variant>
#include <vector>
namespace shader::ir {
class NameStorage;
class Context;
struct ValueImpl;
struct Value;
struct NodeImpl;
struct CloneMap;
template <typename ImplT> struct NodeWrapper;
using Node = NodeWrapper<NodeImpl>;
struct Operand {
using UnderlyingT =
std::variant<std::nullptr_t, ValueImpl *, std::int64_t, std::int32_t,
double, float, bool, std::string>;
UnderlyingT value{nullptr};
template <typename T>
requires(!std::is_integral_v<std::remove_cvref_t<T>> ||
std::is_same_v<bool, std::remove_cvref_t<T>>)
Operand(T &&value)
requires requires { UnderlyingT{std::forward<T>(value)}; }
: value(std::forward<T>(value)) {}
template <typename T>
Operand(T value)
requires requires {
requires(std::is_integral_v<std::remove_cvref_t<T>> &&
!std::is_same_v<bool, T> && sizeof(T) <= sizeof(std::int32_t));
UnderlyingT{static_cast<std::int32_t>(value)};
}
: value(static_cast<std::int32_t>(value)) {}
template <typename T>
Operand(T value)
requires requires {
requires(std::is_integral_v<std::remove_cvref_t<T>> &&
sizeof(T) == sizeof(std::int64_t));
UnderlyingT{static_cast<std::int64_t>(value)};
}
: value(static_cast<std::int64_t>(value)) {}
template <typename T>
requires(std::is_enum_v<std::remove_cvref_t<T>>)
Operand(T value) : Operand(std::to_underlying(value)) {}
template <typename T>
Operand(T &&value)
requires requires { Operand(value.impl); }
: Operand(value.impl) {
if (value.impl == nullptr) {
std::abort();
}
}
Operand() = default;
Operand(const Operand &) = default;
Operand(Operand &&) = default;
Operand &operator=(const Operand &) = default;
Operand &operator=(Operand &&) = default;
template <typename T>
Operand &operator=(T &&other)
requires requires { value = std::forward<T>(other); }
{
value = std::forward<T>(other);
return *this;
}
template <typename T> const T *getAs() const {
if (auto node = std::get_if<T>(&value)) {
return node;
}
return {};
}
Value getAsValue() const;
const std::string *getAsString() const { return getAs<std::string>(); }
const std::int32_t *getAsInt32() const { return getAs<std::int32_t>(); }
const std::int64_t *getAsInt64() const { return getAs<std::int64_t>(); }
const double *getAsDouble() const { return getAs<double>(); }
const float *getAsFloat() const { return getAs<float>(); }
const bool *getAsBool() const { return getAs<bool>(); }
bool isNull() const { return std::get_if<std::nullptr_t>(&value) != nullptr; }
explicit operator bool() const { return !isNull(); }
void print(std::ostream &os, NameStorage &ns) const;
Operand clone(Context &context, CloneMap &map) const;
std::partial_ordering operator<=>(const Operand &other) const {
auto result = value.index() <=> other.value.index();
if (result != 0) {
return result;
}
return std::visit(
[](auto &&lhs, auto &&rhs) -> std::partial_ordering {
using lhs_type = std::remove_cvref_t<decltype(lhs)>;
using rhs_type = std::remove_cvref_t<decltype(rhs)>;
if constexpr (std::is_same_v<lhs_type, rhs_type>) {
if constexpr (std::is_same_v<lhs_type, std::nullptr_t>) {
return std::strong_ordering::equal;
} else if constexpr (std::is_same_v<lhs_type, float>) {
return std::bit_cast<std::uint32_t>(lhs) <=>
std::bit_cast<std::uint32_t>(rhs);
} else if constexpr (std::is_same_v<lhs_type, double>) {
return std::bit_cast<std::uint64_t>(lhs) <=>
std::bit_cast<std::uint64_t>(rhs);
} else {
return lhs <=> rhs;
}
}
throw;
},
value, other.value);
}
bool operator==(const Operand &) const = default;
};
struct OperandList : std::vector<Operand> {
using std::vector<Operand>::vector;
using std::vector<Operand>::operator=;
template <typename T>
requires std::is_enum_v<T>
void addOperand(T enumValue) {
addOperand(std::to_underlying(enumValue));
}
void addOperand(Operand operand) { push_back(std::move(operand)); }
const Operand &getOperand(std::size_t i) const { return at(i); }
};
} // namespace shader::ir

View file

@ -0,0 +1,43 @@
#pragma once
#include "NameStorage.hpp"
#include "Operand.hpp"
#include "ValueImpl.hpp" // IWYU pragma: keep
namespace shader::ir {
inline void Operand::print(std::ostream &os, NameStorage &ns) const {
if (auto node = getAsValue()) {
os << '%' << ns.getNameOf(node);
return;
}
if (auto node = getAsString()) {
os << '"' << *node << '"';
return;
}
if (auto node = getAsInt32()) {
os << *node << "i32";
return;
}
if (auto node = getAsInt64()) {
os << *node << "i64";
return;
}
if (auto node = getAsFloat()) {
os << *node << 'f';
return;
}
if (auto node = getAsDouble()) {
os << *node << 'd';
return;
}
if (auto node = getAsBool()) {
os << (*node ? "true" : "false");
return;
}
if (isNull()) {
os << "null";
return;
}
os << "<invalid operand " << value.index() << ">";
}
} // namespace ir

View file

@ -0,0 +1,88 @@
#pragma once
#include <cassert>
#include <functional>
#include <type_traits>
namespace shader::ir {
template <typename ImplT> struct PointerWrapper {
using underlying_type = ImplT;
ImplT *impl = nullptr;
PointerWrapper() = default;
PointerWrapper(ImplT *impl) : impl(impl) {}
template <typename OtherT>
requires std::is_base_of_v<ImplT, OtherT>
PointerWrapper(PointerWrapper<OtherT> node) : impl(node.impl) {}
explicit operator bool() const { return impl != nullptr; }
bool operator==(std::nullptr_t) const { return impl == nullptr; }
bool operator==(ImplT *other) const { return impl == other; }
template <typename Self> Self &operator=(this Self &self, ImplT *other) {
self.impl = other;
return self;
}
template <typename Self, typename OtherT>
requires std::is_base_of_v<ImplT, OtherT>
Self &operator=(this Self &self, PointerWrapper<OtherT> other) {
self.impl = other.get();
return self;
}
// ImplT *operator->() const { return impl; }
ImplT *get() const { return impl; }
auto operator<=>(const PointerWrapper &) const = default;
bool operator==(const PointerWrapper &) const = default;
template <typename T>
T cast() const
requires requires { static_cast<typename T::underlying_type *>(impl); }
{
return T(dynamic_cast<typename T::underlying_type *>(impl));
}
template <typename T>
T staticCast() const
requires requires { static_cast<typename T::underlying_type *>(impl); }
{
assert(impl == nullptr || cast<T>() != nullptr);
return T(static_cast<typename T::underlying_type *>(impl));
}
template <typename T> bool isa() const {
if (impl == nullptr) {
return false;
}
if constexpr (std::is_same_v<std::remove_cvref_t<T>,
std::remove_cvref_t<ImplT>>) {
return true;
} else if constexpr (!requires { cast<T>() != nullptr; }) {
return false;
} else {
return cast<T>() != nullptr;
}
}
template <typename... T>
requires(sizeof...(T) > 1)
bool isa() const {
return (isa<T>() || ...);
}
};
} // namespace shader::ir
namespace std {
template <typename T>
requires std::is_base_of_v<
shader::ir::PointerWrapper<typename T::underlying_type>, T>
struct hash<T> {
constexpr std::size_t operator()(const T &pointer) const noexcept {
return hash<typename T::underlying_type *>{}(pointer.impl);
}
};
} // namespace std

View file

@ -0,0 +1,136 @@
#pragma once
#include "InstructionImpl.hpp" // IWYU pragma: keep
namespace shader::ir {
template <typename T> struct PreincNodeIterable {
struct EndIterator {};
struct Iterator {
Instruction nextElem;
Instruction currentElem;
Instruction endElem;
Iterator() = default;
Iterator(Instruction elem, Instruction end)
: currentElem(elem), endElem(end) {
nextElem = currentElem ? currentElem.getNext() : nullptr;
if constexpr (!std::is_same_v<Instruction, T>) {
while (currentElem != endElem && !currentElem.isa<T>()) {
advance();
}
}
}
T operator*() const { return currentElem.staticCast<T>(); }
Iterator &operator++() {
advance();
if constexpr (!std::is_same_v<Instruction, T>) {
while (currentElem != endElem && !currentElem.isa<T>()) {
advance();
}
}
return *this;
}
bool operator==(const Iterator &) const = default;
bool operator==(const EndIterator &) const {
return currentElem == endElem;
}
void advance() {
currentElem = nextElem;
if (nextElem) {
nextElem = nextElem.getNext();
}
}
};
PreincNodeIterable(Instruction beginIt, Instruction endIt)
: mBeginIt(beginIt), mEndIt(endIt) {}
Iterator begin() const { return Iterator(mBeginIt, mEndIt); }
EndIterator end() const { return EndIterator{}; }
private:
Instruction mBeginIt;
Instruction mEndIt;
};
template <typename T> struct RevPreincNodeIterable {
struct EndIterator {};
struct Iterator {
Instruction nextElem;
Instruction currentElem;
Instruction endElem;
Iterator() = default;
Iterator(Instruction elem, Instruction end)
: currentElem(elem), endElem(end) {
nextElem = currentElem ? currentElem.getPrev() : nullptr;
if constexpr (!std::is_same_v<Instruction, T>) {
while (currentElem != endElem && !currentElem.isa<T>()) {
advance();
}
}
}
T operator*() const { return currentElem.staticCast<T>(); }
Iterator &operator++() {
advance();
if constexpr (!std::is_same_v<Instruction, T>) {
while (currentElem != endElem && !currentElem.isa<T>()) {
advance();
}
}
return *this;
}
bool operator==(const Iterator &) const = default;
bool operator==(const EndIterator &) const {
return currentElem == endElem;
}
void advance() {
currentElem = nextElem;
if (nextElem) {
nextElem = nextElem.getPrev();
}
}
};
RevPreincNodeIterable(Instruction beginIt, Instruction endIt)
: mBeginIt(beginIt), mEndIt(endIt) {}
Iterator begin() const { return Iterator(mBeginIt, mEndIt); }
EndIterator end() const { return EndIterator{}; }
private:
Instruction mBeginIt;
Instruction mEndIt;
};
template <typename T = Instruction>
inline PreincNodeIterable<T> range(Instruction begin,
Instruction end = nullptr) {
return {begin, end};
}
template <typename T = Instruction>
inline RevPreincNodeIterable<T> revRange(Instruction begin,
Instruction end = nullptr) {
return {begin, end};
}
} // namespace shader::ir

View file

@ -0,0 +1,26 @@
#pragma once
#include "PointerWrapper.hpp"
#include <ostream>
namespace shader::ir {
class NameStorage;
template <typename T> struct PrintableWrapper : PointerWrapper<T> {
using PointerWrapper<T>::PointerWrapper;
using PointerWrapper<T>::operator=;
void print(std::ostream &os, NameStorage &ns) const {
if constexpr (requires { this->impl->print(os, ns); }) {
this->impl->print(os, ns);
} else {
this->impl->print(os);
}
}
void print(std::ostream &os) const
requires requires { this->impl->print(os); }
{
this->impl->print(os);
}
};
} // namespace shader::ir

View file

@ -0,0 +1,19 @@
#pragma once
#include "Node.hpp"
#include "RegionLike.hpp"
namespace shader::ir {
template <typename ImplT>
struct RegionWrapper : RegionLikeWrapper<ImplT, NodeWrapper> {
using RegionLikeWrapper<ImplT, NodeWrapper>::RegionLikeWrapper;
using RegionLikeWrapper<ImplT, NodeWrapper>::operator=;
};
struct RegionImpl;
struct Region : RegionWrapper<RegionImpl> {
using RegionWrapper<RegionImpl>::RegionWrapper;
using RegionWrapper<RegionImpl>::operator=;
};
} // namespace ir

View file

@ -0,0 +1,15 @@
#pragma once
#include "NameStorage.hpp"
#include "NodeImpl.hpp"
#include "Region.hpp"
#include "RegionLikeImpl.hpp"
#include <ostream>
namespace shader::ir {
struct RegionImpl : NodeImpl, RegionLikeImpl {
RegionImpl(Location loc) { setLocation(loc); }
void print(std::ostream &os, NameStorage &ns) const override;
Node clone(Context &context, CloneMap &map) const override;
};
} // namespace ir

View file

@ -0,0 +1,38 @@
#pragma once
#include "Instruction.hpp"
namespace shader::ir {
struct RegionLike;
template <typename ImplT, template <typename> typename BaseWrapper>
struct RegionLikeWrapper : BaseWrapper<ImplT> {
using BaseWrapper<ImplT>::BaseWrapper;
using BaseWrapper<ImplT>::operator=;
void appendRegion(RegionLike other);
auto getFirst() { return this->impl->first; }
auto getLast() { return this->impl->last; }
bool empty() { return this->impl->first == nullptr; }
void insertAfter(Instruction point, Instruction node) {
this->impl->insertAfter(point, node);
}
void prependChild(Instruction node) { this->impl->prependChild(node); }
void addChild(Instruction node) { this->impl->addChild(node); }
template <typename T = Instruction> auto children() {
return this->impl->template children<T>();
}
template <typename T = Instruction> auto revChildren() {
return this->impl->template revChildren<T>();
}
};
struct RegionLikeImpl;
struct RegionLike : RegionLikeWrapper<RegionLikeImpl, PointerWrapper> {
using RegionLikeWrapper::RegionLikeWrapper;
using RegionLikeWrapper::operator=;
};
} // namespace shader::ir

View file

@ -0,0 +1,25 @@
#pragma once
#include "PreincNodeIterable.hpp"
#include "RegionLike.hpp"
namespace shader::ir {
struct RegionLikeImpl {
Instruction first = nullptr;
Instruction last = nullptr;
virtual ~RegionLikeImpl() = default;
template <typename T = Instruction> auto children() const {
return PreincNodeIterable<T>{first, nullptr};
}
template <typename T = Instruction> auto revChildren() const {
return RevPreincNodeIterable<T>{last, nullptr};
}
virtual void insertAfter(Instruction point, Instruction node);
virtual void prependChild(Instruction node);
virtual void addChild(Instruction node);
};
} // namespace shader::ir

View file

@ -0,0 +1,36 @@
#pragma once
#include "Instruction.hpp"
#include "Operand.hpp"
namespace shader::ir {
struct Value;
template <typename T> struct ValueWrapper : InstructionWrapper<T> {
using InstructionWrapper<T>::InstructionWrapper;
using InstructionWrapper<T>::operator=;
decltype(auto) getUserList() const { return this->impl->getUserList(); }
auto & getUseList() const { return this->impl->uses; }
void replaceAllUsesWith(Value other) const;
bool isUnused() const { return this->impl->uses.empty(); }
};
struct ValueImpl;
struct Value : ValueWrapper<ValueImpl> {
using ValueWrapper::ValueWrapper;
using ValueWrapper::operator=;
};
template <typename T>
void ValueWrapper<T>::replaceAllUsesWith(Value other) const {
this->impl->replaceAllUsesWith(other);
}
struct ValueUse {
Instruction user;
Value node;
int operandIndex;
auto operator<=>(const ValueUse &) const = default;
};
} // namespace shader::ir

View file

@ -0,0 +1,55 @@
#pragma once
#include "InstructionImpl.hpp"
#include "NameStorage.hpp"
#include "Node.hpp"
#include "Value.hpp"
namespace shader::ir {
struct ValueImpl : InstructionImpl {
std::set<ValueUse> uses;
ValueImpl(Location location, Kind kind, unsigned op,
std::span<const Operand> operands = {})
: InstructionImpl(location, kind, op, operands) {}
void addUse(Instruction user, int operandIndex) {
uses.insert({user, this, operandIndex});
}
void removeUse(Instruction user, int operandIndex) {
uses.erase({user, this, operandIndex});
}
std::set<Node> getUserList() const {
std::set<Node> list;
for (auto use : uses) {
list.insert(use.user);
}
return list;
}
void replaceAllUsesWith(Value other) {
if (other == this) {
std::abort();
}
while (!uses.empty()) {
auto use = *uses.begin();
if (other == nullptr) {
use.user.replaceOperand(use.operandIndex, nullptr);
} else {
use.user.replaceOperand(use.operandIndex, other);
}
}
}
void print(std::ostream &os, NameStorage &ns) const override {
os << '%' << ns.getNameOf(const_cast<ValueImpl *>(this));
os << " = ";
InstructionImpl::print(os, ns);
}
Node clone(Context &context, CloneMap &map) const override;
};
} // namespace shader::ir

View file

@ -0,0 +1,7 @@
#pragma once
#include "ir/Context.hpp"
#include "ir/Region.hpp"
namespace shader {
bool optimize(ir::Context &context, ir::Region region);
}

View file

@ -0,0 +1,173 @@
#pragma once
#include "ir/Context.hpp"
#include "ir/Region.hpp"
#include "ir/RegionImpl.hpp"
#include <optional>
#include <span>
#include <spirv-tools/optimizer.hpp>
namespace shader::spv {
struct BinaryLayout {
enum {
kCapabilities,
kExtensions,
kExtInstImports,
kMemoryModels,
kEntryPoints,
kExecutionModes,
kDebugs,
kAnnotations,
kGlobals,
kFunctionDeclarations,
kFunctions,
kRegionCount
};
ir::Region regions[kRegionCount];
ir::Region getOrCreateRegion(ir::Context &context, int index) {
if (regions[index] == nullptr) {
regions[index] = context.create<ir::Region>(context.getUnknownLocation());
}
return regions[index];
}
ir::Region getOrCreateCapabilities(ir::Context &context) {
return getOrCreateRegion(context, kCapabilities);
}
ir::Region getOrCreateExtensions(ir::Context &context) {
return getOrCreateRegion(context, kExtensions);
}
ir::Region getOrCreateExtInstImports(ir::Context &context) {
return getOrCreateRegion(context, kExtInstImports);
}
ir::Region getOrCreateMemoryModels(ir::Context &context) {
return getOrCreateRegion(context, kMemoryModels);
}
ir::Region getOrCreateEntryPoints(ir::Context &context) {
return getOrCreateRegion(context, kEntryPoints);
}
ir::Region getOrCreateExecutionModes(ir::Context &context) {
return getOrCreateRegion(context, kExecutionModes);
}
ir::Region getOrCreateDebugs(ir::Context &context) {
return getOrCreateRegion(context, kDebugs);
}
ir::Region getOrCreateAnnotations(ir::Context &context) {
return getOrCreateRegion(context, kAnnotations);
}
ir::Region getOrCreateGlobals(ir::Context &context) {
return getOrCreateRegion(context, kGlobals);
}
ir::Region getOrCreateFunctionDeclarations(ir::Context &context) {
return getOrCreateRegion(context, kFunctionDeclarations);
}
ir::Region getOrCreateFunctions(ir::Context &context) {
return getOrCreateRegion(context, kFunctions);
}
///
/// \brief Merge all regions into a single one.
///
/// After calling this function, all regions in the object
/// become empty.
///
ir::Region merge(ir::Context &context) {
auto result = context.create<ir::Region>(context.getUnknownLocation());
for (auto &region : regions) {
if (region == nullptr) {
continue;
}
result.appendRegion(std::move(region));
region = {};
}
return result;
}
};
///
/// Deserialize a SPIR-V binary into an intermediate representation.
///
/// \param context context to attach the IR to
/// \param spv SPIR-V binary
/// \param loc location to use for error reporting
/// \returns the deserialized IR, or std::nullopt if deserialization failed
///
std::optional<BinaryLayout> deserialize(ir::Context &context,
std::span<const std::uint32_t> spv,
ir::Location loc);
///
/// \brief Serialize SPIR-V from an IR region.
///
/// This function generates a SPIR-V binary from an IR region.
/// The SPIR-V binary is stored in the returned vector.
///
/// \returns A vector of u32 values representing the SPIR-V binary.
///
std::vector<std::uint32_t> serialize(ir::Region body);
inline std::vector<std::uint32_t> serialize(ir::Context &context,
BinaryLayout &&layout) {
return serialize(layout.merge(context));
}
///
/// \brief Returns true if the instruction is a terminator.
///
bool isTerminatorInst(ir::InstructionId inst);
///
/// \brief Disassemble a SPIR-V binary into text and print result to stderr.
///
/// \param spv The SPIR-V binary to disassemble.
/// \param pretty If true, emit friendly names for functions, variables, and
/// other values. If false, emit the SPIR-V ID for each value.
///
/// \note The SPIR-V binary is not validated or checked for errors. If the
/// input is invalid, the output is undefined.
void dump(std::span<const std::uint32_t> spv, bool pretty = false);
///
/// \brief Disassemble a SPIR-V binary into text.
///
/// \param spv The SPIR-V binary to disassemble.
/// \param pretty If true, emit friendly names for functions, variables, and
/// other values. If false, emit the SPIR-V ID for each value.
/// \return the assembly text
///
/// \note The SPIR-V binary is not validated or checked for errors. If the
/// input is invalid, the output is undefined.
std::string disassembly(std::span<const std::uint32_t> spv, bool pretty = false);
///
/// \brief Validates a given SPIR-V binary against the SPIR-V spec
///
/// \param spv the SPIR-V binary to validate
/// \return whether the SPIR-V binary is valid
///
/// This functions uses the SPIR-V Tools validator to check the given SPIR-V
/// binary against the SPIR-V spec. If the SPIR-V is invalid, the function
/// will print out the validation error messages and return false. If the
/// SPIR-V is valid, the function simply returns true.
bool validate(std::span<const std::uint32_t> spv);
///
/// \brief Optimize a SPIR-V module.
///
/// \param spv the SPIR-V binary to optimize
/// \return the optimized SPIR-V binary or an empty optional if binary is
/// invalid
///
/// This function takes a SPIR-V module and runs a series of optimization passes
/// on it using SPIR-V Tools opt. If the optimization is successful, the
/// optimized module is returned. Otherwise, an empty optional is returned.
///
std::optional<std::vector<std::uint32_t>>
optimize(std::span<const std::uint32_t> spv);
} // namespace shader::spv

View file

@ -0,0 +1,8 @@
#pragma once
#include "SpvConverter.hpp"
#include "ir.hpp"
namespace shader {
void structurizeCfg(spv::Context &context, ir::RegionLike region,
ir::Value exitLabel);
}

View file

@ -0,0 +1,19 @@
set(OUTPUT_FILENAME rdna-semantic-spirv.hpp)
set(INCLUDE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include)
set(OUTPUT_DIRECTORY ${INCLUDE_DIRECTORY}/shaders)
set(OUTPUT_FILE ${OUTPUT_DIRECTORY}/${OUTPUT_FILENAME})
set(INPUT_FILE ${CMAKE_CURRENT_SOURCE_DIR}/rdna.glsl)
file(MAKE_DIRECTORY ${OUTPUT_DIRECTORY})
add_custom_command(
OUTPUT ${OUTPUT_FILE}
COMMAND $<TARGET_FILE:shader-tool> --output-type spirv-header --output-var-name g_rdna_semantic_spirv -i ${INPUT_FILE} -o ${OUTPUT_FILE}
DEPENDS shader-tool ${INPUT_FILE}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
COMMENT "Generating ${OUTPUT_FILE}..."
)
add_custom_target(rdna-semantic-spirv-gen DEPENDS ${OUTPUT_FILE})
add_library(rdna-semantic-spirv INTERFACE)
add_dependencies(rdna-semantic-spirv rdna-semantic-spirv-gen)
target_include_directories(rdna-semantic-spirv INTERFACE ${INCLUDE_DIRECTORY})

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,274 @@
#include "Evaluator.hpp"
#include "dialect.hpp"
#include "ir.hpp"
using namespace shader;
eval::Value eval::Evaluator::eval(const ir::Operand &op, ir::Value type) {
if (auto val = op.getAsValue()) {
auto [it, inserted] = values.try_emplace(val, Value{});
if (inserted) {
it->second = eval(val);
}
return it->second;
}
if (auto result = op.getAsInt32()) {
if (type != nullptr) {
bool isSigned = *type.getOperand(1).getAsInt32() != 0;
switch (*type.getOperand(0).getAsInt32()) {
case 8:
if (isSigned) {
return static_cast<std::int8_t>(*result);
}
return static_cast<std::uint8_t>(*result);
case 16:
if (isSigned) {
return static_cast<std::int16_t>(*result);
}
return static_cast<std::uint16_t>(*result);
case 32:
if (isSigned) {
return static_cast<std::int32_t>(*result);
}
return static_cast<std::uint32_t>(*result);
}
return {};
}
return *result;
}
if (auto result = op.getAsInt64()) {
if (type != nullptr) {
bool isSigned = *type.getOperand(1).getAsInt32() != 0;
if (isSigned) {
return static_cast<std::int64_t>(*result);
}
return static_cast<std::uint64_t>(*result);
}
return *result;
}
if (auto result = op.getAsBool()) {
return *result;
}
if (auto result = op.getAsFloat()) {
if (type != nullptr) {
if (*type.getOperand(0).getAsInt32() == 16) {
return static_cast<float16_t>(*result);
}
return static_cast<std::uint64_t>(*result);
}
return *result;
}
if (auto result = op.getAsDouble()) {
return *result;
}
return {};
}
eval::Value eval::Evaluator::eval(ir::InstructionId instId,
std::span<const ir::Operand> operands) {
if (instId == ir::spv::OpConstant) {
return eval(operands[1], operands[0].getAsValue());
}
if (instId == ir::spv::OpBitcast) {
return eval(operands[1]).bitcast(operands[0].getAsValue());
}
if (instId == ir::spv::OpSConvert || instId == ir::spv::OpUConvert) {
if (auto rhs = eval(operands[1])) {
return rhs.iConvert(operands[0].getAsValue(),
instId == ir::spv::OpSConvert);
}
return {};
}
if (instId == ir::spv::OpSelect) {
return eval(operands[1]).select(eval(operands[2]), eval(operands[3]));
}
if (instId == ir::spv::OpIAdd || instId == ir::spv::OpFAdd) {
return eval(operands[1]) + eval(operands[2]);
}
if (instId == ir::spv::OpISub || instId == ir::spv::OpFSub) {
return eval(operands[1]) - eval(operands[2]);
}
if (instId == ir::spv::OpSDiv || instId == ir::spv::OpUDiv ||
instId == ir::spv::OpFDiv) {
return eval(operands[1]) / eval(operands[2]);
}
if (instId == ir::spv::OpSMod || instId == ir::spv::OpUMod ||
instId == ir::spv::OpFMod) {
return eval(operands[1]) % eval(operands[2]);
}
if (instId == ir::spv::OpSRem) {
return eval(operands[1]) % eval(operands[2]);
}
if (instId == ir::spv::OpFRem) {
return eval(operands[1]) % eval(operands[2]);
}
if (instId == ir::spv::OpSNegate || instId == ir::spv::OpFNegate) {
return -eval(operands[0]);
}
if (instId == ir::spv::OpNot) {
return ~eval(operands[1]);
}
if (instId == ir::spv::OpLogicalNot) {
return !eval(operands[1]);
}
if (instId == ir::spv::OpLogicalEqual || instId == ir::spv::OpIEqual) {
return eval(operands[1]) == eval(operands[2]);
}
if (instId == ir::spv::OpLogicalNotEqual || instId == ir::spv::OpINotEqual) {
return eval(operands[1]) != eval(operands[2]);
}
if (instId == ir::spv::OpLogicalOr) {
return eval(operands[1]) || eval(operands[2]);
}
if (instId == ir::spv::OpLogicalAnd) {
return eval(operands[1]) && eval(operands[2]);
}
if (instId == ir::spv::OpUGreaterThan || instId == ir::spv::OpSGreaterThan) {
return eval(operands[1]) > eval(operands[2]);
}
if (instId == ir::spv::OpUGreaterThanEqual ||
instId == ir::spv::OpSGreaterThanEqual) {
return eval(operands[1]) >= eval(operands[2]);
}
if (instId == ir::spv::OpULessThan || instId == ir::spv::OpSLessThan) {
return eval(operands[1]) < eval(operands[2]);
}
if (instId == ir::spv::OpULessThanEqual ||
instId == ir::spv::OpSLessThanEqual) {
return eval(operands[1]) <= eval(operands[2]);
}
if (instId == ir::spv::OpFOrdEqual) {
return !eval(operands[1]).isNan() && !eval(operands[2]).isNan() &&
eval(operands[1]) == eval(operands[2]);
}
if (instId == ir::spv::OpFUnordEqual) {
return eval(operands[1]).isNan() || eval(operands[2]).isNan() ||
eval(operands[1]) == eval(operands[2]);
}
if (instId == ir::spv::OpFOrdNotEqual) {
return !eval(operands[1]).isNan() && !eval(operands[2]).isNan() &&
eval(operands[1]) != eval(operands[2]);
}
if (instId == ir::spv::OpFUnordNotEqual) {
return eval(operands[1]).isNan() || eval(operands[2]).isNan() ||
eval(operands[1]) != eval(operands[2]);
}
if (instId == ir::spv::OpFOrdLessThan) {
return !eval(operands[1]).isNan() && !eval(operands[2]).isNan() &&
eval(operands[1]) < eval(operands[2]);
}
if (instId == ir::spv::OpFUnordLessThan) {
return eval(operands[1]).isNan() || eval(operands[2]).isNan() ||
eval(operands[1]) < eval(operands[2]);
}
if (instId == ir::spv::OpFOrdGreaterThan) {
return !eval(operands[1]).isNan() && !eval(operands[2]).isNan() &&
eval(operands[1]) > eval(operands[2]);
}
if (instId == ir::spv::OpFUnordGreaterThan) {
return eval(operands[1]).isNan() || eval(operands[2]).isNan() ||
eval(operands[1]) > eval(operands[2]);
}
if (instId == ir::spv::OpFOrdLessThanEqual) {
return !eval(operands[1]).isNan() && !eval(operands[2]).isNan() &&
eval(operands[1]) <= eval(operands[2]);
}
if (instId == ir::spv::OpFUnordLessThanEqual) {
return eval(operands[1]).isNan() || eval(operands[2]).isNan() ||
eval(operands[1]) <= eval(operands[2]);
}
if (instId == ir::spv::OpFOrdGreaterThanEqual) {
return !eval(operands[1]).isNan() && !eval(operands[2]).isNan() &&
eval(operands[1]) >= eval(operands[2]);
}
if (instId == ir::spv::OpFUnordGreaterThanEqual) {
return eval(operands[1]).isNan() || eval(operands[2]).isNan() ||
eval(operands[1]) >= eval(operands[2]);
}
if (instId == ir::spv::OpShiftRightLogical) {
return eval(operands[1]) >> eval(operands[2]);
}
if (instId == ir::spv::OpShiftRightArithmetic) {
return eval(operands[1]) >> eval(operands[2]);
}
if (instId == ir::spv::OpShiftLeftLogical) {
return eval(operands[1]) << eval(operands[2]);
}
if (instId == ir::spv::OpBitwiseOr) {
return eval(operands[1]) | eval(operands[2]);
}
if (instId == ir::spv::OpBitwiseXor) {
return eval(operands[1]) ^ eval(operands[2]);
}
if (instId == ir::spv::OpBitwiseAnd) {
return eval(operands[1]) & eval(operands[2]);
}
if (instId == ir::spv::OpIsNan) {
return eval(operands[1]).isNan();
}
if (instId == ir::spv::OpIsInf) {
return eval(operands[1]).isInf();
}
if (instId == ir::spv::OpIsFinite) {
return eval(operands[1]).isFinite();
}
if (instId == ir::spv::OpCompositeConstruct) {
std::vector<Value> constituents;
constituents.reserve(operands.size() - 1);
for (auto &op : operands.subspan(1)) {
constituents.push_back(eval(op));
}
return Value::compositeConstruct(operands[0].getAsValue(), constituents);
}
if (instId == ir::spv::OpCompositeExtract) {
auto composite = eval(operands[1].getAsValue());
if (composite.empty()) {
return{};
}
std::vector<Value> indexes;
indexes.reserve(operands.size() - 2);
for (auto &op : operands.subspan(2)) {
indexes.push_back(eval(op));
}
if (indexes.size() != 1) {
return{};
}
return composite.compositeExtract(indexes[0]);
}
return {};
}
eval::Value eval::Evaluator::eval(ir::Value op) {
return eval(op.getInstId(), op.getOperands());
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,106 @@
#include "ModuleInfo.hpp"
#include "analyze.hpp"
#include "dialect.hpp"
#include "ir.hpp"
shader::ModuleInfo::Function &
shader::collectFunctionInfo(ModuleInfo &moduleInfo, ir::Value function) {
auto [fnIt, fnInserted] =
moduleInfo.functions.try_emplace(function, ModuleInfo::Function{});
if (!fnInserted) {
return fnIt->second;
}
auto &result = fnIt->second;
std::map<ir::Value, int> params;
result.returnType = function.getOperand(0).getAsValue();
auto trackAccess = [&](ir::Value pointer, Access access) {
pointer = unwrapPointer(pointer);
if (auto it = params.find(pointer); it != params.end()) {
result.parameters[it->second].access |= access;
return;
}
if (pointer == ir::spv::OpVariable) {
auto storagePtr = pointer.getOperand(1).getAsInt32();
if (!storagePtr) {
return;
}
auto storage = ir::spv::StorageClass(*storagePtr);
if (storage != ir::spv::StorageClass::Function) {
result.variables[pointer] = access;
}
}
};
for (auto inst : ir::range(function.getNext())) {
if (inst == ir::spv::OpFunctionEnd) {
break;
}
if (inst == ir::spv::OpFunctionParameter) {
auto type = inst.getOperand(0).getAsValue();
params[inst.staticCast<ir::Value>()] = result.parameters.size();
result.parameters.push_back({.type = type, .access = Access::None});
continue;
}
if (inst == ir::spv::OpFunctionCall) {
auto callee = inst.getOperand(1).getAsValue();
auto &calleeInfo = collectFunctionInfo(moduleInfo, callee);
auto args = inst.getOperands().subspan(2);
for (std::size_t index = 0; auto &[_, access] : calleeInfo.parameters) {
trackAccess(args[index++].getAsValue(), access);
}
for (auto &[global, access] : calleeInfo.variables) {
trackAccess(global, access);
}
continue;
}
if (inst == ir::spv::OpLoad || inst == ir::spv::OpAtomicLoad) {
trackAccess(inst.getOperand(1).getAsValue(), Access::Read);
continue;
}
if (inst == ir::spv::OpStore || inst == ir::spv::OpAtomicStore) {
trackAccess(inst.getOperand(0).getAsValue(), Access::Write);
continue;
}
if (inst == ir::spv::OpAtomicExchange ||
inst == ir::spv::OpAtomicCompareExchange ||
inst == ir::spv::OpAtomicCompareExchangeWeak ||
inst == ir::spv::OpAtomicIIncrement ||
inst == ir::spv::OpAtomicIDecrement || inst == ir::spv::OpAtomicIAdd ||
inst == ir::spv::OpAtomicISub || inst == ir::spv::OpAtomicSMin ||
inst == ir::spv::OpAtomicUMin || inst == ir::spv::OpAtomicSMax ||
inst == ir::spv::OpAtomicUMax || inst == ir::spv::OpAtomicAnd ||
inst == ir::spv::OpAtomicOr || inst == ir::spv::OpAtomicXor) {
trackAccess(inst.getOperand(1).getAsValue(), Access::ReadWrite);
}
}
return result;
}
void shader::collectModuleInfo(ModuleInfo &moduleInfo,
const spv::BinaryLayout &layout) {
auto functions = layout.regions[spv::BinaryLayout::kFunctions];
if (!functions) {
return;
}
for (auto child : functions.children<ir::Value>()) {
if (child == ir::spv::OpFunction) {
collectFunctionInfo(moduleInfo, child);
}
}
}

View file

@ -0,0 +1,149 @@
#include "SemanticInfo.hpp"
#include "dialect.hpp"
using namespace shader;
static std::size_t getOpCount(ir::Kind kind) {
switch (kind) {
case ir::Kind::Spv:
case ir::Kind::Builtin:
case ir::Kind::MemSSA:
break;
case ir::Kind::AmdGpu:
return ir::amdgpu::OpCount;
case ir::Kind::Vop2:
return ir::vop2::OpCount;
case ir::Kind::Sop2:
return ir::sop2::OpCount;
case ir::Kind::Sopk:
return ir::sopk::OpCount;
case ir::Kind::Smrd:
return ir::smrd::OpCount;
case ir::Kind::Vop3:
return ir::vop3::OpCount;
case ir::Kind::Mubuf:
return ir::mubuf::OpCount;
case ir::Kind::Mtbuf:
return ir::mtbuf::OpCount;
case ir::Kind::Mimg:
return ir::mimg::OpCount;
case ir::Kind::Ds:
return ir::ds::OpCount;
case ir::Kind::Vintrp:
return ir::vintrp::OpCount;
case ir::Kind::Exp:
return 1;
case ir::Kind::Vop1:
return ir::vop1::OpCount;
case ir::Kind::Vopc:
return ir::vopc::OpCount;
case ir::Kind::Sop1:
return ir::sop1::OpCount;
case ir::Kind::Sopc:
return ir::sopc::OpCount;
case ir::Kind::Sopp:
return ir::sopp::OpCount;
case ir::Kind::Count:
break;
}
return 0;
}
void shader::collectSemanticModuleInfo(SemanticModuleInfo &moduleInfo,
const spv::BinaryLayout &layout) {
static auto instNameToIds = [] {
std::map<std::string, std::vector<ir::InstructionId>, std::less<>> result;
for (std::size_t kind = 0; kind < std::size_t(ir::Kind::Count); ++kind) {
auto opCount = getOpCount(ir::Kind(kind));
for (unsigned op = 0; op < opCount; ++op) {
auto name = getInstructionShortName(ir::Kind(kind), op);
if (name == nullptr) {
continue;
}
result[name].push_back(ir::getInstructionId(ir::Kind(kind), op));
}
}
return result;
}();
collectModuleInfo(moduleInfo, layout);
static auto wideInstNameToIds = [] {
std::map<std::string, std::vector<ir::InstructionId>, std::less<>> result;
for (std::size_t kind = 0; kind < std::size_t(ir::Kind::Count); ++kind) {
auto opCount = getOpCount(ir::Kind(kind));
if (opCount == 0) {
continue;
}
for (unsigned op = 0; op < opCount; ++op) {
auto name = getInstructionShortName(ir::Kind(kind), op);
if (name == nullptr) {
continue;
}
std::string wideName = getKindName(ir::Kind(kind));
wideName += '_';
wideName += name;
result[std::move(wideName)].push_back(
ir::getInstructionId(ir::Kind(kind), op));
}
}
return result;
}();
for (auto &[fn, info] : moduleInfo.functions) {
for (auto &use : fn.getUseList()) {
if (use.user != ir::spv::OpName) {
continue;
}
auto mangledNameString = use.user.getOperand(1).getAsString();
if (mangledNameString == nullptr) {
break;
}
auto mangledName = std::string_view(*mangledNameString);
std::string_view name;
if (auto pos = mangledName.find('('); pos != std::string_view::npos) {
name = mangledName.substr(0, pos);
} else {
break;
}
std::vector<ir::InstructionId> *ids = nullptr;
std::vector<ir::InstructionId> *wideIds = nullptr;
if (auto it = wideInstNameToIds.find(name);
it != wideInstNameToIds.end()) {
wideIds = &it->second;
}
if (auto it = instNameToIds.find(name); it != instNameToIds.end()) {
ids = &it->second;
}
if (ids == nullptr && wideIds == nullptr) {
break;
}
if (wideIds != nullptr) {
for (auto id : *wideIds) {
moduleInfo.semantics[id] = fn;
}
} else {
for (auto id : *ids) {
moduleInfo.semantics.emplace(id, fn);
}
}
break;
}
}
}

View file

@ -0,0 +1,641 @@
#include "SpvConverter.hpp"
#include "dialect.hpp"
#include "dialect/spv.hpp"
#include <string>
using namespace shader;
using Builder = ir::Builder<ir::spv::Builder, ir::builtin::Builder>;
static std::string getTypeName(ir::Value type);
static std::string getConstantName(ir::Value constant) {
if (constant == ir::spv::OpConstant) {
auto typeValue = constant.getOperand(0).getAsValue();
auto value = constant.getOperand(1);
if (typeValue == ir::spv::OpTypeInt) {
auto width = *typeValue.getOperand(0).getAsInt32();
if (width <= 32) {
if (value.getAsInt32() == nullptr) {
std::abort();
}
return "_" + std::to_string(*value.getAsInt32());
}
if (value.getAsInt64() == nullptr) {
std::abort();
}
return "c_" + std::to_string(*value.getAsInt64());
}
if (typeValue == ir::spv::OpTypeFloat) {
auto width = *typeValue.getOperand(0).getAsInt32();
if (width == 32) {
if (value.getAsFloat() == nullptr) {
std::abort();
}
return "c_" + std::to_string(*value.getAsFloat());
}
if (value.getAsDouble() == nullptr) {
std::abort();
}
return "c_" + std::to_string(*value.getAsDouble());
}
return {};
}
if (constant == ir::spv::OpConstantTrue) {
return "true";
}
if (constant == ir::spv::OpConstantFalse) {
return "false";
}
if (constant == ir::spv::OpConstantNull) {
return "null_" + getTypeName(constant.getOperand(0).getAsValue());
}
return {};
}
static std::string getTypeName(ir::Value type) {
if (type == ir::spv::OpTypeInt) {
if (type.getOperand(1) != 0) {
return "s" + std::to_string(*type.getOperand(0).getAsInt32());
}
return "u" + std::to_string(*type.getOperand(0).getAsInt32());
}
if (type == ir::spv::OpTypeFloat) {
return "f" + std::to_string(*type.getOperand(0).getAsInt32());
}
if (type == ir::spv::OpTypeBool) {
return "bool";
}
if (type == ir::spv::OpTypeVoid) {
return "void";
}
if (type == ir::spv::OpTypeSampler) {
return "sampler";
}
if (type == ir::spv::OpTypeVector) {
return getTypeName(type.getOperand(0).getAsValue()) + 'x' +
std::to_string(*type.getOperand(1).getAsInt32());
}
if (type == ir::spv::OpTypeArray) {
auto count = type.getOperand(1).getAsValue();
if (count == ir::spv::OpConstant) {
if (auto n = count.getOperand(1).getAsInt32()) {
return getTypeName(type.getOperand(0).getAsValue()) + '[' +
std::to_string(*n) + ']';
}
}
return getTypeName(type.getOperand(0).getAsValue()) + "[N]";
}
if (type == ir::spv::OpTypeRuntimeArray) {
return getTypeName(type.getOperand(0).getAsValue()) + "[]";
}
if (type == ir::spv::OpTypeStruct) {
std::string result = "struct{";
for (bool first = true; auto &op : type.getOperands()) {
if (!first) {
result += ", ";
} else {
first = false;
}
result += getTypeName(op.getAsValue());
}
result += "}";
return result;
}
if (type == ir::spv::OpTypePointer) {
return getTypeName(type.getOperand(1).getAsValue()) + "*";
}
return {};
}
spv::Context::Context() {
localVariables = create<ir::Region>(getUnknownLocation());
epilogue = createRegionWithLabel(getUnknownLocation()).getParent();
}
ir::Node spv::Import::getOrCloneImpl(ir::Context &context, ir::Node node,
bool isOperand) {
auto inst = node.cast<ir::Instruction>();
if (inst == nullptr) {
return CloneMap::getOrCloneImpl(context, node, isOperand);
}
auto &spvContext = static_cast<spv::Context &>(context);
auto redefine = [&](ir::Node newNode) {
setOverride(node, newNode);
return newNode;
};
auto cloneDecorationsAndDebugs = [&](ir::Node inst = nullptr) {
if (inst == nullptr) {
inst = node;
}
auto annotations = spvContext.layout.getOrCreateAnnotations(context);
auto debugs = spvContext.layout.getOrCreateDebugs(context);
auto value = inst.cast<ir::Value>();
if (value == nullptr) {
return;
}
for (auto &use : value.getUseList()) {
if (use.user == ir::spv::OpDecorate ||
use.user == ir::spv::OpMemberDecorate ||
use.user == ir::spv::OpDecorationGroup ||
use.user == ir::spv::OpGroupDecorate ||
use.user == ir::spv::OpGroupMemberDecorate ||
use.user == ir::spv::OpDecorateId) {
annotations.addChild(ir::clone(use.user, context, *this));
}
if (use.user == ir::spv::OpName || use.user == ir::spv::OpMemberName) {
auto cloned = ir::clone(use.user, context, *this);
debugs.addChild(cloned);
if (use.user == ir::spv::OpName) {
auto demangled =
std::string_view(*cloned.getOperand(1).getAsString());
if (auto pos = demangled.find('('); pos != std::string::npos) {
demangled = demangled.substr(0, pos);
}
spvContext.setName(cloned.getOperand(0).getAsValue(),
std::string(demangled));
}
}
}
};
auto hasDecoration = [&] {
for (auto use : node.staticCast<ir::Value>().getUseList()) {
if (use.user == ir::spv::OpDecorate ||
use.user == ir::spv::OpMemberDecorate) {
return true;
}
}
return false;
};
if (inst.getKind() == ir::Kind::Spv) {
if (inst.getOp() == ir::spv::OpExtInstImport) {
auto extensions = spvContext.layout.getOrCreateExtInstImports(context);
auto result = CloneMap::getOrCloneImpl(context, node, isOperand);
extensions.addChild(result.staticCast<ir::Value>());
return redefine(result);
}
if (ir::spv::isTypeOp(inst.getOp())) {
std::vector<ir::Operand> operands;
for (auto &op : inst.getOperands()) {
operands.push_back(op.clone(context, *this));
}
auto typeOp = static_cast<ir::spv::Op>(inst.getOp());
if ((inst != ir::spv::OpTypeArray || !hasDecoration()) &&
inst != ir::spv::OpTypeRuntimeArray &&
inst != ir::spv::OpTypeStruct) {
if (inst != ir::spv::OpTypePointer ||
inst.getOperand(0) == ir::spv::StorageClass::Function) {
if (auto result = spvContext.findGlobal(typeOp, operands)) {
return redefine(result);
}
}
}
auto result = spvContext.createGlobal(
static_cast<ir::spv::Op>(inst.getOp()), operands);
redefine(result);
cloneDecorationsAndDebugs();
return result;
}
}
if (inst == ir::spv::OpConstant || inst == ir::spv::OpConstantComposite ||
inst == ir::spv::OpConstantTrue || inst == ir::spv::OpConstantFalse ||
inst == ir::spv::OpConstantNull || inst == ir::spv::OpConstantSampler ||
inst == ir::spv::OpSpecConstantTrue ||
inst == ir::spv::OpSpecConstantFalse || inst == ir::spv::OpSpecConstant ||
inst == ir::spv::OpSpecConstantComposite) {
std::vector<ir::Operand> operands;
for (auto &op : inst.getOperands()) {
operands.push_back(op.clone(context, *this));
}
auto result = spvContext.getOrCreateGlobal(
static_cast<ir::spv::Op>(inst.getOp()), operands);
return redefine(result);
}
if (isOperand && inst == ir::spv::OpVariable) {
if (inst == ir::spv::OpVariable) {
auto storage = inst.getOperand(1).getAsInt32();
if (*storage == int(ir::spv::StorageClass::Function)) {
return CloneMap::getOrCloneImpl(context, node, isOperand);
}
}
auto globals = spvContext.layout.getOrCreateGlobals(context);
auto result = CloneMap::getOrCloneImpl(context, node, isOperand);
globals.addChild(result.staticCast<ir::Instruction>());
cloneDecorationsAndDebugs();
return result;
}
if (inst == ir::spv::OpConstant) {
auto type = inst.getOperand(0).clone(context, *this);
return redefine(
spvContext.getOrCreateConstant(type.getAsValue(), inst.getOperand(1)));
}
if (inst == ir::spv::OpFunction) {
auto functions = spvContext.layout.getOrCreateFunctions(context);
auto result = CloneMap::getOrCloneImpl(context, node, isOperand)
.staticCast<ir::Value>();
functions.insertAfter(nullptr, result);
redefine(result);
cloneDecorationsAndDebugs();
ir::Instruction insertPoint = result;
for (auto child : ir::range(inst.getNext())) {
auto cloned = ir::clone(child, context, *this);
functions.insertAfter(insertPoint, cloned);
insertPoint = cloned;
cloneDecorationsAndDebugs(child);
if (child == ir::spv::OpFunctionEnd) {
break;
}
}
return result;
}
return CloneMap::getOrCloneImpl(context, node, isOperand);
}
ir::Value spv::Context::createRegionWithLabel(ir::Location loc) {
return Builder::createAppend(*this, create<ir::Region>(loc))
.createSpvLabel(loc);
}
void spv::Context::setName(ir::spv::IdRef inst, std::string name) {
ns.setNameOf(inst, name);
auto debugs = Builder::createAppend(*this, layout.getOrCreateDebugs(*this));
debugs.createSpvName(getUnknownLocation(), inst, std::move(name));
}
void spv::Context::setConstantName(ir::Value constant) {
auto name = getConstantName(constant);
if (!name.empty()) {
ns.setNameOf(constant, std::move(name));
}
}
ir::Value spv::Context::getOrCreateConstant(ir::Value typeValue,
const ir::Operand &value) {
if (typeValue == getTypeBool()) {
return *value.getAsBool() ? getTrue() : getFalse();
}
return getOrCreateGlobal(ir::spv::OpConstant, {{typeValue, value}});
}
ir::Value spv::Context::getType(ir::spv::Op baseType, int width,
bool isSigned) {
switch (baseType) {
case ir::spv::OpTypeInt:
return getTypeInt(width, isSigned);
case ir::spv::OpTypeFloat:
return getTypeFloat(width);
case ir::spv::OpTypeBool:
return getTypeBool();
case ir::spv::OpTypeVoid:
return getTypeVoid();
default:
std::abort();
}
}
ir::Value spv::Context::getType(const TypeInfo &info) {
switch (info.baseType) {
case ir::spv::OpTypeInt:
case ir::spv::OpTypeFloat:
case ir::spv::OpTypeBool:
case ir::spv::OpTypeVoid:
return getType(info.baseType, info.componentWidth, info.isSigned);
case ir::spv::OpTypeVector:
return getTypeVector(
getType(info.componentType, info.componentWidth, info.isSigned),
info.componentsCount);
case ir::spv::OpTypeArray:
return getTypeArray(
getType(info.componentType, info.componentWidth, info.isSigned),
imm32(info.componentsCount));
default:
std::abort();
}
}
void spv::Context::setTypeName(ir::Value type) {
auto name = getTypeName(type);
if (!name.empty()) {
ns.setNameOf(type, std::move(name));
}
}
ir::Value
spv::Context::findGlobal(ir::spv::Op op,
std::span<const ir::Operand> operands) const {
auto it = globals.find(ir::getInstructionId(ir::Kind::Spv, op));
if (it == globals.end()) {
return nullptr;
}
auto &types = it->second;
for (auto type : types) {
if (type.getOperandCount() != operands.size()) {
continue;
}
bool matches = true;
for (std::size_t i = 0; auto &operand : type.getOperands()) {
if (operands[i++] != operand) {
matches = false;
break;
}
}
if (matches) {
return type;
}
}
return nullptr;
}
ir::Value
spv::Context::createGlobal(ir::spv::Op op,
std::span<const ir::Operand> operands) {
auto builder = Builder::createAppend(*this, layout.getOrCreateGlobals(*this));
auto result =
builder.createValue(getUnknownLocation(), ir::Kind::Spv, op, operands);
globals[ir::getInstructionId(op)].push_back(result);
if (ir::spv::isTypeOp(op)) {
setTypeName(result);
} else {
setConstantName(result);
}
return result;
}
ir::Value spv::Context::getOrCreateGlobal(
ir::spv::Op op, std::span<const ir::Operand> operands) {
if (auto result = findGlobal(op, operands)) {
return result;
}
return createGlobal(op, operands);
}
ir::Value spv::Context::getOperandValue(const ir::Operand &op,
ir::Value type) {
if (auto result = op.getAsValue()) {
return result;
}
auto createConstant = [&](auto value, ir::Value expType) {
return getOrCreateConstant(type ? type : expType, value);
};
if (auto result = op.getAsInt32()) {
return createConstant(*result, getTypeSInt32());
}
if (auto result = op.getAsInt64()) {
return createConstant(*result, getTypeSInt64());
}
if (auto result = op.getAsFloat()) {
return createConstant(*result, getTypeFloat32());
}
if (auto result = op.getAsDouble()) {
return createConstant(*result, getTypeFloat64());
}
if (auto result = op.getAsBool()) {
return createConstant(*result, getTypeBool());
}
std::abort();
}
void spv::Context::createPerVertex() {
if (perVertex != nullptr) {
return;
}
auto loc = rootLocation;
auto float32 = getTypeFloat32();
auto arr1Float = getTypeArray(float32, getIndex(1));
auto float32x4 = getTypeVector(float32, 4);
auto gl_PerVertexStructT =
getTypeStruct(float32x4, float32, arr1Float, arr1Float);
auto gl_PerVertexPtrT =
getTypePointer(ir::spv::StorageClass::Output, gl_PerVertexStructT);
auto annotations =
Builder::createAppend(*this, layout.getOrCreateAnnotations(*this));
annotations.createSpvDecorate(loc, gl_PerVertexStructT,
ir::spv::Decoration::Block());
annotations.createSpvMemberDecorate(
loc, gl_PerVertexStructT, 0,
ir::spv::Decoration::BuiltIn(ir::spv::BuiltIn::Position));
annotations.createSpvMemberDecorate(
loc, gl_PerVertexStructT, 1,
ir::spv::Decoration::BuiltIn(ir::spv::BuiltIn::PointSize));
annotations.createSpvMemberDecorate(
loc, gl_PerVertexStructT, 2,
ir::spv::Decoration::BuiltIn(ir::spv::BuiltIn::ClipDistance));
annotations.createSpvMemberDecorate(
loc, gl_PerVertexStructT, 3,
ir::spv::Decoration::BuiltIn(ir::spv::BuiltIn::CullDistance));
auto globals = Builder::createAppend(*this, layout.getOrCreateGlobals(*this));
perVertex = globals.createSpvVariable(loc, gl_PerVertexPtrT,
ir::spv::StorageClass::Output);
}
ir::Value spv::Context::createUniformBuffer(int descriptorSet,
int binding,
ir::Value structType) {
auto globals = Builder::createAppend(*this, layout.getOrCreateGlobals(*this));
auto annotations =
Builder::createAppend(*this, layout.getOrCreateAnnotations(*this));
auto loc = getUnknownLocation();
auto storageClass = ir::spv::StorageClass::StorageBuffer;
auto blockType = globals.createSpvTypePointer(loc, storageClass, structType);
auto blockVariable = globals.createSpvVariable(loc, blockType, storageClass);
annotations.createSpvDecorate(
loc, blockVariable, ir::spv::Decoration::DescriptorSet(descriptorSet));
annotations.createSpvDecorate(loc, blockVariable,
ir::spv::Decoration::Binding(binding));
annotations.createSpvDecorate(loc, blockVariable,
ir::spv::Decoration::Uniform());
return blockVariable;
}
ir::Value spv::Context::createRuntimeArrayUniformBuffer(
int descriptorSet, int binding, ir::Value elementType) {
auto globals = Builder::createAppend(*this, layout.getOrCreateGlobals(*this));
auto annotations =
Builder::createAppend(*this, layout.getOrCreateAnnotations(*this));
auto loc = getUnknownLocation();
auto element = globals.createSpvTypeRuntimeArray(loc, elementType);
annotations.createSpvDecorate(
loc, element,
ir::spv::Decoration::ArrayStride(
shader::spv::getTypeInfo(elementType).width() / 8));
auto blockStruct = globals.createSpvTypeStruct(loc, {{element}});
annotations.createSpvDecorate(loc, blockStruct, ir::spv::Decoration::Block());
annotations.createSpvMemberDecorate(loc, blockStruct, 0,
ir::spv::Decoration::Offset(0));
return createUniformBuffer(descriptorSet, binding, blockStruct);
}
ir::Value spv::Context::createOutput(ir::Location loc, int index) {
auto &result = outputs[index];
if (result == nullptr) {
auto floatType = getTypeFloat32();
auto float32x4Type = getTypeVector(floatType, 4);
auto variableType =
getTypePointer(ir::spv::StorageClass::Output, float32x4Type);
auto globals =
Builder::createAppend(*this, layout.getOrCreateGlobals(*this));
auto annotations =
Builder::createAppend(*this, layout.getOrCreateAnnotations(*this));
auto debugs = Builder::createAppend(*this, layout.getOrCreateDebugs(*this));
auto variable = globals.createSpvVariable(loc, variableType,
ir::spv::StorageClass::Output);
annotations.createSpvDecorate(loc, variable,
ir::spv::Decoration::Location(index));
setName(variable, "output" + std::to_string(index));
result = variable;
}
return result;
}
ir::Value spv::Context::createInput(ir::Location loc, int index) {
auto &result = inputs[index];
if (result == nullptr) {
auto floatType = getTypeFloat32();
auto float32x4Type = getTypeVector(floatType, 4);
auto variableType =
getTypePointer(ir::spv::StorageClass::Input, float32x4Type);
auto globals =
Builder::createAppend(*this, layout.getOrCreateGlobals(*this));
auto annotations =
Builder::createAppend(*this, layout.getOrCreateAnnotations(*this));
auto debugs = Builder::createAppend(*this, layout.getOrCreateDebugs(*this));
auto variable = globals.createSpvVariable(loc, variableType,
ir::spv::StorageClass::Input);
annotations.createSpvDecorate(loc, variable,
ir::spv::Decoration::Location(index));
setName(variable, "input" + std::to_string(index));
result = variable;
}
return result;
}
ir::Value spv::Context::createAttr(ir::Location loc, int attrId,
bool perVertex, bool flat) {
auto &result = inputs[attrId];
if (result == nullptr) {
auto floatType = getTypeFloat32();
auto float32x4Type = getTypeVector(floatType, 4);
auto attrArrayType = getTypeArray(float32x4Type, imm32(3));
auto variableType =
getTypePointer(ir::spv::StorageClass::Input,
perVertex ? attrArrayType : float32x4Type);
auto globals =
Builder::createAppend(*this, layout.getOrCreateGlobals(*this));
auto annotations =
Builder::createAppend(*this, layout.getOrCreateAnnotations(*this));
auto debugs = Builder::createAppend(*this, layout.getOrCreateDebugs(*this));
auto variable = globals.createSpvVariable(loc, variableType,
ir::spv::StorageClass::Input);
annotations.createSpvDecorate(loc, variable,
ir::spv::Decoration::Location(attrId));
if (perVertex) {
annotations.createSpvDecorate(loc, variable,
ir::spv::Decoration::PerVertexKHR());
} else if (flat) {
annotations.createSpvDecorate(loc, variable, ir::spv::Decoration::Flat());
}
setName(variable, "attr" + std::to_string(attrId));
result = variable;
}
return result;
}

View file

@ -0,0 +1,71 @@
#include "SpvTypeInfo.hpp"
#include "dialect.hpp"
using namespace shader;
shader::spv::TypeInfo shader::spv::getTypeInfo(ir::Value type) {
if (type == ir::spv::OpTypeBool) {
return {
.baseType = ir::spv::OpTypeBool,
.componentWidth = 1,
.componentsCount = 1,
};
}
if (type == ir::spv::OpTypeInt) {
return {
.baseType = ir::spv::OpTypeInt,
.componentWidth = *type.getOperand(0).getAsInt32(),
.componentsCount = 1,
.isSigned = *type.getOperand(1).getAsInt32() ? true : false,
};
}
if (type == ir::spv::OpTypeFloat) {
return {
.baseType = ir::spv::OpTypeFloat,
.componentWidth = *type.getOperand(0).getAsInt32(),
.componentsCount = 1,
};
}
if (type == ir::spv::OpTypeVector) {
auto componentInfo = getTypeInfo(type.getOperand(0).getAsValue());
return {
.baseType = ir::spv::OpTypeVector,
.componentType = componentInfo.baseType,
.componentWidth = componentInfo.width(),
.componentsCount = *type.getOperand(1).getAsInt32(),
};
}
if (type == ir::spv::OpTypeArray) {
auto elementInfo = getTypeInfo(type.getOperand(0).getAsValue());
auto countOfElements = type.getOperand(1).getAsValue();
return {
.baseType = ir::spv::OpTypeArray,
.componentType = elementInfo.baseType,
.componentWidth = elementInfo.width(),
.componentsCount = *countOfElements.getOperand(1).getAsInt32(),
};
}
if (type == ir::spv::OpTypeRuntimeArray) {
auto elementInfo = getTypeInfo(type.getOperand(0).getAsValue());
return {
.baseType = ir::spv::OpTypeRuntimeArray,
.componentType = elementInfo.baseType,
.componentWidth = elementInfo.width(),
.componentsCount = 1,
};
}
return {
.baseType = static_cast<ir::spv::Op>(type.getOp()),
.componentWidth = 0,
.componentsCount = 0,
};
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,688 @@
#include "eval.hpp"
#include "dialect.hpp"
#include "ir.hpp"
#include <cmath>
#include <concepts>
using namespace shader;
template <typename Cond, typename... Args> consteval bool testVisitCond() {
if constexpr (std::is_same_v<Cond, void>) {
return true;
} else {
return Cond{}(std::remove_cvref_t<Args>{}...);
}
};
template <typename Cond, std::size_t U> consteval bool testVisitCond() {
if constexpr (U >= eval::Value::StorageSize) {
return false;
} else if constexpr (std::is_same_v<Cond, void>) {
return true;
} else {
return Cond{}(std::variant_alternative_t<U, eval::Value::Storage>{});
}
};
template <typename Cond = void, size_t I = 0>
constexpr eval::Value visitImpl(const eval::Value &variant, auto &&fn) {
#define DEFINE_CASE(N) \
case I + N: \
if constexpr (testVisitCond<Cond, I + N>()) { \
return std::forward<decltype(fn)>(fn)(std::get<I + N>(variant.storage)); \
} else { \
return {}; \
}
switch (variant.storage.index()) {
DEFINE_CASE(0);
DEFINE_CASE(1);
DEFINE_CASE(2);
DEFINE_CASE(3);
DEFINE_CASE(4);
DEFINE_CASE(5);
DEFINE_CASE(6);
DEFINE_CASE(7);
DEFINE_CASE(8);
DEFINE_CASE(9);
DEFINE_CASE(10);
DEFINE_CASE(11);
DEFINE_CASE(12);
DEFINE_CASE(13);
DEFINE_CASE(14);
DEFINE_CASE(15);
DEFINE_CASE(16);
DEFINE_CASE(17);
DEFINE_CASE(18);
DEFINE_CASE(19);
DEFINE_CASE(20);
DEFINE_CASE(21);
DEFINE_CASE(22);
DEFINE_CASE(23);
DEFINE_CASE(24);
DEFINE_CASE(25);
DEFINE_CASE(26);
DEFINE_CASE(27);
DEFINE_CASE(28);
DEFINE_CASE(29);
DEFINE_CASE(30);
DEFINE_CASE(31);
DEFINE_CASE(32);
DEFINE_CASE(33);
DEFINE_CASE(34);
DEFINE_CASE(35);
DEFINE_CASE(36);
DEFINE_CASE(37);
DEFINE_CASE(38);
DEFINE_CASE(39);
DEFINE_CASE(40);
DEFINE_CASE(41);
DEFINE_CASE(42);
DEFINE_CASE(43);
DEFINE_CASE(44);
DEFINE_CASE(45);
DEFINE_CASE(46);
DEFINE_CASE(47);
DEFINE_CASE(48);
DEFINE_CASE(49);
DEFINE_CASE(50);
DEFINE_CASE(51);
DEFINE_CASE(52);
DEFINE_CASE(53);
DEFINE_CASE(54);
DEFINE_CASE(55);
DEFINE_CASE(56);
DEFINE_CASE(57);
DEFINE_CASE(58);
DEFINE_CASE(59);
DEFINE_CASE(60);
DEFINE_CASE(61);
DEFINE_CASE(62);
DEFINE_CASE(63);
}
#undef DEFINE_CASE
constexpr auto NextIndex = I + 64;
if constexpr (NextIndex < eval::Value::StorageSize) {
return visitImpl<Cond, NextIndex>(std::forward<decltype(fn)>(fn),
std::forward<decltype(variant)>(variant));
}
return {};
}
template <typename Cond = void, typename Cb>
constexpr eval::Value visitScalarType(ir::Value type, Cb &&cb)
requires requires {
{ std::forward<Cb>(cb)(int{}) } -> std::same_as<eval::Value>;
}
{
auto invoke = [&](auto type) -> eval::Value {
if constexpr (testVisitCond<Cond, std::remove_cvref_t<decltype(type)>>()) {
return std::forward<Cb>(cb)(type);
}
return {};
};
if (type == ir::spv::OpTypeBool) {
return invoke(bool{});
}
if (type == ir::spv::OpTypeInt) {
auto isSigned = *type.getOperand(1).getAsInt32();
switch (*type.getOperand(0).getAsInt32()) {
case 8:
if (isSigned) {
return invoke(std::int8_t{});
}
return invoke(std::uint8_t{});
case 16:
if (isSigned) {
return invoke(std::int16_t{});
}
return invoke(std::uint16_t{});
case 32:
if (isSigned) {
return invoke(std::int32_t{});
}
return invoke(std::uint32_t{});
case 64:
if (isSigned) {
return invoke(std::int64_t{});
}
return invoke(std::uint64_t{});
}
return {};
}
if (type == ir::spv::OpTypeFloat) {
switch (*type.getOperand(0).getAsInt32()) {
case 16:
return invoke(shader::float16_t{});
case 32:
return invoke(shader::float32_t{});
case 64:
return invoke(shader::float64_t{});
}
return {};
}
return {};
}
template <typename Cond = void, typename Cb>
constexpr eval::Value visitType(ir::Value type, Cb &&cb)
requires requires {
{ std::forward<Cb>(cb)(int{}) } -> std::same_as<eval::Value>;
}
{
if (type == ir::spv::OpTypeInt || type == ir::spv::OpTypeFloat ||
type == ir::spv::OpTypeBool) {
return visitScalarType<Cond>(type, cb);
}
auto invoke = [&](auto type) -> eval::Value {
if constexpr (testVisitCond<Cond, std::remove_cvref_t<decltype(type)>>()) {
return std::forward<Cb>(cb)(type);
} else {
return {};
}
};
if (type == ir::spv::OpTypeVector) {
switch (*type.getOperand(1).getAsInt32()) {
case 2:
return visitScalarType(
type.getOperand(0).getAsValue(),
[&]<typename T>(T) { return invoke(shader::Vector<T, 2>{}); });
case 3:
return visitScalarType(
type.getOperand(0).getAsValue(),
[&]<typename T>(T) { return invoke(shader::Vector<T, 3>{}); });
case 4:
return visitScalarType(
type.getOperand(0).getAsValue(),
[&]<typename T>(T) { return invoke(shader::Vector<T, 4>{}); });
}
return {};
}
return {};
}
template <typename Cond = void, typename Cb>
eval::Value visit(const eval::Value &value, Cb &&cb) {
using VisitCond = decltype([](auto &&storage) {
using T = std::remove_cvref_t<decltype(storage)>;
if constexpr (std::is_same_v<T, std::nullptr_t>) {
return false;
} else {
return testVisitCond<Cond, T>();
}
});
return visitImpl<VisitCond>(value, std::forward<Cb>(cb));
}
template <typename Cb>
eval::Value visit2(auto &&cond, const eval::Value &value, Cb &&cb) {
if constexpr (cond()) {
return visitImpl(value, std::forward<Cb>(cb));
} else {
return {};
}
}
template <typename ValueCond = void, typename TypeVisitCond = void,
typename TypeValueVisitCond = void, typename Cb>
eval::Value visitWithType(const eval::Value &value, ir::Value type, Cb &&cb) {
using ValueVisitCond = decltype([](auto storage) {
if constexpr (std::is_same_v<decltype(storage), std::nullptr_t>) {
return false;
} else {
return testVisitCond<ValueCond, decltype(storage)>();
}
});
return visitImpl<ValueVisitCond>(value, [&](auto &&value) -> eval::Value {
return visitType<TypeVisitCond>(type, [&](auto type) -> eval::Value {
if constexpr (testVisitCond<TypeValueVisitCond, decltype(type),
decltype(value)>()) {
return std::forward<Cb>(cb)(type, value);
} else {
return {};
}
});
});
}
namespace {
template <typename T> struct ComponentTypeImpl {
using type = T;
};
template <typename T, std::size_t N> struct ComponentTypeImpl<Vector<T, N>> {
using type = T;
};
template <typename T, std::size_t N>
struct ComponentTypeImpl<std::array<T, N>> {
using type = T;
};
template <typename T> struct MakeSignedImpl {
using type = std::make_signed_t<T>;
};
template <typename T, std::size_t N> struct MakeSignedImpl<Vector<T, N>> {
using type = Vector<std::make_signed_t<T>, N>;
};
template <typename T> struct MakeUnsignedImpl {
using type = std::make_unsigned_t<T>;
};
template <typename T, std::size_t N> struct MakeUnsignedImpl<Vector<T, N>> {
using type = Vector<std::make_unsigned_t<T>, N>;
};
} // namespace
template <typename T> using ComponentType = typename ComponentTypeImpl<T>::type;
template <typename T> using MakeSigned = typename MakeSignedImpl<T>::type;
template <typename T> using MakeUnsigned = typename MakeUnsignedImpl<T>::type;
template <typename> constexpr std::size_t Components = 1;
template <typename T, std::size_t N>
constexpr std::size_t Components<Vector<T, N>> = N;
template <typename T, std::size_t N>
constexpr std::size_t Components<std::array<T, N>> = N;
template <typename> constexpr bool IsArray = false;
template <typename T, std::size_t N>
constexpr bool IsArray<std::array<T, N>> = true;
eval::Value
eval::Value::compositeConstruct(ir::Value type,
std::span<const eval::Value> constituents) {
using Cond =
decltype([](auto type) { return Components<decltype(type)> > 1; });
return visitType<Cond>(type, [&](auto type) -> Value {
constexpr std::size_t N = Components<decltype(type)>;
if (N != constituents.size()) {
return {};
}
decltype(type) result;
for (std::size_t i = 0; i < N; ++i) {
if (auto value = constituents[i].as<ComponentType<decltype(type)>>()) {
result[i] = *value;
} else {
return {};
}
}
return result;
});
}
eval::Value eval::Value::compositeExtract(const Value &index) const {
using Cond =
decltype([](auto type) { return Components<decltype(type)> > 1; });
auto optIndexInt = index.zExtScalar();
if (!optIndexInt) {
return {};
}
auto indexInt = *optIndexInt;
return visit<Cond>(*this, [&](auto &&value) -> Value {
using ValueType = std::remove_cvref_t<decltype(value)>;
constexpr std::size_t N = Components<ValueType>;
if (indexInt >= N) {
return {};
}
return value[indexInt];
});
}
eval::Value eval::Value::isNan() const {
using Cond = decltype([](auto type) {
return std::is_floating_point_v<ComponentType<decltype(type)>> && !IsArray<decltype(type)>;
});
return visit<Cond>(*this, [](auto &&value) -> Value {
constexpr std::size_t N = Components<std::remove_cvref_t<decltype(value)>>;
if constexpr (N == 1) {
return std::isnan(value);
} else {
Vector<bool, N> result;
for (std::size_t i = 0; i < N; ++i) {
result[i] = std::isnan(value[i]);
}
return result;
}
});
}
eval::Value eval::Value::isInf() const {
using Cond = decltype([](auto type) {
return std::is_floating_point_v<ComponentType<decltype(type)>> && !IsArray<decltype(type)>;
});
return visit<Cond>(*this, [](auto &&value) -> Value {
constexpr std::size_t N = Components<std::remove_cvref_t<decltype(value)>>;
if constexpr (N == 1) {
return std::isinf(value);
} else {
Vector<bool, N> result;
for (std::size_t i = 0; i < N; ++i) {
result[i] = std::isinf(value[i]);
}
return result;
}
});
}
eval::Value eval::Value::isFinite() const {
using Cond = decltype([](auto type) {
return std::is_floating_point_v<ComponentType<decltype(type)>>;
});
return visit<Cond>(*this, [](auto &&value) -> Value {
constexpr std::size_t N = Components<std::remove_cvref_t<decltype(value)>>;
if constexpr (N == 1) {
return std::isfinite(value);
} else {
Vector<bool, N> result;
for (std::size_t i = 0; i < N; ++i) {
result[i] = std::isfinite(value[i]);
}
return result;
}
});
}
eval::Value eval::Value::makeUnsigned() const {
using Cond = decltype([](auto type) {
return std::is_integral_v<ComponentType<decltype(type)>> &&
!std::is_same_v<ComponentType<decltype(type)>, bool> &&
!IsArray<decltype(type)>;
});
return visit<Cond>(*this, [](auto &&value) -> Value {
constexpr std::size_t N = Components<std::remove_cvref_t<decltype(value)>>;
using T = std::make_unsigned_t<
ComponentType<std::remove_cvref_t<decltype(value)>>>;
if constexpr (N == 1) {
return static_cast<T>(value);
} else {
Vector<T, N> result;
for (std::size_t i = 0; i < N; ++i) {
result[i] = static_cast<T>(value[i]);
}
return result;
}
});
}
eval::Value eval::Value::makeSigned() const {
using Cond = decltype([](auto type) {
return std::is_integral_v<ComponentType<decltype(type)>> &&
!std::is_same_v<ComponentType<decltype(type)>, bool> &&
!IsArray<decltype(type)>;
});
return visit<Cond>(*this, [](auto &&value) -> Value {
constexpr std::size_t N = Components<std::remove_cvref_t<decltype(value)>>;
using T =
std::make_signed_t<ComponentType<std::remove_cvref_t<decltype(value)>>>;
if constexpr (N == 1) {
return static_cast<T>(value);
} else {
Vector<T, N> result;
for (std::size_t i = 0; i < N; ++i) {
result[i] = static_cast<T>(value[i]);
}
return result;
}
});
}
eval::Value eval::Value::all() const {
using Cond = decltype([](auto type) {
return std::is_same_v<ComponentType<decltype(type)>, bool> &&
(Components<decltype(type)> > 1) && !IsArray<decltype(type)>;
});
return visit<Cond>(*this, [](auto &&value) {
constexpr std::size_t N = Components<std::remove_cvref_t<decltype(value)>>;
for (std::size_t i = 0; i < N; ++i) {
if (!value[i]) {
return false;
}
}
return true;
});
}
eval::Value eval::Value::any() const {
using Cond = decltype([](auto type) {
return std::is_same_v<ComponentType<decltype(type)>, bool> &&
(Components<decltype(type)> > 1) && !IsArray<decltype(type)>;
});
return visit<Cond>(*this, [](auto &&value) {
constexpr std::size_t N = Components<std::remove_cvref_t<decltype(value)>>;
for (std::size_t i = 0; i < N; ++i) {
if (value[i]) {
return true;
}
}
return false;
});
}
eval::Value eval::Value::select(const Value &trueValue,
const Value &falseValue) const {
using Cond = decltype([](auto type) consteval {
return std::is_same_v<ComponentType<decltype(type)>, bool> && !IsArray<decltype(type)>;
});
return visit<Cond>(*this, [&](auto &&cond) -> Value {
using CondType = std::remove_cvref_t<decltype(cond)>;
using TrueCond = decltype([](auto type) consteval {
return Components<decltype(type)> == Components<CondType>;
});
return visit<TrueCond>(trueValue, [&](auto &&trueValue) {
using TrueValue = std::remove_cvref_t<decltype(trueValue)>;
using FalseCond = decltype([](auto type) {
return std::is_same_v<TrueValue, std::remove_cvref_t<decltype(type)>>;
});
return visit(falseValue, [&](auto &&falseValue) -> Value {
if constexpr (std::is_same_v<TrueValue, std::remove_cvref_t<
decltype(falseValue)>>) {
constexpr std::size_t N = Components<CondType>;
if constexpr (N == 1) {
return cond ? trueValue : falseValue;
} else {
Vector<bool, N> result;
for (std::size_t i = 0; i < N; ++i) {
result[i] = cond[i] ? trueValue[i] : falseValue[i];
}
return result;
}
} else {
return {};
}
});
});
});
}
eval::Value eval::Value::iConvert(ir::Value type, bool isSigned) const {
using Cond = decltype([](auto type) {
using Type = std::remove_cvref_t<decltype(type)>;
return std::is_integral_v<ComponentType<Type>> &&
!std::is_same_v<bool, ComponentType<Type>> && !IsArray<decltype(type)>;
});
using PairCond = decltype([](auto lhs, auto rhs) {
using Lhs = decltype(lhs);
using Rhs = decltype(rhs);
return !std::is_same_v<Lhs, Rhs> && Components<Lhs> == Components<Rhs>;
});
return visitWithType<Cond, Cond, PairCond>(
*this, type, [&](auto type, auto &&value) -> Value {
using Type = std::remove_cvref_t<decltype(type)>;
using ValueType = std::remove_cvref_t<decltype(value)>;
if (isSigned) {
return static_cast<Type>(static_cast<MakeSigned<ValueType>>(value));
} else {
return static_cast<Type>(static_cast<MakeUnsigned<ValueType>>(value));
}
});
}
eval::Value eval::Value::fConvert(ir::Value type) const {
using Cond = decltype([](auto type) {
return std::is_floating_point_v<ComponentType<decltype(type)>> && !IsArray<decltype(type)>;
});
using PairCond = decltype([](auto lhs, auto rhs) {
using Lhs = decltype(lhs);
using Rhs = decltype(rhs);
return !std::is_same_v<Lhs, Rhs> && Components<Lhs> == Components<Rhs>;
});
return visitWithType<void, void, PairCond>(
*this, type, [&](auto type, auto &&value) -> Value {
using Type = std::remove_cvref_t<decltype(type)>;
return static_cast<Type>(value);
});
}
eval::Value eval::Value::bitcast(ir::Value type) const {
using Cond = decltype([](auto type, auto value) {
using Type = std::remove_cvref_t<decltype(type)>;
return sizeof(type) == sizeof(value);
});
return visitWithType<void, void, Cond>(
*this, type, [](auto type, auto &&value) -> Value {
return std::bit_cast<decltype(type)>(value);
});
}
std::optional<std::uint64_t> eval::Value::zExtScalar() const {
using Cond = decltype([](auto type) {
return std::is_integral_v<ComponentType<decltype(type)>> &&
!std::is_same_v<ComponentType<decltype(type)>, bool> &&
Components<decltype(type)> == 1 && !IsArray<decltype(type)>;
});
auto result = visit<Cond>(*this, [&](auto value) -> Value {
return static_cast<std::uint64_t>(
static_cast<MakeUnsigned<decltype(value)>>(value));
});
if (result) {
return result.as<std::uint64_t>();
}
return {};
}
std::optional<std::int64_t> eval::Value::sExtScalar() const {
using Cond = decltype([](auto type) {
return std::is_integral_v<ComponentType<decltype(type)>> &&
!std::is_same_v<ComponentType<decltype(type)>, bool> &&
Components<decltype(type)> == 1 && !IsArray<decltype(type)>;
});
auto result = visit<Cond>(*this, [&](auto value) -> Value {
return static_cast<std::int64_t>(
static_cast<MakeSigned<decltype(value)>>(value));
});
if (result) {
return result.as<std::int64_t>();
}
return {};
}
#define DEFINE_BINARY_OP(OP) \
eval::Value eval::Value::operator OP(const Value & rhs) const { \
using LhsCond = decltype([](auto &&lhs) { \
return requires { static_cast<Value>(lhs OP rhs); }; \
}); \
return visit<LhsCond>(*this, [&]<typename Lhs>(Lhs &&lhs) -> Value { \
using RhsCond = decltype([](auto &&rhs) { \
return requires(Lhs lhs) { static_cast<Value>(lhs OP rhs); }; \
}); \
return visit<RhsCond>(rhs, [&](auto &&rhs) -> Value { \
return static_cast<Value>(lhs OP rhs); \
}); \
}); \
}
#define DEFINE_UNARY_OP(OP) \
eval::Value eval::Value::operator OP() const { \
using Cond = decltype([](auto rhs) { \
return requires { static_cast<Value>(OP rhs); }; \
}); \
return visit<Cond>(*this, [&](auto &&rhs) -> Value { \
return static_cast<Value>(OP rhs); \
}); \
}
DEFINE_BINARY_OP(+);
DEFINE_BINARY_OP(-);
DEFINE_BINARY_OP(*);
DEFINE_BINARY_OP(/);
DEFINE_BINARY_OP(%);
DEFINE_BINARY_OP(&);
DEFINE_BINARY_OP(|);
DEFINE_BINARY_OP(^);
DEFINE_BINARY_OP(>>);
DEFINE_BINARY_OP(<<);
DEFINE_BINARY_OP(&&);
DEFINE_BINARY_OP(||);
DEFINE_BINARY_OP(<);
DEFINE_BINARY_OP(>);
DEFINE_BINARY_OP(<=);
DEFINE_BINARY_OP(>=);
DEFINE_BINARY_OP(==);
DEFINE_BINARY_OP(!=);
DEFINE_UNARY_OP(-);
DEFINE_UNARY_OP(~);
DEFINE_UNARY_OP(!);

Some files were not shown because too many files have changed in this diff Show more