diff --git a/CMakeLists.txt b/CMakeLists.txt index 0ef228523..517fd2e0c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,3 +7,4 @@ set(CMAKE_CXX_STANDARD 23) add_subdirectory(3rdparty/crypto) add_subdirectory(orbis-kernel) add_subdirectory(rpcsx-os) +add_subdirectory(hw/amdgpu) diff --git a/hw/amdgpu/CMakeLists.txt b/hw/amdgpu/CMakeLists.txt new file mode 100644 index 000000000..9bcd11691 --- /dev/null +++ b/hw/amdgpu/CMakeLists.txt @@ -0,0 +1,17 @@ +cmake_minimum_required(VERSION 3.10) + +set(CMAKE_CXX_STANDARD 23) +set(CMAKE_CXX_EXTENSIONS off) + +add_subdirectory(bridge) +add_subdirectory(device) +add_subdirectory(shader) +add_subdirectory(lib/libspirv) + +project(amdgpu) + +add_library(${PROJECT_NAME} INTERFACE) +target_include_directories(${PROJECT_NAME} INTERFACE include) + +add_library(amdgpu::base ALIAS ${PROJECT_NAME}) + diff --git a/hw/amdgpu/bridge/CMakeLists.txt b/hw/amdgpu/bridge/CMakeLists.txt new file mode 100644 index 000000000..f6762e22f --- /dev/null +++ b/hw/amdgpu/bridge/CMakeLists.txt @@ -0,0 +1,16 @@ +project(libamdgpu-bridge) +set(PROJECT_PATH amdgpu/bridge) + +set(INCLUDE + include/${PROJECT_PATH}/bridge.hpp +) + +set(SRC + src/bridge.cpp +) + +add_library(${PROJECT_NAME} STATIC ${INCLUDE} ${SRC}) +target_include_directories(${PROJECT_NAME} PUBLIC include PRIVATE include/${PROJECT_PATH}) +set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "") +add_library(amdgpu::bridge ALIAS ${PROJECT_NAME}) +set_property(TARGET ${PROJECT_NAME} PROPERTY POSITION_INDEPENDENT_CODE ON) diff --git a/hw/amdgpu/bridge/include/amdgpu/bridge/bridge.hpp b/hw/amdgpu/bridge/include/amdgpu/bridge/bridge.hpp new file mode 100644 index 000000000..1f017374b --- /dev/null +++ b/hw/amdgpu/bridge/include/amdgpu/bridge/bridge.hpp @@ -0,0 +1,256 @@ +#pragma once + +#include +#include +#include + +namespace amdgpu::bridge { +enum class CommandId : std::uint32_t { + Nop, + SetUpSharedMemory, + ProtectMemory, + CommandBuffer, + Flip, + DoFlip, + SetBuffer +}; + +struct CmdMemoryProt { + std::uint64_t address; + std::uint64_t size; + std::uint32_t prot; +}; + +struct CmdCommandBuffer { + std::uint64_t queue; + std::uint64_t address; + std::uint64_t size; +}; + +struct CmdBuffer { + std::uint32_t bufferIndex; + std::uint32_t width; + std::uint32_t height; + std::uint32_t pitch; + std::uint64_t address; + std::uint32_t pixelFormat; + std::uint32_t tilingMode; +}; + +struct CmdFlip { + std::uint32_t bufferIndex; + std::uint64_t arg; +}; + +struct BridgeHeader { + std::uint64_t size; + std::uint64_t info; + std::uint32_t pullerPid; + std::uint32_t pusherPid; + volatile std::uint64_t flags; + std::uint64_t vmAddress; + std::uint64_t vmSize; + char vmName[32]; + volatile std::uint32_t flipBuffer; + volatile std::uint64_t flipArg; + volatile std::uint64_t flipCount; + std::uint32_t memoryAreaCount; + std::uint32_t commandBufferCount; + std::uint32_t bufferCount; + CmdMemoryProt memoryAreas[128]; + CmdCommandBuffer commandBuffers[32]; + CmdBuffer buffers[8]; + + volatile std::uint64_t pull; + volatile std::uint64_t push; + std::uint64_t commands[]; +}; + +struct Command { + CommandId id; + + union { + CmdMemoryProt memoryProt; + CmdCommandBuffer commandBuffer; + CmdBuffer buffer; + CmdFlip flip; + }; +}; + +enum class BridgeFlags { + VmConfigured = 1 << 0, + PushLock = 1 << 1, + PullLock = 1 << 2, +}; + +class BridgePusher { + BridgeHeader *buffer = nullptr; + +public: + BridgePusher() = default; + BridgePusher(BridgeHeader *buffer) : buffer(buffer) {} + + void setVm(std::uint64_t address, std::uint64_t size, const char *name) { + buffer->vmAddress = address; + buffer->vmSize = size; + std::strncpy(buffer->vmName, name, sizeof(buffer->vmName)); + buffer->flags |= static_cast(BridgeFlags::VmConfigured); + } + + void sendMemoryProtect(std::uint64_t address, std::uint64_t size, + std::uint32_t prot) { + sendCommand(CommandId::ProtectMemory, {address, size, prot}); + } + + void sendCommandBuffer(std::uint64_t queue, std::uint64_t address, + std::uint64_t size) { + sendCommand(CommandId::CommandBuffer, {queue, address, size}); + } + + void sendSetBuffer(std::uint32_t bufferIndex, std::uint64_t address, + std::uint32_t width, std::uint32_t height, + std::uint32_t pitch, std::uint32_t pixelFormat, + std::uint32_t tilingMode) { + sendCommand(CommandId::SetBuffer, + {static_cast(bufferIndex) << 32 | tilingMode, + address, static_cast(width) << 32 | height, + static_cast(pitch) << 32 | pixelFormat}); + } + + void sendFlip(std::uint32_t bufferIndex, std::uint64_t arg) { + sendCommand(CommandId::Flip, {bufferIndex, arg}); + } + + void sendDoFlip() { sendCommand(CommandId::DoFlip, {}); } + + void wait() { + while (buffer->pull != buffer->push) + ; + } + +private: + static std::uint64_t makeCommandHeader(CommandId id, std::size_t cmdSize) { + return static_cast(id) | + (static_cast(cmdSize - 1) << 32); + } + + void sendCommand(CommandId id, std::initializer_list args) { + std::size_t cmdSize = args.size() + 1; + std::uint64_t pos = getPushPosition(cmdSize); + + buffer->commands[pos++] = makeCommandHeader(CommandId::Flip, cmdSize); + for (auto arg : args) { + buffer->commands[pos++] = arg; + } + buffer->push = pos; + } + + std::uint64_t getPushPosition(std::uint64_t cmdSize) { + std::uint64_t position = buffer->push; + + if (position + cmdSize > buffer->size) { + if (position < buffer->size) { + buffer->commands[position] = + static_cast(CommandId::Nop) | + ((buffer->size - position - 1) << 32); + } + + position = 0; + waitPuller(cmdSize); + } + + return position; + } + void waitPuller(std::uint64_t pullValue) { + while (buffer->pull < pullValue) { + ; + } + } +}; + +class BridgePuller { + BridgeHeader *buffer = nullptr; + +public: + BridgePuller() = default; + BridgePuller(BridgeHeader *buffer) : buffer(buffer) {} + + std::size_t pullCommands(Command *commands, std::size_t maxCount) { + std::size_t processed = 0; + + while (processed < maxCount) { + if (buffer->pull == buffer->push) { + break; + } + + auto pos = buffer->pull; + auto cmd = buffer->commands[pos]; + CommandId cmdId = static_cast(cmd); + std::uint32_t argsCount = cmd >> 32; + + if (cmdId != CommandId::Nop) { + commands[processed++] = + unpackCommand(cmdId, buffer->commands + pos + 1, argsCount); + } + + auto newPull = pos + argsCount + 1; + + if (newPull >= buffer->size) { + newPull = 0; + } + + buffer->pull = newPull; + } + + return processed; + } + +private: + Command unpackCommand(CommandId command, const std::uint64_t *args, + std::uint32_t argsCount) { + Command result; + result.id = command; + + switch (command) { + case CommandId::Nop: + case CommandId::SetUpSharedMemory: + case CommandId::DoFlip: + return result; + + case CommandId::ProtectMemory: + result.memoryProt.address = args[0]; + result.memoryProt.size = args[1]; + result.memoryProt.prot = args[2]; + return result; + + case CommandId::CommandBuffer: + result.commandBuffer.queue = args[0]; + result.commandBuffer.address = args[1]; + result.commandBuffer.size = args[2]; + return result; + + case CommandId::Flip: + result.flip.bufferIndex = args[0]; + result.flip.arg = args[1]; + return result; + + case CommandId::SetBuffer: + result.buffer.bufferIndex = static_cast(args[0] >> 32); + result.buffer.address = args[1]; + result.buffer.width = static_cast(args[2] >> 32); + result.buffer.height = static_cast(args[2]); + result.buffer.pitch = static_cast(args[3] >> 32); + result.buffer.pixelFormat = static_cast(args[3]); + result.buffer.tilingMode = static_cast(args[0]); + return result; + } + + __builtin_trap(); + } +}; + +BridgeHeader *createShmCommandBuffer(const char *name); +BridgeHeader *openShmCommandBuffer(const char *name); +void destroyShmCommandBuffer(BridgeHeader *buffer); +void unlinkShm(const char *name); +} // namespace amdgpu::bridge diff --git a/hw/amdgpu/bridge/src/bridge.cpp b/hw/amdgpu/bridge/src/bridge.cpp new file mode 100644 index 000000000..d32c940af --- /dev/null +++ b/hw/amdgpu/bridge/src/bridge.cpp @@ -0,0 +1,81 @@ +#include "bridge.hpp" + +#include +#include +#include +#include + +static int gShmFd = -1; +static constexpr std::size_t kShmSize = sizeof(amdgpu::bridge::BridgeHeader) + + (sizeof(std::uint64_t) * (1024 * 1024)); +amdgpu::bridge::BridgeHeader * +amdgpu::bridge::createShmCommandBuffer(const char *name) { + if (gShmFd != -1) { + return nullptr; + } + + unlinkShm(name); + + int fd = ::shm_open(name, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR); + + if (fd == -1) { + return nullptr; + } + + if (ftruncate(fd, kShmSize) < 0) { + ::close(fd); + return nullptr; + } + + void *memory = + ::mmap(nullptr, kShmSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + + if (memory == MAP_FAILED) { + ::close(fd); + return nullptr; + } + + gShmFd = fd; + auto result = new (memory) amdgpu::bridge::BridgeHeader(); + result->size = (kShmSize - sizeof(amdgpu::bridge::BridgeHeader)) / + sizeof(std::uint64_t); + return result; +} + +amdgpu::bridge::BridgeHeader * +amdgpu::bridge::openShmCommandBuffer(const char *name) { + if (gShmFd != -1) { + return nullptr; + } + + int fd = ::shm_open(name, O_RDWR, S_IRUSR | S_IWUSR); + + if (fd == -1) { + return nullptr; + } + + void *memory = + ::mmap(nullptr, kShmSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + + if (memory == MAP_FAILED) { + ::close(fd); + return nullptr; + } + + gShmFd = fd; + return new (memory) amdgpu::bridge::BridgeHeader; +} + +void amdgpu::bridge::destroyShmCommandBuffer( + amdgpu::bridge::BridgeHeader *buffer) { + if (gShmFd == -1) { + __builtin_trap(); + } + + buffer->~BridgeHeader(); + ::close(gShmFd); + gShmFd = -1; + ::munmap(buffer, kShmSize); +} + +void amdgpu::bridge::unlinkShm(const char *name) { ::shm_unlink(name); } diff --git a/hw/amdgpu/device/CMakeLists.txt b/hw/amdgpu/device/CMakeLists.txt new file mode 100644 index 000000000..83db308a9 --- /dev/null +++ b/hw/amdgpu/device/CMakeLists.txt @@ -0,0 +1,66 @@ +project(libamdgpu-device) +set(PROJECT_PATH amdgpu/device) + +set(SRC + src/device.cpp +) + +function(add_precompiled_vulkan_spirv target) + add_library(${target} INTERFACE) + set(SPIRV_GEN_ROOT_DIR "spirv-gen/include/") + set(SPIRV_GEN_DIR "${SPIRV_GEN_ROOT_DIR}/shaders") + + cmake_path(ABSOLUTE_PATH SPIRV_GEN_ROOT_DIR BASE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} NORMALIZE OUTPUT_VARIABLE outputrootdir) + cmake_path(ABSOLUTE_PATH SPIRV_GEN_DIR BASE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} NORMALIZE OUTPUT_VARIABLE outputdir) + file(MAKE_DIRECTORY ${outputrootdir}) + file(MAKE_DIRECTORY ${outputdir}) + target_include_directories(${target} INTERFACE ${outputrootdir}) + + foreach(input IN LISTS ARGN) + cmake_path(GET input FILENAME inputname) + cmake_path(REPLACE_EXTENSION inputname LAST_ONLY .h OUTPUT_VARIABLE outputname) + cmake_path(APPEND outputdir ${outputname} OUTPUT_VARIABLE outputpath) + cmake_path(REMOVE_EXTENSION inputname LAST_ONLY OUTPUT_VARIABLE varname) + + string(REPLACE "." "_" varname ${varname}) + string(PREPEND varname "spirv_") + + add_custom_command( + OUTPUT ${outputpath} + COMMAND glslangValidator -V --vn "${varname}" -o "${outputpath}" "${CMAKE_CURRENT_SOURCE_DIR}/${input}" + DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${input}" + COMMENT "Generating ${outputname}..." + ) + + set(subtarget ".${target}-subtarget-${outputname}") + add_custom_target(${subtarget} DEPENDS ${outputpath}) + add_dependencies(${target} ${subtarget}) + endforeach() +endfunction() + +add_precompiled_vulkan_spirv(${PROJECT_NAME}-shaders + src/rect_list.geom.glsl +) + +find_package(SPIRV-Tools REQUIRED CONFIG) +find_package(SPIRV-Tools-opt REQUIRED CONFIG) + +add_library(${PROJECT_NAME} STATIC ${INCLUDE} ${SRC}) +target_link_libraries(${PROJECT_NAME} +PUBLIC + spirv + amdgpu::base + amdgpu::bridge + amdgpu::shader + util + SPIRV-Tools + SPIRV-Tools-opt + +PRIVATE + ${PROJECT_NAME}-shaders +) + +target_include_directories(${PROJECT_NAME} PUBLIC include PRIVATE include/${PROJECT_PATH}) +set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "") +add_library(amdgpu::device ALIAS ${PROJECT_NAME}) +set_property(TARGET ${PROJECT_NAME} PROPERTY POSITION_INDEPENDENT_CODE ON) diff --git a/hw/amdgpu/device/include/amdgpu/device/device.hpp b/hw/amdgpu/device/include/amdgpu/device/device.hpp new file mode 100644 index 000000000..42baaa20a --- /dev/null +++ b/hw/amdgpu/device/include/amdgpu/device/device.hpp @@ -0,0 +1,1542 @@ +#pragma once + +#include "amdgpu/RemoteMemory.hpp" +#include "amdgpu/bridge/bridge.hpp" +#include "amdgpu/shader/Converter.hpp" +#include "amdgpu/shader/Instruction.hpp" +#include "util/Verify.hpp" + +#include +#include +#include +#include +#include + +namespace amdgpu::device { +inline constexpr std::uint32_t getBits(std::uint32_t value, int end, + int begin) { + return (value >> begin) & ((1u << (end - begin + 1)) - 1); +} + +inline constexpr std::uint32_t getBit(std::uint32_t value, int bit) { + return (value >> bit) & 1; +} + +inline constexpr std::uint32_t genMask(std::uint32_t offset, + std::uint32_t bitCount) { + return ((1u << bitCount) - 1u) << offset; +} + +inline constexpr std::uint32_t getMaskEnd(std::uint32_t mask) { + return 32 - std::countl_zero(mask); +} + +inline constexpr std::uint32_t fetchMaskedValue(std::uint32_t hex, + std::uint32_t mask) { + return (hex & mask) >> std::countr_zero(mask); +} + +template >> +inline std::size_t calcStringLen(T value, unsigned base = 10) { + std::size_t n = 1; + std::size_t base2 = base * base; + std::size_t base3 = base2 * base; + std::size_t base4 = base3 * base; + + while (true) { + if (value < base) { + return n; + } + + if (value < base2) { + return n + 1; + } + + if (value < base3) { + return n + 2; + } + + if (value < base4) { + return n + 3; + } + + value /= base4; + n += 4; + } +} + +template >> +inline void toHexString(char *dst, std::size_t len, T value) { + while (len > 0) { + char digit = value % 16; + value /= 16; + + dst[--len] = digit < 10 ? '0' + digit : 'a' + digit - 10; + } +} + +inline std::string toHexString(unsigned value) { + auto len = calcStringLen(value, 16); + + std::string result(len, '\0'); + toHexString(result.data(), len, value); + return result; +} + +inline std::string toHexString(int value) { + bool isNeg = value < 0; + unsigned uval = isNeg ? static_cast(~value) + 1 : value; + auto len = calcStringLen(uval, 16); + + std::string result(len + (isNeg ? 1 : 0), '-'); + toHexString(result.data(), len, uval); + return result; +} + +enum Registers { + SPI_SHADER_PGM_LO_PS = 0x2c08, + SPI_SHADER_PGM_HI_PS = 0x2c09, + SPI_SHADER_PGM_RSRC1_PS = 0x2c0a, + SPI_SHADER_PGM_RSRC2_PS = 0x2c0b, + SPI_SHADER_USER_DATA_PS_0 = 0x2c0c, + SPI_SHADER_USER_DATA_PS_1, + SPI_SHADER_USER_DATA_PS_2, + SPI_SHADER_USER_DATA_PS_3, + SPI_SHADER_USER_DATA_PS_4, + SPI_SHADER_USER_DATA_PS_5, + SPI_SHADER_USER_DATA_PS_6, + SPI_SHADER_USER_DATA_PS_7, + SPI_SHADER_USER_DATA_PS_8, + SPI_SHADER_USER_DATA_PS_9, + SPI_SHADER_USER_DATA_PS_10, + SPI_SHADER_USER_DATA_PS_11, + SPI_SHADER_USER_DATA_PS_12, + SPI_SHADER_USER_DATA_PS_13, + SPI_SHADER_USER_DATA_PS_14, + SPI_SHADER_USER_DATA_PS_15, + + SPI_SHADER_PGM_LO_VS = 0x2c48, + SPI_SHADER_PGM_HI_VS = 0x2c49, + SPI_SHADER_PGM_RSRC1_VS = 0x2c4a, + SPI_SHADER_PGM_RSRC2_VS = 0x2c4b, + SPI_SHADER_USER_DATA_VS_0 = 0x2c4c, + SPI_SHADER_USER_DATA_VS_1 = 0x2c4d, + SPI_SHADER_USER_DATA_VS_2 = 0x2c4e, + SPI_SHADER_USER_DATA_VS_3 = 0x2c4f, + SPI_SHADER_USER_DATA_VS_4, + SPI_SHADER_USER_DATA_VS_5, + SPI_SHADER_USER_DATA_VS_6, + SPI_SHADER_USER_DATA_VS_7, + SPI_SHADER_USER_DATA_VS_8, + SPI_SHADER_USER_DATA_VS_9, + SPI_SHADER_USER_DATA_VS_10, + SPI_SHADER_USER_DATA_VS_11, + SPI_SHADER_USER_DATA_VS_12, + SPI_SHADER_USER_DATA_VS_13, + SPI_SHADER_USER_DATA_VS_14, + SPI_SHADER_USER_DATA_VS_15, + + COMPUTE_NUM_THREAD_X = 0x2e07, + COMPUTE_NUM_THREAD_Y, + COMPUTE_NUM_THREAD_Z, + COMPUTE_PGM_LO = 0x2e0c, + COMPUTE_PGM_HI, + COMPUTE_PGM_RSRC1 = 0x2e12, + COMPUTE_PGM_RSRC2, + COMPUTE_USER_DATA_0 = 0x2e40, + COMPUTE_USER_DATA_1, + COMPUTE_USER_DATA_2, + COMPUTE_USER_DATA_3, + COMPUTE_USER_DATA_4, + COMPUTE_USER_DATA_5, + COMPUTE_USER_DATA_6, + COMPUTE_USER_DATA_7, + COMPUTE_USER_DATA_8, + COMPUTE_USER_DATA_9, + COMPUTE_USER_DATA_10, + COMPUTE_USER_DATA_11, + COMPUTE_USER_DATA_12, + COMPUTE_USER_DATA_13, + COMPUTE_USER_DATA_14, + COMPUTE_USER_DATA_15, + + DB_RENDER_CONTROL = 0xa000, + DB_DEPTH_VIEW = 0xA002, + DB_HTILE_DATA_BASE = 0xA005, + DB_DEPTH_CLEAR = 0xA00B, + PA_SC_SCREEN_SCISSOR_TL = 0xa00c, + PA_SC_SCREEN_SCISSOR_BR = 0xa00d, + DB_DEPTH_INFO = 0xA00F, + DB_Z_INFO = 0xA010, + DB_STENCIL_INFO = 0xA011, + DB_Z_READ_BASE = 0xA012, + DB_STENCIL_READ_BASE = 0xA013, + DB_Z_WRITE_BASE = 0xA014, + DB_STENCIL_WRITE_BASE = 0xA015, + DB_DEPTH_SIZE = 0xA016, + DB_DEPTH_SLICE = 0xA017, + PA_SU_HARDWARE_SCREEN_OFFSET = 0xa08d, + CB_TARGET_MASK = 0xA08e, + CB_SHADER_MASK = 0xa08f, + PA_SC_VPORT_ZMIN_0 = 0xA0b4, + PA_SC_VPORT_ZMAX_0 = 0xA0b5, + PA_CL_VPORT_XSCALE = 0xa10f, + PA_CL_VPORT_XOFFSET, + PA_CL_VPORT_YSCALE, + PA_CL_VPORT_YOFFSET, + PA_CL_VPORT_ZSCALE, + PA_CL_VPORT_ZOFFSET, + SPI_PS_INPUT_CNTL_0 = 0xa191, + SPI_VS_OUT_CONFIG = 0xa1b1, + SPI_PS_INPUT_ENA = 0xa1b3, + SPI_PS_INPUT_ADDR = 0xa1b4, + SPI_PS_IN_CONTROL = 0xa1b6, + SPI_BARYC_CNTL = 0xa1b8, + SPI_SHADER_POS_FORMAT = 0xa1c3, + SPI_SHADER_Z_FORMAT = 0xa1c4, + SPI_SHADER_COL_FORMAT = 0xa1c5, + DB_DEPTH_CONTROL = 0xa200, + CB_COLOR_CONTROL = 0xa202, + DB_SHADER_CONTROL = 0xa203, + PA_CL_CLIP_CNTL = 0xa204, + PA_SU_SC_MODE_CNTL = 0xa205, + PA_CL_VTE_CNTL = 0xa206, + PA_CL_VS_OUT_CNTL = 0xa207, + DB_HTILE_SURFACE = 0xA2AF, + VGT_SHADER_STAGES_EN = 0xa2d5, + PA_CL_GB_VERT_CLIP_ADJ = 0xa2fa, + PA_CL_GB_VERT_DISC_ADJ, + PA_CL_GB_HORZ_CLIP_ADJ, + PA_CL_GB_HORZ_DISC_ADJ, + + CB_COLOR0_BASE = 0xA318, + CB_COLOR0_PITCH, + CB_COLOR0_SLICE, + CB_COLOR0_VIEW, + CB_COLOR0_INFO, + CB_COLOR0_ATTRIB, + CB_COLOR0_DCC_CONTROL, + CB_COLOR0_CMASK, + CB_COLOR0_CMASK_SLICE, + CB_COLOR0_FMASK, + CB_COLOR0_FMASK_SLICE, + CB_COLOR0_CLEAR_WORD0, + CB_COLOR0_CLEAR_WORD1, + CB_COLOR0_DCC_BASE, + CB_COLOR0_UNK0, + + CB_COLOR1_BASE, + CB_COLOR1_PITCH, + CB_COLOR1_SLICE, + CB_COLOR1_VIEW, + CB_COLOR1_INFO, + CB_COLOR1_ATTRIB, + CB_COLOR1_DCC_CONTROL, + CB_COLOR1_CMASK, + CB_COLOR1_CMASK_SLICE, + CB_COLOR1_FMASK, + CB_COLOR1_FMASK_SLICE, + CB_COLOR1_CLEAR_WORD0, + CB_COLOR1_CLEAR_WORD1, + CB_COLOR1_DCC_BASE, + CB_COLOR1_UNK0, + + CB_COLOR2_BASE, + CB_COLOR2_PITCH, + CB_COLOR2_SLICE, + CB_COLOR2_VIEW, + CB_COLOR2_INFO, + CB_COLOR2_ATTRIB, + CB_COLOR2_DCC_CONTROL, + CB_COLOR2_CMASK, + CB_COLOR2_CMASK_SLICE, + CB_COLOR2_FMASK, + CB_COLOR2_FMASK_SLICE, + CB_COLOR2_CLEAR_WORD0, + CB_COLOR2_CLEAR_WORD1, + CB_COLOR2_DCC_BASE, + CB_COLOR2_UNK0, + + CB_COLOR3_BASE, + CB_COLOR3_PITCH, + CB_COLOR3_SLICE, + CB_COLOR3_VIEW, + CB_COLOR3_INFO, + CB_COLOR3_ATTRIB, + CB_COLOR3_DCC_CONTROL, + CB_COLOR3_CMASK, + CB_COLOR3_CMASK_SLICE, + CB_COLOR3_FMASK, + CB_COLOR3_FMASK_SLICE, + CB_COLOR3_CLEAR_WORD0, + CB_COLOR3_CLEAR_WORD1, + CB_COLOR3_DCC_BASE, + CB_COLOR3_UNK0, + + CB_COLOR4_BASE, + CB_COLOR4_PITCH, + CB_COLOR4_SLICE, + CB_COLOR4_VIEW, + CB_COLOR4_INFO, + CB_COLOR4_ATTRIB, + CB_COLOR4_DCC_CONTROL, + CB_COLOR4_CMASK, + CB_COLOR4_CMASK_SLICE, + CB_COLOR4_FMASK, + CB_COLOR4_FMASK_SLICE, + CB_COLOR4_CLEAR_WORD0, + CB_COLOR4_CLEAR_WORD1, + CB_COLOR4_DCC_BASE, + CB_COLOR4_UNK0, + + CB_COLOR5_BASE, + CB_COLOR5_PITCH, + CB_COLOR5_SLICE, + CB_COLOR5_VIEW, + CB_COLOR5_INFO, + CB_COLOR5_ATTRIB, + CB_COLOR5_DCC_CONTROL, + CB_COLOR5_CMASK, + CB_COLOR5_CMASK_SLICE, + CB_COLOR5_FMASK, + CB_COLOR5_FMASK_SLICE, + CB_COLOR5_CLEAR_WORD0, + CB_COLOR5_CLEAR_WORD1, + CB_COLOR5_DCC_BASE, + CB_COLOR5_UNK0, + + CB_COLOR6_BASE, + CB_COLOR6_PITCH, + CB_COLOR6_SLICE, + CB_COLOR6_VIEW, + CB_COLOR6_INFO, + CB_COLOR6_ATTRIB, + CB_COLOR6_DCC_CONTROL, + CB_COLOR6_CMASK, + CB_COLOR6_CMASK_SLICE, + CB_COLOR6_FMASK, + CB_COLOR6_FMASK_SLICE, + CB_COLOR6_CLEAR_WORD0, + CB_COLOR6_CLEAR_WORD1, + CB_COLOR6_DCC_BASE, + CB_COLOR6_UNK0, + + CB_BLEND0_CONTROL = 0xa1e0, + + VGT_PRIMITIVE_TYPE = 0xc242, +}; + +inline std::string registerToString(int reg) { + switch (reg) { + case SPI_SHADER_PGM_LO_PS: + return "SPI_SHADER_PGM_LO_PS"; + case SPI_SHADER_PGM_HI_PS: + return "SPI_SHADER_PGM_HI_PS"; + case SPI_SHADER_PGM_RSRC1_PS: + return "SPI_SHADER_PGM_RSRC1_PS"; + case SPI_SHADER_PGM_RSRC2_PS: + return "SPI_SHADER_PGM_RSRC2_PS"; + case SPI_SHADER_USER_DATA_PS_0: + return "SPI_SHADER_USER_DATA_PS_0"; + case SPI_SHADER_USER_DATA_PS_1: + return "SPI_SHADER_USER_DATA_PS_1"; + case SPI_SHADER_USER_DATA_PS_2: + return "SPI_SHADER_USER_DATA_PS_2"; + case SPI_SHADER_USER_DATA_PS_3: + return "SPI_SHADER_USER_DATA_PS_3"; + case SPI_SHADER_USER_DATA_PS_4: + return "SPI_SHADER_USER_DATA_PS_4"; + case SPI_SHADER_USER_DATA_PS_5: + return "SPI_SHADER_USER_DATA_PS_5"; + case SPI_SHADER_USER_DATA_PS_6: + return "SPI_SHADER_USER_DATA_PS_6"; + case SPI_SHADER_USER_DATA_PS_7: + return "SPI_SHADER_USER_DATA_PS_7"; + case SPI_SHADER_USER_DATA_PS_8: + return "SPI_SHADER_USER_DATA_PS_8"; + case SPI_SHADER_USER_DATA_PS_9: + return "SPI_SHADER_USER_DATA_PS_9"; + case SPI_SHADER_USER_DATA_PS_10: + return "SPI_SHADER_USER_DATA_PS_10"; + case SPI_SHADER_USER_DATA_PS_11: + return "SPI_SHADER_USER_DATA_PS_11"; + case SPI_SHADER_USER_DATA_PS_12: + return "SPI_SHADER_USER_DATA_PS_12"; + case SPI_SHADER_USER_DATA_PS_13: + return "SPI_SHADER_USER_DATA_PS_13"; + case SPI_SHADER_USER_DATA_PS_14: + return "SPI_SHADER_USER_DATA_PS_14"; + case SPI_SHADER_USER_DATA_PS_15: + return "SPI_SHADER_USER_DATA_PS_15"; + case SPI_SHADER_PGM_LO_VS: + return "SPI_SHADER_PGM_LO_VS"; + case SPI_SHADER_PGM_HI_VS: + return "SPI_SHADER_PGM_HI_VS"; + case SPI_SHADER_PGM_RSRC1_VS: + return "SPI_SHADER_PGM_RSRC1_VS"; + case SPI_SHADER_PGM_RSRC2_VS: + return "SPI_SHADER_PGM_RSRC2_VS"; + case SPI_SHADER_USER_DATA_VS_0: + return "SPI_SHADER_USER_DATA_VS_0"; + case SPI_SHADER_USER_DATA_VS_1: + return "SPI_SHADER_USER_DATA_VS_1"; + case SPI_SHADER_USER_DATA_VS_2: + return "SPI_SHADER_USER_DATA_VS_2"; + case SPI_SHADER_USER_DATA_VS_3: + return "SPI_SHADER_USER_DATA_VS_3"; + case SPI_SHADER_USER_DATA_VS_4: + return "SPI_SHADER_USER_DATA_VS_4"; + case SPI_SHADER_USER_DATA_VS_5: + return "SPI_SHADER_USER_DATA_VS_5"; + case SPI_SHADER_USER_DATA_VS_6: + return "SPI_SHADER_USER_DATA_VS_6"; + case SPI_SHADER_USER_DATA_VS_7: + return "SPI_SHADER_USER_DATA_VS_7"; + case SPI_SHADER_USER_DATA_VS_8: + return "SPI_SHADER_USER_DATA_VS_8"; + case SPI_SHADER_USER_DATA_VS_9: + return "SPI_SHADER_USER_DATA_VS_9"; + case SPI_SHADER_USER_DATA_VS_10: + return "SPI_SHADER_USER_DATA_VS_10"; + case SPI_SHADER_USER_DATA_VS_11: + return "SPI_SHADER_USER_DATA_VS_11"; + case SPI_SHADER_USER_DATA_VS_12: + return "SPI_SHADER_USER_DATA_VS_12"; + case SPI_SHADER_USER_DATA_VS_13: + return "SPI_SHADER_USER_DATA_VS_13"; + case SPI_SHADER_USER_DATA_VS_14: + return "SPI_SHADER_USER_DATA_VS_14"; + case SPI_SHADER_USER_DATA_VS_15: + return "SPI_SHADER_USER_DATA_VS_15"; + case COMPUTE_NUM_THREAD_X: + return "COMPUTE_NUM_THREAD_X"; + case COMPUTE_NUM_THREAD_Y: + return "COMPUTE_NUM_THREAD_Y"; + case COMPUTE_NUM_THREAD_Z: + return "COMPUTE_NUM_THREAD_Z"; + case COMPUTE_PGM_LO: + return "COMPUTE_PGM_LO"; + case COMPUTE_PGM_HI: + return "COMPUTE_PGM_HI"; + case COMPUTE_PGM_RSRC1: + return "COMPUTE_PGM_RSRC1"; + case COMPUTE_PGM_RSRC2: + return "COMPUTE_PGM_RSRC2"; + case COMPUTE_USER_DATA_0: + return "COMPUTE_USER_DATA_0"; + case COMPUTE_USER_DATA_1: + return "COMPUTE_USER_DATA_1"; + case COMPUTE_USER_DATA_2: + return "COMPUTE_USER_DATA_2"; + case COMPUTE_USER_DATA_3: + return "COMPUTE_USER_DATA_3"; + case COMPUTE_USER_DATA_4: + return "COMPUTE_USER_DATA_4"; + case COMPUTE_USER_DATA_5: + return "COMPUTE_USER_DATA_5"; + case COMPUTE_USER_DATA_6: + return "COMPUTE_USER_DATA_6"; + case COMPUTE_USER_DATA_7: + return "COMPUTE_USER_DATA_7"; + case COMPUTE_USER_DATA_8: + return "COMPUTE_USER_DATA_8"; + case COMPUTE_USER_DATA_9: + return "COMPUTE_USER_DATA_9"; + case COMPUTE_USER_DATA_10: + return "COMPUTE_USER_DATA_10"; + case COMPUTE_USER_DATA_11: + return "COMPUTE_USER_DATA_11"; + case COMPUTE_USER_DATA_12: + return "COMPUTE_USER_DATA_12"; + case COMPUTE_USER_DATA_13: + return "COMPUTE_USER_DATA_13"; + case COMPUTE_USER_DATA_14: + return "COMPUTE_USER_DATA_14"; + case COMPUTE_USER_DATA_15: + return "COMPUTE_USER_DATA_15"; + case DB_DEPTH_CLEAR: + return "DB_DEPTH_CLEAR"; + case DB_RENDER_CONTROL: + return "DB_RENDER_CONTROL"; + case DB_DEPTH_VIEW: + return "DB_DEPTH_VIEW"; + case DB_HTILE_DATA_BASE: + return "DB_HTILE_DATA_BASE"; + case PA_SC_SCREEN_SCISSOR_TL: + return "PA_SC_SCREEN_SCISSOR_TL"; + case PA_SC_SCREEN_SCISSOR_BR: + return "PA_SC_SCREEN_SCISSOR_BR"; + case DB_DEPTH_INFO: + return "DB_DEPTH_INFO"; + case DB_Z_INFO: + return "DB_Z_INFO"; + case DB_STENCIL_INFO: + return "DB_STENCIL_INFO"; + case DB_Z_READ_BASE: + return "DB_Z_READ_BASE"; + case DB_STENCIL_READ_BASE: + return "DB_STENCIL_READ_BASE"; + case DB_Z_WRITE_BASE: + return "DB_Z_WRITE_BASE"; + case DB_STENCIL_WRITE_BASE: + return "DB_STENCIL_WRITE_BASE"; + case DB_DEPTH_SIZE: + return "DB_DEPTH_SIZE"; + case DB_DEPTH_SLICE: + return "DB_DEPTH_SLICE"; + case PA_SU_HARDWARE_SCREEN_OFFSET: + return "PA_SU_HARDWARE_SCREEN_OFFSET"; + case CB_TARGET_MASK: + return "CB_TARGET_MASK"; + case CB_SHADER_MASK: + return "CB_SHADER_MASK"; + case PA_SC_VPORT_ZMIN_0: + return "PA_SC_VPORT_ZMIN_0"; + case PA_SC_VPORT_ZMAX_0: + return "PA_SC_VPORT_ZMAX_0"; + case PA_CL_VPORT_XSCALE: + return "PA_CL_VPORT_XSCALE"; + case PA_CL_VPORT_XOFFSET: + return "PA_CL_VPORT_XOFFSET"; + case PA_CL_VPORT_YSCALE: + return "PA_CL_VPORT_YSCALE"; + case PA_CL_VPORT_YOFFSET: + return "PA_CL_VPORT_YOFFSET"; + case PA_CL_VPORT_ZSCALE: + return "PA_CL_VPORT_ZSCALE"; + case PA_CL_VPORT_ZOFFSET: + return "PA_CL_VPORT_ZOFFSET"; + case SPI_PS_INPUT_CNTL_0: + return "SPI_PS_INPUT_CNTL_0"; + case SPI_VS_OUT_CONFIG: + return "SPI_VS_OUT_CONFIG"; + case SPI_PS_INPUT_ENA: + return "SPI_PS_INPUT_ENA"; + case SPI_PS_INPUT_ADDR: + return "SPI_PS_INPUT_ADDR"; + case SPI_PS_IN_CONTROL: + return "SPI_PS_IN_CONTROL"; + case SPI_BARYC_CNTL: + return "SPI_BARYC_CNTL"; + case SPI_SHADER_POS_FORMAT: + return "SPI_SHADER_POS_FORMAT"; + case SPI_SHADER_Z_FORMAT: + return "SPI_SHADER_Z_FORMAT"; + case SPI_SHADER_COL_FORMAT: + return "SPI_SHADER_COL_FORMAT"; + case DB_DEPTH_CONTROL: + return "DB_DEPTH_CONTROL"; + case CB_COLOR_CONTROL: + return "DB_COLOR_CONTROL"; + case DB_SHADER_CONTROL: + return "DB_SHADER_CONTROL"; + case PA_CL_CLIP_CNTL: + return "PA_CL_CLIP_CNTL"; + case PA_SU_SC_MODE_CNTL: + return "PA_SU_SC_MODE_CNTL"; + case PA_CL_VTE_CNTL: + return "PA_CL_VTE_CNTL"; + case PA_CL_VS_OUT_CNTL: + return "PA_CL_VS_OUT_CNTL"; + case DB_HTILE_SURFACE: + return "DB_HTILE_SURFACE"; + case VGT_SHADER_STAGES_EN: + return "VGT_SHADER_STAGES_EN"; + case PA_CL_GB_VERT_CLIP_ADJ: + return "PA_CL_GB_VERT_CLIP_ADJ"; + case PA_CL_GB_VERT_DISC_ADJ: + return "PA_CL_GB_VERT_DISC_ADJ"; + case PA_CL_GB_HORZ_CLIP_ADJ: + return "PA_CL_GB_HORZ_CLIP_ADJ"; + case PA_CL_GB_HORZ_DISC_ADJ: + return "PA_CL_GB_HORZ_DISC_ADJ"; + case CB_COLOR0_BASE: + return "CB_COLOR0_BASE"; + case CB_COLOR0_PITCH: + return "CB_COLOR0_PITCH"; + case CB_COLOR0_SLICE: + return "CB_COLOR0_SLICE"; + case CB_COLOR0_VIEW: + return "CB_COLOR0_VIEW"; + case CB_COLOR0_INFO: + return "CB_COLOR0_INFO"; + case CB_COLOR0_ATTRIB: + return "CB_COLOR0_ATTRIB"; + case CB_COLOR0_DCC_CONTROL: + return "CB_COLOR0_DCC_CONTROL"; + case CB_COLOR0_CMASK: + return "CB_COLOR0_CMASK"; + case CB_COLOR0_CMASK_SLICE: + return "CB_COLOR0_CMASK_SLICE"; + case CB_COLOR0_FMASK: + return "CB_COLOR0_FMASK"; + case CB_COLOR0_FMASK_SLICE: + return "CB_COLOR0_FMASK_SLICE"; + case CB_COLOR0_CLEAR_WORD0: + return "CB_COLOR0_CLEAR_WORD0"; + case CB_COLOR0_CLEAR_WORD1: + return "CB_COLOR0_CLEAR_WORD1"; + case CB_COLOR0_DCC_BASE: + return "CB_COLOR0_DCC_BASE"; + case CB_COLOR1_BASE: + return "CB_COLOR1_BASE"; + case CB_COLOR1_PITCH: + return "CB_COLOR1_PITCH"; + case CB_COLOR1_SLICE: + return "CB_COLOR1_SLICE"; + case CB_COLOR1_VIEW: + return "CB_COLOR1_VIEW"; + case CB_COLOR1_INFO: + return "CB_COLOR1_INFO"; + case CB_COLOR1_ATTRIB: + return "CB_COLOR1_ATTRIB"; + case CB_COLOR1_DCC_CONTROL: + return "CB_COLOR1_DCC_CONTROL"; + case CB_COLOR1_CMASK: + return "CB_COLOR1_CMASK"; + case CB_COLOR1_CMASK_SLICE: + return "CB_COLOR1_CMASK_SLICE"; + case CB_COLOR1_FMASK: + return "CB_COLOR1_FMASK"; + case CB_COLOR1_FMASK_SLICE: + return "CB_COLOR1_FMASK_SLICE"; + case CB_COLOR1_CLEAR_WORD0: + return "CB_COLOR1_CLEAR_WORD0"; + case CB_COLOR1_CLEAR_WORD1: + return "CB_COLOR1_CLEAR_WORD1"; + case CB_COLOR1_DCC_BASE: + return "CB_COLOR1_DCC_BASE"; + case CB_COLOR2_BASE: + return "CB_COLOR2_BASE"; + case CB_COLOR2_PITCH: + return "CB_COLOR2_PITCH"; + case CB_COLOR2_SLICE: + return "CB_COLOR2_SLICE"; + case CB_COLOR2_VIEW: + return "CB_COLOR2_VIEW"; + case CB_COLOR2_INFO: + return "CB_COLOR2_INFO"; + case CB_COLOR2_ATTRIB: + return "CB_COLOR2_ATTRIB"; + case CB_COLOR2_DCC_CONTROL: + return "CB_COLOR2_DCC_CONTROL"; + case CB_COLOR2_CMASK: + return "CB_COLOR2_CMASK"; + case CB_COLOR2_CMASK_SLICE: + return "CB_COLOR2_CMASK_SLICE"; + case CB_COLOR2_FMASK: + return "CB_COLOR2_FMASK"; + case CB_COLOR2_FMASK_SLICE: + return "CB_COLOR2_FMASK_SLICE"; + case CB_COLOR2_CLEAR_WORD0: + return "CB_COLOR2_CLEAR_WORD0"; + case CB_COLOR2_CLEAR_WORD1: + return "CB_COLOR2_CLEAR_WORD1"; + case CB_COLOR2_DCC_BASE: + return "CB_COLOR2_DCC_BASE"; + case CB_COLOR3_BASE: + return "CB_COLOR3_BASE"; + case CB_COLOR3_PITCH: + return "CB_COLOR3_PITCH"; + case CB_COLOR3_SLICE: + return "CB_COLOR3_SLICE"; + case CB_COLOR3_VIEW: + return "CB_COLOR3_VIEW"; + case CB_COLOR3_INFO: + return "CB_COLOR3_INFO"; + case CB_COLOR3_ATTRIB: + return "CB_COLOR3_ATTRIB"; + case CB_COLOR3_DCC_CONTROL: + return "CB_COLOR3_DCC_CONTROL"; + case CB_COLOR3_CMASK: + return "CB_COLOR3_CMASK"; + case CB_COLOR3_CMASK_SLICE: + return "CB_COLOR3_CMASK_SLICE"; + case CB_COLOR3_FMASK: + return "CB_COLOR3_FMASK"; + case CB_COLOR3_FMASK_SLICE: + return "CB_COLOR3_FMASK_SLICE"; + case CB_COLOR3_CLEAR_WORD0: + return "CB_COLOR3_CLEAR_WORD0"; + case CB_COLOR3_CLEAR_WORD1: + return "CB_COLOR3_CLEAR_WORD1"; + case CB_COLOR3_DCC_BASE: + return "CB_COLOR3_DCC_BASE"; + case CB_COLOR4_BASE: + return "CB_COLOR4_BASE"; + case CB_COLOR4_PITCH: + return "CB_COLOR4_PITCH"; + case CB_COLOR4_SLICE: + return "CB_COLOR4_SLICE"; + case CB_COLOR4_VIEW: + return "CB_COLOR4_VIEW"; + case CB_COLOR4_INFO: + return "CB_COLOR4_INFO"; + case CB_COLOR4_ATTRIB: + return "CB_COLOR4_ATTRIB"; + case CB_COLOR4_DCC_CONTROL: + return "CB_COLOR4_DCC_CONTROL"; + case CB_COLOR4_CMASK: + return "CB_COLOR4_CMASK"; + case CB_COLOR4_CMASK_SLICE: + return "CB_COLOR4_CMASK_SLICE"; + case CB_COLOR4_FMASK: + return "CB_COLOR4_FMASK"; + case CB_COLOR4_FMASK_SLICE: + return "CB_COLOR4_FMASK_SLICE"; + case CB_COLOR4_CLEAR_WORD0: + return "CB_COLOR4_CLEAR_WORD0"; + case CB_COLOR4_CLEAR_WORD1: + return "CB_COLOR4_CLEAR_WORD1"; + case CB_COLOR4_DCC_BASE: + return "CB_COLOR4_DCC_BASE"; + case CB_COLOR5_BASE: + return "CB_COLOR5_BASE"; + case CB_COLOR5_PITCH: + return "CB_COLOR5_PITCH"; + case CB_COLOR5_SLICE: + return "CB_COLOR5_SLICE"; + case CB_COLOR5_VIEW: + return "CB_COLOR5_VIEW"; + case CB_COLOR5_INFO: + return "CB_COLOR5_INFO"; + case CB_COLOR5_ATTRIB: + return "CB_COLOR5_ATTRIB"; + case CB_COLOR5_DCC_CONTROL: + return "CB_COLOR5_DCC_CONTROL"; + case CB_COLOR5_CMASK: + return "CB_COLOR5_CMASK"; + case CB_COLOR5_CMASK_SLICE: + return "CB_COLOR5_CMASK_SLICE"; + case CB_COLOR5_FMASK: + return "CB_COLOR5_FMASK"; + case CB_COLOR5_FMASK_SLICE: + return "CB_COLOR5_FMASK_SLICE"; + case CB_COLOR5_CLEAR_WORD0: + return "CB_COLOR5_CLEAR_WORD0"; + case CB_COLOR5_CLEAR_WORD1: + return "CB_COLOR5_CLEAR_WORD1"; + case CB_COLOR5_DCC_BASE: + return "CB_COLOR5_DCC_BASE"; + case CB_COLOR6_BASE: + return "CB_COLOR6_BASE"; + case CB_COLOR6_PITCH: + return "CB_COLOR6_PITCH"; + case CB_COLOR6_SLICE: + return "CB_COLOR6_SLICE"; + case CB_COLOR6_VIEW: + return "CB_COLOR6_VIEW"; + case CB_COLOR6_INFO: + return "CB_COLOR6_INFO"; + case CB_COLOR6_ATTRIB: + return "CB_COLOR6_ATTRIB"; + case CB_COLOR6_DCC_CONTROL: + return "CB_COLOR6_DCC_CONTROL"; + case CB_COLOR6_CMASK: + return "CB_COLOR6_CMASK"; + case CB_COLOR6_CMASK_SLICE: + return "CB_COLOR6_CMASK_SLICE"; + case CB_COLOR6_FMASK: + return "CB_COLOR6_FMASK"; + case CB_COLOR6_FMASK_SLICE: + return "CB_COLOR6_FMASK_SLICE"; + case CB_COLOR6_CLEAR_WORD0: + return "CB_COLOR6_CLEAR_WORD0"; + case CB_COLOR6_CLEAR_WORD1: + return "CB_COLOR6_CLEAR_WORD1"; + case CB_COLOR6_DCC_BASE: + return "CB_COLOR6_DCC_BASE"; + case CB_BLEND0_CONTROL: + return "CB_BLEND0_CONTROL"; + + case VGT_PRIMITIVE_TYPE: + return "VGT_PRIMITIVE_TYPE"; + } + + return ""; +} + +enum Opcodes { + kOpcodeNOP = 0x10, + kOpcodeSET_BASE = 0x11, + kOpcodeCLEAR_STATE = 0x12, + kOpcodeINDEX_BUFFER_SIZE = 0x13, + kOpcodeDISPATCH_DIRECT = 0x15, + kOpcodeDISPATCH_INDIRECT = 0x16, + kOpcodeINDIRECT_BUFFER_END = 0x17, + MODE_CONTROL = 0x18, + kOpcodeATOMIC_GDS = 0x1D, + kOpcodeATOMIC_MEM = 0x1E, + kOpcodeOCCLUSION_QUERY = 0x1F, + kOpcodeSET_PREDICATION = 0x20, + kOpcodeREG_RMW = 0x21, + kOpcodeCOND_EXEC = 0x22, + kOpcodePRED_EXEC = 0x23, + kOpcodeDRAW_INDIRECT = 0x24, + kOpcodeDRAW_INDEX_INDIRECT = 0x25, + kOpcodeINDEX_BASE = 0x26, + kOpcodeDRAW_INDEX_2 = 0x27, + kOpcodeCONTEXT_CONTROL = 0x28, + DRAW_INDEX_OFFSET = 0x29, + kOpcodeINDEX_TYPE = 0x2A, + kOpcodeDRAW_INDEX = 0x2B, + kOpcodeDRAW_INDIRECT_MULTI = 0x2C, + kOpcodeDRAW_INDEX_AUTO = 0x2D, + kOpcodeDRAW_INDEX_IMMD = 0x2E, + kOpcodeNUM_INSTANCES = 0x2F, + kOpcodeDRAW_INDEX_MULTI_AUTO = 0x30, + kOpcodeINDIRECT_BUFFER_32 = 0x32, + kOpcodeINDIRECT_BUFFER_CONST = 0x33, + kOpcodeSTRMOUT_BUFFER_UPDATE = 0x34, + kOpcodeDRAW_INDEX_OFFSET_2 = 0x35, + kOpcodeDRAW_PREAMBLE = 0x36, + kOpcodeWRITE_DATA = 0x37, + kOpcodeDRAW_INDEX_INDIRECT_MULTI = 0x38, + kOpcodeMEM_SEMAPHORE = 0x39, + kOpcodeMPEG_INDEX = 0x3A, + kOpcodeCOPY_DW = 0x3B, + kOpcodeWAIT_REG_MEM = 0x3C, + kOpcodeMEM_WRITE = 0x3D, + kOpcodeINDIRECT_BUFFER_3F = 0x3F, + kOpcodeCOPY_DATA = 0x40, + kOpcodeCP_DMA = 0x41, + kOpcodePFP_SYNC_ME = 0x42, + kOpcodeSURFACE_SYNC = 0x43, + kOpcodeME_INITIALIZE = 0x44, + kOpcodeCOND_WRITE = 0x45, + kOpcodeEVENT_WRITE = 0x46, + kOpcodeEVENT_WRITE_EOP = 0x47, + kOpcodeEVENT_WRITE_EOS = 0x48, + kOpcodeRELEASE_MEM = 0x49, + kOpcodePREAMBLE_CNTL = 0x4A, + RB_OFFSET = 0x4B, + ALU_PS_CONST_BUFFER_COPY = 0x4C, + ALU_VS_CONST_BUFFER_COPY = 0x4D, + ALU_PS_CONST_UPDATE = 0x4E, + ALU_VS_CONST_UPDATE = 0x4F, + kOpcodeDMA_DATA = 0x50, + kOpcodeONE_REG_WRITE = 0x57, + kOpcodeAQUIRE_MEM = 0x58, + kOpcodeREWIND = 0x59, + kOpcodeLOAD_UCONFIG_REG = 0x5E, + kOpcodeLOAD_SH_REG = 0x5F, + kOpcodeLOAD_CONFIG_REG = 0x60, + kOpcodeLOAD_CONTEXT_REG = 0x61, + kOpcodeSET_CONFIG_REG = 0x68, + kOpcodeSET_CONTEXT_REG = 0x69, + kOpcodeSET_ALU_CONST = 0x6A, + kOpcodeSET_BOOL_CONST = 0x6B, + kOpcodeSET_LOOP_CONST = 0x6C, + kOpcodeSET_RESOURCE = 0x6D, + kOpcodeSET_SAMPLER = 0x6E, + kOpcodeSET_CTL_CONST = 0x6F, + SET_RESOURCE_OFFSET = 0x70, + SET_ALU_CONST_VS = 0x71, + SET_ALU_CONST_DI = 0x72, + kOpcodeSET_CONTEXT_REG_INDIRECT = 0x73, + SET_RESOURCE_INDIRECT = 0x74, + SET_APPEND_CNT = 0x75, + kOpcodeSET_SH_REG = 0x76, + kOpcodeSET_SH_REG_OFFSET = 0x77, + kOpcodeSET_QUEUE_REG = 0x78, + kOpcodeSET_UCONFIG_REG = 0x79, + kOpcodeSCRATCH_RAM_WRITE = 0x7D, + kOpcodeSCRATCH_RAM_READ = 0x7E, + kOpcodeLOAD_CONST_RAM = 0x80, + kOpcodeWRITE_CONST_RAM = 0x81, + kOpcodeDUMP_CONST_RAM = 0x83, + kOpcodeINCREMENT_CE_COUNTER = 0x84, + kOpcodeINCREMENT_DE_COUNTER = 0x85, + kOpcodeWAIT_ON_CE_COUNTER = 0x86, + kOpcodeWAIT_ON_DE_COUNTER_DIFF = 0x88, + kOpcodeSWITCH_BUFFER = 0x8B, +}; + +inline const std::string opcodeToString(int op) { + switch (op) { + case kOpcodeNOP: + return "IT_NOP"; + case kOpcodeSET_BASE: + return "IT_SET_BASE"; + case kOpcodeCLEAR_STATE: + return "IT_CLEAR_STATE"; + case kOpcodeINDEX_BUFFER_SIZE: + return "IT_INDEX_BUFFER_SIZE"; + case kOpcodeDISPATCH_DIRECT: + return "IT_DISPATCH_DIRECT"; + case kOpcodeDISPATCH_INDIRECT: + return "IT_DISPATCH_INDIRECT"; + case kOpcodeINDIRECT_BUFFER_END: + return "IT_INDIRECT_BUFFER_END"; + case kOpcodeATOMIC_GDS: + return "IT_ATOMIC_GDS"; + case kOpcodeATOMIC_MEM: + return "IT_ATOMIC_MEM"; + case kOpcodeOCCLUSION_QUERY: + return "IT_OCCLUSION_QUERY"; + case kOpcodeSET_PREDICATION: + return "IT_SET_PREDICATION"; + case kOpcodeREG_RMW: + return "IT_REG_RMW"; + case kOpcodeCOND_EXEC: + return "IT_COND_EXEC"; + case kOpcodePRED_EXEC: + return "IT_PRED_EXEC"; + case kOpcodeDRAW_INDIRECT: + return "IT_DRAW_INDIRECT"; + case kOpcodeDRAW_INDEX_INDIRECT: + return "IT_DRAW_INDEX_INDIRECT"; + case kOpcodeINDEX_BASE: + return "IT_INDEX_BASE"; + case kOpcodeDRAW_INDEX_2: + return "IT_DRAW_INDEX_2"; + case kOpcodeCONTEXT_CONTROL: + return "IT_CONTEXT_CONTROL"; + case kOpcodeINDEX_TYPE: + return "IT_INDEX_TYPE"; + case kOpcodeDRAW_INDEX: + return "IT_DRAW_INDEX"; + case kOpcodeDRAW_INDIRECT_MULTI: + return "IT_DRAW_INDIRECT_MULTI"; + case kOpcodeDRAW_INDEX_AUTO: + return "IT_DRAW_INDEX_AUTO"; + case kOpcodeDRAW_INDEX_IMMD: + return "IT_DRAW_INDEX_IMMD"; + case kOpcodeNUM_INSTANCES: + return "IT_NUM_INSTANCES"; + case kOpcodeDRAW_INDEX_MULTI_AUTO: + return "IT_DRAW_INDEX_MULTI_AUTO"; + case kOpcodeINDIRECT_BUFFER_32: + return "IT_INDIRECT_BUFFER_32"; + case kOpcodeINDIRECT_BUFFER_CONST: + return "IT_INDIRECT_BUFFER_CONST"; + case kOpcodeSTRMOUT_BUFFER_UPDATE: + return "IT_STRMOUT_BUFFER_UPDATE"; + case kOpcodeDRAW_INDEX_OFFSET_2: + return "IT_DRAW_INDEX_OFFSET_2"; + case kOpcodeDRAW_PREAMBLE: + return "IT_DRAW_PREAMBLE"; + case kOpcodeWRITE_DATA: + return "IT_WRITE_DATA"; + case kOpcodeDRAW_INDEX_INDIRECT_MULTI: + return "IT_DRAW_INDEX_INDIRECT_MULTI"; + case kOpcodeMEM_SEMAPHORE: + return "IT_MEM_SEMAPHORE"; + case kOpcodeMPEG_INDEX: + return "IT_MPEG_INDEX"; + case kOpcodeCOPY_DW: + return "IT_COPY_DW"; + case kOpcodeWAIT_REG_MEM: + return "IT_WAIT_REG_MEM"; + case kOpcodeMEM_WRITE: + return "IT_MEM_WRITE"; + case kOpcodeINDIRECT_BUFFER_3F: + return "IT_INDIRECT_BUFFER_3F"; + case kOpcodeCOPY_DATA: + return "IT_COPY_DATA"; + case kOpcodeCP_DMA: + return "IT_CP_DMA"; + case kOpcodePFP_SYNC_ME: + return "IT_PFP_SYNC_ME"; + case kOpcodeSURFACE_SYNC: + return "IT_SURFACE_SYNC"; + case kOpcodeME_INITIALIZE: + return "IT_ME_INITIALIZE"; + case kOpcodeCOND_WRITE: + return "IT_COND_WRITE"; + case kOpcodeEVENT_WRITE: + return "IT_EVENT_WRITE"; + case kOpcodeEVENT_WRITE_EOP: + return "IT_EVENT_WRITE_EOP"; + case kOpcodeEVENT_WRITE_EOS: + return "IT_EVENT_WRITE_EOS"; + case kOpcodeRELEASE_MEM: + return "IT_RELEASE_MEM"; + case kOpcodePREAMBLE_CNTL: + return "IT_PREAMBLE_CNTL"; + case kOpcodeDMA_DATA: + return "IT_DMA_DATA"; + case kOpcodeONE_REG_WRITE: + return "IT_ONE_REG_WRITE"; + case kOpcodeAQUIRE_MEM: + return "IT_AQUIRE_MEM"; + case kOpcodeREWIND: + return "IT_REWIND"; + case kOpcodeLOAD_UCONFIG_REG: + return "IT_LOAD_UCONFIG_REG"; + case kOpcodeLOAD_SH_REG: + return "IT_LOAD_SH_REG"; + case kOpcodeLOAD_CONFIG_REG: + return "IT_LOAD_CONFIG_REG"; + case kOpcodeLOAD_CONTEXT_REG: + return "IT_LOAD_CONTEXT_REG"; + case kOpcodeSET_CONFIG_REG: + return "IT_SET_CONFIG_REG"; + case kOpcodeSET_CONTEXT_REG: + return "IT_SET_CONTEXT_REG"; + case kOpcodeSET_ALU_CONST: + return "IT_SET_ALU_CONST"; + case kOpcodeSET_BOOL_CONST: + return "IT_SET_BOOL_CONST"; + case kOpcodeSET_LOOP_CONST: + return "IT_SET_LOOP_CONST"; + case kOpcodeSET_RESOURCE: + return "IT_SET_RESOURCE"; + case kOpcodeSET_SAMPLER: + return "IT_SET_SAMPLER"; + case kOpcodeSET_CTL_CONST: + return "IT_SET_CTL_CONST"; + case kOpcodeSET_CONTEXT_REG_INDIRECT: + return "IT_SET_CONTEXT_REG_INDIRECT"; + case kOpcodeSET_SH_REG: + return "IT_SET_SH_REG"; + case kOpcodeSET_SH_REG_OFFSET: + return "IT_SET_SH_REG_OFFSET"; + case kOpcodeSET_QUEUE_REG: + return "IT_SET_QUEUE_REG"; + case kOpcodeSET_UCONFIG_REG: + return "IT_SET_UCONFIG_REG"; + case kOpcodeSCRATCH_RAM_WRITE: + return "IT_SCRATCH_RAM_WRITE"; + case kOpcodeSCRATCH_RAM_READ: + return "IT_SCRATCH_RAM_READ"; + case kOpcodeLOAD_CONST_RAM: + return "IT_LOAD_CONST_RAM"; + case kOpcodeWRITE_CONST_RAM: + return "IT_WRITE_CONST_RAM"; + case kOpcodeDUMP_CONST_RAM: + return "IT_DUMP_CONST_RAM"; + case kOpcodeINCREMENT_CE_COUNTER: + return "IT_INCREMENT_CE_COUNTER"; + case kOpcodeINCREMENT_DE_COUNTER: + return "IT_INCREMENT_DE_COUNTER"; + case kOpcodeWAIT_ON_CE_COUNTER: + return "IT_WAIT_ON_CE_COUNTER"; + case kOpcodeWAIT_ON_DE_COUNTER_DIFF: + return "IT_WAIT_ON_DE_COUNTER_DIFF"; + case kOpcodeSWITCH_BUFFER: + return "IT_SWITCH_BUFFER"; + } + + return ""; +} + +inline void dumpShader(const std::uint32_t *data) { + int hackExit = 0; + + while (true) { + auto instHex = *data; + bool isEnd = instHex == 0xBF810000 || instHex == 0xBE802000; + + shader::Instruction inst(data); + + for (int i = 0; i < inst.size(); ++i) { + std::printf("%08X ", data[i]); + } + + inst.dump(); + printf("\n"); + data += inst.size(); + + if (isEnd) { + break; + } + } +} + +enum BlendMultiplier { + kBlendMultiplierZero = 0x00000000, + kBlendMultiplierOne = 0x00000001, + kBlendMultiplierSrcColor = 0x00000002, + kBlendMultiplierOneMinusSrcColor = 0x00000003, + kBlendMultiplierSrcAlpha = 0x00000004, + kBlendMultiplierOneMinusSrcAlpha = 0x00000005, + kBlendMultiplierDestAlpha = 0x00000006, + kBlendMultiplierOneMinusDestAlpha = 0x00000007, + kBlendMultiplierDestColor = 0x00000008, + kBlendMultiplierOneMinusDestColor = 0x00000009, + kBlendMultiplierSrcAlphaSaturate = 0x0000000a, + kBlendMultiplierConstantColor = 0x0000000d, + kBlendMultiplierOneMinusConstantColor = 0x0000000e, + kBlendMultiplierSrc1Color = 0x0000000f, + kBlendMultiplierInverseSrc1Color = 0x00000010, + kBlendMultiplierSrc1Alpha = 0x00000011, + kBlendMultiplierInverseSrc1Alpha = 0x00000012, + kBlendMultiplierConstantAlpha = 0x00000013, + kBlendMultiplierOneMinusConstantAlpha = 0x00000014, +}; + +enum BlendFunc { + kBlendFuncAdd = 0x00000000, + kBlendFuncSubtract = 0x00000001, + kBlendFuncMin = 0x00000002, + kBlendFuncMax = 0x00000003, + kBlendFuncReverseSubtract = 0x00000004, +}; + +enum PrimitiveType { + kPrimitiveTypeNone = 0x00000000, + kPrimitiveTypePointList = 0x00000001, + kPrimitiveTypeLineList = 0x00000002, + kPrimitiveTypeLineStrip = 0x00000003, + kPrimitiveTypeTriList = 0x00000004, + kPrimitiveTypeTriFan = 0x00000005, + kPrimitiveTypeTriStrip = 0x00000006, + kPrimitiveTypePatch = 0x00000009, + kPrimitiveTypeLineListAdjacency = 0x0000000a, + kPrimitiveTypeLineStripAdjacency = 0x0000000b, + kPrimitiveTypeTriListAdjacency = 0x0000000c, + kPrimitiveTypeTriStripAdjacency = 0x0000000d, + kPrimitiveTypeRectList = 0x00000011, + kPrimitiveTypeLineLoop = 0x00000012, + kPrimitiveTypeQuadList = 0x00000013, + kPrimitiveTypeQuadStrip = 0x00000014, + kPrimitiveTypePolygon = 0x00000015 +}; + +enum SurfaceFormat { + kSurfaceFormatInvalid = 0x00000000, + kSurfaceFormat8 = 0x00000001, + kSurfaceFormat16 = 0x00000002, + kSurfaceFormat8_8 = 0x00000003, + kSurfaceFormat32 = 0x00000004, + kSurfaceFormat16_16 = 0x00000005, + kSurfaceFormat10_11_11 = 0x00000006, + kSurfaceFormat11_11_10 = 0x00000007, + kSurfaceFormat10_10_10_2 = 0x00000008, + kSurfaceFormat2_10_10_10 = 0x00000009, + kSurfaceFormat8_8_8_8 = 0x0000000a, + kSurfaceFormat32_32 = 0x0000000b, + kSurfaceFormat16_16_16_16 = 0x0000000c, + kSurfaceFormat32_32_32 = 0x0000000d, + kSurfaceFormat32_32_32_32 = 0x0000000e, + kSurfaceFormat5_6_5 = 0x00000010, + kSurfaceFormat1_5_5_5 = 0x00000011, + kSurfaceFormat5_5_5_1 = 0x00000012, + kSurfaceFormat4_4_4_4 = 0x00000013, + kSurfaceFormat8_24 = 0x00000014, + kSurfaceFormat24_8 = 0x00000015, + kSurfaceFormatX24_8_32 = 0x00000016, + kSurfaceFormatGB_GR = 0x00000020, + kSurfaceFormatBG_RG = 0x00000021, + kSurfaceFormat5_9_9_9 = 0x00000022, + kSurfaceFormatBc1 = 0x00000023, + kSurfaceFormatBc2 = 0x00000024, + kSurfaceFormatBc3 = 0x00000025, + kSurfaceFormatBc4 = 0x00000026, + kSurfaceFormatBc5 = 0x00000027, + kSurfaceFormatBc6 = 0x00000028, + kSurfaceFormatBc7 = 0x00000029, + kSurfaceFormatFmask8_S2_F1 = 0x0000002C, + kSurfaceFormatFmask8_S4_F1 = 0x0000002D, + kSurfaceFormatFmask8_S8_F1 = 0x0000002E, + kSurfaceFormatFmask8_S2_F2 = 0x0000002F, + kSurfaceFormatFmask8_S4_F2 = 0x00000030, + kSurfaceFormatFmask8_S4_F4 = 0x00000031, + kSurfaceFormatFmask16_S16_F1 = 0x00000032, + kSurfaceFormatFmask16_S8_F2 = 0x00000033, + kSurfaceFormatFmask32_S16_F2 = 0x00000034, + kSurfaceFormatFmask32_S8_F4 = 0x00000035, + kSurfaceFormatFmask32_S8_F8 = 0x00000036, + kSurfaceFormatFmask64_S16_F4 = 0x00000037, + kSurfaceFormatFmask64_S16_F8 = 0x00000038, + kSurfaceFormat4_4 = 0x00000039, + kSurfaceFormat6_5_5 = 0x0000003A, + kSurfaceFormat1 = 0x0000003B, + kSurfaceFormat1Reversed = 0x0000003C, +}; + +enum TextureChannelType { + kTextureChannelTypeUNorm = 0x00000000, + kTextureChannelTypeSNorm = 0x00000001, + kTextureChannelTypeUScaled = 0x00000002, + kTextureChannelTypeSScaled = 0x00000003, + kTextureChannelTypeUInt = 0x00000004, + kTextureChannelTypeSInt = 0x00000005, + kTextureChannelTypeSNormNoZero = 0x00000006, + kTextureChannelTypeFloat = 0x00000007, + kTextureChannelTypeSrgb = 0x00000009, + kTextureChannelTypeUBNorm = 0x0000000A, + kTextureChannelTypeUBNormNoZero = 0x0000000B, + kTextureChannelTypeUBInt = 0x0000000C, + kTextureChannelTypeUBScaled = 0x0000000D, +}; + +struct GnmVBuffer { + uint64_t base : 44; + uint64_t mtype_L1s : 2; + uint64_t mtype_L2 : 2; + uint64_t stride : 14; + uint64_t cache_swizzle : 1; + uint64_t swizzle_en : 1; + + uint32_t num_records; + + uint32_t dst_sel_x : 3; + uint32_t dst_sel_y : 3; + uint32_t dst_sel_z : 3; + uint32_t dst_sel_w : 3; + + uint32_t nfmt : 3; + uint32_t dfmt : 4; + uint32_t element_size : 2; + uint32_t index_stride : 2; + uint32_t addtid_en : 1; + uint32_t reserved0 : 1; + uint32_t hash_en : 1; + uint32_t reserved1 : 1; + uint32_t mtype : 3; + uint32_t type : 2; + + std::uint64_t getAddress() const { return base; } + + uint32_t getStride() const { return stride; } + + uint32_t getSize() const { + uint32_t stride = getStride(); + uint32_t numElements = getNumRecords(); + return stride ? numElements * stride : numElements; + } + + uint32_t getNumRecords() const { return num_records; } + uint32_t getElementSize() const { return element_size; } + uint32_t getIndexStrideSize() const { return index_stride; } + SurfaceFormat getSurfaceFormat() const { return (SurfaceFormat)dfmt; } + TextureChannelType getChannelType() const { return (TextureChannelType)nfmt; } +}; + +static_assert(sizeof(GnmVBuffer) == sizeof(std::uint64_t) * 2); + +enum class TextureType { + Dim1D = 8, + Dim2D, + Dim3D, + Cube, + Array1D, + Array2D, + Msaa2D, + MsaaArray2D, +}; + +struct GnmTBuffer { + uint64_t baseaddr256 : 38; + uint64_t mtype_L2 : 2; + uint64_t min_lod : 12; + SurfaceFormat dfmt : 6; + TextureChannelType nfmt : 4; + uint64_t mtype01 : 2; + + uint64_t width : 14; + uint64_t height : 14; + uint64_t perfMod : 3; + uint64_t interlaced : 1; + uint64_t dst_sel_x : 3; + uint64_t dst_sel_y : 3; + uint64_t dst_sel_z : 3; + uint64_t dst_sel_w : 3; + uint64_t base_level : 4; + uint64_t last_level : 4; + uint64_t tiling_idx : 5; + uint64_t pow2pad : 1; + uint64_t mtype2 : 1; + uint64_t : 1; // reserved + TextureType type : 4; + + uint64_t depth : 13; + uint64_t pitch : 14; + uint64_t : 5; // reserved + uint64_t base_array : 13; + uint64_t last_array : 13; + uint64_t : 6; // reserved + + uint64_t min_lod_warn : 12; // fixed point 4.8 + uint64_t counter_bank_id : 8; + uint64_t LOD_hdw_cnt_en : 1; + uint64_t : 42; // reserved + + std::uint64_t getAddress() const { + return static_cast(static_cast(baseaddr256)) + << 8; + } +}; + +static_assert(sizeof(GnmTBuffer) == sizeof(std::uint64_t) * 4); + +struct ShaderModule { + VkPipeline pipeline; + VkPipelineLayout pipelineLayout; + VkDescriptorSetLayout descriptorSetLayout; + VkDescriptorPool descriptorPool; + + void destroy() const; +}; + +constexpr auto kPageSize = 0x4000; + +struct ZoneInfo { + std::uint64_t beginAddress; + std::uint64_t endAddress; +}; + +struct NoInvalidationHandle { + void handleInvalidation(std::uint64_t address) {} +}; + +struct StdSetInvalidationHandle { + std::set> invalidatedZones; + + void handleInvalidation(std::uint64_t address) { + invalidatedZones.insert(address); + } +}; + +template +class MemoryZoneTable : public InvalidationHandleT { + enum class Kind { U, X }; + std::map mAreas; + +public: + class iterator { + using map_iterator = typename std::map::iterator; + map_iterator it; + + public: + iterator() = default; + iterator(map_iterator it) : it(it) {} + + ZoneInfo operator *() const { + return { + it->first, + std::next(it)->first + }; + } + + iterator &operator++() const { + ++it; + ++it; + return *this; + } + + iterator &operator--() const { + --it; + --it; + return *this; + } + + bool operator==(iterator other) const { + return it == other.it; + } + + bool operator!=(iterator other) const { + return it != other.it; + } + }; + + iterator begin() { + return iterator(mAreas.begin()); + } + iterator end() { + return iterator(mAreas.end()); + } + + void clear() { mAreas.clear(); } + + ZoneInfo queryZone(std::uint64_t address) const { + auto it = mAreas.lower_bound(address); + assert(it != mAreas.end()); + std::uint64_t endAddress = 0; + if (it->first != address) { + assert(it->second == Kind::U); + endAddress = it->first; + --it; + } else { + assert(it->second == Kind::X); + endAddress = std::next(it)->first; + } + + auto startAddress = std::uint64_t(it->first); + + return {startAddress, endAddress}; + } + + void map(std::uint64_t beginAddress, std::uint64_t endAddress) { + auto [beginIt, beginInserted] = mAreas.emplace(beginAddress, Kind::X); + auto [endIt, endInserted] = mAreas.emplace(endAddress, Kind::U); + + if (!beginInserted) { + if (beginIt->second == Kind::U) { + // it was close, extend to open + assert(beginIt != mAreas.begin()); + --beginIt; + } + } else if (beginIt != mAreas.begin()) { + auto prevRangePointIt = std::prev(beginIt); + + if (prevRangePointIt->second == Kind::X) { + // we found range start before inserted one, remove insertion and extend + // begin + this->handleInvalidation(beginIt->first); + mAreas.erase(beginIt); + beginIt = prevRangePointIt; + } + } + + if (!endInserted) { + if (endIt->second == Kind::X) { + // it was open, extend to close + assert(endIt != mAreas.end()); + ++endIt; + } + } else { + auto nextRangePointIt = std::next(endIt); + + if (nextRangePointIt != mAreas.end() && + nextRangePointIt->second == Kind::U) { + // we found range end after inserted one, remove insertion and extend + // end + this->handleInvalidation(std::prev(endIt)->first); + mAreas.erase(endIt); + endIt = nextRangePointIt; + } + } + + // eat everything in middle of the range + ++beginIt; + while (beginIt != endIt) { + this->handleInvalidation(std::prev(endIt)->first); + beginIt = mAreas.erase(beginIt); + } + } + + void unmap(std::uint64_t beginAddres, std::uint64_t endAddress) { + auto beginIt = mAreas.lower_bound(beginAddres); + + if (beginIt == mAreas.end() || beginIt->first >= endAddress) { + return; + } + if (beginIt->first > beginAddres && beginIt->second == Kind::U) { + // we have found end after unmap begin, need to insert new end + this->handleInvalidation(std::prev(beginIt)->first); + auto newBeginIt = mAreas.emplace_hint(beginIt, beginAddres, Kind::U); + mAreas.erase(beginIt); + + if (newBeginIt == mAreas.end()) { + return; + } + + beginIt = std::next(newBeginIt); + } else if (beginIt->second == Kind::U) { + beginIt = ++beginIt; + } + + Kind lastKind = Kind::U; + while (beginIt != mAreas.end() && beginIt->first <= endAddress) { + lastKind = beginIt->second; + if (lastKind == Kind::X) { + this->handleInvalidation(std::prev(beginIt)->first); + } + beginIt = mAreas.erase(beginIt); + } + + if (lastKind != Kind::X) { + return; + } + + // Last removed was range open, need to insert new one at unmap end + mAreas.emplace_hint(beginIt, endAddress, Kind::X); + } + + std::size_t totalMemory() const { + std::size_t result = 0; + + for (auto it = mAreas.begin(), end = mAreas.end(); it != end; ++it) { + auto rangeBegin = it; + auto rangeEnd = ++it; + + result += rangeEnd->first - rangeBegin->first; + } + + return result; + } +}; + +extern MemoryZoneTable memoryZoneTable; + +struct DrawContext { + VkPipelineCache pipelineCache; + VkQueue queue; + VkCommandPool commandPool; + std::vector loadedShaderModules; + + ~DrawContext(); +}; + +void draw(RemoteMemory memory, DrawContext &ctxt, std::uint32_t count, + std::uint64_t indeciesAddress, std::uint32_t indexCount); +void dispatch(RemoteMemory memory, DrawContext &ctxt, std::size_t dimX, + std::size_t dimY, std::size_t dimZ); +void handleCommandBuffer(RemoteMemory memory, DrawContext &ctxt, + std::uint32_t *cmds, std::uint32_t count); +void setVkDevice(VkDevice device, VkPhysicalDeviceMemoryProperties properties); + +class AmdgpuDevice final { + amdgpu::bridge::BridgePuller mBridge; + amdgpu::bridge::BridgeHeader *mBridgeCommandBuffer = nullptr; + amdgpu::device::DrawContext mDc; + char *internalMemory = nullptr; + RemoteMemory memory; + std::uint64_t memorySize = 0; + int memoryFd = -1; + std::uint32_t currentBuffer = -1; + std::uint64_t flipArg = 0; + std::uint64_t flipCount = 0; + // amdgpu::bridge::FlipStatus *flipStatus = nullptr; + + struct RenderBuffer { + void *memory; + std::uint32_t pitch; + std::uint32_t width; + std::uint32_t height; + std::uint32_t pixelFormat; + std::uint32_t tilingMode; + + VkImage vkImage; + void *vkDataPoiner; + VkImageView vkImageView; + VkDeviceMemory vkImageMemory; + + VkDescriptorSet vkDescriptorSet; + const ShaderModule *shader; + }; + + RenderBuffer renderBuffers[8]{}; + + bool flipWasRequested = false; + + void handleSetUpSharedMemory(std::uint64_t address, std::uint64_t size, + std::uint64_t internalSize, + const char *name); + void handleSetFlipStatus(std::uint64_t shmOffset); + void handleProtectMemory(std::uint64_t address, std::uint64_t size, + std::uint32_t prot); + void handleCommandBuffer(std::uint64_t address, std::uint64_t size); + void handleFlip(std::uint32_t bufferIndex, std::uint64_t arg); + void handleSetBuffer(std::uint32_t bufferIndex, std::uint64_t address, + std::uint32_t width, std::uint32_t height, + std::uint32_t pitch, std::uint32_t pixelFormat, + std::uint32_t tilingMode); + void handleDoFlip(); + + void updateFlipStatus(); + +public: + AmdgpuDevice(amdgpu::device::DrawContext dc); + ~AmdgpuDevice(); + + void handleCommands() { + // while (mBridge.processCommand(this) && !flipWasRequested) { + // ; + // } + } +}; +} // namespace amdgpu::device diff --git a/hw/amdgpu/device/include/amdgpu/device/pm4.hpp b/hw/amdgpu/device/include/amdgpu/device/pm4.hpp new file mode 100644 index 000000000..f40cb8672 --- /dev/null +++ b/hw/amdgpu/device/include/amdgpu/device/pm4.hpp @@ -0,0 +1,102 @@ +#pragma once + +namespace amdgpu { +enum PM4Opcodes { + NOP = 0x10, + SET_BASE = 0x11, + CLEAR_STATE = 0x12, + INDEX_BUFFER_SIZE = 0x13, + DISPATCH_DIRECT = 0x15, + DISPATCH_INDIRECT = 0x16, + INDIRECT_BUFFER_END = 0x17, + MODE_CONTROL = 0x18, + ATOMIC_GDS = 0x1D, + ATOMIC_MEM = 0x1E, + OCCLUSION_QUERY = 0x1F, + SET_PREDICATION = 0x20, + REG_RMW = 0x21, + COND_EXEC = 0x22, + PRED_EXEC = 0x23, + DRAW_INDIRECT = 0x24, + DRAW_INDEX_INDIRECT = 0x25, + INDEX_BASE = 0x26, + DRAW_INDEX_2 = 0x27, + CONTEXT_CONTROL = 0x28, + DRAW_INDEX_OFFSET = 0x29, + INDEX_TYPE = 0x2A, + DRAW_INDEX = 0x2B, + DRAW_INDIRECT_MULTI = 0x2C, + DRAW_INDEX_AUTO = 0x2D, + DRAW_INDEX_IMMD = 0x2E, + NUM_INSTANCES = 0x2F, + DRAW_INDEX_MULTI_AUTO = 0x30, + INDIRECT_BUFFER_32 = 0x32, + INDIRECT_BUFFER_CONST = 0x33, + STRMOUT_BUFFER_UPDATE = 0x34, + DRAW_INDEX_OFFSET_2 = 0x35, + DRAW_PREAMBLE = 0x36, + WRITE_DATA = 0x37, + DRAW_INDEX_INDIRECT_MULTI = 0x38, + MEM_SEMAPHORE = 0x39, + MPEG_INDEX = 0x3A, + COPY_DW = 0x3B, + WAIT_REG_MEM = 0x3C, + MEM_WRITE = 0x3D, + INDIRECT_BUFFER_3F = 0x3F, + COPY_DATA = 0x40, + CP_DMA = 0x41, + PFP_SYNC_ME = 0x42, + SURFACE_SYNC = 0x43, + ME_INITIALIZE = 0x44, + COND_WRITE = 0x45, + EVENT_WRITE = 0x46, + EVENT_WRITE_EOP = 0x47, + EVENT_WRITE_EOS = 0x48, + RELEASE_MEM = 0x49, + PREAMBLE_CNTL = 0x4A, + RB_OFFSET = 0x4B, + ALU_PS_CONST_BUFFER_COPY = 0x4C, + ALU_VS_CONST_BUFFER_COPY = 0x4D, + ALU_PS_CONST_UPDATE = 0x4E, + ALU_VS_CONST_UPDATE = 0x4F, + DMA_DATA = 0x50, + ONE_REG_WRITE = 0x57, + AQUIRE_MEM = 0x58, + REWIND = 0x59, + LOAD_UCONFIG_REG = 0x5E, + LOAD_SH_REG = 0x5F, + LOAD_CONFIG_REG = 0x60, + LOAD_CONTEXT_REG = 0x61, + SET_CONFIG_REG = 0x68, + SET_CONTEXT_REG = 0x69, + SET_ALU_CONST = 0x6A, + SET_BOOL_CONST = 0x6B, + SET_LOOP_CONST = 0x6C, + SET_RESOURCE = 0x6D, + SET_SAMPLER = 0x6E, + SET_CTL_CONST = 0x6F, + SET_RESOURCE_OFFSET = 0x70, + SET_ALU_CONST_VS = 0x71, + SET_ALU_CONST_DI = 0x72, + SET_CONTEXT_REG_INDIRECT = 0x73, + SET_RESOURCE_INDIRECT = 0x74, + SET_APPEND_CNT = 0x75, + SET_SH_REG = 0x76, + SET_SH_REG_OFFSET = 0x77, + SET_QUEUE_REG = 0x78, + SET_UCONFIG_REG = 0x79, + SCRATCH_RAM_WRITE = 0x7D, + SCRATCH_RAM_READ = 0x7E, + LOAD_CONST_RAM = 0x80, + WRITE_CONST_RAM = 0x81, + DUMP_CONST_RAM = 0x83, + INCREMENT_CE_COUNTER = 0x84, + INCREMENT_DE_COUNTER = 0x85, + WAIT_ON_CE_COUNTER = 0x86, + WAIT_ON_DE_COUNTER_DIFF = 0x88, + SWITCH_BUFFER = 0x8B, +}; + +const char *pm4OpcodeToString(int opcode); +} // namespace amdgpu::device + diff --git a/hw/amdgpu/device/include/amdgpu/device/tiler.hpp b/hw/amdgpu/device/include/amdgpu/device/tiler.hpp new file mode 100644 index 000000000..aab34faac --- /dev/null +++ b/hw/amdgpu/device/include/amdgpu/device/tiler.hpp @@ -0,0 +1,681 @@ +#pragma once +#include "device.hpp" +#include +#include +#include + +namespace amdgpu::device { +namespace Gnm { +enum GpuMode { kGpuModeBase = 0, kGpuModeNeo = 1 }; +enum TileMode { + kTileModeDepth_2dThin_64 = 0x00000000, + kTileModeDepth_2dThin_128 = 0x00000001, + kTileModeDepth_2dThin_256 = 0x00000002, + kTileModeDepth_2dThin_512 = 0x00000003, + kTileModeDepth_2dThin_1K = 0x00000004, + kTileModeDepth_2dThinPrt_256 = 0x00000006, + + kTileModeDisplay_LinearAligned = 0x00000008, + kTileModeDisplay_2dThin = 0x0000000A, + kTileModeDisplay_ThinPrt = 0x0000000B, + kTileModeDisplay_2dThinPrt = 0x0000000C, + + kTileModeThin_1dThin = 0x0000000D, + kTileModeThin_2dThin = 0x0000000E, + kTileModeThin_ThinPrt = 0x00000010, + kTileModeThin_2dThinPrt = 0x00000011, + kTileModeThin_3dThinPrt = 0x00000012, + + kTileModeThick_1dThick = 0x00000013, + kTileModeThick_2dThick = 0x00000014, + kTileModeThick_ThickPrt = 0x00000016, + kTileModeThick_2dThickPrt = 0x00000017, + kTileModeThick_3dThickPrt = 0x00000018, + kTileModeThick_2dXThick = 0x00000019, +}; + +enum MicroTileMode { + kMicroTileModeDisplay = 0x00000000, + kMicroTileModeThin = 0x00000001, + kMicroTileModeDepth = 0x00000002, + kMicroTileModeRotated = 0x00000003, + kMicroTileModeThick = 0x00000004, +}; + +enum ArrayMode { + kArrayModeLinearGeneral = 0x00000000, + kArrayModeLinearAligned = 0x00000001, + kArrayMode1dTiledThin = 0x00000002, + kArrayMode1dTiledThick = 0x00000003, + kArrayMode2dTiledThin = 0x00000004, + kArrayModeTiledThinPrt = 0x00000005, + kArrayMode2dTiledThinPrt = 0x00000006, + kArrayMode2dTiledThick = 0x00000007, + kArrayMode2dTiledXThick = 0x00000008, + kArrayModeTiledThickPrt = 0x00000009, + kArrayMode2dTiledThickPrt = 0x0000000a, + kArrayMode3dTiledThinPrt = 0x0000000b, + kArrayMode3dTiledThin = 0x0000000c, + kArrayMode3dTiledThick = 0x0000000d, + kArrayMode3dTiledXThick = 0x0000000e, + kArrayMode3dTiledThickPrt = 0x0000000f, +}; + +enum PipeConfig { + kPipeConfigP8_32x32_8x16 = 0x0000000a, + kPipeConfigP8_32x32_16x16 = 0x0000000c, + kPipeConfigP16 = 0x00000012, +}; +} // namespace Gnm + +#define GNM_ERROR(msg, ...) \ + //std::fprintf(stderr, msg, __VA_ARGS__); \ + //std::abort() \ + __builtin_trap(); + +static constexpr uint32_t kMicroTileWidth = 8; +static constexpr uint32_t kMicroTileHeight = 8; + +static constexpr uint32_t getElementIndex(uint32_t x, uint32_t y, uint32_t z, + uint32_t bitsPerElement, + Gnm::MicroTileMode microTileMode, + Gnm::ArrayMode arrayMode) { + uint32_t elem = 0; + + if (microTileMode == Gnm::kMicroTileModeDisplay) { + switch (bitsPerElement) { + case 8: + elem |= ((x >> 0) & 0x1) << 0; + elem |= ((x >> 1) & 0x1) << 1; + elem |= ((x >> 2) & 0x1) << 2; + elem |= ((y >> 1) & 0x1) << 3; + elem |= ((y >> 0) & 0x1) << 4; + elem |= ((y >> 2) & 0x1) << 5; + break; + case 16: + elem |= ((x >> 0) & 0x1) << 0; + elem |= ((x >> 1) & 0x1) << 1; + elem |= ((x >> 2) & 0x1) << 2; + elem |= ((y >> 0) & 0x1) << 3; + elem |= ((y >> 1) & 0x1) << 4; + elem |= ((y >> 2) & 0x1) << 5; + break; + case 32: + elem |= ((x >> 0) & 0x1) << 0; + elem |= ((x >> 1) & 0x1) << 1; + elem |= ((y >> 0) & 0x1) << 2; + elem |= ((x >> 2) & 0x1) << 3; + elem |= ((y >> 1) & 0x1) << 4; + elem |= ((y >> 2) & 0x1) << 5; + break; + case 64: + elem |= ((x >> 0) & 0x1) << 0; + elem |= ((y >> 0) & 0x1) << 1; + elem |= ((x >> 1) & 0x1) << 2; + elem |= ((x >> 2) & 0x1) << 3; + elem |= ((y >> 1) & 0x1) << 4; + elem |= ((y >> 2) & 0x1) << 5; + break; + default: + GNM_ERROR("Unsupported bitsPerElement (%u) for displayable surface.", + bitsPerElement); + } + } else if (microTileMode == Gnm::kMicroTileModeThin || + microTileMode == Gnm::kMicroTileModeDepth) { + elem |= ((x >> 0) & 0x1) << 0; + elem |= ((y >> 0) & 0x1) << 1; + elem |= ((x >> 1) & 0x1) << 2; + elem |= ((y >> 1) & 0x1) << 3; + elem |= ((x >> 2) & 0x1) << 4; + elem |= ((y >> 2) & 0x1) << 5; + // Use Z too, if the array mode is Thick/XThick + switch (arrayMode) { + case Gnm::kArrayMode2dTiledXThick: + case Gnm::kArrayMode3dTiledXThick: + elem |= ((z >> 2) & 0x1) << 8; + // Intentional fall-through + case Gnm::kArrayMode1dTiledThick: + case Gnm::kArrayMode2dTiledThick: + case Gnm::kArrayMode3dTiledThick: + case Gnm::kArrayModeTiledThickPrt: + case Gnm::kArrayMode2dTiledThickPrt: + case Gnm::kArrayMode3dTiledThickPrt: + elem |= ((z >> 0) & 0x1) << 6; + elem |= ((z >> 1) & 0x1) << 7; + default: + break; // no other thick modes + } + } else if (microTileMode == Gnm::kMicroTileModeThick) // thick/xthick + { + switch (arrayMode) { + case Gnm::kArrayMode2dTiledXThick: + case Gnm::kArrayMode3dTiledXThick: + elem |= ((z >> 2) & 0x1) << 8; + // intentional fall-through + case Gnm::kArrayMode1dTiledThick: + case Gnm::kArrayMode2dTiledThick: + case Gnm::kArrayMode3dTiledThick: + case Gnm::kArrayModeTiledThickPrt: + case Gnm::kArrayMode2dTiledThickPrt: + case Gnm::kArrayMode3dTiledThickPrt: + if (bitsPerElement == 8 || bitsPerElement == 16) { + elem |= ((x >> 0) & 0x1) << 0; + elem |= ((y >> 0) & 0x1) << 1; + elem |= ((x >> 1) & 0x1) << 2; + elem |= ((y >> 1) & 0x1) << 3; + elem |= ((z >> 0) & 0x1) << 4; + elem |= ((z >> 1) & 0x1) << 5; + elem |= ((x >> 2) & 0x1) << 6; + elem |= ((y >> 2) & 0x1) << 7; + } else if (bitsPerElement == 32) { + elem |= ((x >> 0) & 0x1) << 0; + elem |= ((y >> 0) & 0x1) << 1; + elem |= ((x >> 1) & 0x1) << 2; + elem |= ((z >> 0) & 0x1) << 3; + elem |= ((y >> 1) & 0x1) << 4; + elem |= ((z >> 1) & 0x1) << 5; + elem |= ((x >> 2) & 0x1) << 6; + elem |= ((y >> 2) & 0x1) << 7; + } else if (bitsPerElement == 64 || bitsPerElement == 128) { + elem |= ((x >> 0) & 0x1) << 0; + elem |= ((y >> 0) & 0x1) << 1; + elem |= ((z >> 0) & 0x1) << 2; + elem |= ((x >> 1) & 0x1) << 3; + elem |= ((y >> 1) & 0x1) << 4; + elem |= ((z >> 1) & 0x1) << 5; + elem |= ((x >> 2) & 0x1) << 6; + elem |= ((y >> 2) & 0x1) << 7; + } else { + GNM_ERROR("Invalid bitsPerElement (%u) for " + "microTileMode=kMicroTileModeThick.", + bitsPerElement); + } + break; + default: + GNM_ERROR("Invalid arrayMode (0x%02X) for thick/xthick " + "microTileMode=kMicroTileModeThick.", + arrayMode); + } + } + // TODO: rotated + + return elem; +} +static constexpr uint32_t getPipeIndex(uint32_t x, uint32_t y, + Gnm::PipeConfig pipeCfg) { + uint32_t pipe = 0; + switch (pipeCfg) { + case Gnm::kPipeConfigP8_32x32_8x16: + pipe |= (((x >> 4) ^ (y >> 3) ^ (x >> 5)) & 0x1) << 0; + pipe |= (((x >> 3) ^ (y >> 4)) & 0x1) << 1; + pipe |= (((x >> 5) ^ (y >> 5)) & 0x1) << 2; + break; + case Gnm::kPipeConfigP8_32x32_16x16: + pipe |= (((x >> 3) ^ (y >> 3) ^ (x >> 4)) & 0x1) << 0; + pipe |= (((x >> 4) ^ (y >> 4)) & 0x1) << 1; + pipe |= (((x >> 5) ^ (y >> 5)) & 0x1) << 2; + break; + case Gnm::kPipeConfigP16: + pipe |= (((x >> 3) ^ (y >> 3) ^ (x >> 4)) & 0x1) << 0; + pipe |= (((x >> 4) ^ (y >> 4)) & 0x1) << 1; + pipe |= (((x >> 5) ^ (y >> 5)) & 0x1) << 2; + pipe |= (((x >> 6) ^ (y >> 5)) & 0x1) << 3; + break; + default: + GNM_ERROR("Unsupported pipeCfg (0x%02X).", pipeCfg); + } + return pipe; +} + +inline constexpr uint32_t fastIntLog2(uint32_t i) { + return 31 - __builtin_clz(i | 1); +} + +static constexpr uint32_t getBankIndex(uint32_t x, uint32_t y, + uint32_t bank_width, + uint32_t bank_height, uint32_t num_banks, + uint32_t num_pipes) { + + // bank_width=1, bank_height=1, num_banks = 16, num_pipes=8 + const uint32_t x_shift_offset = fastIntLog2(bank_width * num_pipes); + const uint32_t y_shift_offset = fastIntLog2(bank_height); + const uint32_t xs = x >> x_shift_offset; + const uint32_t ys = y >> y_shift_offset; + + uint32_t bank = 0; + switch (num_banks) { + case 2: + bank |= (((xs >> 3) ^ (ys >> 3)) & 0x1) << 0; + break; + case 4: + bank |= (((xs >> 3) ^ (ys >> 4)) & 0x1) << 0; + bank |= (((xs >> 4) ^ (ys >> 3)) & 0x1) << 1; + break; + case 8: + bank |= (((xs >> 3) ^ (ys >> 5)) & 0x1) << 0; + bank |= (((xs >> 4) ^ (ys >> 4) ^ (ys >> 5)) & 0x1) << 1; + bank |= (((xs >> 5) ^ (ys >> 3)) & 0x1) << 2; + break; + case 16: + bank |= (((xs >> 3) ^ (ys >> 6)) & 0x1) << 0; + bank |= (((xs >> 4) ^ (ys >> 5) ^ (ys >> 6)) & 0x1) << 1; + bank |= (((xs >> 5) ^ (ys >> 4)) & 0x1) << 2; + bank |= (((xs >> 6) ^ (ys >> 3)) & 0x1) << 3; + break; + default: + GNM_ERROR("invalid num_banks (%u) -- must be 2, 4, 8, or 16.", num_banks); + } + + return bank; +} + +inline std::uint32_t getTexelsPerElement(SurfaceFormat format) { + if (format >= kSurfaceFormatBc1 && format <= kSurfaceFormatBc7) { + return 16; + } + + if (format >= kSurfaceFormat1) { + return 8; + } + + return 1; +} + +inline std::uint32_t getBitsPerElement(SurfaceFormat format) { + static constexpr int bitsPerElement[] = { + 0, 8, 16, 16, 32, 32, 32, 32, 32, 32, 32, 64, 64, 96, 128, -1, + 16, 16, 16, 16, 32, 32, 64, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 16, 16, 32, 4, 8, 8, 4, 8, 8, 8, -1, -1, 8, 8, 8, 8, + 8, 8, 16, 16, 32, 32, 32, 64, 64, 8, 16, 1, 1}; + + auto rawFormat = static_cast(format); + + if (rawFormat >= sizeof(bitsPerElement)) { + return 0; + } + + return bitsPerElement[rawFormat]; +} + +struct Tiler1d { + Gnm::ArrayMode m_arrayMode; + uint32_t m_bitsPerElement; + + Gnm::MicroTileMode m_microTileMode; + uint32_t m_tileThickness; + uint32_t m_tileBytes; + uint32_t m_tilesPerRow; + uint32_t m_tilesPerSlice; + + Tiler1d(const GnmTBuffer *texture) { + /* + m_arrayMode = Gnm::ArrayMode::kArrayMode1dTiledThin; + m_bitsPerElement = 128;// getBitsPerElement(texture->dfmt); + m_microTileMode = Gnm::MicroTileMode::kMicroTileModeThin; + m_tileThickness = (m_arrayMode == Gnm::kArrayMode1dTiledThick) ? 4 : 1; + m_tileBytes = (kMicroTileWidth * kMicroTileHeight * m_tileThickness * m_bitsPerElement + 7) / 8; + + auto width = texture->width + 1; + auto height = texture->height + 1; + width = (width + 3) / 4; + height = (height + 3) / 4; + m_tilesPerRow = width / kMicroTileWidth; + m_tilesPerSlice = std::max(m_tilesPerRow * (height / kMicroTileHeight), 1U); + */ + + m_arrayMode = (Gnm::ArrayMode)2; + m_bitsPerElement = 128; + m_microTileMode = (Gnm::MicroTileMode)1; + m_tileThickness= 1; + m_tileBytes= 1024; + m_tilesPerRow = 16; + m_tilesPerSlice = 256; + } + + uint64_t getTiledElementBitOffset(uint32_t x, uint32_t y, uint32_t z) const { + uint64_t element_index = getElementIndex(x, y, z, m_bitsPerElement, + m_microTileMode, m_arrayMode); + + uint64_t slice_offset = + (z / m_tileThickness) * m_tilesPerSlice * m_tileBytes; + + uint64_t tile_row_index = y / kMicroTileHeight; + uint64_t tile_column_index = x / kMicroTileWidth; + uint64_t tile_offset = + ((tile_row_index * m_tilesPerRow) + tile_column_index) * m_tileBytes; + + uint64_t element_offset = element_index * m_bitsPerElement; + + return (slice_offset + tile_offset) * 8 + element_offset; + } + + int32_t getTiledElementByteOffset(uint32_t x, uint32_t y, uint32_t z) const { + return getTiledElementBitOffset(x, y, z) / 8; + } +}; + +struct Tiler2d { + static constexpr int m_bitsPerElement = 32; + static constexpr Gnm::MicroTileMode m_microTileMode = + Gnm::kMicroTileModeDisplay; + static constexpr Gnm::ArrayMode m_arrayMode = Gnm::kArrayMode2dTiledThin; + static constexpr uint32_t m_macroTileWidth = 128; + static constexpr uint32_t m_macroTileHeight = 64; + static constexpr Gnm::PipeConfig m_pipeConfig = + Gnm::kPipeConfigP8_32x32_16x16; + static constexpr uint32_t m_bankWidth = 1; + static constexpr uint32_t m_bankHeight = 1; + static constexpr uint32_t m_numBanks = 16; + static constexpr uint32_t m_numPipes = 8; + static constexpr uint32_t m_tileThickness = 1; + static constexpr uint32_t m_numFragmentsPerPixel = 1; + static constexpr uint32_t m_tileSplitBytes = 512; + static constexpr uint32_t m_pipeInterleaveBytes = 256; + static constexpr uint32_t m_macroTileAspect = 2; + static constexpr uint32_t m_paddedWidth = 1280; + static constexpr uint32_t m_paddedHeight = 768; + static constexpr uint32_t m_arraySlice = 0; + static constexpr uint64_t m_bankSwizzleMask = 0; + static constexpr uint64_t m_pipeSwizzleMask = 0; + static constexpr uint64_t m_pipeInterleaveMask = 255; + static constexpr uint64_t m_pipeInterleaveBits = 8; + static constexpr uint64_t m_pipeBits = 3; + static constexpr uint64_t m_bankBits = 4; + + static constexpr uint32_t kDramRowSize = 0x400; + static constexpr uint32_t kNumLogicalBanks = 16; + static constexpr uint32_t kPipeInterleaveBytes = 256; + static constexpr uint32_t kBankInterleave = 1; + static constexpr uint32_t kMicroTileWidth = 8; + static constexpr uint32_t kMicroTileHeight = 8; + static constexpr uint32_t kNumMicroTilePixels = + kMicroTileWidth * kMicroTileHeight; + static constexpr uint32_t kCmaskCacheBits = 0x400; + static constexpr uint32_t kHtileCacheBits = 0x4000; + + int32_t getTiledElementBitOffset(uint64_t *outTiledBitOffset, uint32_t x, + uint32_t y, uint32_t z, + uint32_t fragmentIndex, bool log = false); + + int32_t getTiledElementByteOffset(uint64_t *outTiledByteOffset, uint32_t x, + uint32_t y, uint32_t z, + uint32_t fragmentIndex, bool log = false) { + uint64_t bitOffset = 0; + int32_t status = + getTiledElementBitOffset(&bitOffset, x, y, z, fragmentIndex, log); + *outTiledByteOffset = bitOffset / 8; + return status; + } +}; + +inline int32_t Tiler2d::getTiledElementBitOffset(uint64_t *outTiledBitOffset, + uint32_t x, uint32_t y, + uint32_t z, + uint32_t fragmentIndex, + bool log) { + uint64_t element_index = + getElementIndex(x, y, z, m_bitsPerElement, m_microTileMode, m_arrayMode); + + uint32_t xh = x, yh = y; + if (m_arrayMode == Gnm::kArrayModeTiledThinPrt || + m_arrayMode == Gnm::kArrayModeTiledThickPrt) { + xh %= m_macroTileWidth; + yh %= m_macroTileHeight; + } + uint64_t pipe = getPipeIndex(xh, yh, m_pipeConfig); + uint64_t bank = + getBankIndex(xh, yh, m_bankWidth, m_bankHeight, m_numBanks, m_numPipes); + + constexpr uint32_t tile_bytes = + (kMicroTileWidth * kMicroTileHeight * m_tileThickness * m_bitsPerElement * + m_numFragmentsPerPixel + + 7) / + 8; + + uint64_t element_offset = 0; + if (m_microTileMode == Gnm::kMicroTileModeDepth) { + uint64_t pixel_offset = + element_index * m_bitsPerElement * m_numFragmentsPerPixel; + element_offset = pixel_offset + (fragmentIndex * m_bitsPerElement); + } else { + uint64_t fragment_offset = + fragmentIndex * (tile_bytes / m_numFragmentsPerPixel) * 8; + element_offset = fragment_offset + (element_index * m_bitsPerElement); + } + + uint64_t slices_per_tile = 1; + uint64_t tile_split_slice = 0; + + uint64_t macro_tile_bytes = (m_macroTileWidth / kMicroTileWidth) * + (m_macroTileHeight / kMicroTileHeight) * + tile_bytes / (m_numPipes * m_numBanks); + uint64_t macro_tiles_per_row = m_paddedWidth / m_macroTileWidth; + uint64_t macro_tile_row_index = y / m_macroTileHeight; + uint64_t macro_tile_column_index = x / m_macroTileWidth; + uint64_t macro_tile_index = + (macro_tile_row_index * macro_tiles_per_row) + macro_tile_column_index; + uint64_t macro_tile_offset = macro_tile_index * macro_tile_bytes; + uint64_t macro_tiles_per_slice = + macro_tiles_per_row * (m_paddedHeight / m_macroTileHeight); + uint64_t slice_bytes = macro_tiles_per_slice * macro_tile_bytes; + + uint32_t slice = z; + + uint64_t slice_offset = + (tile_split_slice + slices_per_tile * slice / m_tileThickness) * + slice_bytes; + if (m_arraySlice != 0) { + slice = m_arraySlice; + } + + uint64_t tile_row_index = (y / kMicroTileHeight) % m_bankHeight; + uint64_t tile_column_index = + ((x / kMicroTileWidth) / m_numPipes) % m_bankWidth; + uint64_t tile_index = (tile_row_index * m_bankWidth) + tile_column_index; + uint64_t tile_offset = tile_index * tile_bytes; + + // Bank and pipe rotation/swizzling. + uint64_t bank_swizzle = m_bankSwizzleMask; + uint64_t pipe_swizzle = m_pipeSwizzleMask; + + uint64_t pipe_slice_rotation = 0; + switch (m_arrayMode) { + case Gnm::kArrayMode3dTiledThin: + case Gnm::kArrayMode3dTiledThick: + case Gnm::kArrayMode3dTiledXThick: + pipe_slice_rotation = + std::max(1UL, (m_numPipes / 2UL) - 1UL) * (slice / m_tileThickness); + break; + default: + break; + } + pipe_swizzle += pipe_slice_rotation; + pipe_swizzle &= (m_numPipes - 1); + pipe = pipe ^ pipe_swizzle; + + uint32_t slice_rotation = 0; + switch (m_arrayMode) { + case Gnm::kArrayMode2dTiledThin: + case Gnm::kArrayMode2dTiledThick: + case Gnm::kArrayMode2dTiledXThick: + slice_rotation = ((m_numBanks / 2) - 1) * (slice / m_tileThickness); + break; + case Gnm::kArrayMode3dTiledThin: + case Gnm::kArrayMode3dTiledThick: + case Gnm::kArrayMode3dTiledXThick: + slice_rotation = std::max(1UL, (m_numPipes / 2UL) - 1UL) * + (slice / m_tileThickness) / m_numPipes; + break; + default: + break; + } + uint64_t tile_split_slice_rotation = 0; + switch (m_arrayMode) { + case Gnm::kArrayMode2dTiledThin: + case Gnm::kArrayMode3dTiledThin: + case Gnm::kArrayMode2dTiledThinPrt: + case Gnm::kArrayMode3dTiledThinPrt: + tile_split_slice_rotation = ((m_numBanks / 2) + 1) * tile_split_slice; + break; + default: + break; + } + + bank ^= bank_swizzle + slice_rotation; + bank ^= tile_split_slice_rotation; + bank &= (m_numBanks - 1); + + uint64_t total_offset = + (slice_offset + macro_tile_offset + tile_offset) * 8 + element_offset; + uint64_t bitOffset = total_offset & 0x7; + total_offset /= 8; + + uint64_t pipe_interleave_offset = total_offset & m_pipeInterleaveMask; + uint64_t offset = total_offset >> m_pipeInterleaveBits; + + uint64_t finalByteOffset = + pipe_interleave_offset | (pipe << (m_pipeInterleaveBits)) | + (bank << (m_pipeInterleaveBits + m_pipeBits)) | + (offset << (m_pipeInterleaveBits + m_pipeBits + m_bankBits)); + *outTiledBitOffset = (finalByteOffset << 3) | bitOffset; + return 0; +} + +namespace surfaceTiler { +constexpr std::uint32_t getElementIndex(std::uint32_t x, std::uint32_t y) { + std::uint32_t elem = 0; + + elem |= ((x >> 0) & 0x1) << 0; + elem |= ((x >> 1) & 0x1) << 1; + elem |= ((y >> 0) & 0x1) << 2; + elem |= ((x >> 2) & 0x1) << 3; + elem |= ((y >> 1) & 0x1) << 4; + elem |= ((y >> 2) & 0x1) << 5; + + return elem; +} + +constexpr std::uint32_t getPipeIndex(std::uint32_t x, std::uint32_t y) { + std::uint32_t pipe = 0; + + pipe |= (((x >> 3) ^ (y >> 3) ^ (x >> 4)) & 0x1) << 0; + pipe |= (((x >> 4) ^ (y >> 4)) & 0x1) << 1; + pipe |= (((x >> 5) ^ (y >> 5)) & 0x1) << 2; + + return pipe; +} + +constexpr std::uint32_t getBankIndex(std::uint32_t x, std::uint32_t y) { + std::uint32_t bank = 0; + + bank |= (((x >> 6) ^ (y >> 6)) & 0x1) << 0; + bank |= (((x >> 7) ^ (y >> 5) ^ (y >> 6)) & 0x1) << 1; + bank |= (((x >> 8) ^ (y >> 4)) & 0x1) << 2; + bank |= (((x >> 9) ^ (y >> 3)) & 0x1) << 3; + + return bank; +} + +inline std::uint64_t getTiledElementByteOffsetImpl(std::uint32_t x, + std::uint32_t y, + std::uint32_t width) { + std::uint32_t elementIndex = getElementIndex(x, y); + std::uint32_t pipe = getPipeIndex(x, y); + std::uint32_t bank = getBankIndex(x, y); + + uint64_t macroTileIndex = + (static_cast(y / 64) * (width / 128)) + x / 128; + uint64_t macroTileOffset = macroTileIndex * 256; + + std::uint64_t totalOffset = macroTileOffset + elementIndex * 4; + + std::uint64_t pipeInterleaveOffset = totalOffset & 255; + std::uint64_t offset = totalOffset >> 8; + + return pipeInterleaveOffset | (pipe << 8) | (bank << 11) | (offset << 15); +} + +static constexpr std::uint32_t kMaxPrecalculatedCount = 8; +static constexpr std::uint32_t kMaxPrecalculatedWidth = 2048; +static constexpr std::uint32_t kMaxPrecalculatedHeight = 2048; + +static std::uint64_t gPrecalculatedTiledOffsets[kMaxPrecalculatedCount] + [kMaxPrecalculatedWidth * + kMaxPrecalculatedHeight]; + +struct PrecalculatedTiler { + std::uint32_t width; + std::uint32_t height; + std::uint32_t stride; + int index; +}; + +static PrecalculatedTiler gPrecalculatedTilers[kMaxPrecalculatedCount]; +static int gPrecalculatedCount; + +static int findPrecalculatedTile(std::uint32_t width, std::uint32_t height) { + for (int i = 0; i < gPrecalculatedCount; ++i) { + if (gPrecalculatedTilers[i].width == width && + gPrecalculatedTilers[i].height == height) { + return i; + } + } + + return -1; +} + +inline int precalculateTiles(std::uint32_t width, std::uint32_t height) { + int index = findPrecalculatedTile(width, height); + if (index >= 0) { + if (index >= kMaxPrecalculatedCount / 2 && + gPrecalculatedCount > kMaxPrecalculatedCount / 2) { + auto tmp = gPrecalculatedTilers[index]; + + for (int i = index; i > 0; --i) { + gPrecalculatedTilers[i] = gPrecalculatedTilers[i - 1]; + } + + gPrecalculatedTilers[0] = tmp; + return 0; + } + + return index; + } + + PrecalculatedTiler tiler; + tiler.width = width; + tiler.height = height; + tiler.stride = std::min(width, kMaxPrecalculatedWidth); + + if (gPrecalculatedCount >= kMaxPrecalculatedCount) { + // TODO: insert in the middle? + tiler.index = gPrecalculatedTilers[kMaxPrecalculatedCount - 1].index; + index = kMaxPrecalculatedCount - 1; + } else { + tiler.index = gPrecalculatedCount++; + index = tiler.index; + } + + gPrecalculatedTilers[index - 1] = tiler; + + for (std::uint32_t y = 0; y < height; ++y) { + for (std::uint32_t x = 0; x < width; ++x) { + gPrecalculatedTiledOffsets[index][y * tiler.stride + x] = + getTiledElementByteOffsetImpl(x, y, tiler.width); + } + } + + return index; +} + +inline std::uint64_t getTiledElementByteOffset(int index, std::uint32_t x, + std::uint32_t y) { + auto tiler = gPrecalculatedTilers[index]; + if (x < kMaxPrecalculatedWidth && y < kMaxPrecalculatedHeight) [[likely]] { + return gPrecalculatedTiledOffsets[index][x + y * tiler.stride]; + } + + return getTiledElementByteOffsetImpl(x, y, tiler.width); +} +} // namespace surfaceTiler +} // namespace amdgpu::device diff --git a/hw/amdgpu/device/src/device.cpp b/hw/amdgpu/device/src/device.cpp new file mode 100644 index 000000000..ec5a40796 --- /dev/null +++ b/hw/amdgpu/device/src/device.cpp @@ -0,0 +1,4283 @@ +#include "device.hpp" +#include "amdgpu/bridge/bridge.hpp" +#include "tiler.hpp" + +#include "spirv-tools/libspirv.hpp" +#include "spirv-tools/optimizer.hpp" +#include "util/unreachable.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +// #include +#include +#include +#include + +void *g_rwMemory; +std::size_t g_memorySize; +std::uint64_t g_memoryBase; + +namespace amdgpu::device { +MemoryZoneTable memoryZoneTable; + +inline Verify operator<<(Verify lhs, VkResult result) { + if (result != VK_SUCCESS) { + auto location = lhs.location(); + util::unreachable("Verification failed at %s: %s:%u:%u(res = %d)", + location.function_name(), location.file_name(), + location.line(), location.column(), result); + } + + return lhs; +} + +inline VkBlendFactor blendMultiplierToVkBlendFactor(BlendMultiplier mul) { + switch (mul) { + case kBlendMultiplierZero: + return VK_BLEND_FACTOR_ZERO; + case kBlendMultiplierOne: + return VK_BLEND_FACTOR_ONE; + case kBlendMultiplierSrcColor: + return VK_BLEND_FACTOR_SRC_COLOR; + case kBlendMultiplierOneMinusSrcColor: + return VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR; + case kBlendMultiplierSrcAlpha: + return VK_BLEND_FACTOR_SRC_ALPHA; + case kBlendMultiplierOneMinusSrcAlpha: + return VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + case kBlendMultiplierDestAlpha: + return VK_BLEND_FACTOR_DST_ALPHA; + case kBlendMultiplierOneMinusDestAlpha: + return VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA; + case kBlendMultiplierDestColor: + return VK_BLEND_FACTOR_DST_COLOR; + case kBlendMultiplierOneMinusDestColor: + return VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR; + case kBlendMultiplierSrcAlphaSaturate: + return VK_BLEND_FACTOR_SRC_ALPHA_SATURATE; + case kBlendMultiplierConstantColor: + return VK_BLEND_FACTOR_CONSTANT_COLOR; + case kBlendMultiplierOneMinusConstantColor: + return VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR; + case kBlendMultiplierSrc1Color: + return VK_BLEND_FACTOR_SRC1_COLOR; + case kBlendMultiplierInverseSrc1Color: + return VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR; + case kBlendMultiplierSrc1Alpha: + return VK_BLEND_FACTOR_SRC1_ALPHA; + case kBlendMultiplierInverseSrc1Alpha: + return VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA; + case kBlendMultiplierConstantAlpha: + return VK_BLEND_FACTOR_CONSTANT_ALPHA; + case kBlendMultiplierOneMinusConstantAlpha: + return VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA; + } + + util::unreachable(); +} + +inline VkBlendOp blendFuncToVkBlendOp(BlendFunc func) { + switch (func) { + case kBlendFuncAdd: + return VK_BLEND_OP_ADD; + case kBlendFuncSubtract: + return VK_BLEND_OP_SUBTRACT; + case kBlendFuncMin: + return VK_BLEND_OP_MIN; + case kBlendFuncMax: + return VK_BLEND_OP_MAX; + case kBlendFuncReverseSubtract: + return VK_BLEND_OP_REVERSE_SUBTRACT; + } + + util::unreachable(); +} + +static VkDevice g_vkDevice = VK_NULL_HANDLE; +static VkAllocationCallbacks *g_vkAllocator = nullptr; +static VkPhysicalDeviceMemoryProperties g_physicalMemoryProperties; + +static std::uint32_t +findPhysicalMemoryTypeIndex(std::uint32_t typeBits, + VkMemoryPropertyFlags properties) { + typeBits &= (1 << g_physicalMemoryProperties.memoryTypeCount) - 1; + + while (typeBits != 0) { + auto typeIndex = std::countr_zero(typeBits); + + if ((g_physicalMemoryProperties.memoryTypes[typeIndex].propertyFlags & + properties) == properties) { + return typeIndex; + } + + typeBits &= ~(1 << typeIndex); + } + + util::unreachable("Failed to find memory type with properties %x", + properties); +} + +void setVkDevice(VkDevice device, VkPhysicalDeviceMemoryProperties properties) { + g_vkDevice = device; + g_physicalMemoryProperties = properties; +} + +#define GNM_GET_FIELD(src, registername, field) \ + (((src) & (GNM_##registername##__##field##__MASK)) >> \ + (GNM_##registername##__##field##__SHIFT)) + +#define mmSQ_BUF_RSRC_WORD0 0x23C0 +#define GNM_SQ_BUF_RSRC_WORD0__BASE_ADDRESS__MASK 0xffffffffL // size:32 +#define GNM_SQ_BUF_RSRC_WORD0__BASE_ADDRESS__SHIFT 0 + +#define mmSQ_BUF_RSRC_WORD1 0x23C1 +#define GNM_SQ_BUF_RSRC_WORD1__BASE_ADDRESS_HI__MASK 0x00000fffL // size:12 +#define GNM_SQ_BUF_RSRC_WORD1__STRIDE__MASK 0x3fff0000L // size:14 +#define GNM_SQ_BUF_RSRC_WORD1__SWIZZLE_ENABLE__MASK 0x80000000L // size: 1 +#define GNM_SQ_BUF_RSRC_WORD1__BASE_ADDRESS_HI__SHIFT 0 +#define GNM_SQ_BUF_RSRC_WORD1__STRIDE__SHIFT 16 +#define GNM_SQ_BUF_RSRC_WORD1__SWIZZLE_ENABLE__SHIFT 31 + +#define mmSQ_BUF_RSRC_WORD2 0x23C2 +#define GNM_SQ_BUF_RSRC_WORD2__NUM_RECORDS__MASK 0xffffffffL // size:32 +#define GNM_SQ_BUF_RSRC_WORD2__NUM_RECORDS__SHIFT 0 + +#define mmSQ_BUF_RSRC_WORD3 0x23C3 +#define GNM_SQ_BUF_RSRC_WORD3__DST_SEL_X__MASK 0x00000007L // size: 3 +#define GNM_SQ_BUF_RSRC_WORD3__DST_SEL_Y__MASK 0x00000038L // size: 3 +#define GNM_SQ_BUF_RSRC_WORD3__DST_SEL_Z__MASK 0x000001c0L // size: 3 +#define GNM_SQ_BUF_RSRC_WORD3__DST_SEL_W__MASK 0x00000e00L // size: 3 +#define GNM_SQ_BUF_RSRC_WORD3__ELEMENT_SIZE__MASK 0x00180000L // size: 2 +#define GNM_SQ_BUF_RSRC_WORD3__INDEX_STRIDE__MASK 0x00600000L // size: 2 +#define GNM_SQ_BUF_RSRC_WORD3__TYPE__MASK 0xc0000000L // size: 2 +#define GNM_SQ_BUF_RSRC_WORD3__DST_SEL_X__SHIFT 0 +#define GNM_SQ_BUF_RSRC_WORD3__DST_SEL_Y__SHIFT 3 +#define GNM_SQ_BUF_RSRC_WORD3__DST_SEL_Z__SHIFT 6 +#define GNM_SQ_BUF_RSRC_WORD3__DST_SEL_W__SHIFT 9 +#define GNM_SQ_BUF_RSRC_WORD3__ELEMENT_SIZE__SHIFT 19 +#define GNM_SQ_BUF_RSRC_WORD3__INDEX_STRIDE__SHIFT 21 +#define GNM_SQ_BUF_RSRC_WORD3__TYPE__SHIFT 30 + +#define mmCB_COLOR0_PITCH 0xA319 +#define GNM_CB_COLOR0_PITCH__TILE_MAX__MASK 0x000007ffL // size:11 +#define GNM_CB_COLOR0_PITCH__FMASK_TILE_MAX__MASK 0x7ff00000L // size:11 +#define GNM_CB_COLOR0_PITCH__TILE_MAX__SHIFT 0 +#define GNM_CB_COLOR0_PITCH__FMASK_TILE_MAX__SHIFT 20 + +#define mmCB_COLOR0_SLICE 0xA31A +#define GNM_CB_COLOR0_SLICE__TILE_MAX__MASK 0x003fffffL // size:22 +#define GNM_CB_COLOR0_SLICE__TILE_MAX__SHIFT 0 + +#define mmCB_COLOR0_VIEW 0xA31B +#define GNM_CB_COLOR0_VIEW__SLICE_START__MASK 0x000007ffL // size:11 +#define GNM_CB_COLOR0_VIEW__SLICE_MAX__MASK 0x00ffe000L // size:11 +#define GNM_CB_COLOR0_VIEW__SLICE_START__SHIFT 0 +#define GNM_CB_COLOR0_VIEW__SLICE_MAX__SHIFT 13 + +#define mmCB_COLOR0_INFO 0xA31C +#define GNM_CB_COLOR0_INFO__FAST_CLEAR__MASK 0x00002000L // size: 1 +#define GNM_CB_COLOR0_INFO__COMPRESSION__MASK 0x00004000L // size: 1 +#define GNM_CB_COLOR0_INFO__CMASK_IS_LINEAR__MASK 0x00080000L // size: 1 +#define GNM_CB_COLOR0_INFO__FMASK_COMPRESSION_MODE__MASK 0x0C000000L // size: 2 +#define GNM_CB_COLOR0_INFO__DCC_ENABLE__MASK 0x10000000L // size: 1 +#define GNM_CB_COLOR0_INFO__CMASK_ADDR_TYPE__MASK 0x60000000L // size: 2 +#define GNM_CB_COLOR0_INFO__ALT_TILE_MODE__MASK 0x80000000L // size: 1 +#define GNM_CB_COLOR0_INFO__FAST_CLEAR__SHIFT 13 +#define GNM_CB_COLOR0_INFO__COMPRESSION__SHIFT 14 +#define GNM_CB_COLOR0_INFO__CMASK_IS_LINEAR__SHIFT 19 +#define GNM_CB_COLOR0_INFO__FMASK_COMPRESSION_MODE__SHIFT 26 +#define GNM_CB_COLOR0_INFO__DCC_ENABLE__SHIFT 28 +#define GNM_CB_COLOR0_INFO__CMASK_ADDR_TYPE__SHIFT 29 +#define GNM_CB_COLOR0_INFO__ALT_TILE_MODE__SHIFT 31 +#define GNM_CB_COLOR0_INFO__FORMAT__MASK 0x3f << 2 +#define GNM_CB_COLOR0_INFO__FORMAT__SHIFT 2 + +#define GNM_CB_COLOR0_INFO__ARRAY_MODE__MASK 0x0f << 8 +#define GNM_CB_COLOR0_INFO__ARRAY_MODE__SHIFT 8 + +enum { + ARRAY_LINEAR_GENERAL = 0x00, // Unaligned linear array + ARRAY_LINEAR_ALIGNED = 0x01, // Aligned linear array +}; + +#define GNM_CB_COLOR0_INFO__NUMBER_TYPE__MASK 0x07 << 12 +#define GNM_CB_COLOR0_INFO__NUMBER_TYPE__SHIFT 12 + +enum { + NUMBER_UNORM = 0x00, // unsigned repeating fraction (urf): range [0..1], scale + // factor (2^n)-1 + NUMBER_SNORM = 0x01, // Microsoft-style signed rf: range [-1..1], scale factor + // (2^(n-1))-1 + NUMBER_USCALED = 0x02, // unsigned integer, converted to float in shader: + // range [0..(2^n)-1] + NUMBER_SSCALED = 0x03, // signed integer, converted to float in shader: range + // [-2^(n-1)..2^(n-1)-1] + NUMBER_UINT = 0x04, // zero-extended bit field, int in shader: not blendable + // or filterable + NUMBER_SINT = 0x05, // sign-extended bit field, int in shader: not blendable + // or filterable + NUMBER_SRGB = 0x06, // gamma corrected, range [0..1] (only suported for 8-bit + // components (always rounds color channels) + NUMBER_FLOAT = + 0x07, // floating point, depends on component size: 32-bit: IEEE float, + // SE8M23, bias 127, range (- 2^129..2^129) 24-bit: Depth float, + // E4M20, bias 15, range [0..1] 16-bit: Short float SE5M10, bias 15, + // range (-2^17..2^17) 11-bit: Packed float, E5M6 bias 15, range + // [0..2^17) 10-bit: Packed float, E5M5 bias 15, range [0..2^17) all + // other component sizes are treated as UINT +}; + +#define GNM_CB_COLOR0_INFO__READ_SIZE__MASK 1 << 15 +#define GNM_CB_COLOR0_INFO__READ_SIZE__SHIFT 15 + +// Specifies how to map the red, green, blue, and alpha components from the +// shader to the components in the frame buffer pixel format. There are four +// choices for each number of components. With one component, the four modes +// select any one component. With 2-4 components, SWAP_STD selects the low order +// shader components in little-endian order; SWAP_ALT selects an alternate order +// (for 4 compoents) or inclusion of alpha (for 2 or 3 components); and the +// other two reverse the component orders for use on big-endian machines. The +// following table specifies the exact component mappings: +// +// 1 comp std alt std_rev alt_rev +// ----------- ------- ------- ------- ------- +// comp 0: red green blue alpha +// +// 3 comps std alt std_rev alt_rev +// ----------- ------- ------- ------- ------- +// comp 0: red red green alpha +// comp 1: green alpha red red +// +// 3 comps std alt std_rev alt_rev +// ----------- ------- ------- ------- ------- +// comp 0: red red blue alpha +// comp 1: green green green green +// comp 2: blue alpha red red +// +// 4 comps std alt std_rev alt_rev +// ----------- ------- ------- ------- ------- +// comp 0: red blue alpha alpha +// comp 1: green green blue red +// comp 2: blue red green green +// comp 3: alpha alpha red blue +// +#define GNM_CB_COLOR0_INFO__COMP_SWAP__MASK 0x03 << 16 +#define GNM_CB_COLOR0_INFO__COMP_SWAP__SHIFT 16 +enum { + SWAP_STD = 0x00, // standard little-endian comp order + SWAP_ALT = 0x01, // alternate components or order + SWAP_STD_REV = 0x02, // reverses SWAP_STD order + SWAP_ALT_REV = 0x03, // reverses SWAP_ALT order +}; + +// Specifies whether to clamp source data to the render target range prior to +// blending, in addition to the post- blend clamp. This bit must be zero for +// uscaled, sscaled and float number types and when blend_bypass is set. +#define GNM_CB_COLOR0_INFO__BLEND_CLAMP__MASK 1 << 20 +#define GNM_CB_COLOR0_INFO__BLEND_CLAMP__SHIFT 20 + +// If false, use RGB=0.0 and A=1.0 (0x3f800000) to expand fast-cleared tiles. If +// true, use the CB_CLEAR register values to expand fast-cleared tiles. +#define GNM_CB_COLOR0_INFO__CLEAR_COLOR__MASK 1 << 21 +#define GNM_CB_COLOR0_INFO__CLEAR_COLOR__SHIFT 21 + +// If false, blending occurs normaly as specified in CB_BLEND#_CONTROL. If true, +// blending (but not fog) is disabled. This must be set for the 24_8 and 8_24 +// formats and when the number type is uint or sint. It should also be set for +// number types that are required to ignore the blend state in a specific +// aplication interface. +#define GNM_CB_COLOR0_INFO__BLEND_BYPASS__MASK 1 << 22 +#define GNM_CB_COLOR0_INFO__BLEND_BYPASS__SHIFT 22 + +// If true, use 32-bit float precision for source colors, else truncate to +// 12-bit mantissa precision. This applies even if blending is disabled so that +// a null blend and blend disable produce the same result. This field is ignored +// for NUMBER_UINT and NUMBER_SINT. It must be one for floating point components +// larger than 16-bits or non- floating components larger than 12-bits, +// otherwise it must be 0. +#define GNM_CB_COLOR0_INFO__BLEND_FLOAT32__MASK 1 << 23 +#define GNM_CB_COLOR0_INFO__BLEND_FLOAT32__SHIFT 23 + +// If false, floating point processing follows full IEEE rules for INF, NaN, and +// -0. If true, 0*anything produces 0 and no operation produces -0. +#define GNM_CB_COLOR0_INFO__SIMPLE_FLOAT__MASK 1 << 24 +#define GNM_CB_COLOR0_INFO__SIMPLE_FLOAT__SHIFT 24 + +// This field selects between truncating (standard for floats) and rounding +// (standard for most other cases) to convert blender results to frame buffer +// components. The ROUND_BY_HALF setting can be over-riden by the DITHER_ENABLE +// field in CB_COLOR_CONTROL. +#define GNM_CB_COLOR0_INFO__ROUND_MODE__MASK 1 << 25 +#define GNM_CB_COLOR0_INFO__ROUND_MODE__SHIFT 25 + +// This field indicates the allowed format for color data being exported from +// the pixel shader into the output merge block. This field may only be set to +// EXPORT_NORM if BLEND_CLAMP is enabled, BLEND_FLOAT32 is disabled, and the +// render target has only 11-bit or smaller UNORM or SNORM components. Selecting +// EXPORT_NORM flushes to zero values with exponent less than 0x70 (values less +// than 2^-15). +#define GNM_CB_COLOR0_INFO__SOURCE_FORMAT__MASK 1 << 27 +#define GNM_CB_COLOR0_INFO__SOURCE_FORMAT__SHIFT 27 + +#define mmCB_COLOR0_ATTRIB 0xA31D +#define GNM_CB_COLOR0_ATTRIB__TILE_MODE_INDEX__MASK 0x0000001fL // size: 5 +#define GNM_CB_COLOR0_ATTRIB__FMASK_TILE_MODE_INDEX__MASK 0x000003e0L // size: 5 +#define GNM_CB_COLOR0_ATTRIB__NUM_SAMPLES__MASK 0x00007000L // size: 3 +#define GNM_CB_COLOR0_ATTRIB__NUM_FRAGMENTS__MASK 0x00018000L // size: 2 +#define GNM_CB_COLOR0_ATTRIB__FORCE_DST_ALPHA_1__MASK 0x00020000L // size: 1 +#define GNM_CB_COLOR0_ATTRIB__TILE_MODE_INDEX__SHIFT 0 +#define GNM_CB_COLOR0_ATTRIB__FMASK_TILE_MODE_INDEX__SHIFT 5 +#define GNM_CB_COLOR0_ATTRIB__NUM_SAMPLES__SHIFT 12 +#define GNM_CB_COLOR0_ATTRIB__NUM_FRAGMENTS__SHIFT 15 +#define GNM_CB_COLOR0_ATTRIB__FORCE_DST_ALPHA_1__SHIFT 17 + +#define mmCB_COLOR0_DCC_CONTROL 0xA31E +#define GNM_CB_COLOR0_DCC_CONTROL__OVERWRITE_COMBINER_DISABLE__MASK \ + 0x00000001L // size: 1 +#define GNM_CB_COLOR0_DCC_CONTROL__MAX_UNCOMPRESSED_BLOCK_SIZE__MASK \ + 0x0000000cL // size: 2 +#define GNM_CB_COLOR0_DCC_CONTROL__MIN_COMPRESSED_BLOCK_SIZE__MASK \ + 0x00000010L // size: 1 +#define GNM_CB_COLOR0_DCC_CONTROL__MAX_COMPRESSED_BLOCK_SIZE__MASK \ + 0x00000060L // size: 2 +#define GNM_CB_COLOR0_DCC_CONTROL__COLOR_TRANSFORM__MASK 0x00000180L // size: 2 +#define GNM_CB_COLOR0_DCC_CONTROL__INDEPENDENT_64B_BLOCKS__MASK \ + 0x00000200L // size: 1 +#define GNM_CB_COLOR0_DCC_CONTROL__OVERWRITE_COMBINER_DISABLE__SHIFT 0 +#define GNM_CB_COLOR0_DCC_CONTROL__MAX_UNCOMPRESSED_BLOCK_SIZE__SHIFT 2 +#define GNM_CB_COLOR0_DCC_CONTROL__MIN_COMPRESSED_BLOCK_SIZE__SHIFT 4 +#define GNM_CB_COLOR0_DCC_CONTROL__MAX_COMPRESSED_BLOCK_SIZE__SHIFT 5 +#define GNM_CB_COLOR0_DCC_CONTROL__COLOR_TRANSFORM__SHIFT 7 +#define GNM_CB_COLOR0_DCC_CONTROL__INDEPENDENT_64B_BLOCKS__SHIFT 9 + +#define mmCB_COLOR0_CMASK 0xA31F +#define GNM_CB_COLOR0_CMASK__BASE_256B__MASK 0xffffffffL // size:32 +#define GNM_CB_COLOR0_CMASK__BASE_256B__SHIFT 0 + +#define mmCB_COLOR0_CMASK_SLICE 0xA320 +#define GNM_CB_COLOR0_CMASK_SLICE__TILE_MAX__MASK 0x00003fffL // size:14 +#define GNM_CB_COLOR0_CMASK_SLICE__TILE_MAX__SHIFT 0 + +#define mmCB_COLOR0_FMASK 0xA321 +#define GNM_CB_COLOR0_FMASK__BASE_256B__MASK 0xffffffffL // size:32 +#define GNM_CB_COLOR0_FMASK__BASE_256B__SHIFT 0 + +#define mmCB_COLOR0_FMASK_SLICE 0xA322 +#define GNM_CB_COLOR0_FMASK_SLICE__TILE_MAX__MASK 0x003fffffL // size:22 +#define GNM_CB_COLOR0_FMASK_SLICE__TILE_MAX__SHIFT 0 + +#define mmCB_COLOR0_CLEAR_WORD0 0xA323 +#define GNM_CB_COLOR0_CLEAR_WORD0__CLEAR_WORD0__MASK 0xffffffffL // size:32 +#define GNM_CB_COLOR0_CLEAR_WORD0__CLEAR_WORD0__SHIFT 0 + +#define mmCB_COLOR0_CLEAR_WORD1 0xA324 +#define GNM_CB_COLOR0_CLEAR_WORD1__CLEAR_WORD1__MASK 0xffffffffL // size:32 +#define GNM_CB_COLOR0_CLEAR_WORD1__CLEAR_WORD1__SHIFT 0 + +#define mmCB_COLOR0_DCC_BASE 0xA325 +#define GNM_CB_COLOR0_DCC_BASE__BASE_256B__MASK 0xffffffffL // size:32 +#define GNM_CB_COLOR0_DCC_BASE__BASE_256B__SHIFT 0 + +static constexpr auto CB_BLEND0_CONTROL_COLOR_SRCBLEND_MASK = genMask(0, 5); +static constexpr auto CB_BLEND0_CONTROL_COLOR_COMB_FCN_MASK = + genMask(getMaskEnd(CB_BLEND0_CONTROL_COLOR_SRCBLEND_MASK), 3); +static constexpr auto CB_BLEND0_CONTROL_COLOR_DESTBLEND_MASK = + genMask(getMaskEnd(CB_BLEND0_CONTROL_COLOR_COMB_FCN_MASK), 5); +static constexpr auto CB_BLEND0_CONTROL_OPACITY_WEIGHT_MASK = + genMask(getMaskEnd(CB_BLEND0_CONTROL_COLOR_DESTBLEND_MASK), 1); +static constexpr auto CB_BLEND0_CONTROL_ALPHA_SRCBLEND_MASK = + genMask(getMaskEnd(CB_BLEND0_CONTROL_OPACITY_WEIGHT_MASK) + 2, 5); +static constexpr auto CB_BLEND0_CONTROL_ALPHA_COMB_FCN_MASK = + genMask(getMaskEnd(CB_BLEND0_CONTROL_ALPHA_SRCBLEND_MASK), 3); +static constexpr auto CB_BLEND0_CONTROL_ALPHA_DESTBLEND_MASK = + genMask(getMaskEnd(CB_BLEND0_CONTROL_ALPHA_COMB_FCN_MASK), 5); +static constexpr auto CB_BLEND0_CONTROL_SEPARATE_ALPHA_BLEND_MASK = + genMask(getMaskEnd(CB_BLEND0_CONTROL_ALPHA_DESTBLEND_MASK), 1); +static constexpr auto CB_BLEND0_CONTROL_BLEND_ENABLE_MASK = + genMask(getMaskEnd(CB_BLEND0_CONTROL_SEPARATE_ALPHA_BLEND_MASK), 1); + +static std::uint64_t pgmPsAddress = 0; +static std::uint64_t pgmVsAddress = 0; +static std::uint64_t pgmComputeAddress = 0; +static std::uint32_t userVsData[16]; +static std::uint32_t userPsData[16]; +static std::uint32_t userComputeData[16]; +static std::uint32_t computeNumThreadX = 1; +static std::uint32_t computeNumThreadY = 1; +static std::uint32_t computeNumThreadZ = 1; +static std::uint8_t psUserSpgrs; +static std::uint8_t vsUserSpgrs; +static std::uint8_t computeUserSpgrs; + +struct ColorBuffer { + std::uint64_t base; + std::uint8_t format; + std::uint8_t tileModeIndex; + + void setRegister(unsigned index, std::uint32_t value) { + switch (index) { + case CB_COLOR0_BASE - CB_COLOR0_BASE: + base = static_cast(value) << 8; + std::printf(" * base = %lx\n", base); + break; + + case CB_COLOR0_PITCH - CB_COLOR0_BASE: { + auto pitchTileMax = GNM_GET_FIELD(value, CB_COLOR0_PITCH, TILE_MAX); + auto pitchFmaskTileMax = + GNM_GET_FIELD(value, CB_COLOR0_PITCH, FMASK_TILE_MAX); + std::printf(" * TILE_MAX = %lx\n", pitchTileMax); + std::printf(" * FMASK_TILE_MAX = %lx\n", pitchFmaskTileMax); + break; + } + case CB_COLOR0_SLICE - CB_COLOR0_BASE: { // SLICE + auto sliceTileMax = GNM_GET_FIELD(value, CB_COLOR0_SLICE, TILE_MAX); + std::printf(" * TILE_MAX = %lx\n", sliceTileMax); + break; + } + case CB_COLOR0_VIEW - CB_COLOR0_BASE: { // VIEW + auto viewSliceStart = + GNM_GET_FIELD(value, CB_COLOR0_VIEW, SLICE_START); + auto viewSliceMax = GNM_GET_FIELD(value, CB_COLOR0_VIEW, SLICE_MAX); + + std::printf(" * SLICE_START = %lx\n", viewSliceStart); + std::printf(" * SLICE_MAX = %lx\n", viewSliceMax); + break; + } + case CB_COLOR0_INFO - CB_COLOR0_BASE: { // INFO + auto fastClear = GNM_GET_FIELD(value, CB_COLOR0_INFO, FAST_CLEAR); + auto compression = GNM_GET_FIELD(value, CB_COLOR0_INFO, COMPRESSION); + auto cmaskIsLinear = + GNM_GET_FIELD(value, CB_COLOR0_INFO, CMASK_IS_LINEAR); + auto fmaskCompressionMode = + GNM_GET_FIELD(value, CB_COLOR0_INFO, FMASK_COMPRESSION_MODE); + auto dccEnable = GNM_GET_FIELD(value, CB_COLOR0_INFO, DCC_ENABLE); + auto cmaskAddrType = + GNM_GET_FIELD(value, CB_COLOR0_INFO, CMASK_ADDR_TYPE); + auto altTileMode = + GNM_GET_FIELD(value, CB_COLOR0_INFO, ALT_TILE_MODE); + format = GNM_GET_FIELD(value, CB_COLOR0_INFO, FORMAT); + auto arrayMode = GNM_GET_FIELD(value, CB_COLOR0_INFO, ARRAY_MODE); + auto numberType = GNM_GET_FIELD(value, CB_COLOR0_INFO, NUMBER_TYPE); + auto readSize = GNM_GET_FIELD(value, CB_COLOR0_INFO, READ_SIZE); + auto compSwap = GNM_GET_FIELD(value, CB_COLOR0_INFO, COMP_SWAP); + auto blendClamp = GNM_GET_FIELD(value, CB_COLOR0_INFO, BLEND_CLAMP); + auto clearColor = GNM_GET_FIELD(value, CB_COLOR0_INFO, CLEAR_COLOR); + auto blendBypass = GNM_GET_FIELD(value, CB_COLOR0_INFO, BLEND_BYPASS); + auto blendFloat32 = + GNM_GET_FIELD(value, CB_COLOR0_INFO, BLEND_FLOAT32); + auto simpleFloat = GNM_GET_FIELD(value, CB_COLOR0_INFO, SIMPLE_FLOAT); + auto roundMode = GNM_GET_FIELD(value, CB_COLOR0_INFO, ROUND_MODE); + auto sourceFormat = + GNM_GET_FIELD(value, CB_COLOR0_INFO, SOURCE_FORMAT); + + std::printf(" * FAST_CLEAR = %lu\n", fastClear); + std::printf(" * COMPRESSION = %lu\n", compression); + std::printf(" * CMASK_IS_LINEAR = %lu\n", cmaskIsLinear); + std::printf(" * FMASK_COMPRESSION_MODE = %lu\n", fmaskCompressionMode); + std::printf(" * DCC_ENABLE = %lu\n", dccEnable); + std::printf(" * CMASK_ADDR_TYPE = %lu\n", cmaskAddrType); + std::printf(" * ALT_TILE_MODE = %lu\n", altTileMode); + std::printf(" * FORMAT = %x\n", format); + std::printf(" * ARRAY_MODE = %u\n", arrayMode); + std::printf(" * NUMBER_TYPE = %u\n", numberType); + std::printf(" * READ_SIZE = %u\n", readSize); + std::printf(" * COMP_SWAP = %u\n", compSwap); + std::printf(" * BLEND_CLAMP = %u\n", blendClamp); + std::printf(" * CLEAR_COLOR = %u\n", clearColor); + std::printf(" * BLEND_BYPASS = %u\n", blendBypass); + std::printf(" * BLEND_FLOAT32 = %u\n", blendFloat32); + std::printf(" * SIMPLE_FLOAT = %u\n", simpleFloat); + std::printf(" * ROUND_MODE = %u\n", roundMode); + std::printf(" * SOURCE_FORMAT = %u\n", sourceFormat); + break; + } + + case CB_COLOR0_ATTRIB - CB_COLOR0_BASE: { // ATTRIB + tileModeIndex = + GNM_GET_FIELD(value, CB_COLOR0_ATTRIB, TILE_MODE_INDEX); + auto fmask_tile_mode_index = + GNM_GET_FIELD(value, CB_COLOR0_ATTRIB, FMASK_TILE_MODE_INDEX); + auto num_samples = + GNM_GET_FIELD(value, CB_COLOR0_ATTRIB, NUM_SAMPLES); + auto num_fragments = + GNM_GET_FIELD(value, CB_COLOR0_ATTRIB, NUM_FRAGMENTS); + auto force_dst_alpha_1 = + GNM_GET_FIELD(value, CB_COLOR0_ATTRIB, FORCE_DST_ALPHA_1); + + std::printf(" * TILE_MODE_INDEX = %u\n", tileModeIndex); + std::printf(" * FMASK_TILE_MODE_INDEX = %lu\n", fmask_tile_mode_index); + std::printf(" * NUM_SAMPLES = %lu\n", num_samples); + std::printf(" * NUM_FRAGMENTS = %lu\n", num_fragments); + std::printf(" * FORCE_DST_ALPHA_1 = %lu\n", force_dst_alpha_1); + break; + } + case CB_COLOR0_CMASK - CB_COLOR0_BASE: { // CMASK + auto cmaskBase = GNM_GET_FIELD(value, CB_COLOR0_CMASK, BASE_256B) + << 8; + std::printf(" * cmaskBase = %lx\n", cmaskBase); + break; + } + case CB_COLOR0_CMASK_SLICE - CB_COLOR0_BASE: { // CMASK_SLICE + auto cmaskSliceTileMax = + GNM_GET_FIELD(value, CB_COLOR0_CMASK_SLICE, TILE_MAX); + std::printf(" * cmaskSliceTileMax = %lx\n", cmaskSliceTileMax); + break; + } + case CB_COLOR0_FMASK - CB_COLOR0_BASE: { // FMASK + auto fmaskBase = GNM_GET_FIELD(value, CB_COLOR0_FMASK, BASE_256B) + << 8; + std::printf(" * fmaskBase = %lx\n", fmaskBase); + break; + } + case CB_COLOR0_FMASK_SLICE - CB_COLOR0_BASE: { // FMASK_SLICE + auto fmaskSliceTileMax = + GNM_GET_FIELD(value, CB_COLOR0_FMASK_SLICE, TILE_MAX); + std::printf(" * fmaskSliceTileMax = %lx\n", fmaskSliceTileMax); + break; + } + case CB_COLOR0_CLEAR_WORD0 - CB_COLOR0_BASE: // CLEAR_WORD0 + break; + case CB_COLOR1_CLEAR_WORD0 - CB_COLOR0_BASE: // CLEAR_WORD1 + break; + } + } +}; + +static constexpr std::size_t colorBuffersCount = 6; + +static ColorBuffer colorBuffers[colorBuffersCount]; + +static std::uint32_t indexType; + +static std::uint32_t screenScissorX = 0; +static std::uint32_t screenScissorY = 0; +static std::uint32_t screenScissorW = 0; +static std::uint32_t screenScissorH = 0; + +enum class CbColorFormat { + /* + 00 - CB_DISABLE: Disables drawing to color + buffer. Causes DB to not send tiles/quads to CB. CB + itself ignores this field. + 01 - CB_NORMAL: Normal rendering mode. DB + should send tiles and quads for pixel exports or just + quads for compute exports. + 02 - CB_ELIMINATE_FAST_CLEAR: Fill fast + cleared color surface locations with clear color. DB + should send only tiles. + 03 - CB_RESOLVE: Read from MRT0, average all + samples, and write to MRT1, which is one-sample. DB + should send only tiles. + 04 - CB_DECOMPRESS: Decompress MRT0 to a + */ + Disable, + Normal, + EliminateFastClear, + Resolve, +}; + +enum class CbRasterOp { + Blackness = 0x00, + Nor = 0x05, // ~(src | dst) + AndInverted = 0x0a, // ~src & dst + CopyInverted = 0x0f, // ~src + NotSrcErase = 0x11, // ~src & ~dst + SrcErase = 0x44, // src & ~dst + DstInvert = 0x55, // ~dst + Xor = 0x5a, // src ^ dst + Nand = 0x5f, // ~(src & dst) + And = 0x88, // src & dst + Equiv = 0x99, // ~(src ^ dst) + Noop = 0xaa, // dst + OrInverted = 0xaf, // ~src | dst + Copy = 0xcc, // src + OrReverse = 0xdd, // src | ~dst + Or = 0xEE, // src | dst + Whiteness = 0xff, +}; + +static CbColorFormat cbColorFormat = CbColorFormat::Normal; + +static CbRasterOp cbRasterOp = CbRasterOp::Copy; + +static std::uint32_t vgtPrimitiveType = 0; +static bool stencilEnable = false; +static bool depthEnable = false; +static bool depthWriteEnable = false; +static bool depthBoundsEnable = false; +static int zFunc = 0; +static bool backFaceEnable = false; +static int stencilFunc = 0; +static int stencilFuncBackFace = 0; + +static float depthClear = 1.f; + +static bool cullFront = false; +static bool cullBack = false; +static int face = 0; // 0 - CCW, 1 - CW +static bool polyMode = false; +static int polyModeFrontPType = 0; +static int polyModeBackPType = 0; +static bool polyOffsetFrontEnable = false; +static bool polyOffsetBackEnable = false; +static bool polyOffsetParaEnable = false; +static bool vtxWindowOffsetEnable = false; +static bool provokingVtxLast = false; +static bool erspCorrDis = false; +static bool multiPrimIbEna = false; + +static bool depthClearEnable = false; +static bool stencilClearEnable = false; +static bool depthCopy = false; +static bool stencilCopy = false; +static bool resummarizeEnable = false; +static bool stencilCompressDisable = false; +static bool depthCompressDisable = false; +static bool copyCentroid = false; +static int copySample = 0; +static bool zpassIncrementDisable = false; + +static std::uint64_t zReadBase = 0; +static std::uint64_t zWriteBase = 0; + +static BlendMultiplier blendColorSrc = {}; +static BlendFunc blendColorFn = {}; +static BlendMultiplier blendColorDst = {}; +static BlendMultiplier blendAlphaSrc = {}; +static BlendFunc blendAlphaFn = {}; +static BlendMultiplier blendAlphaDst = {}; +static bool blendSeparateAlpha = false; +static bool blendEnable = false; +static std::uint32_t cbRenderTargetMask = 0; + +static void setRegister(std::uint32_t regId, std::uint32_t value) { + switch (regId) { + case SPI_SHADER_PGM_LO_PS: + pgmPsAddress &= ~((1ull << 40) - 1); + pgmPsAddress |= static_cast(value) << 8; + break; + case SPI_SHADER_PGM_HI_PS: + pgmPsAddress &= (1ull << 40) - 1; + pgmPsAddress |= static_cast(value) << 40; + break; + case SPI_SHADER_PGM_LO_VS: + pgmVsAddress &= ~((1ull << 40) - 1); + pgmVsAddress |= static_cast(value) << 8; + break; + case SPI_SHADER_PGM_HI_VS: + pgmVsAddress &= (1ull << 40) - 1; + pgmVsAddress |= static_cast(value) << 40; + break; + + case SPI_SHADER_USER_DATA_VS_0: + case SPI_SHADER_USER_DATA_VS_1: + case SPI_SHADER_USER_DATA_VS_2: + case SPI_SHADER_USER_DATA_VS_3: + case SPI_SHADER_USER_DATA_VS_4: + case SPI_SHADER_USER_DATA_VS_5: + case SPI_SHADER_USER_DATA_VS_6: + case SPI_SHADER_USER_DATA_VS_7: + case SPI_SHADER_USER_DATA_VS_8: + case SPI_SHADER_USER_DATA_VS_9: + case SPI_SHADER_USER_DATA_VS_10: + case SPI_SHADER_USER_DATA_VS_11: + case SPI_SHADER_USER_DATA_VS_12: + case SPI_SHADER_USER_DATA_VS_13: + case SPI_SHADER_USER_DATA_VS_14: + case SPI_SHADER_USER_DATA_VS_15: + userVsData[regId - SPI_SHADER_USER_DATA_VS_0] = value; + break; + + case SPI_SHADER_USER_DATA_PS_0: + case SPI_SHADER_USER_DATA_PS_1: + case SPI_SHADER_USER_DATA_PS_2: + case SPI_SHADER_USER_DATA_PS_3: + case SPI_SHADER_USER_DATA_PS_4: + case SPI_SHADER_USER_DATA_PS_5: + case SPI_SHADER_USER_DATA_PS_6: + case SPI_SHADER_USER_DATA_PS_7: + case SPI_SHADER_USER_DATA_PS_8: + case SPI_SHADER_USER_DATA_PS_9: + case SPI_SHADER_USER_DATA_PS_10: + case SPI_SHADER_USER_DATA_PS_11: + case SPI_SHADER_USER_DATA_PS_12: + case SPI_SHADER_USER_DATA_PS_13: + case SPI_SHADER_USER_DATA_PS_14: + case SPI_SHADER_USER_DATA_PS_15: + userPsData[regId - SPI_SHADER_USER_DATA_PS_0] = value; + break; + + case SPI_SHADER_PGM_RSRC2_PS: + psUserSpgrs = (value >> 1) & 0x1f; + break; + + case SPI_SHADER_PGM_RSRC2_VS: + vsUserSpgrs = (value >> 1) & 0x1f; + break; + + case CB_COLOR0_BASE ... CB_COLOR6_DCC_BASE: { + auto buffer = (regId - CB_COLOR0_BASE) / (CB_COLOR1_BASE - CB_COLOR0_BASE); + auto index = (regId - CB_COLOR0_BASE) % (CB_COLOR1_BASE - CB_COLOR0_BASE); + colorBuffers[buffer].setRegister(index, value); + break; + } + + case DB_RENDER_CONTROL: + depthClearEnable = getBit(value, 0); + stencilClearEnable = getBit(value, 1); + depthCopy = getBit(value, 2); + stencilCopy = getBit(value, 3); + resummarizeEnable = getBit(value, 4); + stencilCompressDisable = getBit(value, 5); + depthCompressDisable = getBit(value, 6); + copyCentroid = getBit(value, 7); + copySample = getBits(value, 10, 8); + zpassIncrementDisable = getBit(value, 11); + break; + + case DB_Z_READ_BASE: + zReadBase = static_cast(value) << 8; + break; + + case DB_Z_WRITE_BASE: + zWriteBase = static_cast(value) << 8; + break; + + case DB_DEPTH_CLEAR: + depthClear = std::bit_cast(value); + break; + + case DB_DEPTH_CONTROL: + stencilEnable = getBit(value, 0) != 0; + depthEnable = getBit(value, 1) != 0; + depthWriteEnable = getBit(value, 2) != 0; + depthBoundsEnable = getBit(value, 3) != 0; + zFunc = getBits(value, 6, 4); + backFaceEnable = getBit(value, 7); + stencilFunc = getBits(value, 11, 8); + stencilFuncBackFace = getBits(value, 23, 20); + + std::printf("stencilEnable=%u, depthEnable=%u, depthWriteEnable=%u, " + "depthBoundsEnable=%u, zFunc=%u, backFaceEnable=%u, " + "stencilFunc=%u, stencilFuncBackFace=%u\n", + stencilEnable, depthEnable, depthWriteEnable, depthBoundsEnable, + zFunc, backFaceEnable, stencilFunc, stencilFuncBackFace); + break; + + case CB_TARGET_MASK: { + cbRenderTargetMask = value; + break; + } + + case CB_COLOR_CONTROL: { + /* + If true, then each UNORM format COLOR_8_8_8_8 + MRT is treated as an SRGB format instead. This affects + both normal draw and resolve. This bit exists for + compatibility with older architectures that did not have + an SRGB number type. + */ + auto degammaEnable = getBits(value, 3, 0); + + /* + This field selects standard color processing or one of + several major operation modes. + + POSSIBLE VALUES: + 00 - CB_DISABLE: Disables drawing to color + buffer. Causes DB to not send tiles/quads to CB. CB + itself ignores this field. + 01 - CB_NORMAL: Normal rendering mode. DB + should send tiles and quads for pixel exports or just + quads for compute exports. + 02 - CB_ELIMINATE_FAST_CLEAR: Fill fast + cleared color surface locations with clear color. DB + should send only tiles. + 03 - CB_RESOLVE: Read from MRT0, average all + samples, and write to MRT1, which is one-sample. DB + should send only tiles. + 04 - CB_DECOMPRESS: Decompress MRT0 to a + uncompressed color format. This is required before a + multisampled surface is accessed by the CPU, or used as + a texture. This also decompresses the FMASK buffer. A + CB_ELIMINATE_FAST_CLEAR pass before this is + unnecessary. DB should send tiles and quads. + 05 - CB_FMASK_DECOMPRESS: Decompress the + FMASK buffer into a texture readable format. A + CB_ELIMINATE_FAST_CLEAR pass before this is + unnecessary. DB should send only tiles. + */ + auto mode = getBits(value, 6, 4); + + /* + This field supports the 28 boolean ops that combine + either source and dest or brush and dest, with brush + provided by the shader in place of source. The code + 0xCC (11001100) copies the source to the destination, + which disables the ROP function. ROP must be disabled + if any MRT enables blending. + + POSSIBLE VALUES: + 00 - 0x00: BLACKNESS + 05 - 0x05 + 10 - 0x0A + 15 - 0x0F + 17 - 0x11: NOTSRCERASE + 34 - 0x22 + 51 - 0x33: NOTSRCCOPY + 68 - 0x44: SRCERASE + 80 - 0x50 + 85 - 0x55: DSTINVERT + 90 - 0x5A: PATINVERT + 95 - 0x5F + 102 - 0x66: SRCINVERT + 119 - 0x77 + 136 - 0x88: SRCAND + 153 - 0x99 + 160 - 0xA0 + 165 - 0xA5 + 170 - 0xAA + 175 - 0xAF + 187 - 0xBB: MERGEPAINT + 204 - 0xCC: SRCCOPY + 221 - 0xDD + 238 - 0xEE: SRCPAINT + 240 - 0xF0: PATCOPY + 245 - 0xF5 + 250 - 0xFA + 255 - 0xFF: WHITENESS + */ + auto rop3 = getBits(value, 23, 16); + + std::printf(" * degammaEnable = %x\n", degammaEnable); + std::printf(" * mode = %x\n", mode); + std::printf(" * rop3 = %x\n", rop3); + + cbColorFormat = static_cast(mode); + cbRasterOp = static_cast(rop3); + break; + } + + case PA_CL_CLIP_CNTL: + cullFront = getBit(value, 0); + cullBack = getBit(value, 1); + face = getBit(value, 2); + polyMode = getBits(value, 4, 3); + polyModeFrontPType = getBits(value, 7, 5); + polyModeBackPType = getBits(value, 10, 8); + polyOffsetFrontEnable = getBit(value, 11); + polyOffsetBackEnable = getBit(value, 12); + polyOffsetParaEnable = getBit(value, 13); + vtxWindowOffsetEnable = getBit(value, 16); + provokingVtxLast = getBit(value, 19); + erspCorrDis = getBit(value, 20); + multiPrimIbEna = getBit(value, 21); + break; + + case PA_SC_SCREEN_SCISSOR_TL: + screenScissorX = static_cast(value); + screenScissorY = static_cast(value >> 16); + break; + + case PA_SC_SCREEN_SCISSOR_BR: + screenScissorW = static_cast(value) - screenScissorX; + screenScissorH = static_cast(value >> 16) - screenScissorY; + break; + + case VGT_PRIMITIVE_TYPE: + vgtPrimitiveType = value; + break; + + case COMPUTE_NUM_THREAD_X: + computeNumThreadX = value; + break; + + case COMPUTE_NUM_THREAD_Y: + computeNumThreadY = value; + break; + + case COMPUTE_NUM_THREAD_Z: + computeNumThreadZ = value; + break; + + case COMPUTE_PGM_LO: + pgmComputeAddress &= ~((1ull << 40) - 1); + pgmComputeAddress |= static_cast(value) << 8; + break; + + case COMPUTE_PGM_HI: + pgmComputeAddress &= (1ull << 40) - 1; + pgmComputeAddress |= static_cast(value) << 40; + break; + + case COMPUTE_PGM_RSRC1: + break; + case COMPUTE_PGM_RSRC2: + computeUserSpgrs = (value >> 1) & 0x1f; + break; + + case COMPUTE_USER_DATA_0: + case COMPUTE_USER_DATA_1: + case COMPUTE_USER_DATA_2: + case COMPUTE_USER_DATA_3: + case COMPUTE_USER_DATA_4: + case COMPUTE_USER_DATA_5: + case COMPUTE_USER_DATA_6: + case COMPUTE_USER_DATA_7: + case COMPUTE_USER_DATA_8: + case COMPUTE_USER_DATA_9: + case COMPUTE_USER_DATA_10: + case COMPUTE_USER_DATA_11: + case COMPUTE_USER_DATA_12: + case COMPUTE_USER_DATA_13: + case COMPUTE_USER_DATA_14: + case COMPUTE_USER_DATA_15: + userComputeData[regId - COMPUTE_USER_DATA_0] = value; + break; + + case CB_BLEND0_CONTROL: { + blendColorSrc = (BlendMultiplier)fetchMaskedValue( + value, CB_BLEND0_CONTROL_COLOR_SRCBLEND_MASK); + blendColorFn = (BlendFunc)fetchMaskedValue( + value, CB_BLEND0_CONTROL_COLOR_COMB_FCN_MASK); + blendColorDst = (BlendMultiplier)fetchMaskedValue( + value, CB_BLEND0_CONTROL_COLOR_DESTBLEND_MASK); + auto opacity_weight = + fetchMaskedValue(value, CB_BLEND0_CONTROL_OPACITY_WEIGHT_MASK); + blendAlphaSrc = (BlendMultiplier)fetchMaskedValue( + value, CB_BLEND0_CONTROL_ALPHA_SRCBLEND_MASK); + blendAlphaFn = (BlendFunc)fetchMaskedValue( + value, CB_BLEND0_CONTROL_ALPHA_COMB_FCN_MASK); + blendAlphaDst = (BlendMultiplier)fetchMaskedValue( + value, CB_BLEND0_CONTROL_ALPHA_DESTBLEND_MASK); + blendSeparateAlpha = + fetchMaskedValue(value, CB_BLEND0_CONTROL_SEPARATE_ALPHA_BLEND_MASK) != + 0; + blendEnable = + fetchMaskedValue(value, CB_BLEND0_CONTROL_BLEND_ENABLE_MASK) != 0; + + std::printf(" * COLOR_SRCBLEND = %x\n", blendColorSrc); + std::printf(" * COLOR_COMB_FCN = %x\n", blendColorFn); + std::printf(" * COLOR_DESTBLEND = %x\n", blendColorDst); + std::printf(" * OPACITY_WEIGHT = %x\n", opacity_weight); + std::printf(" * ALPHA_SRCBLEND = %x\n", blendAlphaSrc); + std::printf(" * ALPHA_COMB_FCN = %x\n", blendAlphaFn); + std::printf(" * ALPHA_DESTBLEND = %x\n", blendAlphaDst); + std::printf(" * SEPARATE_ALPHA_BLEND = %x\n", blendSeparateAlpha); + std::printf(" * BLEND_ENABLE = %x\n", blendEnable); + break; + } + } +} + +void ShaderModule::destroy() const { + if (descriptorPool) { + vkDestroyDescriptorPool(g_vkDevice, descriptorPool, nullptr); + } + + vkDestroyPipeline(g_vkDevice, pipeline, nullptr); + vkDestroyPipelineLayout(g_vkDevice, pipelineLayout, nullptr); + vkDestroyDescriptorSetLayout(g_vkDevice, descriptorSetLayout, nullptr); +} + +DrawContext::~DrawContext() { + for (auto shader : loadedShaderModules) { + vkDestroyShaderModule(g_vkDevice, shader, nullptr); + } +} + +VkShaderModule createShaderModule(std::span shaderCode) { + VkShaderModuleCreateInfo moduleCreateInfo{}; + moduleCreateInfo.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + moduleCreateInfo.codeSize = shaderCode.size() * sizeof(std::uint32_t); + moduleCreateInfo.pCode = shaderCode.data(); + + VkShaderModule shaderModule; + + Verify() << vkCreateShaderModule(g_vkDevice, &moduleCreateInfo, nullptr, + &shaderModule); + return shaderModule; +} + +VkPipelineShaderStageCreateInfo +createPipelineShaderStage(DrawContext &dc, + std::span shaderCode, + VkShaderStageFlagBits stage) { + VkPipelineShaderStageCreateInfo shaderStage = {}; + shaderStage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shaderStage.stage = stage; + shaderStage.module = createShaderModule(shaderCode); + shaderStage.pName = "main"; + Verify() << (shaderStage.module != VK_NULL_HANDLE); + dc.loadedShaderModules.push_back(shaderStage.module); + return shaderStage; +} + +void transitionImageLayout(VkCommandPool commandPool, VkQueue queue, + VkImage image, VkImageAspectFlags aspectFlags, + VkImageLayout oldLayout, VkImageLayout newLayout) { + VkCommandBufferAllocateInfo allocInfo{}; + allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + allocInfo.commandPool = commandPool; + allocInfo.commandBufferCount = 1; + + VkCommandBuffer commandBuffer; + vkAllocateCommandBuffers(g_vkDevice, &allocInfo, &commandBuffer); + + VkCommandBufferBeginInfo beginInfo{}; + beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + + vkBeginCommandBuffer(commandBuffer, &beginInfo); + + VkImageMemoryBarrier barrier{}; + barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barrier.oldLayout = oldLayout; + barrier.newLayout = newLayout; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = image; + barrier.subresourceRange.aspectMask = aspectFlags; + barrier.subresourceRange.baseMipLevel = 0; + barrier.subresourceRange.levelCount = 1; + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = 1; + + auto layoutToStageAccess = [](VkImageLayout layout) + -> std::pair { + switch (layout) { + case VK_IMAGE_LAYOUT_UNDEFINED: + return {VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0}; + + case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: + return {VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT}; + + case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: + return {VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT}; + + case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + return {VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_ACCESS_SHADER_READ_BIT}; + + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: + return {VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT, + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT}; + + case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: + return {VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT}; + + default: + util::unreachable("unsupported layout transition! %d", layout); + } + }; + + auto [sourceStage, sourceAccess] = layoutToStageAccess(oldLayout); + auto [destinationStage, destinationAccess] = layoutToStageAccess(newLayout); + + barrier.srcAccessMask = sourceAccess; + barrier.dstAccessMask = destinationAccess; + + vkCmdPipelineBarrier(commandBuffer, sourceStage, destinationStage, 0, 0, + nullptr, 0, nullptr, 1, &barrier); + + vkEndCommandBuffer(commandBuffer); + + VkSubmitInfo submitInfo{}; + submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &commandBuffer; + + Verify() << vkQueueSubmit(queue, 1, &submitInfo, VK_NULL_HANDLE); + Verify() << vkQueueWaitIdle(queue); + + vkFreeCommandBuffers(g_vkDevice, commandPool, 1, &commandBuffer); +} + +VkBuffer createBuffer(VkDeviceSize size, VkBufferUsageFlags usage) { + VkBufferCreateInfo bufferInfo{}; + bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + bufferInfo.size = size; + bufferInfo.usage = usage; + bufferInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + + Verify() << (size != 0); + + VkBuffer result; + Verify() << vkCreateBuffer(g_vkDevice, &bufferInfo, nullptr, &result); + return result; +} + +uint32_t findMemoryType(uint32_t typeBits, VkMemoryPropertyFlags properties) { + + typeBits &= (1 << g_physicalMemoryProperties.memoryTypeCount) - 1; + + while (typeBits != 0) { + auto typeIndex = std::countr_zero(typeBits); + + if ((g_physicalMemoryProperties.memoryTypes[typeIndex].propertyFlags & + properties) == properties) { + return typeIndex; + } + + typeBits &= ~(1 << typeIndex); + } + + util::unreachable("Failed to find memory type with properties %x", + properties); +} + +VkDeviceMemory allocateMemory(std::uint32_t memoryTypeIndex, + VkDeviceSize size) { + VkMemoryAllocateInfo allocInfo{}; + allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + allocInfo.allocationSize = size; + allocInfo.memoryTypeIndex = static_cast(memoryTypeIndex); + + VkDeviceMemory result; + Verify() << vkAllocateMemory(g_vkDevice, &allocInfo, nullptr, &result); + + return result; +} + +VkDeviceMemory allocateMemory(VkMemoryRequirements memRequirements, + VkMemoryPropertyFlags properties) { + + auto memoryType = findMemoryType(memRequirements.memoryTypeBits, properties); + return allocateMemory(memoryType, memRequirements.size); +} + +VkDeviceMemory allocateAndBindBuffer(VkBuffer buffer, + VkMemoryPropertyFlags properties) { + VkMemoryRequirements memRequirements; + vkGetBufferMemoryRequirements(g_vkDevice, buffer, &memRequirements); + + auto result = allocateMemory(memRequirements, properties); + vkBindBufferMemory(g_vkDevice, buffer, result, 0); + + return result; +} + +VkDeviceMemory allocateAndBindImage(VkImage image, + VkMemoryPropertyFlags properties) { + VkMemoryRequirements memRequirements; + vkGetImageMemoryRequirements(g_vkDevice, image, &memRequirements); + + auto result = allocateMemory(memRequirements, properties); + vkBindImageMemory(g_vkDevice, image, result, 0); + + return result; +} + +void copyBuffer(VkQueue queue, VkCommandPool commandPool, VkBuffer srcBuffer, + VkBuffer dstBuffer, VkDeviceSize size) { + VkCommandBufferAllocateInfo allocInfo{}; + allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + allocInfo.commandPool = commandPool; + allocInfo.commandBufferCount = 1; + + VkCommandBuffer commandBuffer; + Verify() << vkAllocateCommandBuffers(g_vkDevice, &allocInfo, &commandBuffer); + + VkCommandBufferBeginInfo beginInfo{}; + beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + + Verify() << vkBeginCommandBuffer(commandBuffer, &beginInfo); + + VkBufferCopy copyRegion{}; + copyRegion.size = size; + vkCmdCopyBuffer(commandBuffer, srcBuffer, dstBuffer, 1, ©Region); + + Verify() << vkEndCommandBuffer(commandBuffer); + + VkSubmitInfo submitInfo{}; + submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &commandBuffer; + + Verify() << vkQueueSubmit(queue, 1, &submitInfo, VK_NULL_HANDLE); + Verify() << vkQueueWaitIdle(queue); + + vkFreeCommandBuffers(g_vkDevice, commandPool, 1, &commandBuffer); +} + +void copyImageToBuffer(VkCommandPool commandPool, VkQueue queue, VkImage image, + VkBuffer buffer, uint32_t width, uint32_t height, + uint32_t bufferOffset, uint32_t bufferRowLength, + uint32_t bufferHeight, VkImageAspectFlags aspectFlags) { + VkCommandBufferAllocateInfo allocInfo{}; + allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + allocInfo.commandPool = commandPool; + allocInfo.commandBufferCount = 1; + + VkCommandBuffer commandBuffer; + vkAllocateCommandBuffers(g_vkDevice, &allocInfo, &commandBuffer); + + VkCommandBufferBeginInfo beginInfo{}; + beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + + vkBeginCommandBuffer(commandBuffer, &beginInfo); + + VkBufferImageCopy region{}; + region.bufferOffset = bufferOffset; + region.bufferRowLength = bufferRowLength; + region.bufferImageHeight = bufferHeight; + region.imageSubresource.aspectMask = aspectFlags; + region.imageSubresource.mipLevel = 0; + region.imageSubresource.baseArrayLayer = 0; + region.imageSubresource.layerCount = 1; + region.imageOffset = {0, 0, 0}; + region.imageExtent = {width, height, 1}; + + vkCmdCopyImageToBuffer(commandBuffer, image, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, 1, + ®ion); + + vkEndCommandBuffer(commandBuffer); + + VkSubmitInfo submitInfo{}; + submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &commandBuffer; + + Verify() << vkQueueSubmit(queue, 1, &submitInfo, VK_NULL_HANDLE); + Verify() << vkQueueWaitIdle(queue); + + vkFreeCommandBuffers(g_vkDevice, commandPool, 1, &commandBuffer); +} + +void copyBufferToImage(VkCommandPool commandPool, VkQueue queue, VkImage image, + VkBuffer buffer, uint32_t width, uint32_t height, + uint32_t bufferOffset, uint32_t bufferRowLength, + uint32_t bufferHeight, VkImageAspectFlags aspectFlags) { + VkCommandBufferAllocateInfo allocInfo{}; + allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + allocInfo.commandPool = commandPool; + allocInfo.commandBufferCount = 1; + + VkCommandBuffer commandBuffer; + vkAllocateCommandBuffers(g_vkDevice, &allocInfo, &commandBuffer); + + VkCommandBufferBeginInfo beginInfo{}; + beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + + vkBeginCommandBuffer(commandBuffer, &beginInfo); + + VkBufferImageCopy region{}; + region.bufferOffset = bufferOffset; + region.bufferRowLength = bufferRowLength; + region.bufferImageHeight = bufferHeight; + region.imageSubresource.aspectMask = aspectFlags; + region.imageSubresource.mipLevel = 0; + region.imageSubresource.baseArrayLayer = 0; + region.imageSubresource.layerCount = 1; + region.imageOffset = {0, 0, 0}; + region.imageExtent = {width, height, 1}; + + vkCmdCopyBufferToImage(commandBuffer, buffer, image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion); + + vkEndCommandBuffer(commandBuffer); + + VkSubmitInfo submitInfo{}; + submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &commandBuffer; + + Verify() << vkQueueSubmit(queue, 1, &submitInfo, VK_NULL_HANDLE); + Verify() << vkQueueWaitIdle(queue); + + vkFreeCommandBuffers(g_vkDevice, commandPool, 1, &commandBuffer); +} + +static VkRenderPass createRenderPass(VkFormat colorFormat, + VkFormat depthFormat) { + std::array attachments = {}; + // Color attachment + attachments[0].format = colorFormat; + attachments[0].samples = VK_SAMPLE_COUNT_1_BIT; + attachments[0].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + attachments[0].storeOp = VK_ATTACHMENT_STORE_OP_STORE; + attachments[0].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + attachments[0].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + attachments[0].initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + attachments[0].finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + + // Depth attachment + attachments[1].format = depthFormat; + attachments[1].samples = VK_SAMPLE_COUNT_1_BIT; + attachments[1].loadOp = depthClearEnable ? VK_ATTACHMENT_LOAD_OP_CLEAR + : VK_ATTACHMENT_LOAD_OP_LOAD; + attachments[1].storeOp = VK_ATTACHMENT_STORE_OP_STORE; + attachments[1].stencilLoadOp = stencilClearEnable + ? VK_ATTACHMENT_LOAD_OP_CLEAR + : VK_ATTACHMENT_LOAD_OP_LOAD; + attachments[1].stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; + attachments[1].initialLayout = + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + attachments[1].finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + + VkAttachmentReference colorReference = {}; + colorReference.attachment = 0; + colorReference.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + + VkAttachmentReference depthReference = {}; + depthReference.attachment = 1; + depthReference.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + + VkSubpassDescription subpassDescription = {}; + subpassDescription.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + subpassDescription.colorAttachmentCount = 1; + subpassDescription.pColorAttachments = &colorReference; + subpassDescription.pDepthStencilAttachment = &depthReference; + subpassDescription.inputAttachmentCount = 0; + subpassDescription.pInputAttachments = nullptr; + subpassDescription.preserveAttachmentCount = 0; + subpassDescription.pPreserveAttachments = nullptr; + subpassDescription.pResolveAttachments = nullptr; + + // Subpass dependencies for layout transitions + std::array dependencies; + + dependencies[0].srcSubpass = VK_SUBPASS_EXTERNAL; + dependencies[0].dstSubpass = 0; + dependencies[0].srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + dependencies[0].dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + dependencies[0].srcAccessMask = VK_ACCESS_MEMORY_READ_BIT; + dependencies[0].dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + dependencies[0].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT; + + dependencies[1].srcSubpass = 0; + dependencies[1].dstSubpass = VK_SUBPASS_EXTERNAL; + dependencies[1].srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + dependencies[1].dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + dependencies[1].srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + dependencies[1].dstAccessMask = VK_ACCESS_MEMORY_READ_BIT; + dependencies[1].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT; + + VkRenderPassCreateInfo renderPassInfo = {}; + renderPassInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; + renderPassInfo.attachmentCount = static_cast(attachments.size()); + renderPassInfo.pAttachments = attachments.data(); + renderPassInfo.subpassCount = 1; + renderPassInfo.pSubpasses = &subpassDescription; + renderPassInfo.dependencyCount = static_cast(dependencies.size()); + renderPassInfo.pDependencies = dependencies.data(); + + VkRenderPass renderPass; + Verify() << vkCreateRenderPass(g_vkDevice, &renderPassInfo, nullptr, + &renderPass); + return renderPass; +} + +static VkFramebuffer +createFramebuffer(VkRenderPass renderPass, VkExtent2D extent, + std::span attachments) { + VkFramebufferCreateInfo frameBufferCreateInfo = {}; + frameBufferCreateInfo.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; + frameBufferCreateInfo.pNext = NULL; + frameBufferCreateInfo.renderPass = renderPass; + frameBufferCreateInfo.attachmentCount = attachments.size(); + frameBufferCreateInfo.pAttachments = attachments.data(); + frameBufferCreateInfo.width = extent.width; + frameBufferCreateInfo.height = extent.height; + frameBufferCreateInfo.layers = 1; + + VkFramebuffer framebuffer; + Verify() << vkCreateFramebuffer(g_vkDevice, &frameBufferCreateInfo, nullptr, + &framebuffer); + return framebuffer; +} + +static inline VkPipeline createGraphicsPipeline( + VkExtent2D extent, VkPipelineLayout pipelineLayout, VkRenderPass renderPass, + VkPipelineCache pipelineCache, + VkPipelineVertexInputStateCreateInfo vertexInputInfo, + VkPrimitiveTopology topology, + std::span shaders) { + VkPipelineInputAssemblyStateCreateInfo inputAssembly{}; + inputAssembly.sType = + VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; + inputAssembly.topology = topology; + inputAssembly.primitiveRestartEnable = VK_FALSE; + + VkPipelineViewportStateCreateInfo viewportState{}; + viewportState.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; + viewportState.viewportCount = 1; + viewportState.scissorCount = 1; + + VkPipelineRasterizationStateCreateInfo rasterizer{}; + rasterizer.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; + rasterizer.depthClampEnable = VK_TRUE; + rasterizer.rasterizerDiscardEnable = VK_FALSE; + rasterizer.polygonMode = VK_POLYGON_MODE_FILL; + rasterizer.cullMode = + (false && cullBack ? VK_CULL_MODE_BACK_BIT : VK_CULL_MODE_NONE) | + (false && cullFront ? VK_CULL_MODE_FRONT_BIT : VK_CULL_MODE_NONE); + + rasterizer.frontFace = + face ? VK_FRONT_FACE_CLOCKWISE : VK_FRONT_FACE_COUNTER_CLOCKWISE; + rasterizer.depthBiasEnable = VK_FALSE; + // rasterizer.depthBiasConstantFactor = 0; + // rasterizer.depthBiasClamp = 0; + // rasterizer.depthBiasSlopeFactor = 0; + rasterizer.lineWidth = 1.0f; + + VkPipelineMultisampleStateCreateInfo multisampling{}; + multisampling.sType = + VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; + multisampling.sampleShadingEnable = VK_FALSE; + multisampling.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; + + VkPipelineDepthStencilStateCreateInfo depthStencil{}; + depthStencil.sType = + VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; + depthStencil.depthTestEnable = depthEnable; + depthStencil.depthWriteEnable = depthWriteEnable; + depthStencil.depthCompareOp = (VkCompareOp)zFunc; + depthStencil.depthBoundsTestEnable = depthBoundsEnable; + // depthStencil.stencilTestEnable = stencilEnable; + // depthStencil.front; + // depthStencil.back; + depthStencil.minDepthBounds = 0.f; + depthStencil.maxDepthBounds = 1.f; + + VkPipelineColorBlendAttachmentState colorBlendAttachment{}; + + colorBlendAttachment.blendEnable = blendEnable; + colorBlendAttachment.srcColorBlendFactor = + blendMultiplierToVkBlendFactor(blendColorSrc); + colorBlendAttachment.dstColorBlendFactor = + blendMultiplierToVkBlendFactor(blendColorDst); + colorBlendAttachment.colorBlendOp = blendFuncToVkBlendOp(blendColorFn); + + if (blendSeparateAlpha) { + colorBlendAttachment.srcAlphaBlendFactor = + blendMultiplierToVkBlendFactor(blendAlphaSrc); + colorBlendAttachment.dstAlphaBlendFactor = + blendMultiplierToVkBlendFactor(blendAlphaDst); + colorBlendAttachment.alphaBlendOp = blendFuncToVkBlendOp(blendAlphaFn); + } else { + colorBlendAttachment.srcAlphaBlendFactor = + colorBlendAttachment.srcColorBlendFactor; + colorBlendAttachment.dstAlphaBlendFactor = + colorBlendAttachment.dstColorBlendFactor; + colorBlendAttachment.alphaBlendOp = colorBlendAttachment.colorBlendOp; + } + + colorBlendAttachment.colorWriteMask = + VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; + + VkPipelineColorBlendStateCreateInfo colorBlending{}; + colorBlending.sType = + VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; + colorBlending.logicOpEnable = VK_FALSE; + colorBlending.logicOp = VK_LOGIC_OP_COPY; + colorBlending.attachmentCount = 1; + colorBlending.pAttachments = &colorBlendAttachment; + colorBlending.blendConstants[0] = 0.0f; + colorBlending.blendConstants[1] = 0.0f; + colorBlending.blendConstants[2] = 0.0f; + colorBlending.blendConstants[3] = 0.0f; + + VkGraphicsPipelineCreateInfo pipelineInfo{}; + pipelineInfo.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + pipelineInfo.stageCount = shaders.size(); + pipelineInfo.pStages = shaders.data(); + pipelineInfo.pVertexInputState = &vertexInputInfo; + pipelineInfo.pInputAssemblyState = &inputAssembly; + pipelineInfo.pViewportState = &viewportState; + pipelineInfo.pRasterizationState = &rasterizer; + pipelineInfo.pMultisampleState = &multisampling; + pipelineInfo.pDepthStencilState = &depthStencil; + pipelineInfo.pColorBlendState = &colorBlending; + pipelineInfo.layout = pipelineLayout; + pipelineInfo.renderPass = renderPass; + pipelineInfo.subpass = 0; + pipelineInfo.basePipelineHandle = VK_NULL_HANDLE; + + std::array dynamicStateEnables = {VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR}; + + VkPipelineDynamicStateCreateInfo pipelineDynamicStateCreateInfo{}; + pipelineDynamicStateCreateInfo.sType = + VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; + pipelineDynamicStateCreateInfo.pDynamicStates = dynamicStateEnables.data(); + pipelineDynamicStateCreateInfo.dynamicStateCount = + static_cast(dynamicStateEnables.size()); + + pipelineInfo.pDynamicState = &pipelineDynamicStateCreateInfo; + + VkPipeline result; + Verify() << vkCreateGraphicsPipelines(g_vkDevice, VK_NULL_HANDLE, 1, + &pipelineInfo, nullptr, &result); + + return result; +} + +static inline VkPipeline +createComputePipeline(VkPipelineLayout pipelineLayout, + const VkPipelineShaderStageCreateInfo &shader) { + VkComputePipelineCreateInfo pipelineInfo{}; + pipelineInfo.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; + pipelineInfo.layout = pipelineLayout; + pipelineInfo.stage = shader; + + VkPipeline result; + Verify() << vkCreateComputePipelines(g_vkDevice, VK_NULL_HANDLE, 1, + &pipelineInfo, nullptr, &result); + return result; +} + +static VkDescriptorSet createDescriptorSet(const ShaderModule *shader) { + VkDescriptorSetAllocateInfo allocateInfo{}; + allocateInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + allocateInfo.descriptorPool = shader->descriptorPool; + allocateInfo.pSetLayouts = &shader->descriptorSetLayout; + allocateInfo.descriptorSetCount = 1; + + VkDescriptorSet result; + Verify() << vkAllocateDescriptorSets(g_vkDevice, &allocateInfo, &result); + return result; +} + +inline VkDescriptorSetLayoutBinding +createDescriptorSetLayoutBinding(uint32_t binding, uint32_t descriptorCount, + VkDescriptorType descriptorType, + VkShaderStageFlags stageFlags) { + + VkDescriptorSetLayoutBinding result{}; + result.binding = binding; + result.descriptorCount = descriptorCount; + result.descriptorType = descriptorType; + result.pImmutableSamplers = nullptr; + result.stageFlags = stageFlags; + return result; +} + +inline VkVertexInputBindingDescription +createVertexInputBindingDescription(uint32_t binding, uint32_t stride, + VkVertexInputRate inputRate) { + VkVertexInputBindingDescription bindingDescription{}; + + bindingDescription.binding = binding; + bindingDescription.stride = stride; + bindingDescription.inputRate = inputRate; + + return bindingDescription; +} + +inline VkVertexInputAttributeDescription +createVertexInputAttributeDescription(uint32_t location, uint32_t binding, + VkFormat format, uint32_t offset) { + VkVertexInputAttributeDescription result{}; + result.location = location; + result.binding = binding; + result.format = format; + result.offset = offset; + return result; +} + +inline VkImage createImage2D(uint32_t width, uint32_t height, VkFormat format, + VkImageTiling tiling, VkImageUsageFlags usage, + uint32_t mipLevels = 1) { + VkImageCreateInfo imageInfo{}; + imageInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + imageInfo.imageType = VK_IMAGE_TYPE_2D; + imageInfo.extent.width = width; + imageInfo.extent.height = height; + imageInfo.extent.depth = 1; + imageInfo.mipLevels = mipLevels; + imageInfo.arrayLayers = 1; + imageInfo.format = format; + imageInfo.tiling = tiling; + imageInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + imageInfo.usage = usage; + imageInfo.samples = VK_SAMPLE_COUNT_1_BIT; + imageInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + + VkImage image; + Verify() << vkCreateImage(g_vkDevice, &imageInfo, nullptr, &image); + return image; +} + +inline VkImageSubresourceRange +imageSubresourceRange(VkImageAspectFlags aspectMask, uint32_t baseMipLevel = 0, + uint32_t levelCount = 1, uint32_t baseArrayLayer = 0, + uint32_t layerCount = 1) { + return {aspectMask, baseMipLevel, levelCount, baseArrayLayer, layerCount}; +} + +inline VkImageView createImageView2D(VkImage image, VkFormat format, + VkComponentMapping components, + VkImageSubresourceRange subresourceRange) { + VkImageViewCreateInfo viewInfo{}; + viewInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + viewInfo.image = image; + viewInfo.viewType = VK_IMAGE_VIEW_TYPE_2D; + viewInfo.format = format; + viewInfo.components = components; + viewInfo.subresourceRange = subresourceRange; + + VkImageView imageView; + Verify() << vkCreateImageView(g_vkDevice, &viewInfo, nullptr, &imageView); + + return imageView; +} + +inline void +updateDescriptorSets(std::span writeSets, + std::span copySets = {}) { + vkUpdateDescriptorSets(g_vkDevice, writeSets.size(), writeSets.data(), + copySets.size(), copySets.data()); +} + +inline VkDescriptorSetLayout createDescriptorSetLayout( + std::span bindings) { + VkDescriptorSetLayoutCreateInfo layoutInfo{}; + layoutInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + layoutInfo.bindingCount = static_cast(bindings.size()); + layoutInfo.pBindings = bindings.data(); + + VkDescriptorSetLayout result; + Verify() << vkCreateDescriptorSetLayout(g_vkDevice, &layoutInfo, nullptr, + &result); + + return result; +} + +inline VkDescriptorPoolSize createDescriptorPoolSize(VkDescriptorType type, + uint32_t descriptorCount) { + VkDescriptorPoolSize result{}; + result.type = type; + result.descriptorCount = descriptorCount; + return result; +} + +inline VkDescriptorPool +createDescriptorPool(uint32_t maxSets, + std::span poolSizes) { + VkDescriptorPoolCreateInfo poolInfo{}; + poolInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + poolInfo.poolSizeCount = static_cast(poolSizes.size()); + poolInfo.pPoolSizes = poolSizes.data(); + poolInfo.maxSets = maxSets; + + VkDescriptorPool result; + Verify() << vkCreateDescriptorPool(g_vkDevice, &poolInfo, nullptr, &result); + return result; +} + +inline VkDescriptorBufferInfo +descriptorBufferInfo(VkBuffer buffer, VkDeviceSize offset = 0, + VkDeviceSize range = VK_WHOLE_SIZE) { + return {buffer, offset, range}; +} + +inline VkDescriptorImageInfo descriptorImageInfo(VkSampler sampler, + VkImageView imageView, + VkImageLayout imageLayout) { + return {sampler, imageView, imageLayout}; +} + +inline VkWriteDescriptorSet writeDescriptorSetBuffer( + VkDescriptorSet dstSet, VkDescriptorType type, uint32_t binding, + const VkDescriptorBufferInfo *bufferInfo, std::uint32_t count = 1) { + VkWriteDescriptorSet writeDescriptorSet{}; + writeDescriptorSet.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + writeDescriptorSet.dstSet = dstSet; + writeDescriptorSet.descriptorType = type; + writeDescriptorSet.dstBinding = binding; + writeDescriptorSet.pBufferInfo = bufferInfo; + writeDescriptorSet.descriptorCount = count; + return writeDescriptorSet; +} + +inline VkWriteDescriptorSet writeDescriptorSetImage( + VkDescriptorSet dstSet, VkDescriptorType type, uint32_t binding, + const VkDescriptorImageInfo *imageInfo, std::uint32_t count) { + VkWriteDescriptorSet writeDescriptorSet{}; + writeDescriptorSet.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + writeDescriptorSet.dstSet = dstSet; + writeDescriptorSet.descriptorType = type; + writeDescriptorSet.dstBinding = binding; + writeDescriptorSet.pImageInfo = imageInfo; + writeDescriptorSet.descriptorCount = count; + return writeDescriptorSet; +} + +inline VkPipelineLayout +createPipelineLayout(VkDescriptorSetLayout descriptorSetLayout) { + VkPipelineLayoutCreateInfo pipelineLayoutInfo{}; + pipelineLayoutInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + pipelineLayoutInfo.setLayoutCount = 1; + pipelineLayoutInfo.pSetLayouts = &descriptorSetLayout; + + VkPipelineLayout result; + Verify() << vkCreatePipelineLayout(g_vkDevice, &pipelineLayoutInfo, nullptr, + &result); + return result; +} + +inline VkPipelineVertexInputStateCreateInfo createPipelineVertexInputState( + std::span vertexBindingDescriptions, + std::span + vertexAttributeDescriptions, + VkPipelineVertexInputStateCreateFlags flags = 0) { + VkPipelineVertexInputStateCreateInfo vertexInputInfo{}; + vertexInputInfo.sType = + VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; + vertexInputInfo.flags = flags; + + vertexInputInfo.vertexBindingDescriptionCount = + vertexBindingDescriptions.size(); + vertexInputInfo.pVertexBindingDescriptions = vertexBindingDescriptions.data(); + + vertexInputInfo.vertexAttributeDescriptionCount = + vertexAttributeDescriptions.size(); + vertexInputInfo.pVertexAttributeDescriptions = + vertexAttributeDescriptions.data(); + + return vertexInputInfo; +} + +inline int getBitWidthOfSurfaceFormat(SurfaceFormat format) { + switch (format) { + case kSurfaceFormatInvalid: + return 0; + case kSurfaceFormat8: + return 8; + case kSurfaceFormat16: + return 16; + case kSurfaceFormat8_8: + return 8 + 8; + case kSurfaceFormat32: + return 32; + case kSurfaceFormat16_16: + return 16 + 16; + case kSurfaceFormat10_11_11: + return 10 + 11 + 11; + case kSurfaceFormat11_11_10: + return 11 + 11 + 10; + case kSurfaceFormat10_10_10_2: + return 10 + 10 + 10 + 2; + case kSurfaceFormat2_10_10_10: + return 2 + 10 + 10 + 10; + case kSurfaceFormat8_8_8_8: + return 8 + 8 + 8 + 8; + case kSurfaceFormat32_32: + return 32 + 32; + case kSurfaceFormat16_16_16_16: + return 16 + 16 + 16 + 16; + case kSurfaceFormat32_32_32: + return 32 + 32 + 32; + case kSurfaceFormat32_32_32_32: + return 32 + 32 + 32 + 32; + case kSurfaceFormat5_6_5: + return 5 + 6 + 5; + case kSurfaceFormat1_5_5_5: + return 1 + 5 + 5 + 5; + case kSurfaceFormat5_5_5_1: + return 5 + 5 + 5 + 1; + case kSurfaceFormat4_4_4_4: + return 4 + 4 + 4 + 4; + case kSurfaceFormat8_24: + return 8 + 24; + case kSurfaceFormat24_8: + return 24 + 8; + case kSurfaceFormatX24_8_32: + return 24 + 8 + 32; + case kSurfaceFormatGB_GR: + return 2 + 2; + case kSurfaceFormatBG_RG: + return 0; + case kSurfaceFormat5_9_9_9: + return 5 + 9 + 9 + 9; + case kSurfaceFormatBc1: + return 0; + case kSurfaceFormatBc2: + return 0; + case kSurfaceFormatBc3: + return 32; + case kSurfaceFormatBc4: + return 0; + case kSurfaceFormatBc5: + return 0; + case kSurfaceFormatBc6: + return 0; + case kSurfaceFormatBc7: + return 0; + case kSurfaceFormatFmask8_S2_F1: + return 0; + case kSurfaceFormatFmask8_S4_F1: + return 0; + case kSurfaceFormatFmask8_S8_F1: + return 0; + case kSurfaceFormatFmask8_S2_F2: + return 0; + case kSurfaceFormatFmask8_S4_F2: + return 0; + case kSurfaceFormatFmask8_S4_F4: + return 0; + case kSurfaceFormatFmask16_S16_F1: + return 0; + case kSurfaceFormatFmask16_S8_F2: + return 0; + case kSurfaceFormatFmask32_S16_F2: + return 0; + case kSurfaceFormatFmask32_S8_F4: + return 0; + case kSurfaceFormatFmask32_S8_F8: + return 0; + case kSurfaceFormatFmask64_S16_F4: + return 0; + case kSurfaceFormatFmask64_S16_F8: + return 0; + case kSurfaceFormat4_4: + return 4 + 4; + case kSurfaceFormat6_5_5: + return 6 + 5 + 5; + case kSurfaceFormat1: + return 1; + case kSurfaceFormat1Reversed: + return 0; + } + + return 0; +} + +inline VkFormat surfaceFormatToVkFormat(SurfaceFormat surface, + TextureChannelType channel) { + switch (surface) { + case kSurfaceFormat32: + switch (channel) { + case kTextureChannelTypeUInt: + return VK_FORMAT_R32_UINT; + case kTextureChannelTypeSInt: + return VK_FORMAT_R32_SINT; + case kTextureChannelTypeFloat: + return VK_FORMAT_R32_SFLOAT; + default: + break; + } + break; + + case kSurfaceFormat32_32: + switch (channel) { + case kTextureChannelTypeUInt: + return VK_FORMAT_R32G32_UINT; + case kTextureChannelTypeSInt: + return VK_FORMAT_R32G32_SINT; + case kTextureChannelTypeFloat: + return VK_FORMAT_R32G32_SFLOAT; + default: + break; + } + break; + + case kSurfaceFormat16_16_16_16: + switch (channel) { + case kTextureChannelTypeUNorm: + return VK_FORMAT_R16G16B16A16_UNORM; + case kTextureChannelTypeSNorm: + return VK_FORMAT_R16G16B16A16_SNORM; + case kTextureChannelTypeUScaled: + return VK_FORMAT_R16G16B16A16_USCALED; + case kTextureChannelTypeSScaled: + return VK_FORMAT_R16G16B16A16_SSCALED; + case kTextureChannelTypeUInt: + return VK_FORMAT_R16G16B16A16_UINT; + case kTextureChannelTypeSInt: + return VK_FORMAT_R16G16B16A16_SINT; + case kTextureChannelTypeFloat: + return VK_FORMAT_R16G16B16A16_SFLOAT; + + default: + break; + } + break; + + case kSurfaceFormat32_32_32: + switch (channel) { + case kTextureChannelTypeUInt: + return VK_FORMAT_R32G32B32_UINT; + case kTextureChannelTypeSInt: + return VK_FORMAT_R32G32B32_SINT; + case kTextureChannelTypeFloat: + return VK_FORMAT_R32G32B32_SFLOAT; + default: + break; + } + break; + case kSurfaceFormat32_32_32_32: + switch (channel) { + case kTextureChannelTypeUInt: + return VK_FORMAT_R32G32B32A32_UINT; + case kTextureChannelTypeSInt: + return VK_FORMAT_R32G32B32A32_SINT; + case kTextureChannelTypeFloat: + return VK_FORMAT_R32G32B32A32_SFLOAT; + default: + break; + } + break; + + case kSurfaceFormat8_8_8_8: + switch (channel) { + case kTextureChannelTypeUNorm: + return VK_FORMAT_R8G8B8A8_UNORM; + case kTextureChannelTypeSNorm: + return VK_FORMAT_R8G8B8A8_SNORM; + case kTextureChannelTypeUScaled: + return VK_FORMAT_R8G8B8A8_USCALED; + case kTextureChannelTypeSScaled: + return VK_FORMAT_R8G8B8A8_SSCALED; + case kTextureChannelTypeUInt: + return VK_FORMAT_R8G8B8A8_UINT; + case kTextureChannelTypeSInt: + return VK_FORMAT_R8G8B8A8_SINT; + // case kTextureChannelTypeSNormNoZero: + // return VK_FORMAT_R8G8B8A8_SNORM; + case kTextureChannelTypeSrgb: + return VK_FORMAT_R8G8B8A8_SRGB; + // case kTextureChannelTypeUBNorm: + // return VK_FORMAT_R8G8B8A8_UNORM; + // case kTextureChannelTypeUBNormNoZero: + // return VK_FORMAT_R8G8B8A8_UNORM; + // case kTextureChannelTypeUBInt: + // return VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK; + // case kTextureChannelTypeUBScaled: + // return VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK; + + default: + break; + } + break; + + case kSurfaceFormatBc1: + switch (channel) { + case kTextureChannelTypeSrgb: + return VK_FORMAT_BC1_RGBA_SRGB_BLOCK; + default: + break; + } + + case kSurfaceFormatBc3: + switch (channel) { + case kTextureChannelTypeSrgb: + return VK_FORMAT_BC3_SRGB_BLOCK; + default: + break; + } + + default: + break; + } + + util::unreachable("unimplemented surface format. %x.%x\n", (int)surface, + (int)channel); +} + +static VkPrimitiveTopology getVkPrimitiveType(PrimitiveType type) { + + switch (type) { + case kPrimitiveTypePointList: + return VK_PRIMITIVE_TOPOLOGY_POINT_LIST; + case kPrimitiveTypeLineList: + return VK_PRIMITIVE_TOPOLOGY_LINE_LIST; + case kPrimitiveTypeLineStrip: + return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; + case kPrimitiveTypeTriList: + return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + case kPrimitiveTypeTriFan: + return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN; + case kPrimitiveTypeTriStrip: + return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP; + case kPrimitiveTypePatch: + return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST; + case kPrimitiveTypeLineListAdjacency: + return VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY; + case kPrimitiveTypeLineStripAdjacency: + return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY; + case kPrimitiveTypeTriListAdjacency: + return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY; + case kPrimitiveTypeTriStripAdjacency: + return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY; + case kPrimitiveTypeLineLoop: + return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; // FIXME + + case kPrimitiveTypeRectList: + case kPrimitiveTypeQuadList: + case kPrimitiveTypeQuadStrip: + case kPrimitiveTypePolygon: + return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + + default: + util::unreachable(); + } +} + +static std::pair +quadListPrimConverter(std::uint64_t index) { + static constexpr int indecies[] = {0, 1, 2, 2, 3, 0}; + return {index, index / 6 + indecies[index % 6]}; +} + +static std::pair +quadStripPrimConverter(std::uint64_t index) { + static constexpr int indecies[] = {0, 1, 3, 0, 3, 2}; + return {index, (index / 6) * 4 + indecies[index % 6]}; +} + +using ConverterFn = + std::pair(std::uint64_t index); + +static ConverterFn *getPrimConverterFn(PrimitiveType primType, + std::uint32_t *count) { + switch (primType) { + case kPrimitiveTypeQuadList: + *count = *count / 4 * 6; + return quadListPrimConverter; + + case kPrimitiveTypeQuadStrip: + *count = *count / 4 * 6; + return quadStripPrimConverter; + + default: + util::unreachable(); + } +} + +static bool isPrimRequiresConversion(PrimitiveType primType) { + switch (primType) { + case kPrimitiveTypePointList: + case kPrimitiveTypeLineList: + case kPrimitiveTypeLineStrip: + case kPrimitiveTypeTriList: + case kPrimitiveTypeTriFan: + case kPrimitiveTypeTriStrip: + case kPrimitiveTypePatch: + case kPrimitiveTypeLineListAdjacency: + case kPrimitiveTypeLineStripAdjacency: + case kPrimitiveTypeTriListAdjacency: + case kPrimitiveTypeTriStripAdjacency: + return false; + case kPrimitiveTypeLineLoop: // FIXME + util::unreachable(); + return false; + + case kPrimitiveTypeRectList: + return false; // handled by geometry shader + + case kPrimitiveTypeQuadList: + case kPrimitiveTypeQuadStrip: + case kPrimitiveTypePolygon: + return true; + + default: + util::unreachable(); + } +} + +static std::uint32_t getPrimDrawCount(PrimitiveType primType, + std::uint32_t count) { + switch (primType) { + case kPrimitiveTypePointList: + case kPrimitiveTypeLineList: + case kPrimitiveTypeLineStrip: + case kPrimitiveTypeTriList: + case kPrimitiveTypeTriFan: + case kPrimitiveTypeTriStrip: + case kPrimitiveTypePatch: + case kPrimitiveTypeLineListAdjacency: + case kPrimitiveTypeLineStripAdjacency: + case kPrimitiveTypeTriListAdjacency: + case kPrimitiveTypeTriStripAdjacency: + case kPrimitiveTypeRectList: // FIXME + return count; + + case kPrimitiveTypeLineLoop: // FIXME + util::unreachable(); + + case kPrimitiveTypeQuadList: + return (count / 4) * 6; + + case kPrimitiveTypeQuadStrip: + return (count / 2) * 6; + + case kPrimitiveTypePolygon: + util::unreachable(); + + default: + util::unreachable(); + } +} +} // namespace amdgpu::device + +static bool validateSpirv(const std::vector &bin) { + spv_target_env target_env = SPV_ENV_VULKAN_1_3; + spv_context spvContext = spvContextCreate(target_env); + spv_diagnostic diagnostic = nullptr; + spv_const_binary_t binary = {bin.data(), bin.size()}; + spv_result_t error = spvValidate(spvContext, &binary, &diagnostic); + if (error != 0) + spvDiagnosticPrint(diagnostic); + spvDiagnosticDestroy(diagnostic); + spvContextDestroy(spvContext); + return error == 0; +} + +static void printSpirv(const std::vector &bin) { + // spv_target_env target_env = SPV_ENV_VULKAN_1_3; + // spv_context spvContext = spvContextCreate(target_env); + // spv_diagnostic diagnostic = nullptr; + + // spv_result_t error = spvBinaryToText( + // spvContext, bin.data(), bin.size(), + // SPV_BINARY_TO_TEXT_OPTION_PRINT | // SPV_BINARY_TO_TEXT_OPTION_COLOR | + // SPV_BINARY_TO_TEXT_OPTION_FRIENDLY_NAMES | + // SPV_BINARY_TO_TEXT_OPTION_COMMENT | SPV_BINARY_TO_TEXT_OPTION_INDENT, + // nullptr, &diagnostic); + + // if (error != 0) { + // spvDiagnosticPrint(diagnostic); + // } + + // spvDiagnosticDestroy(diagnostic); + // spvContextDestroy(spvContext); + + // if (error != 0) { + // return; + // } + + // spirv_cross::CompilerGLSL glsl(bin); + // spirv_cross::CompilerGLSL::Options options; + // options.version = 460; + // options.es = false; + // options.vulkan_semantics = true; + // glsl.set_common_options(options); + // std::printf("%s\n", glsl.compile().c_str()); +} + +static std::optional> +optimizeSpirv(std::span spirv) { + spvtools::Optimizer optimizer(SPV_ENV_VULKAN_1_3); + optimizer.RegisterPerformancePasses(); + optimizer.RegisterPass(spvtools::CreateSimplificationPass()); + + std::vector result; + if (optimizer.Run(spirv.data(), spirv.size(), &result)) { + return result; + } + + util::unreachable(); + return {}; +} + +VkShaderStageFlagBits shaderStageToVk(amdgpu::shader::Stage stage) { + switch (stage) { + case amdgpu::shader::Stage::None: + break; + case amdgpu::shader::Stage::Fragment: + return VK_SHADER_STAGE_FRAGMENT_BIT; + case amdgpu::shader::Stage::Vertex: + return VK_SHADER_STAGE_VERTEX_BIT; + case amdgpu::shader::Stage::Geometry: + return VK_SHADER_STAGE_GEOMETRY_BIT; + case amdgpu::shader::Stage::Compute: + return VK_SHADER_STAGE_COMPUTE_BIT; + } + + return VK_SHADER_STAGE_ALL; +} + +namespace amdgpu::device { +template class Ref { + struct Handle { + T object; + std::atomic references{1}; + + template + requires(std::is_constructible_v) + Handle(ArgsT &&...args) : object(std::forward(args)...) {} + + void incRef() { + if (references.fetch_add(1, std::memory_order_relaxed) > 10000) { + util::unreachable(); + } + } + + void decRef() { + if (references.fetch_sub(1, std::memory_order_relaxed) == 1) { + delete this; + } + } + }; + + Handle *mObject = nullptr; + +public: + Ref() noexcept = default; + Ref(Ref &&other) noexcept : mObject(std::exchange(other.mObject, nullptr)) {} + Ref(const Ref &other) noexcept : mObject(other.mObject) { + if (mObject != nullptr) { + mObject->incRef(); + } + } + + ~Ref() noexcept { + if (mObject != nullptr) { + mObject->decRef(); + } + } + + Ref &operator=(const Ref &other) noexcept { + *this = Ref(other); + return *this; + } + + Ref &operator=(Ref &&other) noexcept { + Ref tmp(std::move(*this)); + mObject = std::exchange(other.mObject, nullptr); + return *this; + } + + static constexpr std::size_t getObjectSize() { return sizeof(Handle); } + static constexpr std::size_t getObjectAlign() { return alignof(Handle); } + + template + requires(std::is_constructible_v) + static Ref CreateAt(void *memory, ArgsT &&...args) { + Ref result; + result.mObject = new (memory) Handle(std::forward(args)...); + return result; + } + + template + requires(std::is_constructible_v) + static Ref Create(ArgsT &&...args) { + Ref result; + result.mObject = new Handle(std::forward(args)...); + return result; + } + + T *get() const { return mObject ? &mObject->object : nullptr; } + + T *operator->() const { return &mObject->object; } + + bool operator==(std::nullptr_t) const { return mObject == nullptr; } + bool operator!=(std::nullptr_t) const { return mObject != nullptr; } + + auto operator<=>(const Ref &other) const = default; +}; + +class DeviceMemory { + VkDeviceMemory mDeviceMemory = VK_NULL_HANDLE; + VkDeviceSize mSize = 0; + unsigned mMemoryTypeIndex = 0; + + DeviceMemory &operator=(const DeviceMemory &) = default; + +public: + DeviceMemory(DeviceMemory &) = delete; + DeviceMemory(DeviceMemory &&other) { *this = std::move(other); } + DeviceMemory() = default; + + ~DeviceMemory() { + if (mDeviceMemory != nullptr) { + vkFreeMemory(g_vkDevice, mDeviceMemory, g_vkAllocator); + } + } + + DeviceMemory &operator=(DeviceMemory &&other) { + *this = other; + other.mDeviceMemory = nullptr; + return *this; + } + + VkDeviceMemory getHandle() const { return mDeviceMemory; } + VkDeviceSize getSize() const { return mSize; } + unsigned getMemoryTypeIndex() const { return mMemoryTypeIndex; } + + static DeviceMemory AllocateFromType(std::size_t size, + unsigned memoryTypeIndex) { + VkMemoryAllocateInfo allocInfo{}; + allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + allocInfo.allocationSize = size; + allocInfo.memoryTypeIndex = memoryTypeIndex; + + DeviceMemory result; + Verify() << vkAllocateMemory(g_vkDevice, &allocInfo, g_vkAllocator, + &result.mDeviceMemory); + result.mSize = size; + result.mMemoryTypeIndex = memoryTypeIndex; + return result; + } + + static DeviceMemory Allocate(std::size_t size, unsigned memoryTypeBits, + VkMemoryPropertyFlags properties) { + return AllocateFromType( + size, findPhysicalMemoryTypeIndex(memoryTypeBits, properties)); + } + + static DeviceMemory Allocate(VkMemoryRequirements requirements, + VkMemoryPropertyFlags properties) { + return AllocateFromType( + requirements.size, + findPhysicalMemoryTypeIndex(requirements.memoryTypeBits, properties)); + } + + static DeviceMemory CreateExternalFd(int fd, std::size_t size, + unsigned memoryTypeIndex) { + VkImportMemoryFdInfoKHR importMemoryInfo{ + VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR, + nullptr, + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT, + fd, + }; + + VkMemoryAllocateInfo allocInfo{ + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = &importMemoryInfo, + .allocationSize = size, + .memoryTypeIndex = memoryTypeIndex, + }; + + DeviceMemory result; + Verify() << vkAllocateMemory(g_vkDevice, &allocInfo, g_vkAllocator, + &result.mDeviceMemory); + result.mSize = size; + result.mMemoryTypeIndex = memoryTypeIndex; + return result; + } + static DeviceMemory + CreateExternalHostMemory(void *hostPointer, std::size_t size, + VkMemoryPropertyFlags properties) { + VkMemoryHostPointerPropertiesEXT hostPointerProperties = { + VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT}; + + auto vkGetMemoryHostPointerPropertiesEXT = + (PFN_vkGetMemoryHostPointerPropertiesEXT)vkGetDeviceProcAddr( + g_vkDevice, "vkGetMemoryHostPointerPropertiesEXT"); + + Verify() << vkGetMemoryHostPointerPropertiesEXT( + g_vkDevice, VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT, + hostPointer, &hostPointerProperties); + + auto memoryTypeBits = hostPointerProperties.memoryTypeBits; + + VkImportMemoryHostPointerInfoEXT importMemoryInfo = { + VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT, + nullptr, + VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT, + hostPointer, + }; + + auto memoryTypeIndex = + findPhysicalMemoryTypeIndex(memoryTypeBits, properties); + + VkMemoryAllocateInfo allocInfo{ + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = &importMemoryInfo, + .allocationSize = size, + .memoryTypeIndex = memoryTypeIndex, + }; + + DeviceMemory result; + Verify() << vkAllocateMemory(g_vkDevice, &allocInfo, g_vkAllocator, + &result.mDeviceMemory); + result.mSize = size; + result.mMemoryTypeIndex = memoryTypeIndex; + return result; + } + + void *map(VkDeviceSize offset, VkDeviceSize size) { + void *result = 0; + Verify() << vkMapMemory(g_vkDevice, mDeviceMemory, offset, size, 0, + &result); + + return result; + } + + void unmap() { vkUnmapMemory(g_vkDevice, mDeviceMemory); } +}; + +struct DeviceMemoryRef { + VkDeviceMemory deviceMemory = VK_NULL_HANDLE; + VkDeviceSize offset = 0; + VkDeviceSize size = 0; + void *data = nullptr; +}; + +class MemoryResource { + DeviceMemory mMemory; + VkMemoryPropertyFlags mProperties = 0; + std::size_t mSize = 0; + std::size_t mAllocationOffset = 0; + char *mData = nullptr; + +public: + MemoryResource(const MemoryResource &) = delete; + + MemoryResource() = default; + MemoryResource(MemoryResource &&other) = default; + MemoryResource &operator=(MemoryResource &&other) = default; + + ~MemoryResource() { + if (mMemory.getHandle() != nullptr && mData != nullptr) { + vkUnmapMemory(g_vkDevice, mMemory.getHandle()); + } + } + + void clear() { mAllocationOffset = 0; } + + static MemoryResource CreateFromFd(int fd, std::size_t size) { + auto properties = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + MemoryResource result; + result.mMemory = DeviceMemory::CreateExternalFd( + fd, size, findPhysicalMemoryTypeIndex(~0, properties)); + result.mProperties = properties; + result.mSize = size; + + return result; + } + + static MemoryResource CreateFromHost(void *data, std::size_t size) { + auto properties = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + MemoryResource result; + result.mMemory = + DeviceMemory::CreateExternalHostMemory(data, size, properties); + result.mProperties = properties; + result.mSize = size; + + return result; + } + + static MemoryResource CreateHostVisible(std::size_t size) { + auto properties = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + MemoryResource result; + result.mMemory = DeviceMemory::Allocate(size, ~0, properties); + result.mProperties = properties; + result.mSize = size; + + void *data = nullptr; + Verify() << vkMapMemory(g_vkDevice, result.mMemory.getHandle(), 0, size, 0, + &data); + result.mData = reinterpret_cast(data); + + return result; + } + + static MemoryResource CreateDeviceLocal(std::size_t size) { + auto properties = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + + MemoryResource result; + result.mMemory = DeviceMemory::Allocate(size, ~0, properties); + result.mProperties = properties; + result.mSize = size; + return result; + } + + DeviceMemoryRef allocate(VkMemoryRequirements requirements) { + if ((requirements.memoryTypeBits & (1 << mMemory.getMemoryTypeIndex())) == + 0) { + util::unreachable(); + } + + auto offset = (mAllocationOffset + requirements.alignment - 1) & + ~(requirements.alignment - 1); + mAllocationOffset = offset + requirements.size; + if (mAllocationOffset > mSize) { + util::unreachable("out of memory resource"); + } + + return {mMemory.getHandle(), offset, requirements.size, + mData ? mData + offset : nullptr}; + } + + DeviceMemoryRef getFromOffset(std::uint64_t offset, std::size_t size) { + return {mMemory.getHandle(), offset, size, nullptr}; + } + + std::size_t getSize() const { return mSize; } + + explicit operator bool() const { return mMemory.getHandle() != nullptr; } +}; + +struct Semaphore { + VkSemaphore mSemaphore = VK_NULL_HANDLE; + + Semaphore &operator=(const Semaphore &) = default; + +public: + Semaphore(const Semaphore &) = delete; + + Semaphore() = default; + Semaphore(Semaphore &&other) { *this = std::move(other); } + + Semaphore &operator=(Semaphore &&other) { + *this = other; + other.mSemaphore = nullptr; + return *this; + } + + ~Semaphore() { + if (mSemaphore != VK_NULL_HANDLE) { + vkDestroySemaphore(g_vkDevice, mSemaphore, nullptr); + } + } + + static Semaphore Create(std::uint64_t initialValue = 0) { + VkSemaphoreTypeCreateInfo typeCreateInfo = { + VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, nullptr, + VK_SEMAPHORE_TYPE_TIMELINE, initialValue}; + + VkSemaphoreCreateInfo createInfo = {VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, + &typeCreateInfo, 0}; + + Semaphore result; + Verify() << vkCreateSemaphore(g_vkDevice, &createInfo, nullptr, &result.mSemaphore); + return result; + } + + VkResult wait(std::uint64_t value, uint64_t timeout) const { + VkSemaphoreWaitInfo waitInfo = {VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, + nullptr, + VK_SEMAPHORE_WAIT_ANY_BIT, + 1, + &mSemaphore, + &value}; + + return vkWaitSemaphores(g_vkDevice, &waitInfo, timeout); + } + + void signal(std::uint64_t value) { + VkSemaphoreSignalInfo signalInfo = { + VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO, + nullptr, + mSemaphore, + value + }; + + Verify() << vkSignalSemaphore(g_vkDevice, &signalInfo); + } + + std::uint64_t getCounterValue() const { + std::uint64_t result = 0; + Verify() << vkGetSemaphoreCounterValue(g_vkDevice, mSemaphore, &result); + return result; + } + + VkSemaphore getHandle() const { + return mSemaphore; + } + + bool operator==(std::nullptr_t) const { return mSemaphore == nullptr; } + bool operator!=(std::nullptr_t) const { return mSemaphore != nullptr; } +}; + +struct CommandBuffer { + VkCommandBuffer mCmdBuffer = VK_NULL_HANDLE; + + CommandBuffer &operator=(const CommandBuffer &) = default; + +public: + CommandBuffer(const CommandBuffer &) = delete; + + CommandBuffer() = default; + CommandBuffer(CommandBuffer &&other) { *this = std::move(other); } + + CommandBuffer &operator=(CommandBuffer &&other) { + *this = other; + other.mCmdBuffer = nullptr; + return *this; + } + + CommandBuffer(VkCommandPool commandPool, + VkCommandBufferLevel level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, + VkCommandBufferUsageFlagBits flags = {}) { + VkCommandBufferAllocateInfo allocInfo{}; + allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + allocInfo.level = level; + allocInfo.commandPool = commandPool; + allocInfo.commandBufferCount = 1; + + VkCommandBuffer commandBuffer; + vkAllocateCommandBuffers(g_vkDevice, &allocInfo, &commandBuffer); + + VkCommandBufferBeginInfo beginInfo{}; + beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + beginInfo.flags = flags; + + vkBeginCommandBuffer(commandBuffer, &beginInfo); + } + + void end() { vkEndCommandBuffer(mCmdBuffer); } + + bool operator==(std::nullptr_t) const { return mCmdBuffer == nullptr; } + bool operator!=(std::nullptr_t) const { return mCmdBuffer != nullptr; } +}; + +class Buffer { + VkBuffer mBuffer = VK_NULL_HANDLE; + DeviceMemoryRef mMemory; + + Buffer &operator=(const Buffer &) = default; + +public: + Buffer(const Buffer &) = delete; + + Buffer() = default; + Buffer(Buffer &&other) { *this = std::move(other); } + ~Buffer() { + if (mBuffer != nullptr) { + vkDestroyBuffer(g_vkDevice, mBuffer, g_vkAllocator); + } + } + + Buffer &operator=(Buffer &&other) { + *this = other; + other.mBuffer = nullptr; + return *this; + } + + Buffer(std::size_t size, VkBufferUsageFlags usage, + VkBufferCreateFlags flags = 0, + VkSharingMode sharingMode = VK_SHARING_MODE_EXCLUSIVE, + std::span queueFamilyIndices = {}) { + VkBufferCreateInfo bufferInfo{}; + bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + bufferInfo.flags = flags; + bufferInfo.size = size; + bufferInfo.usage = usage; + bufferInfo.sharingMode = sharingMode; + bufferInfo.queueFamilyIndexCount = queueFamilyIndices.size(); + bufferInfo.pQueueFamilyIndices = queueFamilyIndices.data(); + + Verify() << vkCreateBuffer(g_vkDevice, &bufferInfo, g_vkAllocator, + &mBuffer); + } + + static Buffer + CreateExternal(std::size_t size, VkBufferUsageFlags usage, + VkBufferCreateFlags flags = 0, + VkSharingMode sharingMode = VK_SHARING_MODE_EXCLUSIVE, + std::span queueFamilyIndices = {}) { + VkExternalMemoryBufferCreateInfo info{ + VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO, nullptr, + VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT}; + + VkBufferCreateInfo bufferInfo{}; + bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + bufferInfo.pNext = &info; + bufferInfo.flags = flags; + bufferInfo.size = size; + bufferInfo.usage = usage; + bufferInfo.sharingMode = sharingMode; + bufferInfo.queueFamilyIndexCount = queueFamilyIndices.size(); + bufferInfo.pQueueFamilyIndices = queueFamilyIndices.data(); + + Buffer result; + + Verify() << vkCreateBuffer(g_vkDevice, &bufferInfo, g_vkAllocator, + &result.mBuffer); + + return result; + } + + static Buffer + Allocate(MemoryResource &pool, std::size_t size, VkBufferUsageFlags usage, + VkBufferCreateFlags flags = 0, + VkSharingMode sharingMode = VK_SHARING_MODE_EXCLUSIVE, + std::span queueFamilyIndices = {}) { + Buffer result(size, usage, flags, sharingMode, queueFamilyIndices); + result.allocateAndBind(pool); + + return result; + } + + VkBuffer getHandle() const { return mBuffer; } + + VkMemoryRequirements getMemoryRequirements() const { + VkMemoryRequirements requirements{}; + vkGetBufferMemoryRequirements(g_vkDevice, mBuffer, &requirements); + return requirements; + } + + void allocateAndBind(MemoryResource &pool) { + auto memory = pool.allocate(getMemoryRequirements()); + bindMemory(memory); + } + + void bindMemory(DeviceMemoryRef memory) { + Verify() << vkBindBufferMemory(g_vkDevice, mBuffer, memory.deviceMemory, + memory.offset); + mMemory = memory; + } + + void copyTo(VkCommandBuffer cmdBuffer, VkBuffer dstBuffer, std::span regions) { + vkCmdCopyBuffer(cmdBuffer, mBuffer, dstBuffer, regions.size(), regions.data()); + + VkDependencyInfo depInfo = { + VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + nullptr + }; + vkCmdPipelineBarrier2(cmdBuffer, &depInfo); + } + + const DeviceMemoryRef &getMemory() const { return mMemory; } + bool operator==(std::nullptr_t) const { return mBuffer == nullptr; } + bool operator!=(std::nullptr_t) const { return mBuffer != nullptr; } +}; + +static void readImageToDeviceMemory(void *data, const void *sourceData, + int tileMode, SurfaceFormat format, + std::uint32_t width, std::uint32_t height, + std::uint32_t depth) { + auto pixelSize = (getBitWidthOfSurfaceFormat(format) + 7) / 8; + if (pixelSize == 0) { + pixelSize = 4; + } + auto imageSize = width * height * depth * pixelSize; + + if (tileMode == 8) { + std::memcpy(data, sourceData, imageSize); + } else if (tileMode == 0xa) { + auto src = reinterpret_cast(sourceData); + auto dst = reinterpret_cast(data); + + int tilerIndex = surfaceTiler::precalculateTiles(width, height); + + for (std::uint64_t y = 0; y < height; ++y) { + for (std::uint64_t x = 0; x < width; ++x) { + std::memcpy( + dst + x + y * width, + src + surfaceTiler::getTiledElementByteOffset(tilerIndex, x, y), + sizeof(std::uint32_t)); + } + } + } else if (sourceData != nullptr && tileMode != 0) { + std::fprintf(stderr, "Unsupported tile mode %x\n", tileMode); + std::memcpy(data, sourceData, imageSize); + } +} + +static void writeImage(void *destinationData, const void *data, int tileMode, + SurfaceFormat format, std::uint32_t width, + std::uint32_t height, std::uint32_t depth) { + auto pixelSize = (getBitWidthOfSurfaceFormat(format) + 7) / 8; + + if (pixelSize == 0) { + pixelSize = 4; + } + + auto bufferSize = width * height * depth * pixelSize; + + if (tileMode == 8) { + std::memcpy(destinationData, data, bufferSize); + } else if (tileMode == 0xa) { + auto dst = reinterpret_cast(destinationData); + auto src = reinterpret_cast(data); + + int tilerIndex = + surfaceTiler::precalculateTiles(screenScissorW, screenScissorH); + + for (std::uint64_t y = 0; y < height; ++y) { + for (std::uint64_t x = 0; x < width; ++x) { + std::memcpy( + dst + surfaceTiler::getTiledElementByteOffset(tilerIndex, x, y), + src + x + y * width, sizeof(std::uint32_t)); + } + } + } else if (destinationData != nullptr && tileMode != 0) { + std::fprintf(stderr, "Unsupported tile mode %x\n", tileMode); + std::memcpy(destinationData, data, bufferSize); + } +} + +static void writeImageBuffer(void *destinationData, const Buffer &buffer, + int tileMode, SurfaceFormat format, + std::uint32_t width, std::uint32_t height, + std::uint32_t depth) { + auto pixelSize = (getBitWidthOfSurfaceFormat(format) + 7) / 8; + + if (pixelSize == 0) { + pixelSize = 4; + } + + auto bufferSize = width * height * depth * pixelSize; + auto memory = buffer.getMemory(); + writeImage(destinationData, memory.data, tileMode, format, width, height, + depth); +} + +class Image2D { + VkImage mImage = VK_NULL_HANDLE; + VkFormat mFormat = {}; + VkImageAspectFlags mAspects = {}; + VkImageLayout mLayout = {}; + unsigned mWidth = 0; + unsigned mHeight = 0; + DeviceMemoryRef mMemory; + + Image2D &operator=(const Image2D &) = default; + +public: + Image2D(const Image2D &) = delete; + + Image2D() = default; + Image2D(Image2D &&other) { *this = std::move(other); } + + ~Image2D() { + if (mImage != nullptr) { + vkDestroyImage(g_vkDevice, mImage, g_vkAllocator); + } + } + + Image2D &operator=(Image2D &&other) { + *this = other; + other.mImage = nullptr; + return *this; + } + + Image2D(uint32_t width, uint32_t height, VkFormat format, + VkImageUsageFlags usage, + VkImageTiling tiling = VK_IMAGE_TILING_OPTIMAL, + VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT, + VkSharingMode sharingMode = VK_SHARING_MODE_EXCLUSIVE, + uint32_t mipLevels = 1, uint32_t arrayLevels = 1, + VkImageLayout initialLayout = VK_IMAGE_LAYOUT_UNDEFINED) { + VkImageCreateInfo imageInfo{}; + imageInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + imageInfo.imageType = VK_IMAGE_TYPE_2D; + imageInfo.extent.width = width; + imageInfo.extent.height = height; + imageInfo.extent.depth = 1; + imageInfo.mipLevels = mipLevels; + imageInfo.arrayLayers = arrayLevels; + imageInfo.format = format; + imageInfo.tiling = tiling; + imageInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + imageInfo.usage = usage; + imageInfo.samples = samples; + imageInfo.sharingMode = sharingMode; + + mFormat = format; + + if (usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { + mAspects |= VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + } else { + mAspects |= VK_IMAGE_ASPECT_COLOR_BIT; + } + + mLayout = initialLayout; + mWidth = width; + mHeight = height; + + Verify() << vkCreateImage(g_vkDevice, &imageInfo, nullptr, &mImage); + } + + static Image2D + Allocate(MemoryResource &pool, uint32_t width, uint32_t height, + VkFormat format, VkImageUsageFlags usage, + VkImageTiling tiling = VK_IMAGE_TILING_OPTIMAL, + VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT, + VkSharingMode sharingMode = VK_SHARING_MODE_EXCLUSIVE, + uint32_t mipLevels = 1, uint32_t arrayLevels = 1, + VkImageLayout initialLayout = VK_IMAGE_LAYOUT_UNDEFINED) { + + Image2D result(width, height, format, usage, tiling, samples, sharingMode, + mipLevels, arrayLevels, initialLayout); + + result.allocateAndBind(pool); + return result; + } + + VkImage getHandle() const { return mImage; } + + VkMemoryRequirements getMemoryRequirements() const { + VkMemoryRequirements requirements{}; + vkGetImageMemoryRequirements(g_vkDevice, mImage, &requirements); + return requirements; + } + + void allocateAndBind(MemoryResource &pool) { + auto memory = pool.allocate(getMemoryRequirements()); + bindMemory(memory); + } + + void bindMemory(DeviceMemoryRef memory) { + mMemory = memory; + Verify() << vkBindImageMemory(g_vkDevice, mImage, memory.deviceMemory, + memory.offset); + } + + void readFromBuffer(DrawContext &ctxt, const Buffer &buffer, + VkImageAspectFlags destAspect) { + transitionLayout(ctxt, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + + copyBufferToImage(ctxt.commandPool, ctxt.queue, mImage, buffer.getHandle(), + mWidth, mHeight, 0, 0, 0, destAspect); + } + + void writeToBuffer(DrawContext &ctxt, const Buffer &buffer, + VkImageAspectFlags sourceAspect) { + transitionLayout(ctxt, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + + copyImageToBuffer(ctxt.commandPool, ctxt.queue, mImage, buffer.getHandle(), + mWidth, mHeight, 0, 0, 0, sourceAspect); + } + + void read(MemoryResource &pool, DrawContext &ctxt, const void *address, + int tileMode, VkImageAspectFlags destAspect) { + auto transferBuffer = Buffer::Allocate( + pool, mWidth * mHeight * 4, + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT); + + readImageToDeviceMemory(transferBuffer.getMemory().data, address, tileMode, + SurfaceFormat::kSurfaceFormat8_8_8_8, // TODO + mWidth, mHeight, 1); + + readFromBuffer(ctxt, transferBuffer, destAspect); + } + + void write(MemoryResource &pool, DrawContext &ctxt, void *address, + int tileMode, VkImageAspectFlags sourceAspect) { + auto transferBuffer = Buffer::Allocate( + pool, mWidth * mHeight * 4, + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT); + + writeToBuffer(ctxt, transferBuffer, sourceAspect); + + writeImageBuffer(address, transferBuffer, tileMode, + SurfaceFormat::kSurfaceFormat8_8_8_8, // TODO + mWidth, mHeight, 1); + } + + void transitionLayout(DrawContext &ctxt, VkImageLayout newLayout) { + if (mLayout == newLayout) { + return; + } + + transitionImageLayout(ctxt.commandPool, ctxt.queue, mImage, mAspects, + mLayout, newLayout); + + mLayout = newLayout; + } + + DeviceMemoryRef getMemory() const { return mMemory; } +}; + +struct DirectMemory { + MemoryResource memoryResource; + Buffer buffer; +}; + +static std::map> directMemory; + +static DirectMemory &getDirectMemory(std::uint64_t address, std::size_t size, + std::uint64_t *beginAddress = nullptr) { + auto it = directMemory.lower_bound(address); + + if (it == directMemory.end() || + it->second.memoryResource.getSize() < address) { + auto zone = memoryZoneTable.queryZone(address / kPageSize); + zone.beginAddress *= kPageSize; + zone.endAddress *= kPageSize; + + auto newResource = MemoryResource::CreateFromHost( + (char *)g_rwMemory + zone.beginAddress - g_memoryBase, + zone.endAddress - zone.beginAddress); + + it = directMemory.emplace_hint(it, zone.beginAddress, + DirectMemory{std::move(newResource)}); + } + + if (beginAddress != nullptr) { + *beginAddress = it->first; + } + return it->second; +} + +struct BufferRef { + Buffer *buffer = nullptr; + VkDeviceSize offset = 0; + VkDeviceSize size = 0; +}; + +static BufferRef getDirectBuffer(std::uint64_t address, std::size_t size, + VkBufferUsageFlags usage) { + std::uint64_t beginAddress; + auto &dm = getDirectMemory(address, size, &beginAddress); + + if (dm.buffer == nullptr) { + dm.buffer = Buffer::CreateExternal(dm.memoryResource.getSize(), + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_INDEX_BUFFER_BIT); + dm.buffer.bindMemory( + dm.memoryResource.getFromOffset(0, dm.memoryResource.getSize())); + } + + return {&dm.buffer, address - beginAddress, size}; +} + +void updateDirectState() { + auto zones = std::move(memoryZoneTable.invalidatedZones); + memoryZoneTable.invalidatedZones = {}; + auto it = directMemory.begin(); + + if (it == directMemory.end()) { + return; + } + + for (auto zone : zones) { + while (it->first > zone) { + if (++it == directMemory.end()) { + return; + } + } + + if (it->first == zone) { + it = directMemory.erase(it); + + if (it == directMemory.end()) { + return; + } + } + } +} + +MemoryResource hostVisibleMemory; +MemoryResource deviceLocalMemory; + +static MemoryResource &getHostVisibleMemory() { + if (!hostVisibleMemory) { + hostVisibleMemory = MemoryResource::CreateHostVisible(1024 * 1024 * 512); + } + + return hostVisibleMemory; +} + +static MemoryResource &getDeviceLocalMemory() { + if (!deviceLocalMemory) { + deviceLocalMemory = MemoryResource::CreateDeviceLocal(1024 * 1024 * 512); + } + + return deviceLocalMemory; +} + +struct RenderState { + DrawContext &ctxt; + amdgpu::RemoteMemory memory; + + struct StoreUniformInfo { + std::uint64_t dstAddress; + std::uint64_t size; + VkDeviceMemory memory; + }; + + StoreUniformInfo storeUniforms[16]; + std::size_t storeUniformsCount = 0; + + std::vector usedMemory; + + std::vector vertexBindings; + std::vector vertexAttrs; + std::vector descriptorSetLayoutBindings; + std::vector writeDescriptorSets; + std::vector buffers; + std::vector descriptorBufferInfos; + std::vector descriptorImageInfos; + std::forward_list images; + std::forward_list buffers2; + std::vector imageViews; + std::vector samplers; + + ~RenderState() { + for (auto buffer : buffers) { + vkDestroyBuffer(g_vkDevice, buffer, nullptr); + } + + for (auto view : imageViews) { + vkDestroyImageView(g_vkDevice, view, nullptr); + } + + images.clear(); + + for (auto sampler : samplers) { + vkDestroySampler(g_vkDevice, sampler, nullptr); + } + + for (auto memory : usedMemory) { + vkFreeMemory(g_vkDevice, memory, nullptr); + } + } + + std::vector + loadShader(shader::Stage stage, std::uint64_t address, + std::uint32_t *userSgprs, std::size_t userSgprsCount, + int &bindingOffset, std::uint32_t dimX = 1, std::uint32_t dimY = 1, + std::uint32_t dimZ = 1) { + auto shader = shader::convert( + memory, stage, address, + std::span(userSgprs, userSgprsCount), + bindingOffset, dimX, dimY, dimZ); + + if (!validateSpirv(shader.spirv)) { + printSpirv(shader.spirv); + dumpShader(memory.getPointer(address)); + util::unreachable(); + } + + if (auto opt = optimizeSpirv(shader.spirv)) { + shader.spirv = std::move(*opt); + } + // printSpirv(shader.spirv); + + // if (stage == shader::Stage::Compute) { + // dumpShader(memory.getPointer(address)); + // printSpirv(shader.spirv); + // } + + bindingOffset += shader.uniforms.size(); + + auto vkStage = shaderStageToVk(stage); + + descriptorBufferInfos.reserve(64); + descriptorImageInfos.reserve(64); + + for (auto &uniform : shader.uniforms) { + VkDescriptorType descriptorType; + switch (uniform.kind) { + case shader::Shader::UniformKind::Buffer: { + descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + + auto vbuffer = reinterpret_cast(uniform.buffer); + auto size = vbuffer->getSize(); + if (size == 0) { + size = 0x10; + } + + auto storageBuffer = getDirectBuffer( + vbuffer->getAddress(), size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT); + + descriptorBufferInfos.push_back(descriptorBufferInfo( + storageBuffer.buffer->getHandle(), storageBuffer.offset, size)); + + writeDescriptorSets.push_back( + writeDescriptorSetBuffer(nullptr, descriptorType, uniform.binding, + &descriptorBufferInfos.back(), 1)); + break; + } + + case shader::Shader::UniformKind::Image: { + descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + + auto tbuffer = reinterpret_cast(uniform.buffer); + auto dataFormat = tbuffer->dfmt; + auto channelType = tbuffer->nfmt; + auto colorFormat = surfaceFormatToVkFormat(dataFormat, channelType); + std::printf("tbuffer address = %lx (%lx), width=%u, " + "height=%u,pitch=%u,type=%u,tiling_idx=%u\n", + tbuffer->getAddress(), tbuffer->baseaddr256, tbuffer->width, + tbuffer->height, tbuffer->pitch, (unsigned)tbuffer->type, + tbuffer->tiling_idx); + std::fflush(stdout); + + assert(tbuffer->width == tbuffer->pitch); + + auto &image = images.emplace_front(Image2D::Allocate( + deviceLocalMemory, tbuffer->width + 1, tbuffer->height + 1, + colorFormat, + VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT)); + + image.read(hostVisibleMemory, ctxt, + memory.getPointer(tbuffer->getAddress()), + tbuffer->tiling_idx, VK_IMAGE_ASPECT_COLOR_BIT); + + auto imageView = + createImageView2D(image.getHandle(), colorFormat, {}, + imageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT)); + imageViews.push_back(imageView); + + descriptorImageInfos.push_back( + descriptorImageInfo(VK_NULL_HANDLE, imageView, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL)); + + writeDescriptorSets.push_back( + writeDescriptorSetImage(nullptr, descriptorType, uniform.binding, + &descriptorImageInfos.back(), 1)); + + image.transitionLayout(ctxt, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + break; + } + + case shader::Shader::UniformKind::Sampler: { + VkSamplerCreateInfo samplerInfo{}; + // TODO: load S# sampler + samplerInfo.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; + samplerInfo.magFilter = VK_FILTER_LINEAR; + samplerInfo.minFilter = VK_FILTER_LINEAR; + samplerInfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; + samplerInfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; + samplerInfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; + samplerInfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; + samplerInfo.mipLodBias = 0.0f; + samplerInfo.compareOp = VK_COMPARE_OP_NEVER; + samplerInfo.minLod = 0.0f; + samplerInfo.maxLod = 0.0f; + samplerInfo.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; + samplerInfo.maxAnisotropy = 1.0; + samplerInfo.anisotropyEnable = VK_FALSE; + + VkSampler sampler; + Verify() << vkCreateSampler(g_vkDevice, &samplerInfo, nullptr, + &sampler); + samplers.push_back(sampler); + + descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; + + descriptorImageInfos.push_back(descriptorImageInfo( + sampler, VK_NULL_HANDLE, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL)); + + writeDescriptorSets.push_back( + writeDescriptorSetImage(nullptr, descriptorType, uniform.binding, + &descriptorImageInfos.back(), 1)); + break; + } + } + + descriptorSetLayoutBindings.push_back(createDescriptorSetLayoutBinding( + uniform.binding, 1, descriptorType, vkStage)); + } + + return std::move(shader.spirv); + } + + void uploadUniforms() { + for (std::size_t i = 0; i < storeUniformsCount; ++i) { + auto uniform = storeUniforms[i]; + void *data; + vkMapMemory(g_vkDevice, uniform.memory, 0, uniform.size, 0, &data); + std::memcpy(memory.getPointer(uniform.dstAddress), data, uniform.size); + vkUnmapMemory(g_vkDevice, uniform.memory); + } + + storeUniformsCount = 0; + } + + void eliminateFastClear() { + // TODO + // util::unreachable(); + } + + void resolve() { + // TODO: when texture cache will be implemented it MSAA should be done by + // GPU + auto srcBuffer = colorBuffers[0]; + auto dstBuffer = colorBuffers[1]; + + const auto src = memory.getPointer(srcBuffer.base); + auto dst = memory.getPointer(dstBuffer.base); + + if (src == nullptr || dst == nullptr) { + return; + } + + std::memcpy(dst, src, screenScissorH * screenScissorW * 4); + } + + void draw(std::uint32_t count, std::uint64_t indeciesAddress, + std::uint32_t indexCount) { + if (cbColorFormat == CbColorFormat::Disable) { + return; + } + + if (cbColorFormat == CbColorFormat::EliminateFastClear) { + eliminateFastClear(); + return; + } + + if (cbColorFormat == CbColorFormat::Resolve) { + resolve(); + return; + } + + if (pgmVsAddress == 0 || pgmPsAddress == 0) { + return; + } + + if (cbRenderTargetMask == 0 || colorBuffers[0].base == 0) { + return; + } + + updateDirectState(); + + getHostVisibleMemory().clear(); + getDeviceLocalMemory().clear(); + + depthClearEnable = true; + + auto primType = static_cast(vgtPrimitiveType); + + int bindingOffset = 0; + auto vertexShader = loadShader(shader::Stage::Vertex, pgmVsAddress, + userVsData, vsUserSpgrs, bindingOffset); + auto fragmentShader = loadShader(shader::Stage::Fragment, pgmPsAddress, + userPsData, psUserSpgrs, bindingOffset); + + auto colorFormat = VK_FORMAT_R8G8B8A8_SRGB; // TODO + auto depthFormat = VK_FORMAT_D32_SFLOAT_S8_UINT; // TODO + + std::vector colorImages; + std::vector framebufferAttachments; + + for (auto targetMask = cbRenderTargetMask; + auto &colorBuffer : colorBuffers) { + if (targetMask == 0 || colorBuffer.base == 0) { + break; + } + + if ((targetMask & 0xf) == 0) { + targetMask >>= 4; + continue; + } + + targetMask >>= 4; + + // TODO: implement MRT + if (!colorImages.empty()) { + std::printf("MRT!\n"); + break; + } + + auto colorImage = Image2D::Allocate(deviceLocalMemory, screenScissorW, + screenScissorH, colorFormat, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_TRANSFER_DST_BIT); + colorImage.read(hostVisibleMemory, ctxt, + memory.getPointer(colorBuffer.base), + colorBuffer.tileModeIndex, VK_IMAGE_ASPECT_COLOR_BIT); + + colorImage.transitionLayout(ctxt, + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + + auto colorImageView = + createImageView2D(colorImage.getHandle(), colorFormat, {}, + imageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT)); + + colorImages.push_back(std::move(colorImage)); + framebufferAttachments.push_back(colorImageView); + } + + auto depthImage = Image2D::Allocate( + deviceLocalMemory, screenScissorW, screenScissorH, depthFormat, + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + (depthClearEnable || zReadBase == 0 + ? 0 + : VK_IMAGE_USAGE_TRANSFER_DST_BIT)); + + if (!depthClearEnable && zReadBase) { + depthImage.read(hostVisibleMemory, ctxt, memory.getPointer(zReadBase), 8, + VK_IMAGE_ASPECT_DEPTH_BIT); + } + + depthImage.transitionLayout( + ctxt, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + + auto depthImageView = + createImageView2D(depthImage.getHandle(), depthFormat, {}, + imageSubresourceRange(VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT)); + + auto renderPass = createRenderPass(colorFormat, depthFormat); + + framebufferAttachments.push_back(depthImageView); + auto framebuffer = createFramebuffer( + renderPass, {screenScissorW, screenScissorH}, framebufferAttachments); + + ShaderModule shader{}; + + shader.descriptorSetLayout = + createDescriptorSetLayout(descriptorSetLayoutBindings); + + shader.descriptorPool = + createDescriptorPool(64, std::array{createDescriptorPoolSize( + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 64)}); + + auto descriptorSet = createDescriptorSet(&shader); + + for (auto &writeSet : writeDescriptorSets) { + writeSet.dstSet = descriptorSet; + } + + updateDescriptorSets(writeDescriptorSets); + + shader.pipelineLayout = createPipelineLayout(shader.descriptorSetLayout); + + std::vector shaders; + + shaders.push_back(createPipelineShaderStage(ctxt, vertexShader, + VK_SHADER_STAGE_VERTEX_BIT)); + + if (primType == kPrimitiveTypeRectList) { + shaders.push_back(createPipelineShaderStage( + ctxt, spirv_rect_list_geom, VK_SHADER_STAGE_GEOMETRY_BIT)); + } + + shaders.push_back(createPipelineShaderStage(ctxt, fragmentShader, + VK_SHADER_STAGE_FRAGMENT_BIT)); + + shader.pipeline = createGraphicsPipeline( + {screenScissorW, screenScissorH}, shader.pipelineLayout, renderPass, + ctxt.pipelineCache, + createPipelineVertexInputState(vertexBindings, vertexAttrs), + getVkPrimitiveType(primType), shaders); + + VkCommandBufferAllocateInfo allocInfo{}; + allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + allocInfo.commandPool = ctxt.commandPool; + allocInfo.commandBufferCount = 1; + + VkCommandBuffer commandBuffer; + Verify() << vkAllocateCommandBuffers(g_vkDevice, &allocInfo, + &commandBuffer); + + VkCommandBufferBeginInfo beginInfo{}; + beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + + Verify() << vkBeginCommandBuffer(commandBuffer, &beginInfo); + + VkClearValue clearValues[2]; + clearValues[0].color = {{1.f, 1.f, 1.f, 1.0f}}; + clearValues[1].depthStencil = {depthClear, 0}; + + VkRenderPassBeginInfo renderPassBeginInfo{}; + renderPassBeginInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; + renderPassBeginInfo.renderPass = renderPass; + renderPassBeginInfo.framebuffer = framebuffer; + renderPassBeginInfo.renderArea.extent = {screenScissorW, screenScissorH}; + renderPassBeginInfo.clearValueCount = 2; + renderPassBeginInfo.pClearValues = clearValues; + + vkCmdBeginRenderPass(commandBuffer, &renderPassBeginInfo, + VK_SUBPASS_CONTENTS_INLINE); + + vkCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, + shader.pipeline); + + VkViewport viewport{}; + viewport.x = screenScissorX; + viewport.y = (float)screenScissorH - screenScissorY; + viewport.width = screenScissorW; + viewport.height = -(float)screenScissorH; + viewport.minDepth = 0.0f; + viewport.maxDepth = 1.0f; + vkCmdSetViewport(commandBuffer, 0, 1, &viewport); + + VkRect2D scissor{}; + scissor.extent.width = screenScissorW; + scissor.extent.height = screenScissorH; + scissor.offset.x = screenScissorX; + scissor.offset.y = screenScissorY; + vkCmdSetScissor(commandBuffer, 0, 1, &scissor); + + vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, + shader.pipelineLayout, 0, 1, &descriptorSet, 0, + nullptr); + + Buffer indexBufferStorage; + BufferRef indexBuffer; + auto needConversion = isPrimRequiresConversion(primType); + VkIndexType vkIndexType = + (indexType & 0x1f) == 0 ? VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32; + + if (needConversion) { + auto indecies = memory.getPointer(indeciesAddress); + if (indecies == nullptr) { + indexCount = count; + } + + unsigned origIndexSize = vkIndexType == VK_INDEX_TYPE_UINT16 ? 16 : 32; + auto converterFn = getPrimConverterFn(primType, &indexCount); + + if (indecies == nullptr) { + if (indexCount < 0x10000) { + vkIndexType = VK_INDEX_TYPE_UINT16; + } else if (indecies) { + vkIndexType = VK_INDEX_TYPE_UINT32; + } + } + + unsigned indexSize = vkIndexType == VK_INDEX_TYPE_UINT16 ? 16 : 32; + auto indexBufferSize = indexSize * indexCount; + + indexBufferStorage = Buffer::Allocate( + hostVisibleMemory, indexBufferSize, + VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT); + + void *data = indexBufferStorage.getMemory().data; + + if (indecies == nullptr) { + if (indexSize == 16) { + for (std::uint32_t i = 0; i < indexCount; ++i) { + auto [dstIndex, srcIndex] = converterFn(i); + ((std::uint16_t *)data)[dstIndex] = srcIndex; + } + } else { + for (std::uint32_t i = 0; i < indexCount; ++i) { + auto [dstIndex, srcIndex] = converterFn(i); + ((std::uint32_t *)data)[dstIndex] = srcIndex; + } + } + } else { + if (indexSize == 16) { + for (std::uint32_t i = 0; i < indexCount; ++i) { + auto [dstIndex, srcIndex] = converterFn(i); + std::uint32_t origIndex = + origIndexSize == 16 ? ((std::uint16_t *)indecies)[srcIndex] + : ((std::uint32_t *)indecies)[srcIndex]; + ((std::uint16_t *)data)[dstIndex] = origIndex; + } + + } else { + for (std::uint32_t i = 0; i < indexCount; ++i) { + auto [dstIndex, srcIndex] = converterFn(i); + std::uint32_t origIndex = + origIndexSize == 16 ? ((std::uint16_t *)indecies)[srcIndex] + : ((std::uint32_t *)indecies)[srcIndex]; + ((std::uint32_t *)data)[dstIndex] = origIndex; + } + } + } + + indexBuffer = {&indexBufferStorage, 0, indexBufferSize}; + } else if (indeciesAddress != 0) { + unsigned indexSize = vkIndexType == VK_INDEX_TYPE_UINT16 ? 16 : 32; + auto indexBufferSize = indexSize * indexCount; + indexBuffer = getDirectBuffer(indeciesAddress, indexBufferSize, + VK_BUFFER_USAGE_INDEX_BUFFER_BIT); + } + + if (indexBuffer.buffer == nullptr) { + vkCmdDraw(commandBuffer, count, 1, 0, 0); + } else { + vkCmdBindIndexBuffer(commandBuffer, indexBufferStorage.getHandle(), + indexBuffer.offset, vkIndexType); + vkCmdDrawIndexed(commandBuffer, indexCount, 1, 0, 0, 0); + } + + vkCmdEndRenderPass(commandBuffer); + vkEndCommandBuffer(commandBuffer); + + VkSubmitInfo submitInfo{}; + submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &commandBuffer; + + Verify() << vkQueueSubmit(ctxt.queue, 1, &submitInfo, nullptr); + Verify() << vkQueueWaitIdle(ctxt.queue); + + for (std::size_t i = 0, end = colorImages.size(); i < end; ++i) { + auto &colorImage = colorImages[i]; + auto &colorBuffer = colorBuffers[i]; + + colorImage.write(hostVisibleMemory, ctxt, + memory.getPointer(colorBuffer.base), + colorBuffer.tileModeIndex, VK_IMAGE_ASPECT_COLOR_BIT); + } + + // TODO: implement mrt support + + if (depthWriteEnable && zWriteBase != 0) { + depthImage.write(hostVisibleMemory, ctxt, memory.getPointer(zWriteBase), + 8, VK_IMAGE_ASPECT_DEPTH_BIT); + } + + uploadUniforms(); + + shader.destroy(); + + vkDestroyFramebuffer(g_vkDevice, framebuffer, nullptr); + vkDestroyRenderPass(g_vkDevice, renderPass, nullptr); + + for (auto attachment : framebufferAttachments) { + vkDestroyImageView(g_vkDevice, attachment, nullptr); + } + } + + void dispatch(std::size_t dimX, std::size_t dimY, std::size_t dimZ) { + getHostVisibleMemory().clear(); + getDeviceLocalMemory().clear(); + updateDirectState(); + + int bindingOffset = 0; + + auto computeShader = + loadShader(shader::Stage::Compute, pgmComputeAddress, userComputeData, + computeUserSpgrs, bindingOffset, computeNumThreadX, + computeNumThreadY, computeNumThreadZ); + ShaderModule shader{}; + + shader.descriptorSetLayout = + createDescriptorSetLayout(descriptorSetLayoutBindings); + + shader.descriptorPool = + createDescriptorPool(64, std::array{createDescriptorPoolSize( + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 64)}); + + auto descriptorSet = createDescriptorSet(&shader); + + for (auto &writeSet : writeDescriptorSets) { + writeSet.dstSet = descriptorSet; + } + + updateDescriptorSets(writeDescriptorSets); + + shader.pipelineLayout = createPipelineLayout(shader.descriptorSetLayout); + + shader.pipeline = createComputePipeline( + shader.pipelineLayout, + createPipelineShaderStage(ctxt, computeShader, + VK_SHADER_STAGE_COMPUTE_BIT)); + + VkCommandBufferAllocateInfo allocInfo{}; + allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + allocInfo.commandPool = ctxt.commandPool; + allocInfo.commandBufferCount = 1; + + VkCommandBuffer commandBuffer; + vkAllocateCommandBuffers(g_vkDevice, &allocInfo, &commandBuffer); + + VkCommandBufferBeginInfo beginInfo{}; + beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + + vkBeginCommandBuffer(commandBuffer, &beginInfo); + + vkCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, + shader.pipeline); + vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, + shader.pipelineLayout, 0, 1, &descriptorSet, 0, + nullptr); + vkCmdDispatch(commandBuffer, dimX, dimY, dimZ); + vkEndCommandBuffer(commandBuffer); + + VkSubmitInfo submitInfo{}; + submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &commandBuffer; + + Verify() << vkQueueSubmit(ctxt.queue, 1, &submitInfo, nullptr); + Verify() << vkQueueWaitIdle(ctxt.queue); + + uploadUniforms(); + + shader.destroy(); + } +}; +} // namespace amdgpu::device + +void amdgpu::device::draw(RemoteMemory memory, DrawContext &ctxt, + std::uint32_t count, std::uint64_t indeciesAddress, + std::uint32_t indexCount) { + RenderState{ctxt, memory}.draw(count, indeciesAddress, indexCount); +} + +void amdgpu::device::dispatch(RemoteMemory memory, DrawContext &ctxt, + std::size_t dimX, std::size_t dimY, + std::size_t dimZ) { + + RenderState{ctxt, memory}.dispatch(dimX, dimY, dimZ); +} + +enum class EventWriteSource : std::uint8_t { + Immediate32 = 0x1, + Immediate64 = 0x2, + GlobalClockCounter = 0x3, + GpuCoreClockCounter = 0x4, +}; + +struct EopData { + std::uint32_t eventType; + std::uint32_t eventIndex; + std::uint64_t address; + std::uint64_t value; + std::uint8_t dstSel; + std::uint8_t intSel; + EventWriteSource eventSource; +}; + +static std::uint64_t globalClock() { + // TODO + return 0x0; +} + +static std::uint64_t gpuCoreClock() { + // TODO + return 0x0; +} + +static void writeEop(amdgpu::RemoteMemory memory, EopData data) { + switch (data.eventSource) { + case EventWriteSource::Immediate32: { + *memory.getPointer(data.address) = data.value; + break; + } + case EventWriteSource::Immediate64: { + *memory.getPointer(data.address) = data.value; + break; + } + case EventWriteSource::GlobalClockCounter: { + *memory.getPointer(data.address) = globalClock(); + break; + } + case EventWriteSource::GpuCoreClockCounter: { + *memory.getPointer(data.address) = gpuCoreClock(); + break; + } + } +} + +static void drawIndexAuto(amdgpu::RemoteMemory memory, + amdgpu::device::DrawContext &ctxt, + std::uint32_t count) { + draw(memory, ctxt, count, 0, 0); +} + +static void drawIndex2(amdgpu::RemoteMemory memory, + amdgpu::device::DrawContext &ctxt, + std::uint32_t maxSize, std::uint64_t address, + std::uint32_t count) { + draw(memory, ctxt, count, address, maxSize); +} + +void amdgpu::device::handleCommandBuffer(RemoteMemory memory, + DrawContext &ctxt, + std::uint32_t *cmds, + std::uint32_t count) { + bool log = true; + for (std::uint32_t cmdOffset = 0; cmdOffset < count; ++cmdOffset) { + auto cmd = cmds[cmdOffset]; + auto type = getBits(cmd, 31, 30); + + if (type == 0) { + std::printf("!packet type 0!\n"); + auto baseIndex = getBits(cmd, 15, 0); + auto count = getBits(cmd, 29, 16); + std::printf("-- %04x: %08x: baseIndex=%x, count=%d\n", cmdOffset, cmd, + baseIndex, count); + cmdOffset += count; + } else if (type == 1) { + std::printf("Unexpected packet type 1!\n"); + } else if (type == 2) { + std::printf("!packet type 2!\n"); + continue; + } else if (type == 3) { + auto predicate = getBit(cmd, 0); + auto shaderType = getBit(cmd, 1); + auto op = getBits(cmd, 15, 8); + auto len = getBits(cmd, 29, 16) + 1; + + if (log) { + std::printf("-- %04x: %08x: %s len=%d, shaderType = %cX\n", cmdOffset, + cmd, opcodeToString(op).c_str(), len, + 'G' ^ (shaderType << 1)); + + for (std::uint32_t offset = 0; offset < len; ++offset) { + std::printf(" %04x: %08x\n", cmdOffset + 1 + offset, + cmds[cmdOffset + 1 + offset]); + } + } + + switch (op) { + case kOpcodeLOAD_CONST_RAM: { + std::uint64_t addressLo = cmds[cmdOffset + 1]; + std::uint64_t addressHi = cmds[cmdOffset + 2]; + std::uint32_t numDw = getBits(cmds[cmdOffset + 3], 14, 0); + std::uint32_t offset = getBits(cmds[cmdOffset + 4], 15, 0); + if (log) { + std::printf(" ` address=%lx, numDw = %x, offset=%x\n", + addressLo | (addressHi << 32), numDw, offset); + } + break; + } + + case kOpcodeSET_UCONFIG_REG: { + std::uint32_t baseRegOffset = 0xc000 + cmds[cmdOffset + 1]; + + for (std::uint32_t regOffset = 0; regOffset < len - 1; ++regOffset) { + if (log) { + std::printf(" %04x: %04x: %s = 0x%08x\n", + cmdOffset + 2 + regOffset, + (baseRegOffset + regOffset) << 2, + registerToString(baseRegOffset + regOffset).c_str(), + cmds[cmdOffset + 2 + regOffset]); + } + + setRegister(baseRegOffset + regOffset, + cmds[cmdOffset + 2 + regOffset]); + } + break; + } + + case kOpcodeSET_CONTEXT_REG: { + std::uint32_t baseRegOffset = 0xa000 + cmds[cmdOffset + 1]; + + for (std::uint32_t regOffset = 0; regOffset < len - 1; ++regOffset) { + if (log) { + std::printf(" %04x: %04x: %s = 0x%08x\n", + cmdOffset + 2 + regOffset, + (baseRegOffset + regOffset) << 2, + registerToString(baseRegOffset + regOffset).c_str(), + cmds[cmdOffset + 2 + regOffset]); + } + setRegister(baseRegOffset + regOffset, + cmds[cmdOffset + 2 + regOffset]); + } + break; + } + + case kOpcodeSET_SH_REG: { + std::uint32_t baseRegOffset = 0x2c00 + cmds[cmdOffset + 1]; + + for (std::uint32_t regOffset = 0; regOffset < len - 1; ++regOffset) { + if (log) { + std::printf(" %04x: %04x: %s = 0x%08x\n", + cmdOffset + 2 + regOffset, + (baseRegOffset + regOffset) << 2, + registerToString(baseRegOffset + regOffset).c_str(), + cmds[cmdOffset + 2 + regOffset]); + } + + setRegister(baseRegOffset + regOffset, + cmds[cmdOffset + 2 + regOffset]); + } + break; + } + + case kOpcodeWRITE_DATA: { + auto control = cmds[cmdOffset + 1]; + auto destAddrLo = cmds[cmdOffset + 2]; + auto destAddrHi = cmds[cmdOffset + 3]; + auto data = cmds + cmdOffset + 4; + auto size = len - 3; + + // 0 - Micro Engine - ME + // 1 - Prefetch parser - PFP + // 2 - Constant engine - CE + // 3 - Dispatch engine - DE + auto engineSel = getBits(control, 31, 30); + + // wait for confirmation that write complete + auto wrConfirm = getBit(control, 20); + + // do not increment address + auto wrOneAddr = getBit(control, 16); + + // 0 - mem-mapped register + // 1 - memory sync + // 2 - tc/l2 + // 3 - gds + // 4 - reserved + // 5 - memory async + auto dstSel = getBits(control, 11, 8); + + auto memMappedRegisterAddress = getBits(destAddrLo, 15, 0); + auto memory32bit = getBits(destAddrLo, 31, 2); + auto memory64bit = getBits(destAddrLo, 31, 3); + auto gdsOffset = getBits(destAddrLo, 15, 0); + + if (log) { + std::printf(" %04x: control=%x [engineSel=%d, " + "wrConfirm=%d,wrOneAddr=%d, dstSel=%d]\n", + cmdOffset + 1, control, engineSel, wrConfirm, wrOneAddr, + dstSel); + + std::printf(" %04x: destAddrLo=%x " + "[memory32bit=%x,memory64bit=%x,gdsOffset=%x]\n", + cmdOffset + 2, destAddrLo, memory32bit, memory64bit, + gdsOffset); + + std::printf(" %04x: destAddrHi=%x\n", cmdOffset + 3, destAddrHi); + + for (std::uint32_t offset = 4; offset < len; ++offset) { + std::printf(" %04x: %08x\n", cmdOffset + offset, + cmds[cmdOffset + offset]); + } + } + auto address = + destAddrLo | (static_cast(destAddrHi) << 32); + auto dest = memory.getPointer(address); + if (log) { + std::printf(" address=%lx\n", address); + } + for (unsigned i = 0; i < size; ++i) { + dest[i] = data[i]; + } + + break; + } + + case kOpcodeINDEX_TYPE: { + indexType = cmds[cmdOffset + 1]; + break; + } + + case kOpcodeDRAW_INDEX_AUTO: { + drawIndexAuto(memory, ctxt, cmds[cmdOffset + 1]); + break; + } + + case kOpcodeDRAW_INDEX_2: { + auto maxSize = cmds[cmdOffset + 1]; + auto address = cmds[cmdOffset + 2] | + (static_cast(cmds[cmdOffset + 3]) << 32); + auto count = cmds[cmdOffset + 4]; + drawIndex2(memory, ctxt, maxSize, address, count); + break; + } + + case kOpcodeDISPATCH_DIRECT: { + auto dimX = cmds[cmdOffset + 1]; + auto dimY = cmds[cmdOffset + 2]; + auto dimZ = cmds[cmdOffset + 3]; + if (log) { + std::printf(" %04x: DIM X=%u\n", cmdOffset + 1, dimX); + std::printf(" %04x: DIM Y=%u\n", cmdOffset + 2, dimY); + std::printf(" %04x: DIM Z=%u\n", cmdOffset + 3, dimZ); + } + dispatch(memory, ctxt, dimX, dimY, dimZ); + } + + case kOpcodeEVENT_WRITE_EOP: { + EopData eopData{}; + eopData.eventType = getBits(cmds[cmdOffset + 1], 6, 0); + eopData.eventIndex = getBits(cmds[cmdOffset + 1], 12, 8); + eopData.address = + cmds[cmdOffset + 2] | + (static_cast(getBits(cmds[cmdOffset + 3], 16, 0)) + << 32); + eopData.value = cmds[cmdOffset + 4] | + (static_cast(cmds[cmdOffset + 5]) << 32); + eopData.dstSel = 0; + eopData.intSel = getBits(cmds[cmdOffset + 3], 26, 24); + eopData.eventSource = + static_cast(getBits(cmds[cmdOffset + 3], 32, 29)); + writeEop(memory, eopData); + break; + } + + case kOpcodeEVENT_WRITE_EOS: { + std::uint32_t eventType = getBits(cmds[cmdOffset + 1], 6, 0); + std::uint32_t eventIndex = getBits(cmds[cmdOffset + 1], 12, 8); + std::uint64_t address = + cmds[cmdOffset + 2] | + (static_cast(getBits(cmds[cmdOffset + 3], 16, 0)) + << 32); + std::uint32_t command = getBits(cmds[cmdOffset + 3], 32, 16); + + if (log) { + std::printf("address = %#lx\n", address); + std::printf("command = %#x\n", command); + } + if (command == 0x4000) { // store 32bit data + *memory.getPointer(address) = cmds[cmdOffset + 4]; + } else { + util::unreachable(); + } + + break; + } + + case kOpcodeWAIT_REG_MEM: { + auto function = cmds[cmdOffset + 1] & 7; + auto pollAddressLo = cmds[cmdOffset + 2]; + auto pollAddressHi = cmds[cmdOffset + 3]; + auto reference = cmds[cmdOffset + 4]; + auto mask = cmds[cmdOffset + 5]; + auto pollInterval = cmds[cmdOffset + 5]; + + auto pollAddress = + pollAddressLo | (static_cast(pollAddressHi) << 32); + auto pointer = memory.getPointer(pollAddress); + + reference &= mask; + + auto compare = [&](std::uint32_t value, std::uint32_t reference, + int function) { + switch (function) { + case 0: + return true; + case 1: + return value < reference; + case 2: + return value <= reference; + case 3: + return value == reference; + case 4: + return value != reference; + case 5: + return value >= reference; + case 6: + return value > reference; + } + + util::unreachable(); + }; + + if (log) { + std::printf(" polling address %lx, reference = %x, function = %u\n", + pollAddress, reference, function); + std::fflush(stdout); + } + while (true) { + auto value = *pointer & mask; + + if (compare(value, reference, function)) { + break; + } + } + break; + } + + case kOpcodeNOP: + if (log) { + for (std::uint32_t offset = 0; offset < len; ++offset) { + std::printf(" %04x: %08x\n", cmdOffset + 1 + offset, + cmds[cmdOffset + 1 + offset]); + } + } + break; + default: + for (std::uint32_t offset = 0; offset < len; ++offset) { + std::printf(" %04x: %08x\n", cmdOffset + 1 + offset, + cmds[cmdOffset + 1 + offset]); + } + break; + } + + cmdOffset += len; + } + } +} + +void amdgpu::device::AmdgpuDevice::handleSetUpSharedMemory( + std::uint64_t address, std::uint64_t size, std::uint64_t internalSize, + const char *name) { + std::printf("setup shared memory %s: %lx-%lx (internal size is %lx)\n", name, + address, address + size, internalSize); + + memoryFd = ::shm_open(name, O_RDWR, S_IRUSR | S_IWUSR); + + internalMemory = (char *)::mmap(nullptr, internalSize, PROT_READ | PROT_WRITE, + MAP_SHARED, memoryFd, 0); + + memory = RemoteMemory{(char *)::mmap(nullptr, size, PROT_NONE, MAP_SHARED, + memoryFd, internalSize)}; + + memorySize = size; +} +void amdgpu::device::AmdgpuDevice::handleSetFlipStatus( + std::uint64_t shmOffset) { + std::printf("flip status at %lx\n", shmOffset); + //flipStatus = new (internalMemory + shmOffset) amdgpu::bridge::FlipStatus; +} +void amdgpu::device::AmdgpuDevice::handleProtectMemory( + std::uint64_t address, std::uint64_t size, std::uint32_t prot) { + ::mprotect(memory.getPointer(address), size, prot >> 4); +} +void amdgpu::device::AmdgpuDevice::handleCommandBuffer( + std::uint64_t address, std::uint64_t size) { + auto count = size / sizeof(std::uint32_t); + + std::printf("address = %lx, count = %lx\n", address, count); + + amdgpu::device::handleCommandBuffer( + memory, mDc, memory.getPointer(address), count); +} + +void amdgpu::device::AmdgpuDevice::handleFlip(std::uint32_t bufferIndex, + std::uint64_t arg) { + std::printf("requested flip %d\n", bufferIndex); + + auto &buffer = renderBuffers[bufferIndex]; + + /* + if (buffer.pitch == 0 || buffer.height == 0 || buffer.memory == nullptr) { + return; + } +*/ + + flipWasRequested = true; + + VkImageLayout oldImageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + + if (buffer.vkDataPoiner == nullptr) { + auto vkImage = createImage2D( + buffer.width, buffer.height, VK_FORMAT_R8G8B8A8_SRGB, + VK_IMAGE_TILING_LINEAR, + VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT); + + VkMemoryRequirements memRequirements; + vkGetImageMemoryRequirements(g_vkDevice, vkImage, &memRequirements); + + auto imageMemory = + allocateAndBindImage(vkImage, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + + buffer.vkImage = vkImage; + buffer.vkImageMemory = imageMemory; + + Verify() << vkMapMemory(g_vkDevice, imageMemory, 0, memRequirements.size, 0, + &buffer.vkDataPoiner); + + buffer.vkImageView = + createImageView2D(vkImage, VK_FORMAT_R8G8B8A8_SRGB, {}, + imageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT)); + + transitionImageLayout(mDc.commandPool, mDc.queue, buffer.vkImage, + VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_LAYOUT_UNDEFINED, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + + oldImageLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + + /* + buffer.shader = false && buffer.tilingMode != 0 + ? &mDrawTiledTextureShader + : findShader(overlay::ShaderId::DrawTexture); + buffer.vkDescriptorSet = createDescriptorSet(mDc.device, buffer.shader); + + updateDescriptorSets( + mDc.device, std::array{writeDescriptorSetImage( + buffer.vkDescriptorSet, + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 0, + std::array{descriptorImageInfo( + mTextureSampler, buffer.vkImageView, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL)})}); + */ + } + + std::uint32_t *internalBuffer = + reinterpret_cast(buffer.vkDataPoiner); + + if (buffer.tilingMode != 0) { + auto src = reinterpret_cast(buffer.memory); + + int tilerIndex = amdgpu::device::surfaceTiler::precalculateTiles( + buffer.width, buffer.height); + + for (std::size_t y = 0; y < buffer.height; ++y) { + for (std::size_t x = 0; x < buffer.width; ++x) { + std::memcpy( + internalBuffer + y * buffer.pitch + x, + src + amdgpu::device::surfaceTiler::getTiledElementByteOffset( + tilerIndex, x, y), + sizeof(std::uint32_t)); + } + } + } else { + std::memcpy(internalBuffer, buffer.memory, + buffer.pitch * buffer.height * sizeof(std::uint32_t)); + } + + /* + copyBufferToImage(mGraphicsCommandPool, mGraphicsQueue, buffer.vkBuffer, + buffer.vkImage, buffer.width, buffer.height, 0, + buffer.pitch, buffer.height); +*/ + if (oldImageLayout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { + transitionImageLayout(mDc.commandPool, mDc.queue, buffer.vkImage, + VK_IMAGE_ASPECT_COLOR_BIT, oldImageLayout, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + } + + transitionImageLayout(mDc.commandPool, mDc.queue, buffer.vkImage, + VK_IMAGE_ASPECT_COLOR_BIT, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + + // std::fprintf(stderr, "Buffer %d %dx%d (%d)\n", bufferIndex, buffer.width, + // buffer.height, buffer.pitch); + + currentBuffer = bufferIndex; + flipArg = arg; + ++flipCount; +} + +void amdgpu::device::AmdgpuDevice::handleDoFlip() {} + +void amdgpu::device::AmdgpuDevice::handleSetBuffer( + std::uint32_t bufferIndex, std::uint64_t address, std::uint32_t width, + std::uint32_t height, std::uint32_t pitch, std::uint32_t pixelFormat, + std::uint32_t tilingMode) { + std::printf("set buffer: %x at %lx. %dx%d (%d)\n", bufferIndex, address, + width, height, pitch); + + auto &buffer = renderBuffers[bufferIndex]; + buffer.memory = memory.getPointer(address); + buffer.pitch = pitch; + buffer.width = width; + buffer.height = height; + buffer.pixelFormat = pixelFormat; + buffer.tilingMode = tilingMode; +} + +void amdgpu::device::AmdgpuDevice::updateFlipStatus() {} + +amdgpu::device::AmdgpuDevice::AmdgpuDevice( + amdgpu::device::DrawContext dc) + : mDc(dc) { + mBridgeCommandBuffer = amdgpu::bridge::createShmCommandBuffer("/amdgpu-cmds"); + + if (mBridgeCommandBuffer == nullptr) { + util::unreachable("Failed to create shm command buffer\n"); + } + + mBridge = amdgpu::bridge::BridgePuller(mBridgeCommandBuffer); +} + +amdgpu::device::AmdgpuDevice::~AmdgpuDevice() { + if (memoryFd != -1) { + ::close(memoryFd); + } + + amdgpu::bridge::destroyShmCommandBuffer(mBridgeCommandBuffer); + amdgpu::bridge::unlinkShm("/amdgpu-cmds"); +} diff --git a/hw/amdgpu/device/src/rect_list.geom.glsl b/hw/amdgpu/device/src/rect_list.geom.glsl new file mode 100644 index 000000000..84123923c --- /dev/null +++ b/hw/amdgpu/device/src/rect_list.geom.glsl @@ -0,0 +1,40 @@ +#version 450 + +layout (triangles) in; +layout (triangle_strip, max_vertices = 4) out; + +void main(void) +{ + vec4 topLeft = gl_in[0].gl_Position; + vec4 right = gl_in[1].gl_Position; + vec4 bottomLeft = gl_in[2].gl_Position; + + vec4 topRight = vec4( + right.x, + topLeft.y, + topLeft.z, + topLeft.w + ); + + vec4 bottomRight = vec4( + right.x, + bottomLeft.y, + topLeft.z, + topLeft.w + ); + + + gl_Position = topLeft; + EmitVertex(); + + gl_Position = bottomLeft; + EmitVertex(); + + gl_Position = topRight; + EmitVertex(); + + gl_Position = bottomRight; + EmitVertex(); + + EndPrimitive(); +} diff --git a/hw/amdgpu/include/amdgpu/RemoteMemory.hpp b/hw/amdgpu/include/amdgpu/RemoteMemory.hpp new file mode 100644 index 000000000..9097e5d3e --- /dev/null +++ b/hw/amdgpu/include/amdgpu/RemoteMemory.hpp @@ -0,0 +1,12 @@ +#pragma once +#include + +namespace amdgpu { +struct RemoteMemory { + char *shmPointer; + + template T *getPointer(std::uint64_t address) const { + return address ? reinterpret_cast(shmPointer + address) : nullptr; + } +}; +} // namespace amdgpu diff --git a/hw/amdgpu/include/util/SourceLocation.hpp b/hw/amdgpu/include/util/SourceLocation.hpp new file mode 100644 index 000000000..1275fbfd4 --- /dev/null +++ b/hw/amdgpu/include/util/SourceLocation.hpp @@ -0,0 +1,31 @@ +#pragma once + +namespace util { +class SourceLocation { +public: + const char *mFileName = {}; + const char *mFunctionName = {}; + unsigned mLine = 0; + unsigned mColumn = 0; + +public: + constexpr SourceLocation(const char *fileName = __builtin_FILE(), + const char *functionName = __builtin_FUNCTION(), + unsigned line = __builtin_LINE(), + unsigned column = +#if __has_builtin(__builtin_COLUMN) + __builtin_COLUMN() +#else + 0 +#endif + ) noexcept + : mFileName(fileName), mFunctionName(functionName), mLine(line), + mColumn(column) { + } + + constexpr unsigned line() const noexcept { return mLine; } + constexpr unsigned column() const noexcept { return mColumn; } + constexpr const char *file_name() const noexcept { return mFileName; } + constexpr const char *function_name() const noexcept { return mFunctionName; } +}; +} // namespace util diff --git a/hw/amdgpu/include/util/Verify.hpp b/hw/amdgpu/include/util/Verify.hpp new file mode 100644 index 000000000..84ed86289 --- /dev/null +++ b/hw/amdgpu/include/util/Verify.hpp @@ -0,0 +1,26 @@ +#pragma once + +#include "SourceLocation.hpp" +#include "unreachable.hpp" + +class Verify { + util::SourceLocation mLocation; + +public: + util::SourceLocation location() const { + return mLocation; + } + + Verify(util::SourceLocation location = util::SourceLocation()) + : mLocation(location) {} + + Verify &operator<<(bool result) { + if (!result) { + util::unreachable("Verification failed at %s: %s:%u:%u", + mLocation.function_name(), mLocation.file_name(), + mLocation.line(), mLocation.column()); + } + + return *this; + } +}; diff --git a/hw/amdgpu/include/util/unreachable.hpp b/hw/amdgpu/include/util/unreachable.hpp new file mode 100644 index 000000000..50d9facfc --- /dev/null +++ b/hw/amdgpu/include/util/unreachable.hpp @@ -0,0 +1,29 @@ +#pragma once + +#include "SourceLocation.hpp" +#include +#include + +namespace util { +[[noreturn]] inline void unreachable_impl() { std::fflush(stdout); __builtin_trap(); } + +[[noreturn]] inline void unreachable(SourceLocation location = {}) { + std::printf("\n"); + std::fflush(stdout); + std::fprintf(stderr, "Unreachable at %s:%u:%u %s\n", location.file_name(), + location.line(), location.column(), location.function_name()); + unreachable_impl(); +} + +[[noreturn]] inline void unreachable(const char *fmt, ...) { + std::printf("\n"); + std::fflush(stdout); + va_list list; + va_start(list, fmt); + std::vfprintf(stderr, fmt, list); + va_end(list); + std::fprintf(stderr, "\n"); + + unreachable_impl(); +} +} // namespace util diff --git a/hw/amdgpu/lib/libspirv/CMakeLists.txt b/hw/amdgpu/lib/libspirv/CMakeLists.txt new file mode 100644 index 000000000..12f815c78 --- /dev/null +++ b/hw/amdgpu/lib/libspirv/CMakeLists.txt @@ -0,0 +1,4 @@ +project(spirv) + +add_library(${PROJECT_NAME} INTERFACE) +target_include_directories(${PROJECT_NAME} INTERFACE include) diff --git a/hw/amdgpu/lib/libspirv/include/spirv/GLSL.std.450.h b/hw/amdgpu/lib/libspirv/include/spirv/GLSL.std.450.h new file mode 100644 index 000000000..54cc00e9a --- /dev/null +++ b/hw/amdgpu/lib/libspirv/include/spirv/GLSL.std.450.h @@ -0,0 +1,131 @@ +/* +** Copyright (c) 2014-2016 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +#ifndef GLSLstd450_H +#define GLSLstd450_H + +static const int GLSLstd450Version = 100; +static const int GLSLstd450Revision = 3; + +enum GLSLstd450 { + GLSLstd450Bad = 0, // Don't use + + GLSLstd450Round = 1, + GLSLstd450RoundEven = 2, + GLSLstd450Trunc = 3, + GLSLstd450FAbs = 4, + GLSLstd450SAbs = 5, + GLSLstd450FSign = 6, + GLSLstd450SSign = 7, + GLSLstd450Floor = 8, + GLSLstd450Ceil = 9, + GLSLstd450Fract = 10, + + GLSLstd450Radians = 11, + GLSLstd450Degrees = 12, + GLSLstd450Sin = 13, + GLSLstd450Cos = 14, + GLSLstd450Tan = 15, + GLSLstd450Asin = 16, + GLSLstd450Acos = 17, + GLSLstd450Atan = 18, + GLSLstd450Sinh = 19, + GLSLstd450Cosh = 20, + GLSLstd450Tanh = 21, + GLSLstd450Asinh = 22, + GLSLstd450Acosh = 23, + GLSLstd450Atanh = 24, + GLSLstd450Atan2 = 25, + + GLSLstd450Pow = 26, + GLSLstd450Exp = 27, + GLSLstd450Log = 28, + GLSLstd450Exp2 = 29, + GLSLstd450Log2 = 30, + GLSLstd450Sqrt = 31, + GLSLstd450InverseSqrt = 32, + + GLSLstd450Determinant = 33, + GLSLstd450MatrixInverse = 34, + + GLSLstd450Modf = 35, // second operand needs an OpVariable to write to + GLSLstd450ModfStruct = 36, // no OpVariable operand + GLSLstd450FMin = 37, + GLSLstd450UMin = 38, + GLSLstd450SMin = 39, + GLSLstd450FMax = 40, + GLSLstd450UMax = 41, + GLSLstd450SMax = 42, + GLSLstd450FClamp = 43, + GLSLstd450UClamp = 44, + GLSLstd450SClamp = 45, + GLSLstd450FMix = 46, + GLSLstd450IMix = 47, // Reserved + GLSLstd450Step = 48, + GLSLstd450SmoothStep = 49, + + GLSLstd450Fma = 50, + GLSLstd450Frexp = 51, // second operand needs an OpVariable to write to + GLSLstd450FrexpStruct = 52, // no OpVariable operand + GLSLstd450Ldexp = 53, + + GLSLstd450PackSnorm4x8 = 54, + GLSLstd450PackUnorm4x8 = 55, + GLSLstd450PackSnorm2x16 = 56, + GLSLstd450PackUnorm2x16 = 57, + GLSLstd450PackHalf2x16 = 58, + GLSLstd450PackDouble2x32 = 59, + GLSLstd450UnpackSnorm2x16 = 60, + GLSLstd450UnpackUnorm2x16 = 61, + GLSLstd450UnpackHalf2x16 = 62, + GLSLstd450UnpackSnorm4x8 = 63, + GLSLstd450UnpackUnorm4x8 = 64, + GLSLstd450UnpackDouble2x32 = 65, + + GLSLstd450Length = 66, + GLSLstd450Distance = 67, + GLSLstd450Cross = 68, + GLSLstd450Normalize = 69, + GLSLstd450FaceForward = 70, + GLSLstd450Reflect = 71, + GLSLstd450Refract = 72, + + GLSLstd450FindILsb = 73, + GLSLstd450FindSMsb = 74, + GLSLstd450FindUMsb = 75, + + GLSLstd450InterpolateAtCentroid = 76, + GLSLstd450InterpolateAtSample = 77, + GLSLstd450InterpolateAtOffset = 78, + + GLSLstd450NMin = 79, + GLSLstd450NMax = 80, + GLSLstd450NClamp = 81, + + GLSLstd450Count +}; + +#endif // #ifndef GLSLstd450_H diff --git a/hw/amdgpu/lib/libspirv/include/spirv/spirv-builder.hpp b/hw/amdgpu/lib/libspirv/include/spirv/spirv-builder.hpp new file mode 100644 index 000000000..63122000d --- /dev/null +++ b/hw/amdgpu/lib/libspirv/include/spirv/spirv-builder.hpp @@ -0,0 +1,2120 @@ +#pragma once + +#include "spirv.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace spirv { +struct Id { + unsigned id{}; + + Id() = default; + explicit Id(unsigned value) : id(value) {} + + explicit operator unsigned() const { + assert(id != 0); + return id; + } + explicit operator bool() const { return id != 0; } + + bool operator==(Id other) const { return id == other.id; } + bool operator!=(Id other) const { return id != other.id; } + bool operator<(Id other) const { return id < other.id; } + bool operator>(Id other) const { return id > other.id; } + bool operator<=(Id other) const { return id <= other.id; } + bool operator>=(Id other) const { return id >= other.id; } +}; + +struct Type : Id {}; +struct ScalarType : Type {}; +struct VoidType : Type {}; +struct BoolType : ScalarType {}; +struct IntType : ScalarType {}; +struct SIntType : IntType {}; +struct UIntType : IntType {}; +struct FloatType : ScalarType {}; +struct VectorType : Type {}; +struct MatrixType : Type {}; +struct SamplerType : Type {}; +struct ImageType : Type {}; +struct SampledImageType : Type {}; +struct ArrayType : Type {}; +struct RuntimeArrayType : Type {}; +struct StructType : Type {}; +struct PointerType : Type {}; +struct FunctionType : Type {}; + +struct ExtInstSet : Id {}; +struct Function : Id {}; +struct Block : Id {}; +struct Value : Id {}; + +struct BoolValue : Value {}; +struct IntValue : Value {}; +struct SIntValue : IntValue {}; +struct UIntValue : IntValue {}; +struct FloatValue : Value {}; +struct StructValue : Value {}; +struct PointerValue : Value {}; +struct VectorValue : Value {}; +struct ArrayValue : Value {}; +struct SamplerValue : Value {}; +struct ImageValue : Value {}; +struct SampledImageValue : Value {}; + +template + requires(std::is_base_of_v) +struct ConstantValue : T {}; + +struct AnyConstantValue : Value { + AnyConstantValue() = default; + + template AnyConstantValue(ConstantValue specialization) { + id = specialization.id; + } + + template + AnyConstantValue &operator=(ConstantValue specialization) { + id = specialization.id; + return *this; + } + + template explicit operator ConstantValue() { + ConstantValue result; + result.id = id; + return result; + } +}; + +template + requires(std::is_base_of_v) +struct VectorOfType : VectorType {}; + +template + requires(std::is_base_of_v) +struct ArrayOfType : ArrayType {}; + +template + requires(std::is_base_of_v) +struct VectorOfValue : VectorValue {}; + +template + requires(std::is_base_of_v) +struct ArrayOfValue : ArrayValue {}; + +template + requires(std::is_base_of_v) +struct PointerToType : PointerType {}; + +template + requires(std::is_base_of_v) +struct PointerToValue : PointerValue {}; + +struct StructPointerValue : Value {}; + +struct VariableValue : PointerValue {}; + +namespace detail { +template struct TypeToValueImpl; + +template <> struct TypeToValueImpl { + using type = Value; +}; +template <> struct TypeToValueImpl { + using type = BoolValue; +}; +template <> struct TypeToValueImpl { + using type = IntValue; +}; +template <> struct TypeToValueImpl { + using type = SIntValue; +}; +template <> struct TypeToValueImpl { + using type = UIntValue; +}; +template <> struct TypeToValueImpl { + using type = FloatValue; +}; +template <> struct TypeToValueImpl { + using type = StructValue; +}; +template <> struct TypeToValueImpl { + using type = PointerValue; +}; +template <> struct TypeToValueImpl { + using type = PointerValue; +}; +template <> struct TypeToValueImpl { + using type = VectorValue; +}; +template <> struct TypeToValueImpl { + using type = ArrayValue; +}; +template <> struct TypeToValueImpl { + using type = SamplerValue; +}; +template <> struct TypeToValueImpl { + using type = ImageValue; +}; +template <> struct TypeToValueImpl { + using type = SampledImageValue; +}; + +template struct TypeToValueImpl> { + using type = PointerToValue; +}; +template struct TypeToValueImpl> { + using type = VectorOfValue; +}; + +template struct TypeToValueImpl> { + using type = ArrayOfValue; +}; +} // namespace detail + +template +using TypeToValue = typename detail::TypeToValueImpl::type; + +template + requires(std::is_base_of_v) +struct ScalarOrVectorOfValue : Value { + ScalarOrVectorOfValue() = default; + + ScalarOrVectorOfValue(TypeToValue scalar) { id = scalar.id; } + ScalarOrVectorOfValue(VectorOfValue vector) { id = vector.id; } +}; + +using ConstantBool = ConstantValue; +using ConstantSInt = ConstantValue; +using ConstantUInt = ConstantValue; +using ConstantInt = ConstantValue; +using ConstantFloat = ConstantValue; + +template + requires(std::is_base_of_v && std::is_base_of_v) +ToT cast(FromT from) { + ToT result; + result.id = from.id; + return result; +} + +inline unsigned calcStringWordCount(std::string_view string) { + return (string.length() + 1 + (sizeof(std::uint32_t) - 1)) / + sizeof(std::uint32_t); +} + +using IdUsesTackerType = + std::unordered_map>; +using IdDefTackerType = std::unordered_map; + +class RegionPusher { + IdUsesTackerType *mIdUses = nullptr; + IdDefTackerType *mIdDefs = nullptr; + std::uint32_t *mBeginPtr = nullptr; + std::uint32_t *mPtr = nullptr; + std::size_t mCount = 0; + + RegionPusher &operator=(const RegionPusher &) = default; + +public: + RegionPusher() = default; + RegionPusher(const RegionPusher &) = delete; + RegionPusher(std::uint32_t *beginPtr, std::uint32_t *ptr, std::size_t count, + IdUsesTackerType *idUses, IdDefTackerType *idDefs) + : mIdUses(idUses), mIdDefs(idDefs), mBeginPtr(beginPtr), mPtr(ptr), + mCount(count) {} + RegionPusher(RegionPusher &&other) { *this = std::move(other); } + + RegionPusher &operator=(RegionPusher &&other) { + *this = other; + other.mCount = 0; + return *this; + } + + ~RegionPusher() { assert(mCount == 0); } + + void pushWord(unsigned word) { + assert(mCount > 0); + *mPtr++ = word; + --mCount; + } + + void pushIdDef(Id id) { + assert(id); + (*mIdDefs)[id.id] = mPtr - mBeginPtr; + pushWord(id.id); + } + + void pushIdUse(Id id) { + assert(id); + (*mIdUses)[id.id].push_back(mPtr - mBeginPtr); + + pushWord(id.id); + } + + void pushString(std::string_view string) { + auto nwords = calcStringWordCount(string); + assert(mCount >= nwords); + + auto dst = reinterpret_cast(mPtr); + std::memcpy(dst, string.data(), string.length()); + std::memset(dst + string.length(), 0, + nwords * sizeof(std::uint32_t) - string.length()); + mPtr += nwords; + mCount -= nwords; + } +}; + +struct IdGenerator { + std::uint32_t bounds = 1; + + template + requires(std::is_base_of_v) + T newId() { + T result; + result.id = bounds++; + return result; + } + + Id newId() { + Id result; + result.id = bounds++; + return result; + } + + void reset() { bounds = 1; } +}; + +class RegionPoint { + const std::vector *mData = nullptr; + std::size_t mOffset = 0; + +public: + RegionPoint() = default; + RegionPoint(const std::vector *data, std::size_t offset) + : mData(data), mOffset(offset) {} + + std::span operator-(RegionPoint other) const { + assert(mData == other.mData); + assert(mOffset >= other.mOffset); + + return {other.mData->data() + other.mOffset, mData->data() + mOffset}; + } +}; + +class Region { + std::vector mData; + IdUsesTackerType mIdUses; + IdDefTackerType mIdDefs; + +public: + Region() = default; + Region(std::size_t expInstCount) { mData.reserve(expInstCount); } + + void clear() { mData.clear(); } + + const std::uint32_t *data() const { return mData.data(); } + std::size_t size() const { return mData.size(); } + + RegionPoint getCurrentPosition() const { return {&mData, mData.size()}; } + + RegionPusher pushOp(spv::Op op, unsigned wordCount) { + assert(wordCount >= 1); + auto offset = mData.size(); + mData.resize(mData.size() + wordCount); + RegionPusher pusher(mData.data(), mData.data() + offset, wordCount, + &mIdUses, &mIdDefs); + pusher.pushWord((static_cast(op) & spv::OpCodeMask) | + (wordCount << spv::WordCountShift)); + + return pusher; + } + + void pushRegion(const Region &other) { + auto offset = mData.size(); + mData.resize(mData.size() + other.size()); + std::memcpy(mData.data() + offset, other.data(), + other.size() * sizeof(std::uint32_t)); + + for (auto &[id, def] : mIdDefs) { + mIdDefs[id] = offset + def; + } + + for (auto &[id, uses] : mIdUses) { + auto &idUses = mIdUses[id]; + idUses.reserve(idUses.size() + uses.size()); + + for (auto use : uses) { + idUses.push_back(offset + use); + } + } + } + + void recreateDefs(std::unordered_map &remap, + IdGenerator &generator) { + auto prevDefs = std::move(mIdDefs); + mIdDefs = {}; + + for (auto [id, def] : prevDefs) { + auto newId = generator.newId().id; + + remap[id] = newId; + mData[def] = newId; + mIdDefs[newId] = def; + } + } + + void + remapUses(const std::unordered_map &remap) { + auto prevUses = std::move(mIdUses); + mIdUses = {}; + + for (auto &[id, uses] : prevUses) { + auto it = remap.find(id); + assert(it != remap.end()); + auto newId = it->second; + + for (auto &use : uses) { + mData[use] = newId; + } + + mIdUses[newId] = std::move(uses); + } + } +}; + +class BlockBuilder { + IdGenerator *mIdGenerator = nullptr; + + template auto newId() -> decltype(mIdGenerator->newId()) { + return mIdGenerator->newId(); + } + +public: + Block id; + Region prefix; + Region phiRegion; + Region variablesRegion; + Region bodyRegion; + Region terminatorRegion; + + BlockBuilder() = default; + BlockBuilder(IdGenerator &idGenerator, Block id, + std::size_t expInstructionsCount) + : mIdGenerator(&idGenerator), bodyRegion{expInstructionsCount}, + terminatorRegion{1}, id(id) {} + + void moveBlock(BlockBuilder &&other) { + prefix.pushRegion(other.prefix); + { + auto region = prefix.pushOp(spv::Op::OpLabel, 2); + region.pushIdDef(id); + } + prefix.pushRegion(phiRegion); + prefix.pushRegion(bodyRegion); + prefix.pushRegion(terminatorRegion); + + id = other.id; + phiRegion = std::move(other.phiRegion); + variablesRegion.pushRegion(other.variablesRegion); + bodyRegion = std::move(other.bodyRegion); + terminatorRegion = std::move(other.terminatorRegion); + } + + Value createExtInst(Type resultType, ExtInstSet set, + std::uint32_t instruction, + std::span operands) { + auto region = bodyRegion.pushOp(spv::Op::OpExtInst, 5 + operands.size()); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(set); + region.pushWord(instruction); + for (auto operand : operands) { + region.pushIdUse(operand); + } + return id; + } + + VariableValue createVariable(Type type, spv::StorageClass storageClass, + std::optional initializer = {}) { + auto region = variablesRegion.pushOp(spv::Op::OpVariable, + 4 + (initializer.has_value() ? 1 : 0)); + auto id = newId(); + region.pushIdUse(type); + region.pushIdDef(id); + region.pushWord(static_cast(storageClass)); + if (initializer.has_value()) { + region.pushIdUse(initializer.value()); + } + return id; + } + + Value createFunctionCall(Type resultType, Function function, + std::span arguments) { + auto region = + bodyRegion.pushOp(spv::Op::OpFunctionCall, 4 + arguments.size()); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(function); + for (auto argument : arguments) { + region.pushIdUse(argument); + } + return id; + } + + // composite + Value createVectorExtractDynamic(Type resultType, Value vector, + IntValue index) { + auto region = bodyRegion.pushOp(spv::Op::OpVectorExtractDynamic, 5); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(vector); + region.pushIdUse(index); + return id; + } + + Value createVectorInsertDynamic(Type resultType, Value vector, + Value component, IntValue index) { + auto region = bodyRegion.pushOp(spv::Op::OpVectorInsertDynamic, 6); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(vector); + region.pushIdUse(component); + region.pushIdUse(index); + return id; + } + + Value createVectorShuffle(Type resultType, Value vector1, Value vector2, + std::span components) { + auto region = + bodyRegion.pushOp(spv::Op::OpVectorShuffle, 5 + components.size()); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(vector1); + region.pushIdUse(vector2); + + for (auto component : components) { + region.pushWord(component); + } + return id; + } + + template + TypeToValue createCompositeConstruct(T resultType, + std::span constituents) { + auto region = bodyRegion.pushOp(spv::Op::OpCompositeConstruct, + 3 + constituents.size()); + auto id = newId>(); + region.pushIdUse(resultType); + region.pushIdDef(id); + + for (auto constituent : constituents) { + region.pushIdUse(constituent); + } + return id; + } + + Value createCompositeExtract(Type resultType, Value composite, + std::span indexes) { + auto region = + bodyRegion.pushOp(spv::Op::OpCompositeExtract, 4 + indexes.size()); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(composite); + + for (auto index : indexes) { + region.pushWord(index); + } + return id; + } + + // arithmetic + template + requires(std::is_base_of_v) + TypeToValue createInst(spv::Op op, T resultType, + std::span> operands) { + auto region = bodyRegion.pushOp(op, 3 + operands.size()); + auto id = newId>(); + region.pushIdUse(resultType); + region.pushIdDef(id); + for (auto operand : operands) { + region.pushIdUse(operand); + } + return id; + } + + Value createInst(spv::Op op, Type resultType, + std::span operands) { + auto region = bodyRegion.pushOp(op, 3 + operands.size()); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + for (auto operand : operands) { + region.pushIdUse(operand); + } + return id; + } + + template + VectorOfValue createInst(spv::Op op, VectorOfType resultType, + std::span> operands) { + auto region = bodyRegion.pushOp(op, 3 + operands.size()); + auto id = newId>(); + region.pushIdUse(resultType); + region.pushIdDef(id); + for (auto operand : operands) { + region.pushIdUse(operand); + } + return id; + } + + template + requires(std::is_same_v || + std::is_same_v, T>) + TypeToValue createSNegate(T resultType, TypeToValue operand) { + return createInst(spv::Op::OpSNegate, resultType, std::array{operand}); + } + + template + requires(std::is_same_v || + std::is_same_v, T>) + TypeToValue createFNegate(T resultType, TypeToValue operand) { + return createInst(spv::Op::OpFNegate, resultType, std::array{operand}); + } + + template + requires(std::is_same_v || std::is_base_of_v || + std::is_same_v, T> || + std::is_same_v, T> || + std::is_same_v, T>) + TypeToValue createIAdd(T resultType, TypeToValue operand1, + TypeToValue operand2) { + return createInst(spv::Op::OpIAdd, resultType, + std::array{operand1, operand2}); + } + + template + requires(std::is_same_v || + std::is_same_v, T>) + TypeToValue createFAdd(T resultType, TypeToValue operand1, + TypeToValue operand2) { + return createInst(spv::Op::OpFAdd, resultType, + std::array{operand1, operand2}); + } + + template + requires(std::is_same_v || std::is_base_of_v || + std::is_same_v, T> || + std::is_same_v, T> || + std::is_same_v, T>) + TypeToValue createISub(T resultType, TypeToValue operand1, + TypeToValue operand2) { + return createInst(spv::Op::OpISub, resultType, + std::array{operand1, operand2}); + } + + template + requires(std::is_same_v || + std::is_same_v, T>) + TypeToValue createFSub(T resultType, TypeToValue operand1, + TypeToValue operand2) { + return createInst(spv::Op::OpFSub, resultType, + std::array{operand1, operand2}); + } + + template + requires(std::is_same_v || std::is_base_of_v || + std::is_same_v, T> || + std::is_same_v, T> || + std::is_same_v, T>) + TypeToValue createIMul(T resultType, TypeToValue operand1, + TypeToValue operand2) { + return createInst(spv::Op::OpIMul, resultType, + std::array{operand1, operand2}); + } + + template + requires(std::is_same_v || + std::is_same_v, T>) + TypeToValue createFMul(T resultType, TypeToValue operand1, + TypeToValue operand2) { + return createInst(spv::Op::OpFMul, resultType, + std::array{operand1, operand2}); + } + + template + requires(std::is_same_v || + std::is_same_v, T>) + TypeToValue createUDiv(T resultType, TypeToValue operand1, + TypeToValue operand2) { + return createInst(spv::Op::OpUDiv, resultType, + std::array{operand1, operand2}); + } + template + requires(std::is_same_v || + std::is_same_v, T>) + TypeToValue createSDiv(T resultType, TypeToValue operand1, + TypeToValue operand2) { + return createInst(spv::Op::OpSDiv, resultType, + std::array{operand1, operand2}); + } + + template + requires(std::is_same_v || + std::is_same_v, T>) + TypeToValue createFDiv(T resultType, TypeToValue operand1, + TypeToValue operand2) { + return createInst(spv::Op::OpFDiv, resultType, + std::array{operand1, operand2}); + } + template + requires(std::is_same_v || + std::is_same_v, T>) + TypeToValue createUMod(T resultType, TypeToValue operand1, + TypeToValue operand2) { + return createInst(spv::Op::OpUMod, resultType, + std::array{operand1, operand2}); + } + template + requires(std::is_same_v || + std::is_same_v, T>) + TypeToValue createSRem(T resultType, TypeToValue operand1, + TypeToValue operand2) { + return createInst(spv::Op::OpSRem, resultType, + std::array{operand1, operand2}); + } + + template + requires(std::is_same_v || + std::is_same_v, T>) + TypeToValue createSMod(T resultType, TypeToValue operand1, + TypeToValue operand2) { + return createInst(spv::Op::OpSMod, resultType, + std::array{operand1, operand2}); + } + template + requires(std::is_same_v || + std::is_same_v, T>) + TypeToValue createFRem(T resultType, TypeToValue operand1, + TypeToValue operand2) { + return createInst(spv::Op::OpFRem, resultType, + std::array{operand1, operand2}); + } + template + requires(std::is_same_v || + std::is_same_v, T>) + TypeToValue createFMod(T resultType, TypeToValue operand1, + TypeToValue operand2) { + return createInst(spv::Op::OpFMod, resultType, + std::array{operand1, operand2}); + } + + Value createIAddCarry(Type resultType, Value operand1, Value operand2) { + return createInst(spv::Op::OpIAddCarry, resultType, + std::array{operand1, operand2}); + } + + Value createISubBorrow(Type resultType, Value operand1, Value operand2) { + return createInst(spv::Op::OpISubBorrow, resultType, + std::array{operand1, operand2}); + } + + Value createUMulExtended(Type resultType, Value operand1, Value operand2) { + return createInst(spv::Op::OpUMulExtended, resultType, + std::array{operand1, operand2}); + } + + Value createSMulExtended(Type resultType, Value operand1, Value operand2) { + return createInst(spv::Op::OpSMulExtended, resultType, + std::array{operand1, operand2}); + } + + Value createPhi(Type resultType, + std::span> values) { + auto region = phiRegion.pushOp(spv::Op::OpPhi, 3 + values.size() * 2); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + for (auto [variable, block] : values) { + region.pushIdUse(variable); + region.pushIdUse(block); + } + return id; + } + + void addBlockToPhis(spirv::Block block, + std::span values) { + auto phi = phiRegion.data(); + spirv::Region newPhi(phiRegion.size() * 2); + + assert(block); + + for (std::size_t i = 0, end = phiRegion.size(), index = 0; i < end; + index++) { + auto opWordCount = phi[i]; + + assert(static_cast(static_cast(opWordCount) & + spv::OpCodeMask) == spv::Op::OpPhi); + auto wordCount = + static_cast(opWordCount) >> spv::WordCountShift; + auto newOp = newPhi.pushOp(spv::Op::OpPhi, wordCount + 2); + + for (std::size_t j = 1; j < wordCount; ++j) { + newOp.pushWord(phi[i + j]); + } + + i += wordCount; + + assert(index < values.size()); + assert(values[index]); + + newOp.pushIdUse(values[index]); + newOp.pushIdUse(block); + } + + phiRegion = std::move(newPhi); + } + + void moveVariablesFrom(BlockBuilder &otherBlock) { + variablesRegion.pushRegion(otherBlock.variablesRegion); + otherBlock.variablesRegion.clear(); + } + + template + requires(std::is_base_of_v) + TypeToValue createPhi(T resultType, + std::span> values) { + return cast>( + createPhi(static_cast(resultType), values)); + } + + void createLoopMerge(Block mergeBlock, Block continueTarget, + spv::LoopControlMask loopControl, + std::span loopControlParameters) { + auto region = terminatorRegion.pushOp(spv::Op::OpLoopMerge, + 4 + loopControlParameters.size()); + region.pushIdUse(mergeBlock); + region.pushIdUse(continueTarget); + region.pushWord(static_cast(loopControl)); + + for (auto loopControlParameter : loopControlParameters) { + region.pushWord(static_cast(loopControlParameter)); + } + } + + void createSelectionMerge(Block mergeBlock, + spv::SelectionControlMask selectionControl) { + auto region = terminatorRegion.pushOp(spv::Op::OpSelectionMerge, 3); + region.pushIdUse(mergeBlock); + region.pushWord(static_cast(selectionControl)); + } + + void createBranch(Block label) { + auto region = terminatorRegion.pushOp(spv::Op::OpBranch, 2); + region.pushIdUse(label); + } + + void createBranchConditional( + BoolValue condition, Block trueLabel, Block falseLabel, + std::optional> weights = {}) { + auto region = terminatorRegion.pushOp(spv::Op::OpBranchConditional, + 4 + (weights.has_value() ? 1 : 0)); + region.pushIdUse(condition); + region.pushIdUse(trueLabel); + region.pushIdUse(falseLabel); + + if (weights.has_value()) { + region.pushWord(weights->first); + region.pushWord(weights->second); + } + } + + void createKill() { + assert(terminatorRegion.size() == 0); + terminatorRegion.pushOp(spv::Op::OpKill, 1); + } + + void createReturn() { + assert(terminatorRegion.size() == 0); + terminatorRegion.pushOp(spv::Op::OpReturn, 1); + } + + void createReturnValue(Value value) { + assert(terminatorRegion.size() == 0); + auto region = terminatorRegion.pushOp(spv::Op::OpReturnValue, 2); + region.pushIdUse(value); + } + + void createUnreachable() { + assert(terminatorRegion.size() == 0); + terminatorRegion.pushOp(spv::Op::OpUnreachable, 1); + } + + Value createLoad(Type resultType, PointerValue pointer, + spv::MemoryAccessMask memoryAccess, + std::span memoryAccessOperands) { + auto region = + bodyRegion.pushOp(spv::Op::OpLoad, 5 + memoryAccessOperands.size()); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(pointer); + region.pushWord(static_cast(memoryAccess)); + + for (auto memoryAccessOperand : memoryAccessOperands) { + region.pushWord(static_cast(memoryAccessOperand)); + } + + return id; + } + + template + requires(std::is_base_of_v) + TypeToValue createLoad(T resultType, PointerValue pointer) { + auto region = bodyRegion.pushOp(spv::Op::OpLoad, 4); + auto id = newId>(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(pointer); + return id; + } + + void createStore(PointerValue pointer, Value object) { + auto region = bodyRegion.pushOp(spv::Op::OpStore, 3); + region.pushIdUse(pointer); + region.pushIdUse(object); + } + + void createStore(PointerValue pointer, Value object, + spv::MemoryAccessMask memoryAccess, + std::span memoryAccessOperands) { + auto region = + bodyRegion.pushOp(spv::Op::OpStore, 4 + memoryAccessOperands.size()); + region.pushIdUse(pointer); + region.pushIdUse(object); + region.pushWord(static_cast(memoryAccess)); + + for (auto memoryAccessOperand : memoryAccessOperands) { + region.pushWord(memoryAccessOperand); + } + } + + void createCopyMemory(PointerValue targetPointer, + PointerValue sourcePointer) { + auto region = bodyRegion.pushOp(spv::Op::OpCopyMemory, 3); + region.pushIdUse(targetPointer); + region.pushIdUse(sourcePointer); + } + + void createCopyMemory(PointerValue targetPointer, PointerValue sourcePointer, + spv::MemoryAccessMask memoryAccess, + std::span memoryAccessOperands) { + auto region = bodyRegion.pushOp(spv::Op::OpCopyMemory, + 4 + memoryAccessOperands.size()); + region.pushIdUse(targetPointer); + region.pushIdUse(sourcePointer); + region.pushWord(static_cast(memoryAccess)); + for (auto memoryAccessOperand : memoryAccessOperands) { + region.pushWord(memoryAccessOperand); + } + } + + void + createCopyMemory(PointerValue targetPointer, PointerValue sourcePointer, + spv::MemoryAccessMask targetMemoryAccess, + std::span targetMemoryAccessOperands, + spv::MemoryAccessMask sourceMemoryAccess, + std::span sourceMemoryAccessOperands) { + auto region = bodyRegion.pushOp(spv::Op::OpCopyMemory, + 5 + targetMemoryAccessOperands.size() + + sourceMemoryAccessOperands.size()); + region.pushIdUse(targetPointer); + region.pushIdUse(sourcePointer); + region.pushWord(static_cast(targetMemoryAccess)); + for (auto memoryAccessOperand : targetMemoryAccessOperands) { + region.pushWord(static_cast(memoryAccessOperand)); + } + region.pushWord(static_cast(sourceMemoryAccess)); + for (auto memoryAccessOperand : sourceMemoryAccessOperands) { + region.pushWord(static_cast(memoryAccessOperand)); + } + } + + UIntValue createArrayLength(UIntType resultType, + PointerToValue structure, + std::uint32_t member) { + auto region = bodyRegion.pushOp(spv::Op::OpArrayLength, 5); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(structure); + region.pushWord(member); + return id; + } + + BoolValue createPtrEqual(BoolType resultType, PointerValue operand1, + PointerValue operand2) { + auto region = bodyRegion.pushOp(spv::Op::OpPtrEqual, 5); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(operand1); + region.pushIdUse(operand2); + return id; + } + + BoolValue createPtrNotEqual(BoolType resultType, PointerValue operand1, + PointerValue operand2) { + auto region = bodyRegion.pushOp(spv::Op::OpPtrNotEqual, 5); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(operand1); + region.pushIdUse(operand2); + return id; + } + + PointerValue createAccessChain(PointerType resultType, PointerValue base, + std::span indices) { + auto region = bodyRegion.pushOp(spv::Op::OpAccessChain, 4 + indices.size()); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(base); + + for (auto index : indices) { + region.pushIdUse(index); + } + return id; + } + + PointerValue createInBoundsAccessChain(PointerType resultType, + PointerValue base, + std::span indices) { + auto region = + bodyRegion.pushOp(spv::Op::OpInBoundsAccessChain, 4 + indices.size()); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(base); + + for (auto index : indices) { + region.pushIdUse(index); + } + return id; + } + + // conversion + Value createConvertFToU(Type resultType, Value operand) { + auto region = bodyRegion.pushOp(spv::Op::OpConvertFToU, 4); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(operand); + return id; + } + Value createConvertFToS(Type resultType, Value operand) { + auto region = bodyRegion.pushOp(spv::Op::OpConvertFToS, 4); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(operand); + return id; + } + template + TypeToValue createConvertSToF(T resultType, SIntValue operand) { + auto region = bodyRegion.pushOp(spv::Op::OpConvertSToF, 4); + auto id = newId>(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(operand); + return id; + } + template + TypeToValue createConvertUToF(T resultType, UIntValue operand) { + auto region = bodyRegion.pushOp(spv::Op::OpConvertUToF, 4); + auto id = newId>(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(operand); + return id; + } + template + TypeToValue createUConvert(T resultType, UIntValue operand) { + auto region = bodyRegion.pushOp(spv::Op::OpUConvert, 4); + auto id = newId>(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(operand); + return id; + } + template + TypeToValue createSConvert(T resultType, SIntValue operand) { + auto region = bodyRegion.pushOp(spv::Op::OpSConvert, 4); + auto id = newId>(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(operand); + return id; + } + Value createFConvert(Type resultType, Value operand) { + auto region = bodyRegion.pushOp(spv::Op::OpFConvert, 4); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(operand); + return id; + } + template + TypeToValue createBitcast(T resultType, Value operand) { + auto region = bodyRegion.pushOp(spv::Op::OpBitcast, 4); + auto id = newId>(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(operand); + return id; + } + + // bit + template + TypeToValue createShiftRightLogical(T resultType, TypeToValue base, + IntValue shift) { + auto region = bodyRegion.pushOp(spv::Op::OpShiftRightLogical, 5); + auto id = newId>(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(base); + region.pushIdUse(shift); + return id; + } + + template + TypeToValue createShiftRightArithmetic(T resultType, TypeToValue base, + IntValue shift) { + auto region = bodyRegion.pushOp(spv::Op::OpShiftRightArithmetic, 5); + auto id = newId>(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(base); + region.pushIdUse(shift); + return id; + } + + template + TypeToValue createShiftLeftLogical(T resultType, TypeToValue base, + IntValue shift) { + auto region = bodyRegion.pushOp(spv::Op::OpShiftLeftLogical, 5); + auto id = newId>(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(base); + region.pushIdUse(shift); + return id; + } + + Value createBitwiseOr(Type resultType, Value operand1, Value operand2) { + auto region = bodyRegion.pushOp(spv::Op::OpBitwiseOr, 5); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(operand1); + region.pushIdUse(operand2); + return id; + } + + Value createBitwiseXor(Type resultType, Value operand1, Value operand2) { + auto region = bodyRegion.pushOp(spv::Op::OpBitwiseXor, 5); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(operand1); + region.pushIdUse(operand2); + return id; + } + + Value createBitwiseAnd(Type resultType, Value operand1, Value operand2) { + auto region = bodyRegion.pushOp(spv::Op::OpBitwiseAnd, 5); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(operand1); + region.pushIdUse(operand2); + return id; + } + + Value createNot(Type resultType, Value operand) { + auto region = bodyRegion.pushOp(spv::Op::OpNot, 4); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(operand); + return id; + } + + // logic + BoolValue createLogicalEqual(BoolType resultType, Value operand1, + Value operand2) { + auto region = bodyRegion.pushOp(spv::Op::OpLogicalEqual, 5); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(operand1); + region.pushIdUse(operand2); + return id; + } + BoolValue createLogicalNotEqual(BoolType resultType, Value operand1, + Value operand2) { + auto region = bodyRegion.pushOp(spv::Op::OpLogicalNotEqual, 5); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(operand1); + region.pushIdUse(operand2); + return id; + } + + BoolValue createLogicalOr(BoolType resultType, BoolValue operand1, + BoolValue operand2) { + auto region = bodyRegion.pushOp(spv::Op::OpLogicalOr, 5); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(operand1); + region.pushIdUse(operand2); + return id; + } + + BoolValue createLogicalAnd(BoolType resultType, BoolValue operand1, + BoolValue operand2) { + auto region = bodyRegion.pushOp(spv::Op::OpLogicalAnd, 5); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(operand1); + region.pushIdUse(operand2); + return id; + } + + BoolValue createLogicalNot(BoolType resultType, BoolValue operand) { + auto region = bodyRegion.pushOp(spv::Op::OpLogicalNot, 4); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(operand); + return id; + } + + template + TypeToValue createSelect(T resultType, BoolValue condition, Value object1, + Value object2) { + auto region = bodyRegion.pushOp(spv::Op::OpSelect, 6); + auto id = newId>(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(condition); + region.pushIdUse(object1); + region.pushIdUse(object2); + return id; + } + + BoolValue createIEqual(BoolType resultType, Value operand1, Value operand2) { + auto region = bodyRegion.pushOp(spv::Op::OpIEqual, 5); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(operand1); + region.pushIdUse(operand2); + return id; + } + BoolValue createINotEqual(BoolType resultType, Value operand1, + Value operand2) { + auto region = bodyRegion.pushOp(spv::Op::OpINotEqual, 5); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(operand1); + region.pushIdUse(operand2); + return id; + } + BoolValue createUGreaterThan(BoolType resultType, Value operand1, + Value operand2) { + auto region = bodyRegion.pushOp(spv::Op::OpUGreaterThan, 5); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(operand1); + region.pushIdUse(operand2); + return id; + } + BoolValue createSGreaterThan(BoolType resultType, Value operand1, + Value operand2) { + auto region = bodyRegion.pushOp(spv::Op::OpSGreaterThan, 5); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(operand1); + region.pushIdUse(operand2); + return id; + } + BoolValue createUGreaterThanEqual(BoolType resultType, Value operand1, + Value operand2) { + auto region = bodyRegion.pushOp(spv::Op::OpUGreaterThanEqual, 5); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(operand1); + region.pushIdUse(operand2); + return id; + } + BoolValue createSGreaterThanEqual(BoolType resultType, Value operand1, + Value operand2) { + auto region = bodyRegion.pushOp(spv::Op::OpSGreaterThanEqual, 5); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(operand1); + region.pushIdUse(operand2); + return id; + } + BoolValue createULessThan(BoolType resultType, Value operand1, + Value operand2) { + auto region = bodyRegion.pushOp(spv::Op::OpULessThan, 5); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(operand1); + region.pushIdUse(operand2); + return id; + } + BoolValue createSLessThan(BoolType resultType, Value operand1, + Value operand2) { + auto region = bodyRegion.pushOp(spv::Op::OpSLessThan, 5); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(operand1); + region.pushIdUse(operand2); + return id; + } + BoolValue createULessThanEqual(BoolType resultType, Value operand1, + Value operand2) { + auto region = bodyRegion.pushOp(spv::Op::OpULessThanEqual, 5); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(operand1); + region.pushIdUse(operand2); + return id; + } + BoolValue createSLessThanEqual(BoolType resultType, Value operand1, + Value operand2) { + auto region = bodyRegion.pushOp(spv::Op::OpSLessThanEqual, 5); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(operand1); + region.pushIdUse(operand2); + return id; + } + + BoolValue createFOrdEqual(BoolType resultType, Value operand1, + Value operand2) { + auto region = bodyRegion.pushOp(spv::Op::OpFOrdEqual, 5); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(operand1); + region.pushIdUse(operand2); + return id; + } + BoolValue createFUnordEqual(BoolType resultType, Value operand1, + Value operand2) { + auto region = bodyRegion.pushOp(spv::Op::OpFUnordEqual, 5); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(operand1); + region.pushIdUse(operand2); + return id; + } + BoolValue createFOrdNotEqual(BoolType resultType, Value operand1, + Value operand2) { + auto region = bodyRegion.pushOp(spv::Op::OpFOrdNotEqual, 5); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(operand1); + region.pushIdUse(operand2); + return id; + } + BoolValue createFUnordNotEqual(BoolType resultType, Value operand1, + Value operand2) { + auto region = bodyRegion.pushOp(spv::Op::OpFUnordNotEqual, 5); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(operand1); + region.pushIdUse(operand2); + return id; + } + BoolValue createFOrdLessThan(BoolType resultType, Value operand1, + Value operand2) { + auto region = bodyRegion.pushOp(spv::Op::OpFOrdLessThan, 5); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(operand1); + region.pushIdUse(operand2); + return id; + } + BoolValue createFUnordLessThan(BoolType resultType, Value operand1, + Value operand2) { + auto region = bodyRegion.pushOp(spv::Op::OpFUnordLessThan, 5); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(operand1); + region.pushIdUse(operand2); + return id; + } + BoolValue createFOrdLessThanEqual(BoolType resultType, Value operand1, + Value operand2) { + auto region = bodyRegion.pushOp(spv::Op::OpFOrdLessThanEqual, 5); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(operand1); + region.pushIdUse(operand2); + return id; + } + BoolValue createFUnordLessThanEqual(BoolType resultType, Value operand1, + Value operand2) { + auto region = bodyRegion.pushOp(spv::Op::OpFUnordLessThanEqual, 5); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(operand1); + region.pushIdUse(operand2); + return id; + } + BoolValue createFOrdGreaterThan(BoolType resultType, Value operand1, + Value operand2) { + auto region = bodyRegion.pushOp(spv::Op::OpFOrdGreaterThan, 5); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(operand1); + region.pushIdUse(operand2); + return id; + } + BoolValue createFUnordGreaterThan(BoolType resultType, Value operand1, + Value operand2) { + auto region = bodyRegion.pushOp(spv::Op::OpFUnordGreaterThan, 5); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(operand1); + region.pushIdUse(operand2); + return id; + } + BoolValue createFOrdGreaterThanEqual(BoolType resultType, Value operand1, + Value operand2) { + auto region = bodyRegion.pushOp(spv::Op::OpFOrdGreaterThanEqual, 5); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(operand1); + region.pushIdUse(operand2); + return id; + } + BoolValue createFUnordGreaterThanEqual(BoolType resultType, Value operand1, + Value operand2) { + auto region = bodyRegion.pushOp(spv::Op::OpFUnordGreaterThanEqual, 5); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(operand1); + region.pushIdUse(operand2); + return id; + } + + // image + SampledImageValue createSampledImage(SampledImageType resultType, + ImageValue image, SamplerValue sampler) { + auto region = bodyRegion.pushOp(spv::Op::OpSampledImage, 5); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(image); + region.pushIdUse(sampler); + return id; + } + VectorOfValue createImageSampleImplicitLod( + VectorOfType resultType, SampledImageValue sampledImage, + ScalarOrVectorOfValue coords, + spv::ImageOperandsMask operands = spv::ImageOperandsMask::MaskNone, + std::span args = {}) { + auto region = bodyRegion.pushOp( + spv::Op::OpImageSampleImplicitLod, + 5 + (operands == spv::ImageOperandsMask::MaskNone ? 0 + : 1 + args.size())); + auto id = newId>(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(sampledImage); + region.pushIdUse(coords); + + if (operands != spv::ImageOperandsMask::MaskNone) { + region.pushWord(static_cast(operands)); + + for (auto arg : args) { + region.pushIdUse(arg); + } + } + + return id; + } + + Value createImageQuerySizeLod(Type resultType, ImageValue image, Value lod) { + auto region = bodyRegion.pushOp(spv::Op::OpImageQuerySizeLod, 5); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(image); + region.pushIdUse(lod); + return id; + } + + Value createImageQuerySize(Type resultType, ImageValue image) { + auto region = bodyRegion.pushOp(spv::Op::OpImageQuerySize, 4); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(image); + return id; + } + + VectorOfValue + createImageQueryLod(VectorOfType resultType, + SampledImageValue sampledImage, + ScalarOrVectorOfValue coords) { + auto region = bodyRegion.pushOp(spv::Op::OpImageQueryLod, 5); + auto id = newId>(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(sampledImage); + region.pushIdUse(coords); + return id; + } + + IntValue createImageQueryLevels(IntType resultType, + ImageValue sampledImage) { + auto region = bodyRegion.pushOp(spv::Op::OpImageQueryLevels, 4); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(sampledImage); + return id; + } + + IntValue createImageQuerySamples(IntType resultType, + ImageValue sampledImage) { + auto region = bodyRegion.pushOp(spv::Op::OpImageQuerySamples, 4); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(sampledImage); + return id; + } +}; + +class FunctionBuilder { + IdGenerator *mIdGenerator = nullptr; + + template auto newId() -> decltype(mIdGenerator->newId()) { + return mIdGenerator->newId(); + } + +public: + Region paramsRegion; + Region bodyRegion; + Function id; + + FunctionBuilder() = default; + FunctionBuilder(IdGenerator &idGenerator, Function id, + std::size_t expInstructionsCount) + : mIdGenerator(&idGenerator), bodyRegion{expInstructionsCount}, id(id) {} + + Value createFunctionParameter(Type resultType) { + auto region = paramsRegion.pushOp(spv::Op::OpFunctionParameter, 3); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + return id; + } + + BlockBuilder createBlockBuilder(std::size_t expInstructionsCount) { + auto id = newId(); + + return BlockBuilder(*mIdGenerator, id, expInstructionsCount); + } + + void insertBlock(const BlockBuilder &builder) { + bodyRegion.pushRegion(builder.prefix); + + auto region = bodyRegion.pushOp(spv::Op::OpLabel, 2); + region.pushIdDef(builder.id); + + bodyRegion.pushRegion(builder.variablesRegion); + bodyRegion.pushRegion(builder.phiRegion); + bodyRegion.pushRegion(builder.bodyRegion); + bodyRegion.pushRegion(builder.terminatorRegion); + } +}; + +class SpirvBuilder { + IdGenerator *mIdGenerator = nullptr; + + Region capabilityRegion; + Region extensionRegion; + Region extInstRegion; + Region memoryModelRegion; + Region entryPointRegion; + Region executionModeRegion; + Region debugRegion; + Region annotationRegion; + Region globalRegion; + Region functionDeclRegion; + Region functionRegion; + + template auto newId() -> decltype(mIdGenerator->newId()) { + return mIdGenerator->newId(); + } + +private: + SpirvBuilder(const SpirvBuilder &) = default; + SpirvBuilder(SpirvBuilder &&) = default; + SpirvBuilder &operator=(SpirvBuilder &&) = default; + +public: + SpirvBuilder() = default; + + SpirvBuilder(IdGenerator &idGenerator, std::size_t expInstructionsCount) + : mIdGenerator(&idGenerator), capabilityRegion{1}, extensionRegion{1}, + extInstRegion{4}, memoryModelRegion{3}, entryPointRegion{1}, + executionModeRegion{1}, debugRegion{0}, annotationRegion{1}, + globalRegion{1}, functionDeclRegion{1}, + functionRegion{expInstructionsCount} {} + + SpirvBuilder clone() const { return *this; } + + void swap(SpirvBuilder &other) { + std::swap(mIdGenerator, other.mIdGenerator); + std::swap(capabilityRegion, other.capabilityRegion); + std::swap(extensionRegion, other.extensionRegion); + std::swap(extInstRegion, other.extInstRegion); + std::swap(memoryModelRegion, other.memoryModelRegion); + std::swap(entryPointRegion, other.entryPointRegion); + std::swap(executionModeRegion, other.executionModeRegion); + std::swap(debugRegion, other.debugRegion); + std::swap(annotationRegion, other.annotationRegion); + std::swap(globalRegion, other.globalRegion); + std::swap(functionDeclRegion, other.functionDeclRegion); + std::swap(functionRegion, other.functionRegion); + } + + void reset() { + mIdGenerator->reset(); + capabilityRegion.clear(); + extensionRegion.clear(); + extInstRegion.clear(); + memoryModelRegion.clear(); + entryPointRegion.clear(); + executionModeRegion.clear(); + debugRegion.clear(); + annotationRegion.clear(); + globalRegion.clear(); + functionDeclRegion.clear(); + functionRegion.clear(); + } + + std::vector build(std::uint32_t spirvVersion, + std::uint32_t generatorMagic) { + const std::size_t headerSize = 5; + std::size_t finalSize = headerSize; + + std::array regions = { + &capabilityRegion, &extensionRegion, &extInstRegion, + &memoryModelRegion, &entryPointRegion, &executionModeRegion, + &debugRegion, &annotationRegion, &globalRegion, + &functionDeclRegion, &functionRegion, + }; + + for (auto region : regions) { + finalSize += region->size(); + } + + std::vector result; + result.resize(finalSize); + + result[0] = spv::MagicNumber; + result[1] = spirvVersion; + result[2] = generatorMagic; + result[3] = mIdGenerator->bounds; + result[4] = 0; // instruction schema + + std::size_t currentOffset = headerSize; + + for (auto region : regions) { + std::memcpy(result.data() + currentOffset, region->data(), + region->size() * sizeof(std::uint32_t)); + currentOffset += region->size(); + } + + return result; + } + + // misc + Value createUndef(Type resultType) { + auto region = globalRegion.pushOp(spv::Op::OpUndef, 3); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + return id; + } + + template + requires(std::is_base_of_v) + TypeToValue createUndef(T resultType) { + return cast>(createUndef(resultType)); + } + + // annotation + void createDecorate(Id target, spv::Decoration decoration, + std::span decorationOperands) { + auto region = annotationRegion.pushOp(spv::Op::OpDecorate, + 3 + decorationOperands.size()); + region.pushIdUse(target); + region.pushWord(static_cast(decoration)); + + for (auto decorationOperand : decorationOperands) { + region.pushWord(decorationOperand); + } + } + + void createMemberDecorate(StructType structureType, std::uint32_t member, + spv::Decoration decoration, + std::span decorationOperands) { + auto region = annotationRegion.pushOp(spv::Op::OpMemberDecorate, + 4 + decorationOperands.size()); + region.pushIdUse(structureType); + region.pushWord(member); + region.pushWord(static_cast(decoration)); + + for (auto decorationOperand : decorationOperands) { + region.pushWord(decorationOperand); + } + } + + void createDecorateId(Id target, spv::Decoration decoration, + std::span decorationOperands) { + auto region = annotationRegion.pushOp(spv::Op::OpDecorateId, + 3 + decorationOperands.size()); + region.pushIdUse(target); + region.pushWord(static_cast(decoration)); + + for (auto decorationOperand : decorationOperands) { + region.pushIdUse(decorationOperand); + } + } + + void createDecorateString( + Id target, spv::Decoration decoration, + std::string_view firstDecorationOperand, + std::span decorationOperands = {}) { + std::size_t decorationOperandsLen = + calcStringWordCount(firstDecorationOperand); + + for (auto decorationOperand : decorationOperands) { + decorationOperandsLen += calcStringWordCount(decorationOperand); + } + + auto region = annotationRegion.pushOp(spv::Op::OpDecorateString, + 3 + decorationOperandsLen); + region.pushIdUse(target); + region.pushWord(static_cast(decoration)); + region.pushString(firstDecorationOperand); + + for (auto decorationOperand : decorationOperands) { + region.pushString(decorationOperand); + } + } + + void createMemberDecorateString( + StructType structType, std::uint32_t member, spv::Decoration decoration, + std::string_view firstDecorationOperand, + std::span decorationOperands = {}) { + std::size_t decorationOperandsLen = + calcStringWordCount(firstDecorationOperand); + + for (auto decorationOperand : decorationOperands) { + decorationOperandsLen += calcStringWordCount(decorationOperand); + } + + auto region = annotationRegion.pushOp(spv::Op::OpMemberDecorateString, + 4 + decorationOperandsLen); + region.pushIdUse(structType); + region.pushWord(member); + region.pushWord(static_cast(decoration)); + region.pushString(firstDecorationOperand); + + for (auto decorationOperand : decorationOperands) { + region.pushString(decorationOperand); + } + } + + // extension + void createExtension(std::string_view name) { + auto region = extensionRegion.pushOp(spv::Op::OpExtension, + 1 + calcStringWordCount(name)); + region.pushString(name); + } + + ExtInstSet createExtInstImport(std::string_view name) { + auto region = extInstRegion.pushOp(spv::Op::OpExtInstImport, + 2 + calcStringWordCount(name)); + auto id = newId(); + region.pushIdDef(id); + region.pushString(name); + return id; + } + + // mode set + void createCapability(spv::Capability cap) { + auto region = capabilityRegion.pushOp(spv::Op::OpCapability, 2); + region.pushWord(static_cast(cap)); + } + + void setMemoryModel(spv::AddressingModel addressingModel, + spv::MemoryModel memoryModel) { + memoryModelRegion.clear(); + auto region = memoryModelRegion.pushOp(spv::Op::OpMemoryModel, 3); + region.pushWord(static_cast(addressingModel)); + region.pushWord(static_cast(memoryModel)); + } + + void createEntryPoint(spv::ExecutionModel executionModel, Function entryPoint, + std::string_view name, + std::span interfaces) { + auto region = entryPointRegion.pushOp(spv::Op::OpEntryPoint, + 3 + calcStringWordCount(name) + + interfaces.size()); + region.pushWord(static_cast(executionModel)); + region.pushIdUse(entryPoint); + region.pushString(name); + for (auto iface : interfaces) { + region.pushIdUse(iface); + } + } + void createExecutionMode(Function entryPoint, spv::ExecutionMode mode, + std::span args) { + auto region = + executionModeRegion.pushOp(spv::Op::OpExecutionMode, 3 + args.size()); + region.pushIdUse(entryPoint); + region.pushWord(static_cast(mode)); + for (auto arg : args) { + region.pushWord(arg); + } + } + + void createExecutionModeId(Function entryPoint, spv::ExecutionMode mode, + std::span args) { + auto region = + executionModeRegion.pushOp(spv::Op::OpExecutionModeId, 3 + args.size()); + region.pushIdUse(entryPoint); + region.pushWord(static_cast(mode)); + for (auto arg : args) { + region.pushIdUse(arg); + } + } + + // type + VoidType createTypeVoid() { + auto region = globalRegion.pushOp(spv::Op::OpTypeVoid, 2); + auto id = newId(); + region.pushIdDef(id); + return id; + } + BoolType createTypeBool() { + auto region = globalRegion.pushOp(spv::Op::OpTypeBool, 2); + auto id = newId(); + region.pushIdDef(id); + return id; + } + IntType createTypeInt(std::uint32_t width, bool signedness) { + auto region = globalRegion.pushOp(spv::Op::OpTypeInt, 4); + auto id = newId(); + region.pushIdDef(id); + region.pushWord(width); + region.pushWord(static_cast(signedness)); + return id; + } + SIntType createTypeSInt(std::uint32_t width) { + return cast(createTypeInt(width, true)); + } + UIntType createTypeUInt(std::uint32_t width) { + return cast(createTypeInt(width, false)); + } + FloatType createTypeFloat(std::uint32_t width) { + auto region = globalRegion.pushOp(spv::Op::OpTypeFloat, 3); + auto id = newId(); + region.pushIdDef(id); + region.pushWord(width); + return id; + } + template + VectorOfType createTypeVector(T componentType, + std::uint32_t componentCount) { + auto region = globalRegion.pushOp(spv::Op::OpTypeVector, 4); + auto id = newId>(); + region.pushIdDef(id); + region.pushIdUse(componentType); + region.pushWord(componentCount); + return id; + } + MatrixType createTypeMatrix(VectorType columnType, + std::uint32_t coulumnCount) { + auto region = globalRegion.pushOp(spv::Op::OpTypeMatrix, 4); + auto id = newId(); + region.pushIdDef(id); + region.pushIdUse(columnType); + region.pushWord(coulumnCount); + return id; + } + + ImageType createTypeImage(Type sampledType, spv::Dim dim, std::uint32_t depth, + std::uint32_t arrayed, std::uint32_t ms, + std::uint32_t sampled, spv::ImageFormat imageFormat, + std::optional access = {}) { + auto region = globalRegion.pushOp(spv::Op::OpTypeImage, + 9 + (access.has_value() ? 1 : 0)); + auto id = newId(); + region.pushIdDef(id); + region.pushIdUse(sampledType); + region.pushWord(static_cast(dim)); + region.pushWord(depth); + region.pushWord(arrayed); + region.pushWord(ms); + region.pushWord(sampled); + region.pushWord(static_cast(imageFormat)); + + if (access.has_value()) { + region.pushWord(static_cast(*access)); + } + + return id; + } + + SamplerType createTypeSampler() { + auto region = globalRegion.pushOp(spv::Op::OpTypeSampler, 2); + auto id = newId(); + region.pushIdDef(id); + return id; + } + + SampledImageType createTypeSampledImage(ImageType imageType) { + auto region = globalRegion.pushOp(spv::Op::OpTypeSampledImage, 3); + auto id = newId(); + region.pushIdDef(id); + region.pushIdUse(imageType); + return id; + } + + ArrayType createTypeArray(Type elementType, AnyConstantValue count) { + auto region = globalRegion.pushOp(spv::Op::OpTypeArray, 4); + auto id = newId(); + region.pushIdDef(id); + region.pushIdUse(elementType); + region.pushIdUse(count); + return id; + } + + RuntimeArrayType createTypeRuntimeArray(Type elementType) { + auto region = globalRegion.pushOp(spv::Op::OpTypeRuntimeArray, 3); + auto id = newId(); + region.pushIdDef(id); + region.pushIdUse(elementType); + + return id; + } + + StructType createTypeStruct(std::span members) { + auto region = + globalRegion.pushOp(spv::Op::OpTypeStruct, 2 + members.size()); + auto id = newId(); + region.pushIdDef(id); + + for (auto member : members) { + region.pushIdUse(member); + } + + return id; + } + + PointerType createTypePointer(spv::StorageClass storageClass, Type type) { + auto region = globalRegion.pushOp(spv::Op::OpTypePointer, 4); + auto id = newId(); + region.pushIdDef(id); + region.pushWord(static_cast(storageClass)); + region.pushIdUse(type); + return id; + } + + template + requires(std::is_base_of_v) + PointerToType createTypePointer(spv::StorageClass storageClass, T type) { + return cast>( + createTypePointer(storageClass, static_cast(type))); + } + + FunctionType createTypeFunction(Type returnType, + std::span parameters) { + auto region = + globalRegion.pushOp(spv::Op::OpTypeFunction, 3 + parameters.size()); + auto id = newId(); + region.pushIdDef(id); + region.pushIdUse(returnType); + + for (auto param : parameters) { + region.pushIdUse(param); + } + + return id; + } + + // constant + ConstantBool createConstantTrue(BoolType type) { + auto region = globalRegion.pushOp(spv::Op::OpConstantTrue, 3); + auto id = newId(); + region.pushIdUse(type); + region.pushIdDef(id); + return id; + } + + ConstantBool createConstantFalse(BoolType type) { + auto region = globalRegion.pushOp(spv::Op::OpConstantFalse, 3); + auto id = newId(); + region.pushIdUse(type); + region.pushIdDef(id); + return id; + } + + template + requires(std::is_base_of_v) + ConstantValue> + createConstant(T type, std::span values) { + auto region = globalRegion.pushOp(spv::Op::OpConstant, 3 + values.size()); + auto id = newId>>(); + region.pushIdUse(type); + region.pushIdDef(id); + for (auto value : values) { + region.pushWord(value); + } + return id; + } + + template + requires(std::is_base_of_v) + ConstantValue> createConstant32(T type, std::uint32_t value) { + return createConstant(type, std::array{value}); + } + + template + requires(std::is_base_of_v) + ConstantValue> createConstant64(T type, std::uint64_t value) { + return createConstant(type, + std::array{static_cast(value), + static_cast(value >> 32)}); + } + + // memory + VariableValue createVariable(Type type, spv::StorageClass storageClass, + std::optional initializer = {}) { + auto region = globalRegion.pushOp(spv::Op::OpVariable, + 4 + (initializer.has_value() ? 1 : 0)); + auto id = newId(); + region.pushIdUse(type); + region.pushIdDef(id); + region.pushWord(static_cast(storageClass)); + if (initializer.has_value()) { + region.pushIdUse(initializer.value()); + } + return id; + } + +private: + void createFunction(Function id, Type resultType, + spv::FunctionControlMask functionControl, + Type functionType) { + auto region = functionRegion.pushOp(spv::Op::OpFunction, 5); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushWord(static_cast(functionControl)); + region.pushIdUse(functionType); + } + + Value createFunctionParameter(Type resultType) { + auto region = functionRegion.pushOp(spv::Op::OpFunctionParameter, 3); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + return id; + } + + void createFunctionEnd() { functionRegion.pushOp(spv::Op::OpFunctionEnd, 1); } + +public: + FunctionBuilder createFunctionBuilder(std::size_t expInstructionsCount) { + auto id = newId(); + return FunctionBuilder(*mIdGenerator, id, expInstructionsCount); + } + + void insertFunctionDeclaration(const FunctionBuilder &function, + Type resultType, + spv::FunctionControlMask functionControl, + Type functionType) { + createFunction(function.id, resultType, functionControl, functionType); + functionRegion.pushRegion(function.paramsRegion); + createFunctionEnd(); + } + + void insertFunction(const FunctionBuilder &function, Type resultType, + spv::FunctionControlMask functionControl, + Type functionType) { + createFunction(function.id, resultType, functionControl, functionType); + functionRegion.pushRegion(function.paramsRegion); + functionRegion.pushRegion(function.bodyRegion); + createFunctionEnd(); + } + + BlockBuilder createBlockBuilder(std::size_t expInstructionsCount) { + auto id = newId(); + + return BlockBuilder(*mIdGenerator, id, expInstructionsCount); + } +}; +} // namespace spirv diff --git a/hw/amdgpu/lib/libspirv/include/spirv/spirv-instruction.hpp b/hw/amdgpu/lib/libspirv/include/spirv/spirv-instruction.hpp new file mode 100644 index 000000000..aaa8eb7fc --- /dev/null +++ b/hw/amdgpu/lib/libspirv/include/spirv/spirv-instruction.hpp @@ -0,0 +1,2422 @@ +#pragma once +#include "spirv.hpp" +#include +#include +#include + +namespace spirv { +enum class OperandKind { + Invalid, + ValueId, + TypeId, + Word, + String, + VariadicId, + VariadicWord, +}; + +enum class OperandDirection { + In, + Out, +}; + +enum class InstructionFlags { + None = 0, + HasResult = 1 << 0, + HasResultType = 1 << 1, +}; + +inline InstructionFlags operator|(InstructionFlags lhs, InstructionFlags rhs) { + return static_cast(static_cast(lhs) | + static_cast(rhs)); +} +inline InstructionFlags operator&(InstructionFlags lhs, InstructionFlags rhs) { + return static_cast(static_cast(lhs) & + static_cast(rhs)); +} + +struct InstructionInfo { + const char *name; + InstructionFlags flags; + OperandKind operands[16]; +}; + +inline const InstructionInfo *getInstructionInfo(spv::Op opcode) { + switch (opcode) { + default: /* unknown opcode */ + break; + case spv::Op::OpNop: { + static InstructionInfo result = {"OpNop", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpUndef: { + static InstructionInfo result = { + "OpUndef", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpSourceContinued: { + static InstructionInfo result = { + "OpSourceContinued", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpSource: { + static InstructionInfo result = {"OpSource", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpSourceExtension: { + static InstructionInfo result = { + "OpSourceExtension", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpName: { + static InstructionInfo result = {"OpName", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpMemberName: { + static InstructionInfo result = { + "OpMemberName", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpString: { + static InstructionInfo result = { + "OpString", InstructionFlags::HasResult, {}}; + return &result; + } + case spv::Op::OpLine: { + static InstructionInfo result = {"OpLine", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpExtension: { + static InstructionInfo result = {"OpExtension", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpExtInstImport: { + static InstructionInfo result = { + "OpExtInstImport", InstructionFlags::HasResult, {}}; + return &result; + } + case spv::Op::OpExtInst: { + static InstructionInfo result = { + "OpExtInst", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpMemoryModel: { + static InstructionInfo result = { + "OpMemoryModel", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpEntryPoint: { + static InstructionInfo result = { + "OpEntryPoint", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpExecutionMode: { + static InstructionInfo result = { + "OpExecutionMode", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpCapability: { + static InstructionInfo result = { + "OpCapability", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpTypeVoid: { + static InstructionInfo result = { + "OpTypeVoid", InstructionFlags::HasResult, {}}; + return &result; + } + case spv::Op::OpTypeBool: { + static InstructionInfo result = { + "OpTypeBool", InstructionFlags::HasResult, {}}; + return &result; + } + case spv::Op::OpTypeInt: { + static InstructionInfo result = { + "OpTypeInt", InstructionFlags::HasResult, {}}; + return &result; + } + case spv::Op::OpTypeFloat: { + static InstructionInfo result = { + "OpTypeFloat", InstructionFlags::HasResult, {}}; + return &result; + } + case spv::Op::OpTypeVector: { + static InstructionInfo result = { + "OpTypeVector", InstructionFlags::HasResult, {}}; + return &result; + } + case spv::Op::OpTypeMatrix: { + static InstructionInfo result = { + "OpTypeMatrix", InstructionFlags::HasResult, {}}; + return &result; + } + case spv::Op::OpTypeImage: { + static InstructionInfo result = { + "OpTypeImage", InstructionFlags::HasResult, {}}; + return &result; + } + case spv::Op::OpTypeSampler: { + static InstructionInfo result = { + "OpTypeSampler", InstructionFlags::HasResult, {}}; + return &result; + } + case spv::Op::OpTypeSampledImage: { + static InstructionInfo result = {"OpTypeSampledImage", + InstructionFlags::HasResult, + {}}; + return &result; + } + case spv::Op::OpTypeArray: { + static InstructionInfo result = { + "OpTypeArray", InstructionFlags::HasResult, {}}; + return &result; + } + case spv::Op::OpTypeRuntimeArray: { + static InstructionInfo result = {"OpTypeRuntimeArray", + InstructionFlags::HasResult, + {}}; + return &result; + } + case spv::Op::OpTypeStruct: { + static InstructionInfo result = { + "OpTypeStruct", InstructionFlags::HasResult, {}}; + return &result; + } + case spv::Op::OpTypeOpaque: { + static InstructionInfo result = { + "OpTypeOpaque", InstructionFlags::HasResult, {}}; + return &result; + } + case spv::Op::OpTypePointer: { + static InstructionInfo result = { + "OpTypePointer", InstructionFlags::HasResult, {}}; + return &result; + } + case spv::Op::OpTypeFunction: { + static InstructionInfo result = { + "OpTypeFunction", InstructionFlags::HasResult, {}}; + return &result; + } + case spv::Op::OpTypeEvent: { + static InstructionInfo result = { + "OpTypeEvent", InstructionFlags::HasResult, {}}; + return &result; + } + case spv::Op::OpTypeDeviceEvent: { + static InstructionInfo result = {"OpTypeDeviceEvent", + InstructionFlags::HasResult, + {}}; + return &result; + } + case spv::Op::OpTypeReserveId: { + static InstructionInfo result = { + "OpTypeReserveId", InstructionFlags::HasResult, {}}; + return &result; + } + case spv::Op::OpTypeQueue: { + static InstructionInfo result = { + "OpTypeQueue", InstructionFlags::HasResult, {}}; + return &result; + } + case spv::Op::OpTypePipe: { + static InstructionInfo result = { + "OpTypePipe", InstructionFlags::HasResult, {}}; + return &result; + } + case spv::Op::OpTypeForwardPointer: { + static InstructionInfo result = { + "OpTypeForwardPointer", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpConstantTrue: { + static InstructionInfo result = { + "OpConstantTrue", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpConstantFalse: { + static InstructionInfo result = { + "OpConstantFalse", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpConstant: { + static InstructionInfo result = { + "OpConstant", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpConstantComposite: { + static InstructionInfo result = { + "OpConstantComposite", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpConstantSampler: { + static InstructionInfo result = { + "OpConstantSampler", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpConstantNull: { + static InstructionInfo result = { + "OpConstantNull", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpSpecConstantTrue: { + static InstructionInfo result = { + "OpSpecConstantTrue", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpSpecConstantFalse: { + static InstructionInfo result = { + "OpSpecConstantFalse", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpSpecConstant: { + static InstructionInfo result = { + "OpSpecConstant", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpSpecConstantComposite: { + static InstructionInfo result = { + "OpSpecConstantComposite", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpSpecConstantOp: { + static InstructionInfo result = { + "OpSpecConstantOp", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpFunction: { + static InstructionInfo result = { + "OpFunction", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpFunctionParameter: { + static InstructionInfo result = { + "OpFunctionParameter", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpFunctionEnd: { + static InstructionInfo result = { + "OpFunctionEnd", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpFunctionCall: { + static InstructionInfo result = { + "OpFunctionCall", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpVariable: { + static InstructionInfo result = { + "OpVariable", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpImageTexelPointer: { + static InstructionInfo result = { + "OpImageTexelPointer", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpLoad: { + static InstructionInfo result = { + "OpLoad", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpStore: { + static InstructionInfo result = {"OpStore", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpCopyMemory: { + static InstructionInfo result = { + "OpCopyMemory", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpCopyMemorySized: { + static InstructionInfo result = { + "OpCopyMemorySized", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpAccessChain: { + static InstructionInfo result = { + "OpAccessChain", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpInBoundsAccessChain: { + static InstructionInfo result = { + "OpInBoundsAccessChain", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpPtrAccessChain: { + static InstructionInfo result = { + "OpPtrAccessChain", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpArrayLength: { + static InstructionInfo result = { + "OpArrayLength", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGenericPtrMemSemantics: { + static InstructionInfo result = { + "OpGenericPtrMemSemantics", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpInBoundsPtrAccessChain: { + static InstructionInfo result = { + "OpInBoundsPtrAccessChain", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpDecorate: { + static InstructionInfo result = {"OpDecorate", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpMemberDecorate: { + static InstructionInfo result = { + "OpMemberDecorate", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpDecorationGroup: { + static InstructionInfo result = {"OpDecorationGroup", + InstructionFlags::HasResult, + {}}; + return &result; + } + case spv::Op::OpGroupDecorate: { + static InstructionInfo result = { + "OpGroupDecorate", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpGroupMemberDecorate: { + static InstructionInfo result = { + "OpGroupMemberDecorate", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpVectorExtractDynamic: { + static InstructionInfo result = { + "OpVectorExtractDynamic", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpVectorInsertDynamic: { + static InstructionInfo result = { + "OpVectorInsertDynamic", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpVectorShuffle: { + static InstructionInfo result = { + "OpVectorShuffle", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpCompositeConstruct: { + static InstructionInfo result = { + "OpCompositeConstruct", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpCompositeExtract: { + static InstructionInfo result = { + "OpCompositeExtract", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpCompositeInsert: { + static InstructionInfo result = { + "OpCompositeInsert", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpCopyObject: { + static InstructionInfo result = { + "OpCopyObject", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpTranspose: { + static InstructionInfo result = { + "OpTranspose", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpSampledImage: { + static InstructionInfo result = { + "OpSampledImage", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpImageSampleImplicitLod: { + static InstructionInfo result = { + "OpImageSampleImplicitLod", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpImageSampleExplicitLod: { + static InstructionInfo result = { + "OpImageSampleExplicitLod", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpImageSampleDrefImplicitLod: { + static InstructionInfo result = { + "OpImageSampleDrefImplicitLod", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpImageSampleDrefExplicitLod: { + static InstructionInfo result = { + "OpImageSampleDrefExplicitLod", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpImageSampleProjImplicitLod: { + static InstructionInfo result = { + "OpImageSampleProjImplicitLod", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpImageSampleProjExplicitLod: { + static InstructionInfo result = { + "OpImageSampleProjExplicitLod", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpImageSampleProjDrefImplicitLod: { + static InstructionInfo result = { + "OpImageSampleProjDrefImplicitLod", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpImageSampleProjDrefExplicitLod: { + static InstructionInfo result = { + "OpImageSampleProjDrefExplicitLod", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpImageFetch: { + static InstructionInfo result = { + "OpImageFetch", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpImageGather: { + static InstructionInfo result = { + "OpImageGather", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpImageDrefGather: { + static InstructionInfo result = { + "OpImageDrefGather", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpImageRead: { + static InstructionInfo result = { + "OpImageRead", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpImageWrite: { + static InstructionInfo result = { + "OpImageWrite", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpImage: { + static InstructionInfo result = { + "OpImage", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpImageQueryFormat: { + static InstructionInfo result = { + "OpImageQueryFormat", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpImageQueryOrder: { + static InstructionInfo result = { + "OpImageQueryOrder", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpImageQuerySizeLod: { + static InstructionInfo result = { + "OpImageQuerySizeLod", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpImageQuerySize: { + static InstructionInfo result = { + "OpImageQuerySize", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpImageQueryLod: { + static InstructionInfo result = { + "OpImageQueryLod", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpImageQueryLevels: { + static InstructionInfo result = { + "OpImageQueryLevels", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpImageQuerySamples: { + static InstructionInfo result = { + "OpImageQuerySamples", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpConvertFToU: { + static InstructionInfo result = { + "OpConvertFToU", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpConvertFToS: { + static InstructionInfo result = { + "OpConvertFToS", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpConvertSToF: { + static InstructionInfo result = { + "OpConvertSToF", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpConvertUToF: { + static InstructionInfo result = { + "OpConvertUToF", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpUConvert: { + static InstructionInfo result = { + "OpUConvert", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpSConvert: { + static InstructionInfo result = { + "OpSConvert", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpFConvert: { + static InstructionInfo result = { + "OpFConvert", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpQuantizeToF16: { + static InstructionInfo result = { + "OpQuantizeToF16", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpConvertPtrToU: { + static InstructionInfo result = { + "OpConvertPtrToU", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpSatConvertSToU: { + static InstructionInfo result = { + "OpSatConvertSToU", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpSatConvertUToS: { + static InstructionInfo result = { + "OpSatConvertUToS", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpConvertUToPtr: { + static InstructionInfo result = { + "OpConvertUToPtr", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpPtrCastToGeneric: { + static InstructionInfo result = { + "OpPtrCastToGeneric", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGenericCastToPtr: { + static InstructionInfo result = { + "OpGenericCastToPtr", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGenericCastToPtrExplicit: { + static InstructionInfo result = { + "OpGenericCastToPtrExplicit", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpBitcast: { + static InstructionInfo result = { + "OpBitcast", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpSNegate: { + static InstructionInfo result = { + "OpSNegate", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpFNegate: { + static InstructionInfo result = { + "OpFNegate", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpIAdd: { + static InstructionInfo result = { + "OpIAdd", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpFAdd: { + static InstructionInfo result = { + "OpFAdd", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpISub: { + static InstructionInfo result = { + "OpISub", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpFSub: { + static InstructionInfo result = { + "OpFSub", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpIMul: { + static InstructionInfo result = { + "OpIMul", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpFMul: { + static InstructionInfo result = { + "OpFMul", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpUDiv: { + static InstructionInfo result = { + "OpUDiv", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpSDiv: { + static InstructionInfo result = { + "OpSDiv", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpFDiv: { + static InstructionInfo result = { + "OpFDiv", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpUMod: { + static InstructionInfo result = { + "OpUMod", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpSRem: { + static InstructionInfo result = { + "OpSRem", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpSMod: { + static InstructionInfo result = { + "OpSMod", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpFRem: { + static InstructionInfo result = { + "OpFRem", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpFMod: { + static InstructionInfo result = { + "OpFMod", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpVectorTimesScalar: { + static InstructionInfo result = { + "OpVectorTimesScalar", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpMatrixTimesScalar: { + static InstructionInfo result = { + "OpMatrixTimesScalar", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpVectorTimesMatrix: { + static InstructionInfo result = { + "OpVectorTimesMatrix", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpMatrixTimesVector: { + static InstructionInfo result = { + "OpMatrixTimesVector", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpMatrixTimesMatrix: { + static InstructionInfo result = { + "OpMatrixTimesMatrix", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpOuterProduct: { + static InstructionInfo result = { + "OpOuterProduct", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpDot: { + static InstructionInfo result = { + "OpDot", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpIAddCarry: { + static InstructionInfo result = { + "OpIAddCarry", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpISubBorrow: { + static InstructionInfo result = { + "OpISubBorrow", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpUMulExtended: { + static InstructionInfo result = { + "OpUMulExtended", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpSMulExtended: { + static InstructionInfo result = { + "OpSMulExtended", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpAny: { + static InstructionInfo result = { + "OpAny", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpAll: { + static InstructionInfo result = { + "OpAll", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpIsNan: { + static InstructionInfo result = { + "OpIsNan", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpIsInf: { + static InstructionInfo result = { + "OpIsInf", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpIsFinite: { + static InstructionInfo result = { + "OpIsFinite", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpIsNormal: { + static InstructionInfo result = { + "OpIsNormal", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpSignBitSet: { + static InstructionInfo result = { + "OpSignBitSet", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpLessOrGreater: { + static InstructionInfo result = { + "OpLessOrGreater", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpOrdered: { + static InstructionInfo result = { + "OpOrdered", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpUnordered: { + static InstructionInfo result = { + "OpUnordered", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpLogicalEqual: { + static InstructionInfo result = { + "OpLogicalEqual", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpLogicalNotEqual: { + static InstructionInfo result = { + "OpLogicalNotEqual", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpLogicalOr: { + static InstructionInfo result = { + "OpLogicalOr", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpLogicalAnd: { + static InstructionInfo result = { + "OpLogicalAnd", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpLogicalNot: { + static InstructionInfo result = { + "OpLogicalNot", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpSelect: { + static InstructionInfo result = { + "OpSelect", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpIEqual: { + static InstructionInfo result = { + "OpIEqual", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpINotEqual: { + static InstructionInfo result = { + "OpINotEqual", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpUGreaterThan: { + static InstructionInfo result = { + "OpUGreaterThan", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpSGreaterThan: { + static InstructionInfo result = { + "OpSGreaterThan", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpUGreaterThanEqual: { + static InstructionInfo result = { + "OpUGreaterThanEqual", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpSGreaterThanEqual: { + static InstructionInfo result = { + "OpSGreaterThanEqual", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpULessThan: { + static InstructionInfo result = { + "OpULessThan", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpSLessThan: { + static InstructionInfo result = { + "OpSLessThan", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpULessThanEqual: { + static InstructionInfo result = { + "OpULessThanEqual", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpSLessThanEqual: { + static InstructionInfo result = { + "OpSLessThanEqual", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpFOrdEqual: { + static InstructionInfo result = { + "OpFOrdEqual", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpFUnordEqual: { + static InstructionInfo result = { + "OpFUnordEqual", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpFOrdNotEqual: { + static InstructionInfo result = { + "OpFOrdNotEqual", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpFUnordNotEqual: { + static InstructionInfo result = { + "OpFUnordNotEqual", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpFOrdLessThan: { + static InstructionInfo result = { + "OpFOrdLessThan", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpFUnordLessThan: { + static InstructionInfo result = { + "OpFUnordLessThan", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpFOrdGreaterThan: { + static InstructionInfo result = { + "OpFOrdGreaterThan", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpFUnordGreaterThan: { + static InstructionInfo result = { + "OpFUnordGreaterThan", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpFOrdLessThanEqual: { + static InstructionInfo result = { + "OpFOrdLessThanEqual", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpFUnordLessThanEqual: { + static InstructionInfo result = { + "OpFUnordLessThanEqual", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpFOrdGreaterThanEqual: { + static InstructionInfo result = { + "OpFOrdGreaterThanEqual", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpFUnordGreaterThanEqual: { + static InstructionInfo result = { + "OpFUnordGreaterThanEqual", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpShiftRightLogical: { + static InstructionInfo result = { + "OpShiftRightLogical", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpShiftRightArithmetic: { + static InstructionInfo result = { + "OpShiftRightArithmetic", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpShiftLeftLogical: { + static InstructionInfo result = { + "OpShiftLeftLogical", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpBitwiseOr: { + static InstructionInfo result = { + "OpBitwiseOr", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpBitwiseXor: { + static InstructionInfo result = { + "OpBitwiseXor", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpBitwiseAnd: { + static InstructionInfo result = { + "OpBitwiseAnd", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpNot: { + static InstructionInfo result = { + "OpNot", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpBitFieldInsert: { + static InstructionInfo result = { + "OpBitFieldInsert", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpBitFieldSExtract: { + static InstructionInfo result = { + "OpBitFieldSExtract", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpBitFieldUExtract: { + static InstructionInfo result = { + "OpBitFieldUExtract", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpBitReverse: { + static InstructionInfo result = { + "OpBitReverse", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpBitCount: { + static InstructionInfo result = { + "OpBitCount", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpDPdx: { + static InstructionInfo result = { + "OpDPdx", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpDPdy: { + static InstructionInfo result = { + "OpDPdy", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpFwidth: { + static InstructionInfo result = { + "OpFwidth", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpDPdxFine: { + static InstructionInfo result = { + "OpDPdxFine", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpDPdyFine: { + static InstructionInfo result = { + "OpDPdyFine", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpFwidthFine: { + static InstructionInfo result = { + "OpFwidthFine", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpDPdxCoarse: { + static InstructionInfo result = { + "OpDPdxCoarse", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpDPdyCoarse: { + static InstructionInfo result = { + "OpDPdyCoarse", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpFwidthCoarse: { + static InstructionInfo result = { + "OpFwidthCoarse", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpEmitVertex: { + static InstructionInfo result = { + "OpEmitVertex", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpEndPrimitive: { + static InstructionInfo result = { + "OpEndPrimitive", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpEmitStreamVertex: { + static InstructionInfo result = { + "OpEmitStreamVertex", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpEndStreamPrimitive: { + static InstructionInfo result = { + "OpEndStreamPrimitive", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpControlBarrier: { + static InstructionInfo result = { + "OpControlBarrier", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpMemoryBarrier: { + static InstructionInfo result = { + "OpMemoryBarrier", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpAtomicLoad: { + static InstructionInfo result = { + "OpAtomicLoad", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpAtomicStore: { + static InstructionInfo result = { + "OpAtomicStore", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpAtomicExchange: { + static InstructionInfo result = { + "OpAtomicExchange", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpAtomicCompareExchange: { + static InstructionInfo result = { + "OpAtomicCompareExchange", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpAtomicCompareExchangeWeak: { + static InstructionInfo result = { + "OpAtomicCompareExchangeWeak", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpAtomicIIncrement: { + static InstructionInfo result = { + "OpAtomicIIncrement", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpAtomicIDecrement: { + static InstructionInfo result = { + "OpAtomicIDecrement", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpAtomicIAdd: { + static InstructionInfo result = { + "OpAtomicIAdd", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpAtomicISub: { + static InstructionInfo result = { + "OpAtomicISub", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpAtomicSMin: { + static InstructionInfo result = { + "OpAtomicSMin", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpAtomicUMin: { + static InstructionInfo result = { + "OpAtomicUMin", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpAtomicSMax: { + static InstructionInfo result = { + "OpAtomicSMax", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpAtomicUMax: { + static InstructionInfo result = { + "OpAtomicUMax", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpAtomicAnd: { + static InstructionInfo result = { + "OpAtomicAnd", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpAtomicOr: { + static InstructionInfo result = { + "OpAtomicOr", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpAtomicXor: { + static InstructionInfo result = { + "OpAtomicXor", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpPhi: { + static InstructionInfo result = { + "OpPhi", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpLoopMerge: { + static InstructionInfo result = {"OpLoopMerge", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpSelectionMerge: { + static InstructionInfo result = { + "OpSelectionMerge", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpLabel: { + static InstructionInfo result = { + "OpLabel", InstructionFlags::HasResult, {}}; + return &result; + } + case spv::Op::OpBranch: { + static InstructionInfo result = {"OpBranch", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpBranchConditional: { + static InstructionInfo result = { + "OpBranchConditional", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpSwitch: { + static InstructionInfo result = {"OpSwitch", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpKill: { + static InstructionInfo result = {"OpKill", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpReturn: { + static InstructionInfo result = {"OpReturn", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpReturnValue: { + static InstructionInfo result = { + "OpReturnValue", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpUnreachable: { + static InstructionInfo result = { + "OpUnreachable", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpLifetimeStart: { + static InstructionInfo result = { + "OpLifetimeStart", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpLifetimeStop: { + static InstructionInfo result = { + "OpLifetimeStop", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpGroupAsyncCopy: { + static InstructionInfo result = { + "OpGroupAsyncCopy", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupWaitEvents: { + static InstructionInfo result = { + "OpGroupWaitEvents", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpGroupAll: { + static InstructionInfo result = { + "OpGroupAll", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupAny: { + static InstructionInfo result = { + "OpGroupAny", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupBroadcast: { + static InstructionInfo result = { + "OpGroupBroadcast", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupIAdd: { + static InstructionInfo result = { + "OpGroupIAdd", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupFAdd: { + static InstructionInfo result = { + "OpGroupFAdd", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupFMin: { + static InstructionInfo result = { + "OpGroupFMin", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupUMin: { + static InstructionInfo result = { + "OpGroupUMin", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupSMin: { + static InstructionInfo result = { + "OpGroupSMin", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupFMax: { + static InstructionInfo result = { + "OpGroupFMax", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupUMax: { + static InstructionInfo result = { + "OpGroupUMax", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupSMax: { + static InstructionInfo result = { + "OpGroupSMax", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpReadPipe: { + static InstructionInfo result = { + "OpReadPipe", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpWritePipe: { + static InstructionInfo result = { + "OpWritePipe", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpReservedReadPipe: { + static InstructionInfo result = { + "OpReservedReadPipe", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpReservedWritePipe: { + static InstructionInfo result = { + "OpReservedWritePipe", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpReserveReadPipePackets: { + static InstructionInfo result = { + "OpReserveReadPipePackets", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpReserveWritePipePackets: { + static InstructionInfo result = { + "OpReserveWritePipePackets", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpCommitReadPipe: { + static InstructionInfo result = { + "OpCommitReadPipe", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpCommitWritePipe: { + static InstructionInfo result = { + "OpCommitWritePipe", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpIsValidReserveId: { + static InstructionInfo result = { + "OpIsValidReserveId", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGetNumPipePackets: { + static InstructionInfo result = { + "OpGetNumPipePackets", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGetMaxPipePackets: { + static InstructionInfo result = { + "OpGetMaxPipePackets", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupReserveReadPipePackets: { + static InstructionInfo result = { + "OpGroupReserveReadPipePackets", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupReserveWritePipePackets: { + static InstructionInfo result = { + "OpGroupReserveWritePipePackets", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupCommitReadPipe: { + static InstructionInfo result = { + "OpGroupCommitReadPipe", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpGroupCommitWritePipe: { + static InstructionInfo result = { + "OpGroupCommitWritePipe", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpEnqueueMarker: { + static InstructionInfo result = { + "OpEnqueueMarker", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpEnqueueKernel: { + static InstructionInfo result = { + "OpEnqueueKernel", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGetKernelNDrangeSubGroupCount: { + static InstructionInfo result = { + "OpGetKernelNDrangeSubGroupCount", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGetKernelNDrangeMaxSubGroupSize: { + static InstructionInfo result = { + "OpGetKernelNDrangeMaxSubGroupSize", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGetKernelWorkGroupSize: { + static InstructionInfo result = { + "OpGetKernelWorkGroupSize", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGetKernelPreferredWorkGroupSizeMultiple: { + static InstructionInfo result = { + "OpGetKernelPreferredWorkGroupSizeMultiple", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpRetainEvent: { + static InstructionInfo result = { + "OpRetainEvent", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpReleaseEvent: { + static InstructionInfo result = { + "OpReleaseEvent", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpCreateUserEvent: { + static InstructionInfo result = { + "OpCreateUserEvent", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpIsValidEvent: { + static InstructionInfo result = { + "OpIsValidEvent", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpSetUserEventStatus: { + static InstructionInfo result = { + "OpSetUserEventStatus", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpCaptureEventProfilingInfo: { + static InstructionInfo result = { + "OpCaptureEventProfilingInfo", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpGetDefaultQueue: { + static InstructionInfo result = { + "OpGetDefaultQueue", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpBuildNDRange: { + static InstructionInfo result = { + "OpBuildNDRange", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpImageSparseSampleImplicitLod: { + static InstructionInfo result = { + "OpImageSparseSampleImplicitLod", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpImageSparseSampleExplicitLod: { + static InstructionInfo result = { + "OpImageSparseSampleExplicitLod", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpImageSparseSampleDrefImplicitLod: { + static InstructionInfo result = { + "OpImageSparseSampleDrefImplicitLod", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpImageSparseSampleDrefExplicitLod: { + static InstructionInfo result = { + "OpImageSparseSampleDrefExplicitLod", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpImageSparseSampleProjImplicitLod: { + static InstructionInfo result = { + "OpImageSparseSampleProjImplicitLod", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpImageSparseSampleProjExplicitLod: { + static InstructionInfo result = { + "OpImageSparseSampleProjExplicitLod", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpImageSparseSampleProjDrefImplicitLod: { + static InstructionInfo result = { + "OpImageSparseSampleProjDrefImplicitLod", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpImageSparseSampleProjDrefExplicitLod: { + static InstructionInfo result = { + "OpImageSparseSampleProjDrefExplicitLod", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpImageSparseFetch: { + static InstructionInfo result = { + "OpImageSparseFetch", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpImageSparseGather: { + static InstructionInfo result = { + "OpImageSparseGather", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpImageSparseDrefGather: { + static InstructionInfo result = { + "OpImageSparseDrefGather", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpImageSparseTexelsResident: { + static InstructionInfo result = { + "OpImageSparseTexelsResident", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpNoLine: { + static InstructionInfo result = {"OpNoLine", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpAtomicFlagTestAndSet: { + static InstructionInfo result = { + "OpAtomicFlagTestAndSet", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpAtomicFlagClear: { + static InstructionInfo result = { + "OpAtomicFlagClear", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpImageSparseRead: { + static InstructionInfo result = { + "OpImageSparseRead", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpSizeOf: { + static InstructionInfo result = { + "OpSizeOf", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpTypePipeStorage: { + static InstructionInfo result = {"OpTypePipeStorage", + InstructionFlags::HasResult, + {}}; + return &result; + } + case spv::Op::OpConstantPipeStorage: { + static InstructionInfo result = { + "OpConstantPipeStorage", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpCreatePipeFromPipeStorage: { + static InstructionInfo result = { + "OpCreatePipeFromPipeStorage", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGetKernelLocalSizeForSubgroupCount: { + static InstructionInfo result = { + "OpGetKernelLocalSizeForSubgroupCount", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGetKernelMaxNumSubgroups: { + static InstructionInfo result = { + "OpGetKernelMaxNumSubgroups", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpTypeNamedBarrier: { + static InstructionInfo result = {"OpTypeNamedBarrier", + InstructionFlags::HasResult, + {}}; + return &result; + } + case spv::Op::OpNamedBarrierInitialize: { + static InstructionInfo result = { + "OpNamedBarrierInitialize", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpMemoryNamedBarrier: { + static InstructionInfo result = { + "OpMemoryNamedBarrier", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpModuleProcessed: { + static InstructionInfo result = { + "OpModuleProcessed", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpExecutionModeId: { + static InstructionInfo result = { + "OpExecutionModeId", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpDecorateId: { + static InstructionInfo result = { + "OpDecorateId", InstructionFlags::None, {}}; + return &result; + } + case spv::Op::OpGroupNonUniformElect: { + static InstructionInfo result = { + "OpGroupNonUniformElect", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupNonUniformAll: { + static InstructionInfo result = { + "OpGroupNonUniformAll", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupNonUniformAny: { + static InstructionInfo result = { + "OpGroupNonUniformAny", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupNonUniformAllEqual: { + static InstructionInfo result = { + "OpGroupNonUniformAllEqual", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupNonUniformBroadcast: { + static InstructionInfo result = { + "OpGroupNonUniformBroadcast", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupNonUniformBroadcastFirst: { + static InstructionInfo result = { + "OpGroupNonUniformBroadcastFirst", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupNonUniformBallot: { + static InstructionInfo result = { + "OpGroupNonUniformBallot", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupNonUniformInverseBallot: { + static InstructionInfo result = { + "OpGroupNonUniformInverseBallot", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupNonUniformBallotBitExtract: { + static InstructionInfo result = { + "OpGroupNonUniformBallotBitExtract", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupNonUniformBallotBitCount: { + static InstructionInfo result = { + "OpGroupNonUniformBallotBitCount", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupNonUniformBallotFindLSB: { + static InstructionInfo result = { + "OpGroupNonUniformBallotFindLSB", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupNonUniformBallotFindMSB: { + static InstructionInfo result = { + "OpGroupNonUniformBallotFindMSB", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupNonUniformShuffle: { + static InstructionInfo result = { + "OpGroupNonUniformShuffle", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupNonUniformShuffleXor: { + static InstructionInfo result = { + "OpGroupNonUniformShuffleXor", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupNonUniformShuffleUp: { + static InstructionInfo result = { + "OpGroupNonUniformShuffleUp", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupNonUniformShuffleDown: { + static InstructionInfo result = { + "OpGroupNonUniformShuffleDown", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupNonUniformIAdd: { + static InstructionInfo result = { + "OpGroupNonUniformIAdd", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupNonUniformFAdd: { + static InstructionInfo result = { + "OpGroupNonUniformFAdd", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupNonUniformIMul: { + static InstructionInfo result = { + "OpGroupNonUniformIMul", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupNonUniformFMul: { + static InstructionInfo result = { + "OpGroupNonUniformFMul", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupNonUniformSMin: { + static InstructionInfo result = { + "OpGroupNonUniformSMin", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupNonUniformUMin: { + static InstructionInfo result = { + "OpGroupNonUniformUMin", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupNonUniformFMin: { + static InstructionInfo result = { + "OpGroupNonUniformFMin", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupNonUniformSMax: { + static InstructionInfo result = { + "OpGroupNonUniformSMax", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupNonUniformUMax: { + static InstructionInfo result = { + "OpGroupNonUniformUMax", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupNonUniformFMax: { + static InstructionInfo result = { + "OpGroupNonUniformFMax", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupNonUniformBitwiseAnd: { + static InstructionInfo result = { + "OpGroupNonUniformBitwiseAnd", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupNonUniformBitwiseOr: { + static InstructionInfo result = { + "OpGroupNonUniformBitwiseOr", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupNonUniformBitwiseXor: { + static InstructionInfo result = { + "OpGroupNonUniformBitwiseXor", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupNonUniformLogicalAnd: { + static InstructionInfo result = { + "OpGroupNonUniformLogicalAnd", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupNonUniformLogicalOr: { + static InstructionInfo result = { + "OpGroupNonUniformLogicalOr", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupNonUniformLogicalXor: { + static InstructionInfo result = { + "OpGroupNonUniformLogicalXor", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupNonUniformQuadBroadcast: { + static InstructionInfo result = { + "OpGroupNonUniformQuadBroadcast", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpGroupNonUniformQuadSwap: { + static InstructionInfo result = { + "OpGroupNonUniformQuadSwap", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpCopyLogical: { + static InstructionInfo result = { + "OpCopyLogical", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpPtrEqual: { + static InstructionInfo result = { + "OpPtrEqual", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpPtrNotEqual: { + static InstructionInfo result = { + "OpPtrNotEqual", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + case spv::Op::OpPtrDiff: { + static InstructionInfo result = { + "OpPtrDiff", + InstructionFlags::HasResult | InstructionFlags::HasResultType, + {}}; + return &result; + } + } + + return nullptr; +} + +inline void dump(std::span range, + void (*printId)(std::uint32_t id) = nullptr) { + if (printId == nullptr) { + printId = [](uint32_t id) { std::printf("%%%u", id); }; + } + + while (!range.empty()) { + auto opWordCount = range[0]; + auto op = static_cast(opWordCount & spv::OpCodeMask); + auto wordCount = opWordCount >> spv::WordCountShift; + + if (range.size() < wordCount || wordCount == 0) { + std::printf("\n"); + + for (auto word : range) { + std::printf("%08x ", (unsigned)word); + } + + std::printf("\n"); + + break; + } + + auto info = getInstructionInfo(op); + + if (info == nullptr) { + std::printf("unknown instruction\n"); + range = range.subspan(wordCount); + continue; + } + + auto word = range.data() + 1; + auto wordEnd = range.data() + wordCount; + bool isFirst = true; + + if ((info->flags & InstructionFlags::HasResult) == + InstructionFlags::HasResult) { + std::uint32_t outputTypeId = 0; + + if ((info->flags & InstructionFlags::HasResultType) == + InstructionFlags::HasResultType) { + if (word < wordEnd) { + outputTypeId = *word++; + } + } + + std::uint32_t outputId = word < wordEnd ? *word++ : 0; + + printId(outputId); + if ((info->flags & InstructionFlags::HasResultType) == + InstructionFlags::HasResultType) { + std::printf(": "); + printId(outputTypeId); + } + + std::printf(" = "); + } + + std::printf("%s(", info->name); + + for (auto &op : std::span(info->operands)) { + if (op == OperandKind::Invalid) { + break; + } + + if (word >= wordEnd) { + if (op == OperandKind::VariadicWord || + op == OperandKind::VariadicId) { + break; + } + + std::printf("\n"); + break; + } + + auto currentWord = *word++; + + if (isFirst) { + isFirst = false; + } else { + std::printf(", "); + } + + if (op == OperandKind::VariadicId || + op == OperandKind::TypeId || op == OperandKind::ValueId) { + printId(currentWord); + } else if (op == OperandKind::Word || + op == OperandKind::VariadicWord) { + std::printf("%u", currentWord); + } else if (op == OperandKind::String) { + bool foundEnd = false; + while (true) { + if (reinterpret_cast(currentWord)[3] == '\0') { + foundEnd = true; + break; + } + + if (word >= wordEnd) { + break; + } + + currentWord = *word++; + } + + if (foundEnd) { + std::printf("'%s'", reinterpret_cast(word - 1)); + } else { + std::printf(""); + } + } else { + std::printf(""); + } + } + + while (word < wordEnd) { + if (isFirst) { + isFirst = false; + } else { + std::printf(", "); + } + + auto currentWord = *word++; + + std::printf("%u", currentWord); + } + + std::printf(")\n"); + range = range.subspan(wordCount); + } +} +} // namespace spirv \ No newline at end of file diff --git a/hw/amdgpu/lib/libspirv/include/spirv/spirv.hpp b/hw/amdgpu/lib/libspirv/include/spirv/spirv.hpp new file mode 100644 index 000000000..561c1a8d6 --- /dev/null +++ b/hw/amdgpu/lib/libspirv/include/spirv/spirv.hpp @@ -0,0 +1,2753 @@ +// Copyright (c) 2014-2020 The Khronos Group Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and/or associated documentation files (the "Materials"), +// to deal in the Materials without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Materials, and to permit persons to whom the +// Materials are furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Materials. +// +// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +// STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +// HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +// +// THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +// IN THE MATERIALS. + +// This header is automatically generated by the same tool that creates +// the Binary Section of the SPIR-V specification. + +// Enumeration tokens for SPIR-V, in various styles: +// C, C++, C++11, JSON, Lua, Python, C#, D, Beef +// +// - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL +// - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL +// - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL +// - Lua will use tables, e.g.: spv.SourceLanguage.GLSL +// - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL'] +// - C# will use enum classes in the Specification class located in the "Spv" namespace, +// e.g.: Spv.Specification.SourceLanguage.GLSL +// - D will have tokens under the "spv" module, e.g: spv.SourceLanguage.GLSL +// - Beef will use enum classes in the Specification class located in the "Spv" namespace, +// e.g.: Spv.Specification.SourceLanguage.GLSL +// +// Some tokens act like mask values, which can be OR'd together, +// while others are mutually exclusive. The mask-like ones have +// "Mask" in their name, and a parallel enum that has the shift +// amount (1 << x) for each corresponding enumerant. + +#ifndef spirv_HPP +#define spirv_HPP + +namespace spv { + +typedef unsigned int Id; + +#define SPV_VERSION 0x10600 +#define SPV_REVISION 1 + +static const unsigned int MagicNumber = 0x07230203; +static const unsigned int Version = 0x00010600; +static const unsigned int Revision = 1; +static const unsigned int OpCodeMask = 0xffff; +static const unsigned int WordCountShift = 16; + +enum class SourceLanguage : unsigned { + Unknown = 0, + ESSL = 1, + GLSL = 2, + OpenCL_C = 3, + OpenCL_CPP = 4, + HLSL = 5, + CPP_for_OpenCL = 6, + SYCL = 7, + HERO_C = 8, + Max = 0x7fffffff, +}; + +enum class ExecutionModel : unsigned { + Vertex = 0, + TessellationControl = 1, + TessellationEvaluation = 2, + Geometry = 3, + Fragment = 4, + GLCompute = 5, + Kernel = 6, + TaskNV = 5267, + MeshNV = 5268, + RayGenerationKHR = 5313, + RayGenerationNV = 5313, + IntersectionKHR = 5314, + IntersectionNV = 5314, + AnyHitKHR = 5315, + AnyHitNV = 5315, + ClosestHitKHR = 5316, + ClosestHitNV = 5316, + MissKHR = 5317, + MissNV = 5317, + CallableKHR = 5318, + CallableNV = 5318, + TaskEXT = 5364, + MeshEXT = 5365, + Max = 0x7fffffff, +}; + +enum class AddressingModel : unsigned { + Logical = 0, + Physical32 = 1, + Physical64 = 2, + PhysicalStorageBuffer64 = 5348, + PhysicalStorageBuffer64EXT = 5348, + Max = 0x7fffffff, +}; + +enum class MemoryModel : unsigned { + Simple = 0, + GLSL450 = 1, + OpenCL = 2, + Vulkan = 3, + VulkanKHR = 3, + Max = 0x7fffffff, +}; + +enum class ExecutionMode : unsigned { + Invocations = 0, + SpacingEqual = 1, + SpacingFractionalEven = 2, + SpacingFractionalOdd = 3, + VertexOrderCw = 4, + VertexOrderCcw = 5, + PixelCenterInteger = 6, + OriginUpperLeft = 7, + OriginLowerLeft = 8, + EarlyFragmentTests = 9, + PointMode = 10, + Xfb = 11, + DepthReplacing = 12, + DepthGreater = 14, + DepthLess = 15, + DepthUnchanged = 16, + LocalSize = 17, + LocalSizeHint = 18, + InputPoints = 19, + InputLines = 20, + InputLinesAdjacency = 21, + Triangles = 22, + InputTrianglesAdjacency = 23, + Quads = 24, + Isolines = 25, + OutputVertices = 26, + OutputPoints = 27, + OutputLineStrip = 28, + OutputTriangleStrip = 29, + VecTypeHint = 30, + ContractionOff = 31, + Initializer = 33, + Finalizer = 34, + SubgroupSize = 35, + SubgroupsPerWorkgroup = 36, + SubgroupsPerWorkgroupId = 37, + LocalSizeId = 38, + LocalSizeHintId = 39, + NonCoherentColorAttachmentReadEXT = 4169, + NonCoherentDepthAttachmentReadEXT = 4170, + NonCoherentStencilAttachmentReadEXT = 4171, + SubgroupUniformControlFlowKHR = 4421, + PostDepthCoverage = 4446, + DenormPreserve = 4459, + DenormFlushToZero = 4460, + SignedZeroInfNanPreserve = 4461, + RoundingModeRTE = 4462, + RoundingModeRTZ = 4463, + EarlyAndLateFragmentTestsAMD = 5017, + StencilRefReplacingEXT = 5027, + StencilRefUnchangedFrontAMD = 5079, + StencilRefGreaterFrontAMD = 5080, + StencilRefLessFrontAMD = 5081, + StencilRefUnchangedBackAMD = 5082, + StencilRefGreaterBackAMD = 5083, + StencilRefLessBackAMD = 5084, + OutputLinesEXT = 5269, + OutputLinesNV = 5269, + OutputPrimitivesEXT = 5270, + OutputPrimitivesNV = 5270, + DerivativeGroupQuadsNV = 5289, + DerivativeGroupLinearNV = 5290, + OutputTrianglesEXT = 5298, + OutputTrianglesNV = 5298, + PixelInterlockOrderedEXT = 5366, + PixelInterlockUnorderedEXT = 5367, + SampleInterlockOrderedEXT = 5368, + SampleInterlockUnorderedEXT = 5369, + ShadingRateInterlockOrderedEXT = 5370, + ShadingRateInterlockUnorderedEXT = 5371, + SharedLocalMemorySizeINTEL = 5618, + RoundingModeRTPINTEL = 5620, + RoundingModeRTNINTEL = 5621, + FloatingPointModeALTINTEL = 5622, + FloatingPointModeIEEEINTEL = 5623, + MaxWorkgroupSizeINTEL = 5893, + MaxWorkDimINTEL = 5894, + NoGlobalOffsetINTEL = 5895, + NumSIMDWorkitemsINTEL = 5896, + SchedulerTargetFmaxMhzINTEL = 5903, + StreamingInterfaceINTEL = 6154, + RegisterMapInterfaceINTEL = 6160, + NamedBarrierCountINTEL = 6417, + Max = 0x7fffffff, +}; + +enum class StorageClass : unsigned { + UniformConstant = 0, + Input = 1, + Uniform = 2, + Output = 3, + Workgroup = 4, + CrossWorkgroup = 5, + Private = 6, + Function = 7, + Generic = 8, + PushConstant = 9, + AtomicCounter = 10, + Image = 11, + StorageBuffer = 12, + TileImageEXT = 4172, + CallableDataKHR = 5328, + CallableDataNV = 5328, + IncomingCallableDataKHR = 5329, + IncomingCallableDataNV = 5329, + RayPayloadKHR = 5338, + RayPayloadNV = 5338, + HitAttributeKHR = 5339, + HitAttributeNV = 5339, + IncomingRayPayloadKHR = 5342, + IncomingRayPayloadNV = 5342, + ShaderRecordBufferKHR = 5343, + ShaderRecordBufferNV = 5343, + PhysicalStorageBuffer = 5349, + PhysicalStorageBufferEXT = 5349, + HitObjectAttributeNV = 5385, + TaskPayloadWorkgroupEXT = 5402, + CodeSectionINTEL = 5605, + DeviceOnlyINTEL = 5936, + HostOnlyINTEL = 5937, + Max = 0x7fffffff, +}; + +enum class Dim : unsigned { + Dim1D = 0, + Dim2D = 1, + Dim3D = 2, + Cube = 3, + Rect = 4, + Buffer = 5, + SubpassData = 6, + TileImageDataEXT = 4173, + Max = 0x7fffffff, +}; + +enum class SamplerAddressingMode : unsigned { + None = 0, + ClampToEdge = 1, + Clamp = 2, + Repeat = 3, + RepeatMirrored = 4, + Max = 0x7fffffff, +}; + +enum class SamplerFilterMode : unsigned { + Nearest = 0, + Linear = 1, + Max = 0x7fffffff, +}; + +enum class ImageFormat : unsigned { + Unknown = 0, + Rgba32f = 1, + Rgba16f = 2, + R32f = 3, + Rgba8 = 4, + Rgba8Snorm = 5, + Rg32f = 6, + Rg16f = 7, + R11fG11fB10f = 8, + R16f = 9, + Rgba16 = 10, + Rgb10A2 = 11, + Rg16 = 12, + Rg8 = 13, + R16 = 14, + R8 = 15, + Rgba16Snorm = 16, + Rg16Snorm = 17, + Rg8Snorm = 18, + R16Snorm = 19, + R8Snorm = 20, + Rgba32i = 21, + Rgba16i = 22, + Rgba8i = 23, + R32i = 24, + Rg32i = 25, + Rg16i = 26, + Rg8i = 27, + R16i = 28, + R8i = 29, + Rgba32ui = 30, + Rgba16ui = 31, + Rgba8ui = 32, + R32ui = 33, + Rgb10a2ui = 34, + Rg32ui = 35, + Rg16ui = 36, + Rg8ui = 37, + R16ui = 38, + R8ui = 39, + R64ui = 40, + R64i = 41, + Max = 0x7fffffff, +}; + +enum class ImageChannelOrder : unsigned { + R = 0, + A = 1, + RG = 2, + RA = 3, + RGB = 4, + RGBA = 5, + BGRA = 6, + ARGB = 7, + Intensity = 8, + Luminance = 9, + Rx = 10, + RGx = 11, + RGBx = 12, + Depth = 13, + DepthStencil = 14, + sRGB = 15, + sRGBx = 16, + sRGBA = 17, + sBGRA = 18, + ABGR = 19, + Max = 0x7fffffff, +}; + +enum class ImageChannelDataType : unsigned { + SnormInt8 = 0, + SnormInt16 = 1, + UnormInt8 = 2, + UnormInt16 = 3, + UnormShort565 = 4, + UnormShort555 = 5, + UnormInt101010 = 6, + SignedInt8 = 7, + SignedInt16 = 8, + SignedInt32 = 9, + UnsignedInt8 = 10, + UnsignedInt16 = 11, + UnsignedInt32 = 12, + HalfFloat = 13, + Float = 14, + UnormInt24 = 15, + UnormInt101010_2 = 16, + Max = 0x7fffffff, +}; + +enum class ImageOperandsShift : unsigned { + Bias = 0, + Lod = 1, + Grad = 2, + ConstOffset = 3, + Offset = 4, + ConstOffsets = 5, + Sample = 6, + MinLod = 7, + MakeTexelAvailable = 8, + MakeTexelAvailableKHR = 8, + MakeTexelVisible = 9, + MakeTexelVisibleKHR = 9, + NonPrivateTexel = 10, + NonPrivateTexelKHR = 10, + VolatileTexel = 11, + VolatileTexelKHR = 11, + SignExtend = 12, + ZeroExtend = 13, + Nontemporal = 14, + Offsets = 16, + Max = 0x7fffffff, +}; + +enum class ImageOperandsMask : unsigned { + MaskNone = 0, + Bias = 0x00000001, + Lod = 0x00000002, + Grad = 0x00000004, + ConstOffset = 0x00000008, + Offset = 0x00000010, + ConstOffsets = 0x00000020, + Sample = 0x00000040, + MinLod = 0x00000080, + MakeTexelAvailable = 0x00000100, + MakeTexelAvailableKHR = 0x00000100, + MakeTexelVisible = 0x00000200, + MakeTexelVisibleKHR = 0x00000200, + NonPrivateTexel = 0x00000400, + NonPrivateTexelKHR = 0x00000400, + VolatileTexel = 0x00000800, + VolatileTexelKHR = 0x00000800, + SignExtend = 0x00001000, + ZeroExtend = 0x00002000, + Nontemporal = 0x00004000, + Offsets = 0x00010000, +}; + +enum class FPFastMathModeShift : unsigned { + NotNaN = 0, + NotInf = 1, + NSZ = 2, + AllowRecip = 3, + Fast = 4, + AllowContractFastINTEL = 16, + AllowReassocINTEL = 17, + Max = 0x7fffffff, +}; + +enum class FPFastMathModeMask : unsigned { + MaskNone = 0, + NotNaN = 0x00000001, + NotInf = 0x00000002, + NSZ = 0x00000004, + AllowRecip = 0x00000008, + Fast = 0x00000010, + AllowContractFastINTEL = 0x00010000, + AllowReassocINTEL = 0x00020000, +}; + +enum class FPRoundingMode : unsigned { + RTE = 0, + RTZ = 1, + RTP = 2, + RTN = 3, + Max = 0x7fffffff, +}; + +enum class LinkageType : unsigned { + Export = 0, + Import = 1, + LinkOnceODR = 2, + Max = 0x7fffffff, +}; + +enum class AccessQualifier : unsigned { + ReadOnly = 0, + WriteOnly = 1, + ReadWrite = 2, + Max = 0x7fffffff, +}; + +enum class FunctionParameterAttribute : unsigned { + Zext = 0, + Sext = 1, + ByVal = 2, + Sret = 3, + NoAlias = 4, + NoCapture = 5, + NoWrite = 6, + NoReadWrite = 7, + RuntimeAlignedINTEL = 5940, + Max = 0x7fffffff, +}; + +enum class Decoration : unsigned { + RelaxedPrecision = 0, + SpecId = 1, + Block = 2, + BufferBlock = 3, + RowMajor = 4, + ColMajor = 5, + ArrayStride = 6, + MatrixStride = 7, + GLSLShared = 8, + GLSLPacked = 9, + CPacked = 10, + BuiltIn = 11, + NoPerspective = 13, + Flat = 14, + Patch = 15, + Centroid = 16, + Sample = 17, + Invariant = 18, + Restrict = 19, + Aliased = 20, + Volatile = 21, + Constant = 22, + Coherent = 23, + NonWritable = 24, + NonReadable = 25, + Uniform = 26, + UniformId = 27, + SaturatedConversion = 28, + Stream = 29, + Location = 30, + Component = 31, + Index = 32, + Binding = 33, + DescriptorSet = 34, + Offset = 35, + XfbBuffer = 36, + XfbStride = 37, + FuncParamAttr = 38, + FPRoundingMode = 39, + FPFastMathMode = 40, + LinkageAttributes = 41, + NoContraction = 42, + InputAttachmentIndex = 43, + Alignment = 44, + MaxByteOffset = 45, + AlignmentId = 46, + MaxByteOffsetId = 47, + NoSignedWrap = 4469, + NoUnsignedWrap = 4470, + WeightTextureQCOM = 4487, + BlockMatchTextureQCOM = 4488, + ExplicitInterpAMD = 4999, + OverrideCoverageNV = 5248, + PassthroughNV = 5250, + ViewportRelativeNV = 5252, + SecondaryViewportRelativeNV = 5256, + PerPrimitiveEXT = 5271, + PerPrimitiveNV = 5271, + PerViewNV = 5272, + PerTaskNV = 5273, + PerVertexKHR = 5285, + PerVertexNV = 5285, + NonUniform = 5300, + NonUniformEXT = 5300, + RestrictPointer = 5355, + RestrictPointerEXT = 5355, + AliasedPointer = 5356, + AliasedPointerEXT = 5356, + HitObjectShaderRecordBufferNV = 5386, + BindlessSamplerNV = 5398, + BindlessImageNV = 5399, + BoundSamplerNV = 5400, + BoundImageNV = 5401, + SIMTCallINTEL = 5599, + ReferencedIndirectlyINTEL = 5602, + ClobberINTEL = 5607, + SideEffectsINTEL = 5608, + VectorComputeVariableINTEL = 5624, + FuncParamIOKindINTEL = 5625, + VectorComputeFunctionINTEL = 5626, + StackCallINTEL = 5627, + GlobalVariableOffsetINTEL = 5628, + CounterBuffer = 5634, + HlslCounterBufferGOOGLE = 5634, + HlslSemanticGOOGLE = 5635, + UserSemantic = 5635, + UserTypeGOOGLE = 5636, + FunctionRoundingModeINTEL = 5822, + FunctionDenormModeINTEL = 5823, + RegisterINTEL = 5825, + MemoryINTEL = 5826, + NumbanksINTEL = 5827, + BankwidthINTEL = 5828, + MaxPrivateCopiesINTEL = 5829, + SinglepumpINTEL = 5830, + DoublepumpINTEL = 5831, + MaxReplicatesINTEL = 5832, + SimpleDualPortINTEL = 5833, + MergeINTEL = 5834, + BankBitsINTEL = 5835, + ForcePow2DepthINTEL = 5836, + BurstCoalesceINTEL = 5899, + CacheSizeINTEL = 5900, + DontStaticallyCoalesceINTEL = 5901, + PrefetchINTEL = 5902, + StallEnableINTEL = 5905, + FuseLoopsInFunctionINTEL = 5907, + MathOpDSPModeINTEL = 5909, + AliasScopeINTEL = 5914, + NoAliasINTEL = 5915, + InitiationIntervalINTEL = 5917, + MaxConcurrencyINTEL = 5918, + PipelineEnableINTEL = 5919, + BufferLocationINTEL = 5921, + IOPipeStorageINTEL = 5944, + FunctionFloatingPointModeINTEL = 6080, + SingleElementVectorINTEL = 6085, + VectorComputeCallableFunctionINTEL = 6087, + MediaBlockIOINTEL = 6140, + LatencyControlLabelINTEL = 6172, + LatencyControlConstraintINTEL = 6173, + ConduitKernelArgumentINTEL = 6175, + RegisterMapKernelArgumentINTEL = 6176, + MMHostInterfaceAddressWidthINTEL = 6177, + MMHostInterfaceDataWidthINTEL = 6178, + MMHostInterfaceLatencyINTEL = 6179, + MMHostInterfaceReadWriteModeINTEL = 6180, + MMHostInterfaceMaxBurstINTEL = 6181, + MMHostInterfaceWaitRequestINTEL = 6182, + StableKernelArgumentINTEL = 6183, + Max = 0x7fffffff, +}; + +enum class BuiltIn : unsigned { + Position = 0, + PointSize = 1, + ClipDistance = 3, + CullDistance = 4, + VertexId = 5, + InstanceId = 6, + PrimitiveId = 7, + InvocationId = 8, + Layer = 9, + ViewportIndex = 10, + TessLevelOuter = 11, + TessLevelInner = 12, + TessCoord = 13, + PatchVertices = 14, + FragCoord = 15, + PointCoord = 16, + FrontFacing = 17, + SampleId = 18, + SamplePosition = 19, + SampleMask = 20, + FragDepth = 22, + HelperInvocation = 23, + NumWorkgroups = 24, + WorkgroupSize = 25, + WorkgroupId = 26, + LocalInvocationId = 27, + GlobalInvocationId = 28, + LocalInvocationIndex = 29, + WorkDim = 30, + GlobalSize = 31, + EnqueuedWorkgroupSize = 32, + GlobalOffset = 33, + GlobalLinearId = 34, + SubgroupSize = 36, + SubgroupMaxSize = 37, + NumSubgroups = 38, + NumEnqueuedSubgroups = 39, + SubgroupId = 40, + SubgroupLocalInvocationId = 41, + VertexIndex = 42, + InstanceIndex = 43, + CoreIDARM = 4160, + CoreCountARM = 4161, + CoreMaxIDARM = 4162, + WarpIDARM = 4163, + WarpMaxIDARM = 4164, + SubgroupEqMask = 4416, + SubgroupEqMaskKHR = 4416, + SubgroupGeMask = 4417, + SubgroupGeMaskKHR = 4417, + SubgroupGtMask = 4418, + SubgroupGtMaskKHR = 4418, + SubgroupLeMask = 4419, + SubgroupLeMaskKHR = 4419, + SubgroupLtMask = 4420, + SubgroupLtMaskKHR = 4420, + BaseVertex = 4424, + BaseInstance = 4425, + DrawIndex = 4426, + PrimitiveShadingRateKHR = 4432, + DeviceIndex = 4438, + ViewIndex = 4440, + ShadingRateKHR = 4444, + BaryCoordNoPerspAMD = 4992, + BaryCoordNoPerspCentroidAMD = 4993, + BaryCoordNoPerspSampleAMD = 4994, + BaryCoordSmoothAMD = 4995, + BaryCoordSmoothCentroidAMD = 4996, + BaryCoordSmoothSampleAMD = 4997, + BaryCoordPullModelAMD = 4998, + FragStencilRefEXT = 5014, + ViewportMaskNV = 5253, + SecondaryPositionNV = 5257, + SecondaryViewportMaskNV = 5258, + PositionPerViewNV = 5261, + ViewportMaskPerViewNV = 5262, + FullyCoveredEXT = 5264, + TaskCountNV = 5274, + PrimitiveCountNV = 5275, + PrimitiveIndicesNV = 5276, + ClipDistancePerViewNV = 5277, + CullDistancePerViewNV = 5278, + LayerPerViewNV = 5279, + MeshViewCountNV = 5280, + MeshViewIndicesNV = 5281, + BaryCoordKHR = 5286, + BaryCoordNV = 5286, + BaryCoordNoPerspKHR = 5287, + BaryCoordNoPerspNV = 5287, + FragSizeEXT = 5292, + FragmentSizeNV = 5292, + FragInvocationCountEXT = 5293, + InvocationsPerPixelNV = 5293, + PrimitivePointIndicesEXT = 5294, + PrimitiveLineIndicesEXT = 5295, + PrimitiveTriangleIndicesEXT = 5296, + CullPrimitiveEXT = 5299, + LaunchIdKHR = 5319, + LaunchIdNV = 5319, + LaunchSizeKHR = 5320, + LaunchSizeNV = 5320, + WorldRayOriginKHR = 5321, + WorldRayOriginNV = 5321, + WorldRayDirectionKHR = 5322, + WorldRayDirectionNV = 5322, + ObjectRayOriginKHR = 5323, + ObjectRayOriginNV = 5323, + ObjectRayDirectionKHR = 5324, + ObjectRayDirectionNV = 5324, + RayTminKHR = 5325, + RayTminNV = 5325, + RayTmaxKHR = 5326, + RayTmaxNV = 5326, + InstanceCustomIndexKHR = 5327, + InstanceCustomIndexNV = 5327, + ObjectToWorldKHR = 5330, + ObjectToWorldNV = 5330, + WorldToObjectKHR = 5331, + WorldToObjectNV = 5331, + HitTNV = 5332, + HitKindKHR = 5333, + HitKindNV = 5333, + CurrentRayTimeNV = 5334, + HitTriangleVertexPositionsKHR = 5335, + IncomingRayFlagsKHR = 5351, + IncomingRayFlagsNV = 5351, + RayGeometryIndexKHR = 5352, + WarpsPerSMNV = 5374, + SMCountNV = 5375, + WarpIDNV = 5376, + SMIDNV = 5377, + CullMaskKHR = 6021, + Max = 0x7fffffff, +}; + +enum class SelectionControlShift : unsigned { + Flatten = 0, + DontFlatten = 1, + Max = 0x7fffffff, +}; + +enum class SelectionControlMask : unsigned { + MaskNone = 0, + Flatten = 0x00000001, + DontFlatten = 0x00000002, +}; + +enum class LoopControlShift : unsigned { + Unroll = 0, + DontUnroll = 1, + DependencyInfinite = 2, + DependencyLength = 3, + MinIterations = 4, + MaxIterations = 5, + IterationMultiple = 6, + PeelCount = 7, + PartialCount = 8, + InitiationIntervalINTEL = 16, + MaxConcurrencyINTEL = 17, + DependencyArrayINTEL = 18, + PipelineEnableINTEL = 19, + LoopCoalesceINTEL = 20, + MaxInterleavingINTEL = 21, + SpeculatedIterationsINTEL = 22, + NoFusionINTEL = 23, + LoopCountINTEL = 24, + MaxReinvocationDelayINTEL = 25, + Max = 0x7fffffff, +}; + +enum class LoopControlMask : unsigned { + MaskNone = 0, + Unroll = 0x00000001, + DontUnroll = 0x00000002, + DependencyInfinite = 0x00000004, + DependencyLength = 0x00000008, + MinIterations = 0x00000010, + MaxIterations = 0x00000020, + IterationMultiple = 0x00000040, + PeelCount = 0x00000080, + PartialCount = 0x00000100, + InitiationIntervalINTEL = 0x00010000, + MaxConcurrencyINTEL = 0x00020000, + DependencyArrayINTEL = 0x00040000, + PipelineEnableINTEL = 0x00080000, + LoopCoalesceINTEL = 0x00100000, + MaxInterleavingINTEL = 0x00200000, + SpeculatedIterationsINTEL = 0x00400000, + NoFusionINTEL = 0x00800000, + LoopCountINTEL = 0x01000000, + MaxReinvocationDelayINTEL = 0x02000000, +}; + +enum class FunctionControlShift : unsigned { + Inline = 0, + DontInline = 1, + Pure = 2, + Const = 3, + OptNoneINTEL = 16, + Max = 0x7fffffff, +}; + +enum class FunctionControlMask : unsigned { + MaskNone = 0, + Inline = 0x00000001, + DontInline = 0x00000002, + Pure = 0x00000004, + Const = 0x00000008, + OptNoneINTEL = 0x00010000, +}; + +enum class MemorySemanticsShift : unsigned { + Acquire = 1, + Release = 2, + AcquireRelease = 3, + SequentiallyConsistent = 4, + UniformMemory = 6, + SubgroupMemory = 7, + WorkgroupMemory = 8, + CrossWorkgroupMemory = 9, + AtomicCounterMemory = 10, + ImageMemory = 11, + OutputMemory = 12, + OutputMemoryKHR = 12, + MakeAvailable = 13, + MakeAvailableKHR = 13, + MakeVisible = 14, + MakeVisibleKHR = 14, + Volatile = 15, + Max = 0x7fffffff, +}; + +enum class MemorySemanticsMask : unsigned { + MaskNone = 0, + Acquire = 0x00000002, + Release = 0x00000004, + AcquireRelease = 0x00000008, + SequentiallyConsistent = 0x00000010, + UniformMemory = 0x00000040, + SubgroupMemory = 0x00000080, + WorkgroupMemory = 0x00000100, + CrossWorkgroupMemory = 0x00000200, + AtomicCounterMemory = 0x00000400, + ImageMemory = 0x00000800, + OutputMemory = 0x00001000, + OutputMemoryKHR = 0x00001000, + MakeAvailable = 0x00002000, + MakeAvailableKHR = 0x00002000, + MakeVisible = 0x00004000, + MakeVisibleKHR = 0x00004000, + Volatile = 0x00008000, +}; + +enum class MemoryAccessShift : unsigned { + Volatile = 0, + Aligned = 1, + Nontemporal = 2, + MakePointerAvailable = 3, + MakePointerAvailableKHR = 3, + MakePointerVisible = 4, + MakePointerVisibleKHR = 4, + NonPrivatePointer = 5, + NonPrivatePointerKHR = 5, + AliasScopeINTELMask = 16, + NoAliasINTELMask = 17, + Max = 0x7fffffff, +}; + +enum class MemoryAccessMask : unsigned { + MaskNone = 0, + Volatile = 0x00000001, + Aligned = 0x00000002, + Nontemporal = 0x00000004, + MakePointerAvailable = 0x00000008, + MakePointerAvailableKHR = 0x00000008, + MakePointerVisible = 0x00000010, + MakePointerVisibleKHR = 0x00000010, + NonPrivatePointer = 0x00000020, + NonPrivatePointerKHR = 0x00000020, + AliasScopeINTELMask = 0x00010000, + NoAliasINTELMask = 0x00020000, +}; + +enum class Scope : unsigned { + CrossDevice = 0, + Device = 1, + Workgroup = 2, + Subgroup = 3, + Invocation = 4, + QueueFamily = 5, + QueueFamilyKHR = 5, + ShaderCallKHR = 6, + Max = 0x7fffffff, +}; + +enum class GroupOperation : unsigned { + Reduce = 0, + InclusiveScan = 1, + ExclusiveScan = 2, + ClusteredReduce = 3, + PartitionedReduceNV = 6, + PartitionedInclusiveScanNV = 7, + PartitionedExclusiveScanNV = 8, + Max = 0x7fffffff, +}; + +enum class KernelEnqueueFlags : unsigned { + NoWait = 0, + WaitKernel = 1, + WaitWorkGroup = 2, + Max = 0x7fffffff, +}; + +enum class KernelProfilingInfoShift : unsigned { + CmdExecTime = 0, + Max = 0x7fffffff, +}; + +enum class KernelProfilingInfoMask : unsigned { + MaskNone = 0, + CmdExecTime = 0x00000001, +}; + +enum class Capability : unsigned { + Matrix = 0, + Shader = 1, + Geometry = 2, + Tessellation = 3, + Addresses = 4, + Linkage = 5, + Kernel = 6, + Vector16 = 7, + Float16Buffer = 8, + Float16 = 9, + Float64 = 10, + Int64 = 11, + Int64Atomics = 12, + ImageBasic = 13, + ImageReadWrite = 14, + ImageMipmap = 15, + Pipes = 17, + Groups = 18, + DeviceEnqueue = 19, + LiteralSampler = 20, + AtomicStorage = 21, + Int16 = 22, + TessellationPointSize = 23, + GeometryPointSize = 24, + ImageGatherExtended = 25, + StorageImageMultisample = 27, + UniformBufferArrayDynamicIndexing = 28, + SampledImageArrayDynamicIndexing = 29, + StorageBufferArrayDynamicIndexing = 30, + StorageImageArrayDynamicIndexing = 31, + ClipDistance = 32, + CullDistance = 33, + ImageCubeArray = 34, + SampleRateShading = 35, + ImageRect = 36, + SampledRect = 37, + GenericPointer = 38, + Int8 = 39, + InputAttachment = 40, + SparseResidency = 41, + MinLod = 42, + Sampled1D = 43, + Image1D = 44, + SampledCubeArray = 45, + SampledBuffer = 46, + ImageBuffer = 47, + ImageMSArray = 48, + StorageImageExtendedFormats = 49, + ImageQuery = 50, + DerivativeControl = 51, + InterpolationFunction = 52, + TransformFeedback = 53, + GeometryStreams = 54, + StorageImageReadWithoutFormat = 55, + StorageImageWriteWithoutFormat = 56, + MultiViewport = 57, + SubgroupDispatch = 58, + NamedBarrier = 59, + PipeStorage = 60, + GroupNonUniform = 61, + GroupNonUniformVote = 62, + GroupNonUniformArithmetic = 63, + GroupNonUniformBallot = 64, + GroupNonUniformShuffle = 65, + GroupNonUniformShuffleRelative = 66, + GroupNonUniformClustered = 67, + GroupNonUniformQuad = 68, + ShaderLayer = 69, + ShaderViewportIndex = 70, + UniformDecoration = 71, + CoreBuiltinsARM = 4165, + TileImageColorReadAccessEXT = 4166, + TileImageDepthReadAccessEXT = 4167, + TileImageStencilReadAccessEXT = 4168, + FragmentShadingRateKHR = 4422, + SubgroupBallotKHR = 4423, + DrawParameters = 4427, + WorkgroupMemoryExplicitLayoutKHR = 4428, + WorkgroupMemoryExplicitLayout8BitAccessKHR = 4429, + WorkgroupMemoryExplicitLayout16BitAccessKHR = 4430, + SubgroupVoteKHR = 4431, + StorageBuffer16BitAccess = 4433, + StorageUniformBufferBlock16 = 4433, + StorageUniform16 = 4434, + UniformAndStorageBuffer16BitAccess = 4434, + StoragePushConstant16 = 4435, + StorageInputOutput16 = 4436, + DeviceGroup = 4437, + MultiView = 4439, + VariablePointersStorageBuffer = 4441, + VariablePointers = 4442, + AtomicStorageOps = 4445, + SampleMaskPostDepthCoverage = 4447, + StorageBuffer8BitAccess = 4448, + UniformAndStorageBuffer8BitAccess = 4449, + StoragePushConstant8 = 4450, + DenormPreserve = 4464, + DenormFlushToZero = 4465, + SignedZeroInfNanPreserve = 4466, + RoundingModeRTE = 4467, + RoundingModeRTZ = 4468, + RayQueryProvisionalKHR = 4471, + RayQueryKHR = 4472, + RayTraversalPrimitiveCullingKHR = 4478, + RayTracingKHR = 4479, + TextureSampleWeightedQCOM = 4484, + TextureBoxFilterQCOM = 4485, + TextureBlockMatchQCOM = 4486, + Float16ImageAMD = 5008, + ImageGatherBiasLodAMD = 5009, + FragmentMaskAMD = 5010, + StencilExportEXT = 5013, + ImageReadWriteLodAMD = 5015, + Int64ImageEXT = 5016, + ShaderClockKHR = 5055, + SampleMaskOverrideCoverageNV = 5249, + GeometryShaderPassthroughNV = 5251, + ShaderViewportIndexLayerEXT = 5254, + ShaderViewportIndexLayerNV = 5254, + ShaderViewportMaskNV = 5255, + ShaderStereoViewNV = 5259, + PerViewAttributesNV = 5260, + FragmentFullyCoveredEXT = 5265, + MeshShadingNV = 5266, + ImageFootprintNV = 5282, + MeshShadingEXT = 5283, + FragmentBarycentricKHR = 5284, + FragmentBarycentricNV = 5284, + ComputeDerivativeGroupQuadsNV = 5288, + FragmentDensityEXT = 5291, + ShadingRateNV = 5291, + GroupNonUniformPartitionedNV = 5297, + ShaderNonUniform = 5301, + ShaderNonUniformEXT = 5301, + RuntimeDescriptorArray = 5302, + RuntimeDescriptorArrayEXT = 5302, + InputAttachmentArrayDynamicIndexing = 5303, + InputAttachmentArrayDynamicIndexingEXT = 5303, + UniformTexelBufferArrayDynamicIndexing = 5304, + UniformTexelBufferArrayDynamicIndexingEXT = 5304, + StorageTexelBufferArrayDynamicIndexing = 5305, + StorageTexelBufferArrayDynamicIndexingEXT = 5305, + UniformBufferArrayNonUniformIndexing = 5306, + UniformBufferArrayNonUniformIndexingEXT = 5306, + SampledImageArrayNonUniformIndexing = 5307, + SampledImageArrayNonUniformIndexingEXT = 5307, + StorageBufferArrayNonUniformIndexing = 5308, + StorageBufferArrayNonUniformIndexingEXT = 5308, + StorageImageArrayNonUniformIndexing = 5309, + StorageImageArrayNonUniformIndexingEXT = 5309, + InputAttachmentArrayNonUniformIndexing = 5310, + InputAttachmentArrayNonUniformIndexingEXT = 5310, + UniformTexelBufferArrayNonUniformIndexing = 5311, + UniformTexelBufferArrayNonUniformIndexingEXT = 5311, + StorageTexelBufferArrayNonUniformIndexing = 5312, + StorageTexelBufferArrayNonUniformIndexingEXT = 5312, + RayTracingPositionFetchKHR = 5336, + RayTracingNV = 5340, + RayTracingMotionBlurNV = 5341, + VulkanMemoryModel = 5345, + VulkanMemoryModelKHR = 5345, + VulkanMemoryModelDeviceScope = 5346, + VulkanMemoryModelDeviceScopeKHR = 5346, + PhysicalStorageBufferAddresses = 5347, + PhysicalStorageBufferAddressesEXT = 5347, + ComputeDerivativeGroupLinearNV = 5350, + RayTracingProvisionalKHR = 5353, + CooperativeMatrixNV = 5357, + FragmentShaderSampleInterlockEXT = 5363, + FragmentShaderShadingRateInterlockEXT = 5372, + ShaderSMBuiltinsNV = 5373, + FragmentShaderPixelInterlockEXT = 5378, + DemoteToHelperInvocation = 5379, + DemoteToHelperInvocationEXT = 5379, + RayTracingOpacityMicromapEXT = 5381, + ShaderInvocationReorderNV = 5383, + BindlessTextureNV = 5390, + RayQueryPositionFetchKHR = 5391, + SubgroupShuffleINTEL = 5568, + SubgroupBufferBlockIOINTEL = 5569, + SubgroupImageBlockIOINTEL = 5570, + SubgroupImageMediaBlockIOINTEL = 5579, + RoundToInfinityINTEL = 5582, + FloatingPointModeINTEL = 5583, + IntegerFunctions2INTEL = 5584, + FunctionPointersINTEL = 5603, + IndirectReferencesINTEL = 5604, + AsmINTEL = 5606, + AtomicFloat32MinMaxEXT = 5612, + AtomicFloat64MinMaxEXT = 5613, + AtomicFloat16MinMaxEXT = 5616, + VectorComputeINTEL = 5617, + VectorAnyINTEL = 5619, + ExpectAssumeKHR = 5629, + SubgroupAvcMotionEstimationINTEL = 5696, + SubgroupAvcMotionEstimationIntraINTEL = 5697, + SubgroupAvcMotionEstimationChromaINTEL = 5698, + VariableLengthArrayINTEL = 5817, + FunctionFloatControlINTEL = 5821, + FPGAMemoryAttributesINTEL = 5824, + FPFastMathModeINTEL = 5837, + ArbitraryPrecisionIntegersINTEL = 5844, + ArbitraryPrecisionFloatingPointINTEL = 5845, + UnstructuredLoopControlsINTEL = 5886, + FPGALoopControlsINTEL = 5888, + KernelAttributesINTEL = 5892, + FPGAKernelAttributesINTEL = 5897, + FPGAMemoryAccessesINTEL = 5898, + FPGAClusterAttributesINTEL = 5904, + LoopFuseINTEL = 5906, + FPGADSPControlINTEL = 5908, + MemoryAccessAliasingINTEL = 5910, + FPGAInvocationPipeliningAttributesINTEL = 5916, + FPGABufferLocationINTEL = 5920, + ArbitraryPrecisionFixedPointINTEL = 5922, + USMStorageClassesINTEL = 5935, + RuntimeAlignedAttributeINTEL = 5939, + IOPipesINTEL = 5943, + BlockingPipesINTEL = 5945, + FPGARegINTEL = 5948, + DotProductInputAll = 6016, + DotProductInputAllKHR = 6016, + DotProductInput4x8Bit = 6017, + DotProductInput4x8BitKHR = 6017, + DotProductInput4x8BitPacked = 6018, + DotProductInput4x8BitPackedKHR = 6018, + DotProduct = 6019, + DotProductKHR = 6019, + RayCullMaskKHR = 6020, + BitInstructions = 6025, + GroupNonUniformRotateKHR = 6026, + AtomicFloat32AddEXT = 6033, + AtomicFloat64AddEXT = 6034, + LongConstantCompositeINTEL = 6089, + OptNoneINTEL = 6094, + AtomicFloat16AddEXT = 6095, + DebugInfoModuleINTEL = 6114, + BFloat16ConversionINTEL = 6115, + SplitBarrierINTEL = 6141, + FPGAKernelAttributesv2INTEL = 6161, + FPGALatencyControlINTEL = 6171, + FPGAArgumentInterfacesINTEL = 6174, + GroupUniformArithmeticKHR = 6400, + Max = 0x7fffffff, +}; + +enum class RayFlagsShift : unsigned { + OpaqueKHR = 0, + NoOpaqueKHR = 1, + TerminateOnFirstHitKHR = 2, + SkipClosestHitShaderKHR = 3, + CullBackFacingTrianglesKHR = 4, + CullFrontFacingTrianglesKHR = 5, + CullOpaqueKHR = 6, + CullNoOpaqueKHR = 7, + SkipTrianglesKHR = 8, + SkipAABBsKHR = 9, + ForceOpacityMicromap2StateEXT = 10, + Max = 0x7fffffff, +}; + +enum class RayFlagsMask : unsigned { + MaskNone = 0, + OpaqueKHR = 0x00000001, + NoOpaqueKHR = 0x00000002, + TerminateOnFirstHitKHR = 0x00000004, + SkipClosestHitShaderKHR = 0x00000008, + CullBackFacingTrianglesKHR = 0x00000010, + CullFrontFacingTrianglesKHR = 0x00000020, + CullOpaqueKHR = 0x00000040, + CullNoOpaqueKHR = 0x00000080, + SkipTrianglesKHR = 0x00000100, + SkipAABBsKHR = 0x00000200, + ForceOpacityMicromap2StateEXT = 0x00000400, +}; + +enum class RayQueryIntersection : unsigned { + RayQueryCandidateIntersectionKHR = 0, + RayQueryCommittedIntersectionKHR = 1, + Max = 0x7fffffff, +}; + +enum class RayQueryCommittedIntersectionType : unsigned { + RayQueryCommittedIntersectionNoneKHR = 0, + RayQueryCommittedIntersectionTriangleKHR = 1, + RayQueryCommittedIntersectionGeneratedKHR = 2, + Max = 0x7fffffff, +}; + +enum class RayQueryCandidateIntersectionType : unsigned { + RayQueryCandidateIntersectionTriangleKHR = 0, + RayQueryCandidateIntersectionAABBKHR = 1, + Max = 0x7fffffff, +}; + +enum class FragmentShadingRateShift : unsigned { + Vertical2Pixels = 0, + Vertical4Pixels = 1, + Horizontal2Pixels = 2, + Horizontal4Pixels = 3, + Max = 0x7fffffff, +}; + +enum class FragmentShadingRateMask : unsigned { + MaskNone = 0, + Vertical2Pixels = 0x00000001, + Vertical4Pixels = 0x00000002, + Horizontal2Pixels = 0x00000004, + Horizontal4Pixels = 0x00000008, +}; + +enum class FPDenormMode : unsigned { + Preserve = 0, + FlushToZero = 1, + Max = 0x7fffffff, +}; + +enum class FPOperationMode : unsigned { + IEEE = 0, + ALT = 1, + Max = 0x7fffffff, +}; + +enum class QuantizationModes : unsigned { + TRN = 0, + TRN_ZERO = 1, + RND = 2, + RND_ZERO = 3, + RND_INF = 4, + RND_MIN_INF = 5, + RND_CONV = 6, + RND_CONV_ODD = 7, + Max = 0x7fffffff, +}; + +enum class OverflowModes : unsigned { + WRAP = 0, + SAT = 1, + SAT_ZERO = 2, + SAT_SYM = 3, + Max = 0x7fffffff, +}; + +enum class PackedVectorFormat : unsigned { + PackedVectorFormat4x8Bit = 0, + PackedVectorFormat4x8BitKHR = 0, + Max = 0x7fffffff, +}; + +enum class Op : unsigned { + OpNop = 0, + OpUndef = 1, + OpSourceContinued = 2, + OpSource = 3, + OpSourceExtension = 4, + OpName = 5, + OpMemberName = 6, + OpString = 7, + OpLine = 8, + OpExtension = 10, + OpExtInstImport = 11, + OpExtInst = 12, + OpMemoryModel = 14, + OpEntryPoint = 15, + OpExecutionMode = 16, + OpCapability = 17, + OpTypeVoid = 19, + OpTypeBool = 20, + OpTypeInt = 21, + OpTypeFloat = 22, + OpTypeVector = 23, + OpTypeMatrix = 24, + OpTypeImage = 25, + OpTypeSampler = 26, + OpTypeSampledImage = 27, + OpTypeArray = 28, + OpTypeRuntimeArray = 29, + OpTypeStruct = 30, + OpTypeOpaque = 31, + OpTypePointer = 32, + OpTypeFunction = 33, + OpTypeEvent = 34, + OpTypeDeviceEvent = 35, + OpTypeReserveId = 36, + OpTypeQueue = 37, + OpTypePipe = 38, + OpTypeForwardPointer = 39, + OpConstantTrue = 41, + OpConstantFalse = 42, + OpConstant = 43, + OpConstantComposite = 44, + OpConstantSampler = 45, + OpConstantNull = 46, + OpSpecConstantTrue = 48, + OpSpecConstantFalse = 49, + OpSpecConstant = 50, + OpSpecConstantComposite = 51, + OpSpecConstantOp = 52, + OpFunction = 54, + OpFunctionParameter = 55, + OpFunctionEnd = 56, + OpFunctionCall = 57, + OpVariable = 59, + OpImageTexelPointer = 60, + OpLoad = 61, + OpStore = 62, + OpCopyMemory = 63, + OpCopyMemorySized = 64, + OpAccessChain = 65, + OpInBoundsAccessChain = 66, + OpPtrAccessChain = 67, + OpArrayLength = 68, + OpGenericPtrMemSemantics = 69, + OpInBoundsPtrAccessChain = 70, + OpDecorate = 71, + OpMemberDecorate = 72, + OpDecorationGroup = 73, + OpGroupDecorate = 74, + OpGroupMemberDecorate = 75, + OpVectorExtractDynamic = 77, + OpVectorInsertDynamic = 78, + OpVectorShuffle = 79, + OpCompositeConstruct = 80, + OpCompositeExtract = 81, + OpCompositeInsert = 82, + OpCopyObject = 83, + OpTranspose = 84, + OpSampledImage = 86, + OpImageSampleImplicitLod = 87, + OpImageSampleExplicitLod = 88, + OpImageSampleDrefImplicitLod = 89, + OpImageSampleDrefExplicitLod = 90, + OpImageSampleProjImplicitLod = 91, + OpImageSampleProjExplicitLod = 92, + OpImageSampleProjDrefImplicitLod = 93, + OpImageSampleProjDrefExplicitLod = 94, + OpImageFetch = 95, + OpImageGather = 96, + OpImageDrefGather = 97, + OpImageRead = 98, + OpImageWrite = 99, + OpImage = 100, + OpImageQueryFormat = 101, + OpImageQueryOrder = 102, + OpImageQuerySizeLod = 103, + OpImageQuerySize = 104, + OpImageQueryLod = 105, + OpImageQueryLevels = 106, + OpImageQuerySamples = 107, + OpConvertFToU = 109, + OpConvertFToS = 110, + OpConvertSToF = 111, + OpConvertUToF = 112, + OpUConvert = 113, + OpSConvert = 114, + OpFConvert = 115, + OpQuantizeToF16 = 116, + OpConvertPtrToU = 117, + OpSatConvertSToU = 118, + OpSatConvertUToS = 119, + OpConvertUToPtr = 120, + OpPtrCastToGeneric = 121, + OpGenericCastToPtr = 122, + OpGenericCastToPtrExplicit = 123, + OpBitcast = 124, + OpSNegate = 126, + OpFNegate = 127, + OpIAdd = 128, + OpFAdd = 129, + OpISub = 130, + OpFSub = 131, + OpIMul = 132, + OpFMul = 133, + OpUDiv = 134, + OpSDiv = 135, + OpFDiv = 136, + OpUMod = 137, + OpSRem = 138, + OpSMod = 139, + OpFRem = 140, + OpFMod = 141, + OpVectorTimesScalar = 142, + OpMatrixTimesScalar = 143, + OpVectorTimesMatrix = 144, + OpMatrixTimesVector = 145, + OpMatrixTimesMatrix = 146, + OpOuterProduct = 147, + OpDot = 148, + OpIAddCarry = 149, + OpISubBorrow = 150, + OpUMulExtended = 151, + OpSMulExtended = 152, + OpAny = 154, + OpAll = 155, + OpIsNan = 156, + OpIsInf = 157, + OpIsFinite = 158, + OpIsNormal = 159, + OpSignBitSet = 160, + OpLessOrGreater = 161, + OpOrdered = 162, + OpUnordered = 163, + OpLogicalEqual = 164, + OpLogicalNotEqual = 165, + OpLogicalOr = 166, + OpLogicalAnd = 167, + OpLogicalNot = 168, + OpSelect = 169, + OpIEqual = 170, + OpINotEqual = 171, + OpUGreaterThan = 172, + OpSGreaterThan = 173, + OpUGreaterThanEqual = 174, + OpSGreaterThanEqual = 175, + OpULessThan = 176, + OpSLessThan = 177, + OpULessThanEqual = 178, + OpSLessThanEqual = 179, + OpFOrdEqual = 180, + OpFUnordEqual = 181, + OpFOrdNotEqual = 182, + OpFUnordNotEqual = 183, + OpFOrdLessThan = 184, + OpFUnordLessThan = 185, + OpFOrdGreaterThan = 186, + OpFUnordGreaterThan = 187, + OpFOrdLessThanEqual = 188, + OpFUnordLessThanEqual = 189, + OpFOrdGreaterThanEqual = 190, + OpFUnordGreaterThanEqual = 191, + OpShiftRightLogical = 194, + OpShiftRightArithmetic = 195, + OpShiftLeftLogical = 196, + OpBitwiseOr = 197, + OpBitwiseXor = 198, + OpBitwiseAnd = 199, + OpNot = 200, + OpBitFieldInsert = 201, + OpBitFieldSExtract = 202, + OpBitFieldUExtract = 203, + OpBitReverse = 204, + OpBitCount = 205, + OpDPdx = 207, + OpDPdy = 208, + OpFwidth = 209, + OpDPdxFine = 210, + OpDPdyFine = 211, + OpFwidthFine = 212, + OpDPdxCoarse = 213, + OpDPdyCoarse = 214, + OpFwidthCoarse = 215, + OpEmitVertex = 218, + OpEndPrimitive = 219, + OpEmitStreamVertex = 220, + OpEndStreamPrimitive = 221, + OpControlBarrier = 224, + OpMemoryBarrier = 225, + OpAtomicLoad = 227, + OpAtomicStore = 228, + OpAtomicExchange = 229, + OpAtomicCompareExchange = 230, + OpAtomicCompareExchangeWeak = 231, + OpAtomicIIncrement = 232, + OpAtomicIDecrement = 233, + OpAtomicIAdd = 234, + OpAtomicISub = 235, + OpAtomicSMin = 236, + OpAtomicUMin = 237, + OpAtomicSMax = 238, + OpAtomicUMax = 239, + OpAtomicAnd = 240, + OpAtomicOr = 241, + OpAtomicXor = 242, + OpPhi = 245, + OpLoopMerge = 246, + OpSelectionMerge = 247, + OpLabel = 248, + OpBranch = 249, + OpBranchConditional = 250, + OpSwitch = 251, + OpKill = 252, + OpReturn = 253, + OpReturnValue = 254, + OpUnreachable = 255, + OpLifetimeStart = 256, + OpLifetimeStop = 257, + OpGroupAsyncCopy = 259, + OpGroupWaitEvents = 260, + OpGroupAll = 261, + OpGroupAny = 262, + OpGroupBroadcast = 263, + OpGroupIAdd = 264, + OpGroupFAdd = 265, + OpGroupFMin = 266, + OpGroupUMin = 267, + OpGroupSMin = 268, + OpGroupFMax = 269, + OpGroupUMax = 270, + OpGroupSMax = 271, + OpReadPipe = 274, + OpWritePipe = 275, + OpReservedReadPipe = 276, + OpReservedWritePipe = 277, + OpReserveReadPipePackets = 278, + OpReserveWritePipePackets = 279, + OpCommitReadPipe = 280, + OpCommitWritePipe = 281, + OpIsValidReserveId = 282, + OpGetNumPipePackets = 283, + OpGetMaxPipePackets = 284, + OpGroupReserveReadPipePackets = 285, + OpGroupReserveWritePipePackets = 286, + OpGroupCommitReadPipe = 287, + OpGroupCommitWritePipe = 288, + OpEnqueueMarker = 291, + OpEnqueueKernel = 292, + OpGetKernelNDrangeSubGroupCount = 293, + OpGetKernelNDrangeMaxSubGroupSize = 294, + OpGetKernelWorkGroupSize = 295, + OpGetKernelPreferredWorkGroupSizeMultiple = 296, + OpRetainEvent = 297, + OpReleaseEvent = 298, + OpCreateUserEvent = 299, + OpIsValidEvent = 300, + OpSetUserEventStatus = 301, + OpCaptureEventProfilingInfo = 302, + OpGetDefaultQueue = 303, + OpBuildNDRange = 304, + OpImageSparseSampleImplicitLod = 305, + OpImageSparseSampleExplicitLod = 306, + OpImageSparseSampleDrefImplicitLod = 307, + OpImageSparseSampleDrefExplicitLod = 308, + OpImageSparseSampleProjImplicitLod = 309, + OpImageSparseSampleProjExplicitLod = 310, + OpImageSparseSampleProjDrefImplicitLod = 311, + OpImageSparseSampleProjDrefExplicitLod = 312, + OpImageSparseFetch = 313, + OpImageSparseGather = 314, + OpImageSparseDrefGather = 315, + OpImageSparseTexelsResident = 316, + OpNoLine = 317, + OpAtomicFlagTestAndSet = 318, + OpAtomicFlagClear = 319, + OpImageSparseRead = 320, + OpSizeOf = 321, + OpTypePipeStorage = 322, + OpConstantPipeStorage = 323, + OpCreatePipeFromPipeStorage = 324, + OpGetKernelLocalSizeForSubgroupCount = 325, + OpGetKernelMaxNumSubgroups = 326, + OpTypeNamedBarrier = 327, + OpNamedBarrierInitialize = 328, + OpMemoryNamedBarrier = 329, + OpModuleProcessed = 330, + OpExecutionModeId = 331, + OpDecorateId = 332, + OpGroupNonUniformElect = 333, + OpGroupNonUniformAll = 334, + OpGroupNonUniformAny = 335, + OpGroupNonUniformAllEqual = 336, + OpGroupNonUniformBroadcast = 337, + OpGroupNonUniformBroadcastFirst = 338, + OpGroupNonUniformBallot = 339, + OpGroupNonUniformInverseBallot = 340, + OpGroupNonUniformBallotBitExtract = 341, + OpGroupNonUniformBallotBitCount = 342, + OpGroupNonUniformBallotFindLSB = 343, + OpGroupNonUniformBallotFindMSB = 344, + OpGroupNonUniformShuffle = 345, + OpGroupNonUniformShuffleXor = 346, + OpGroupNonUniformShuffleUp = 347, + OpGroupNonUniformShuffleDown = 348, + OpGroupNonUniformIAdd = 349, + OpGroupNonUniformFAdd = 350, + OpGroupNonUniformIMul = 351, + OpGroupNonUniformFMul = 352, + OpGroupNonUniformSMin = 353, + OpGroupNonUniformUMin = 354, + OpGroupNonUniformFMin = 355, + OpGroupNonUniformSMax = 356, + OpGroupNonUniformUMax = 357, + OpGroupNonUniformFMax = 358, + OpGroupNonUniformBitwiseAnd = 359, + OpGroupNonUniformBitwiseOr = 360, + OpGroupNonUniformBitwiseXor = 361, + OpGroupNonUniformLogicalAnd = 362, + OpGroupNonUniformLogicalOr = 363, + OpGroupNonUniformLogicalXor = 364, + OpGroupNonUniformQuadBroadcast = 365, + OpGroupNonUniformQuadSwap = 366, + OpCopyLogical = 400, + OpPtrEqual = 401, + OpPtrNotEqual = 402, + OpPtrDiff = 403, + OpColorAttachmentReadEXT = 4160, + OpDepthAttachmentReadEXT = 4161, + OpStencilAttachmentReadEXT = 4162, + OpTerminateInvocation = 4416, + OpSubgroupBallotKHR = 4421, + OpSubgroupFirstInvocationKHR = 4422, + OpSubgroupAllKHR = 4428, + OpSubgroupAnyKHR = 4429, + OpSubgroupAllEqualKHR = 4430, + OpGroupNonUniformRotateKHR = 4431, + OpSubgroupReadInvocationKHR = 4432, + OpTraceRayKHR = 4445, + OpExecuteCallableKHR = 4446, + OpConvertUToAccelerationStructureKHR = 4447, + OpIgnoreIntersectionKHR = 4448, + OpTerminateRayKHR = 4449, + OpSDot = 4450, + OpSDotKHR = 4450, + OpUDot = 4451, + OpUDotKHR = 4451, + OpSUDot = 4452, + OpSUDotKHR = 4452, + OpSDotAccSat = 4453, + OpSDotAccSatKHR = 4453, + OpUDotAccSat = 4454, + OpUDotAccSatKHR = 4454, + OpSUDotAccSat = 4455, + OpSUDotAccSatKHR = 4455, + OpTypeRayQueryKHR = 4472, + OpRayQueryInitializeKHR = 4473, + OpRayQueryTerminateKHR = 4474, + OpRayQueryGenerateIntersectionKHR = 4475, + OpRayQueryConfirmIntersectionKHR = 4476, + OpRayQueryProceedKHR = 4477, + OpRayQueryGetIntersectionTypeKHR = 4479, + OpImageSampleWeightedQCOM = 4480, + OpImageBoxFilterQCOM = 4481, + OpImageBlockMatchSSDQCOM = 4482, + OpImageBlockMatchSADQCOM = 4483, + OpGroupIAddNonUniformAMD = 5000, + OpGroupFAddNonUniformAMD = 5001, + OpGroupFMinNonUniformAMD = 5002, + OpGroupUMinNonUniformAMD = 5003, + OpGroupSMinNonUniformAMD = 5004, + OpGroupFMaxNonUniformAMD = 5005, + OpGroupUMaxNonUniformAMD = 5006, + OpGroupSMaxNonUniformAMD = 5007, + OpFragmentMaskFetchAMD = 5011, + OpFragmentFetchAMD = 5012, + OpReadClockKHR = 5056, + OpHitObjectRecordHitMotionNV = 5249, + OpHitObjectRecordHitWithIndexMotionNV = 5250, + OpHitObjectRecordMissMotionNV = 5251, + OpHitObjectGetWorldToObjectNV = 5252, + OpHitObjectGetObjectToWorldNV = 5253, + OpHitObjectGetObjectRayDirectionNV = 5254, + OpHitObjectGetObjectRayOriginNV = 5255, + OpHitObjectTraceRayMotionNV = 5256, + OpHitObjectGetShaderRecordBufferHandleNV = 5257, + OpHitObjectGetShaderBindingTableRecordIndexNV = 5258, + OpHitObjectRecordEmptyNV = 5259, + OpHitObjectTraceRayNV = 5260, + OpHitObjectRecordHitNV = 5261, + OpHitObjectRecordHitWithIndexNV = 5262, + OpHitObjectRecordMissNV = 5263, + OpHitObjectExecuteShaderNV = 5264, + OpHitObjectGetCurrentTimeNV = 5265, + OpHitObjectGetAttributesNV = 5266, + OpHitObjectGetHitKindNV = 5267, + OpHitObjectGetPrimitiveIndexNV = 5268, + OpHitObjectGetGeometryIndexNV = 5269, + OpHitObjectGetInstanceIdNV = 5270, + OpHitObjectGetInstanceCustomIndexNV = 5271, + OpHitObjectGetWorldRayDirectionNV = 5272, + OpHitObjectGetWorldRayOriginNV = 5273, + OpHitObjectGetRayTMaxNV = 5274, + OpHitObjectGetRayTMinNV = 5275, + OpHitObjectIsEmptyNV = 5276, + OpHitObjectIsHitNV = 5277, + OpHitObjectIsMissNV = 5278, + OpReorderThreadWithHitObjectNV = 5279, + OpReorderThreadWithHintNV = 5280, + OpTypeHitObjectNV = 5281, + OpImageSampleFootprintNV = 5283, + OpEmitMeshTasksEXT = 5294, + OpSetMeshOutputsEXT = 5295, + OpGroupNonUniformPartitionNV = 5296, + OpWritePackedPrimitiveIndices4x8NV = 5299, + OpReportIntersectionKHR = 5334, + OpReportIntersectionNV = 5334, + OpIgnoreIntersectionNV = 5335, + OpTerminateRayNV = 5336, + OpTraceNV = 5337, + OpTraceMotionNV = 5338, + OpTraceRayMotionNV = 5339, + OpRayQueryGetIntersectionTriangleVertexPositionsKHR = 5340, + OpTypeAccelerationStructureKHR = 5341, + OpTypeAccelerationStructureNV = 5341, + OpExecuteCallableNV = 5344, + OpTypeCooperativeMatrixNV = 5358, + OpCooperativeMatrixLoadNV = 5359, + OpCooperativeMatrixStoreNV = 5360, + OpCooperativeMatrixMulAddNV = 5361, + OpCooperativeMatrixLengthNV = 5362, + OpBeginInvocationInterlockEXT = 5364, + OpEndInvocationInterlockEXT = 5365, + OpDemoteToHelperInvocation = 5380, + OpDemoteToHelperInvocationEXT = 5380, + OpIsHelperInvocationEXT = 5381, + OpConvertUToImageNV = 5391, + OpConvertUToSamplerNV = 5392, + OpConvertImageToUNV = 5393, + OpConvertSamplerToUNV = 5394, + OpConvertUToSampledImageNV = 5395, + OpConvertSampledImageToUNV = 5396, + OpSamplerImageAddressingModeNV = 5397, + OpSubgroupShuffleINTEL = 5571, + OpSubgroupShuffleDownINTEL = 5572, + OpSubgroupShuffleUpINTEL = 5573, + OpSubgroupShuffleXorINTEL = 5574, + OpSubgroupBlockReadINTEL = 5575, + OpSubgroupBlockWriteINTEL = 5576, + OpSubgroupImageBlockReadINTEL = 5577, + OpSubgroupImageBlockWriteINTEL = 5578, + OpSubgroupImageMediaBlockReadINTEL = 5580, + OpSubgroupImageMediaBlockWriteINTEL = 5581, + OpUCountLeadingZerosINTEL = 5585, + OpUCountTrailingZerosINTEL = 5586, + OpAbsISubINTEL = 5587, + OpAbsUSubINTEL = 5588, + OpIAddSatINTEL = 5589, + OpUAddSatINTEL = 5590, + OpIAverageINTEL = 5591, + OpUAverageINTEL = 5592, + OpIAverageRoundedINTEL = 5593, + OpUAverageRoundedINTEL = 5594, + OpISubSatINTEL = 5595, + OpUSubSatINTEL = 5596, + OpIMul32x16INTEL = 5597, + OpUMul32x16INTEL = 5598, + OpConstantFunctionPointerINTEL = 5600, + OpFunctionPointerCallINTEL = 5601, + OpAsmTargetINTEL = 5609, + OpAsmINTEL = 5610, + OpAsmCallINTEL = 5611, + OpAtomicFMinEXT = 5614, + OpAtomicFMaxEXT = 5615, + OpAssumeTrueKHR = 5630, + OpExpectKHR = 5631, + OpDecorateString = 5632, + OpDecorateStringGOOGLE = 5632, + OpMemberDecorateString = 5633, + OpMemberDecorateStringGOOGLE = 5633, + OpVmeImageINTEL = 5699, + OpTypeVmeImageINTEL = 5700, + OpTypeAvcImePayloadINTEL = 5701, + OpTypeAvcRefPayloadINTEL = 5702, + OpTypeAvcSicPayloadINTEL = 5703, + OpTypeAvcMcePayloadINTEL = 5704, + OpTypeAvcMceResultINTEL = 5705, + OpTypeAvcImeResultINTEL = 5706, + OpTypeAvcImeResultSingleReferenceStreamoutINTEL = 5707, + OpTypeAvcImeResultDualReferenceStreamoutINTEL = 5708, + OpTypeAvcImeSingleReferenceStreaminINTEL = 5709, + OpTypeAvcImeDualReferenceStreaminINTEL = 5710, + OpTypeAvcRefResultINTEL = 5711, + OpTypeAvcSicResultINTEL = 5712, + OpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL = 5713, + OpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL = 5714, + OpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL = 5715, + OpSubgroupAvcMceSetInterShapePenaltyINTEL = 5716, + OpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL = 5717, + OpSubgroupAvcMceSetInterDirectionPenaltyINTEL = 5718, + OpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL = 5719, + OpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL = 5720, + OpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL = 5721, + OpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL = 5722, + OpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL = 5723, + OpSubgroupAvcMceSetMotionVectorCostFunctionINTEL = 5724, + OpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL = 5725, + OpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL = 5726, + OpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL = 5727, + OpSubgroupAvcMceSetAcOnlyHaarINTEL = 5728, + OpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL = 5729, + OpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL = 5730, + OpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL = 5731, + OpSubgroupAvcMceConvertToImePayloadINTEL = 5732, + OpSubgroupAvcMceConvertToImeResultINTEL = 5733, + OpSubgroupAvcMceConvertToRefPayloadINTEL = 5734, + OpSubgroupAvcMceConvertToRefResultINTEL = 5735, + OpSubgroupAvcMceConvertToSicPayloadINTEL = 5736, + OpSubgroupAvcMceConvertToSicResultINTEL = 5737, + OpSubgroupAvcMceGetMotionVectorsINTEL = 5738, + OpSubgroupAvcMceGetInterDistortionsINTEL = 5739, + OpSubgroupAvcMceGetBestInterDistortionsINTEL = 5740, + OpSubgroupAvcMceGetInterMajorShapeINTEL = 5741, + OpSubgroupAvcMceGetInterMinorShapeINTEL = 5742, + OpSubgroupAvcMceGetInterDirectionsINTEL = 5743, + OpSubgroupAvcMceGetInterMotionVectorCountINTEL = 5744, + OpSubgroupAvcMceGetInterReferenceIdsINTEL = 5745, + OpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL = 5746, + OpSubgroupAvcImeInitializeINTEL = 5747, + OpSubgroupAvcImeSetSingleReferenceINTEL = 5748, + OpSubgroupAvcImeSetDualReferenceINTEL = 5749, + OpSubgroupAvcImeRefWindowSizeINTEL = 5750, + OpSubgroupAvcImeAdjustRefOffsetINTEL = 5751, + OpSubgroupAvcImeConvertToMcePayloadINTEL = 5752, + OpSubgroupAvcImeSetMaxMotionVectorCountINTEL = 5753, + OpSubgroupAvcImeSetUnidirectionalMixDisableINTEL = 5754, + OpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL = 5755, + OpSubgroupAvcImeSetWeightedSadINTEL = 5756, + OpSubgroupAvcImeEvaluateWithSingleReferenceINTEL = 5757, + OpSubgroupAvcImeEvaluateWithDualReferenceINTEL = 5758, + OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL = 5759, + OpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL = 5760, + OpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL = 5761, + OpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL = 5762, + OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL = 5763, + OpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL = 5764, + OpSubgroupAvcImeConvertToMceResultINTEL = 5765, + OpSubgroupAvcImeGetSingleReferenceStreaminINTEL = 5766, + OpSubgroupAvcImeGetDualReferenceStreaminINTEL = 5767, + OpSubgroupAvcImeStripSingleReferenceStreamoutINTEL = 5768, + OpSubgroupAvcImeStripDualReferenceStreamoutINTEL = 5769, + OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL = 5770, + OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL = 5771, + OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL = 5772, + OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL = 5773, + OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL = 5774, + OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL = 5775, + OpSubgroupAvcImeGetBorderReachedINTEL = 5776, + OpSubgroupAvcImeGetTruncatedSearchIndicationINTEL = 5777, + OpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL = 5778, + OpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL = 5779, + OpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL = 5780, + OpSubgroupAvcFmeInitializeINTEL = 5781, + OpSubgroupAvcBmeInitializeINTEL = 5782, + OpSubgroupAvcRefConvertToMcePayloadINTEL = 5783, + OpSubgroupAvcRefSetBidirectionalMixDisableINTEL = 5784, + OpSubgroupAvcRefSetBilinearFilterEnableINTEL = 5785, + OpSubgroupAvcRefEvaluateWithSingleReferenceINTEL = 5786, + OpSubgroupAvcRefEvaluateWithDualReferenceINTEL = 5787, + OpSubgroupAvcRefEvaluateWithMultiReferenceINTEL = 5788, + OpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL = 5789, + OpSubgroupAvcRefConvertToMceResultINTEL = 5790, + OpSubgroupAvcSicInitializeINTEL = 5791, + OpSubgroupAvcSicConfigureSkcINTEL = 5792, + OpSubgroupAvcSicConfigureIpeLumaINTEL = 5793, + OpSubgroupAvcSicConfigureIpeLumaChromaINTEL = 5794, + OpSubgroupAvcSicGetMotionVectorMaskINTEL = 5795, + OpSubgroupAvcSicConvertToMcePayloadINTEL = 5796, + OpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL = 5797, + OpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL = 5798, + OpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL = 5799, + OpSubgroupAvcSicSetBilinearFilterEnableINTEL = 5800, + OpSubgroupAvcSicSetSkcForwardTransformEnableINTEL = 5801, + OpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL = 5802, + OpSubgroupAvcSicEvaluateIpeINTEL = 5803, + OpSubgroupAvcSicEvaluateWithSingleReferenceINTEL = 5804, + OpSubgroupAvcSicEvaluateWithDualReferenceINTEL = 5805, + OpSubgroupAvcSicEvaluateWithMultiReferenceINTEL = 5806, + OpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL = 5807, + OpSubgroupAvcSicConvertToMceResultINTEL = 5808, + OpSubgroupAvcSicGetIpeLumaShapeINTEL = 5809, + OpSubgroupAvcSicGetBestIpeLumaDistortionINTEL = 5810, + OpSubgroupAvcSicGetBestIpeChromaDistortionINTEL = 5811, + OpSubgroupAvcSicGetPackedIpeLumaModesINTEL = 5812, + OpSubgroupAvcSicGetIpeChromaModeINTEL = 5813, + OpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL = 5814, + OpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL = 5815, + OpSubgroupAvcSicGetInterRawSadsINTEL = 5816, + OpVariableLengthArrayINTEL = 5818, + OpSaveMemoryINTEL = 5819, + OpRestoreMemoryINTEL = 5820, + OpArbitraryFloatSinCosPiINTEL = 5840, + OpArbitraryFloatCastINTEL = 5841, + OpArbitraryFloatCastFromIntINTEL = 5842, + OpArbitraryFloatCastToIntINTEL = 5843, + OpArbitraryFloatAddINTEL = 5846, + OpArbitraryFloatSubINTEL = 5847, + OpArbitraryFloatMulINTEL = 5848, + OpArbitraryFloatDivINTEL = 5849, + OpArbitraryFloatGTINTEL = 5850, + OpArbitraryFloatGEINTEL = 5851, + OpArbitraryFloatLTINTEL = 5852, + OpArbitraryFloatLEINTEL = 5853, + OpArbitraryFloatEQINTEL = 5854, + OpArbitraryFloatRecipINTEL = 5855, + OpArbitraryFloatRSqrtINTEL = 5856, + OpArbitraryFloatCbrtINTEL = 5857, + OpArbitraryFloatHypotINTEL = 5858, + OpArbitraryFloatSqrtINTEL = 5859, + OpArbitraryFloatLogINTEL = 5860, + OpArbitraryFloatLog2INTEL = 5861, + OpArbitraryFloatLog10INTEL = 5862, + OpArbitraryFloatLog1pINTEL = 5863, + OpArbitraryFloatExpINTEL = 5864, + OpArbitraryFloatExp2INTEL = 5865, + OpArbitraryFloatExp10INTEL = 5866, + OpArbitraryFloatExpm1INTEL = 5867, + OpArbitraryFloatSinINTEL = 5868, + OpArbitraryFloatCosINTEL = 5869, + OpArbitraryFloatSinCosINTEL = 5870, + OpArbitraryFloatSinPiINTEL = 5871, + OpArbitraryFloatCosPiINTEL = 5872, + OpArbitraryFloatASinINTEL = 5873, + OpArbitraryFloatASinPiINTEL = 5874, + OpArbitraryFloatACosINTEL = 5875, + OpArbitraryFloatACosPiINTEL = 5876, + OpArbitraryFloatATanINTEL = 5877, + OpArbitraryFloatATanPiINTEL = 5878, + OpArbitraryFloatATan2INTEL = 5879, + OpArbitraryFloatPowINTEL = 5880, + OpArbitraryFloatPowRINTEL = 5881, + OpArbitraryFloatPowNINTEL = 5882, + OpLoopControlINTEL = 5887, + OpAliasDomainDeclINTEL = 5911, + OpAliasScopeDeclINTEL = 5912, + OpAliasScopeListDeclINTEL = 5913, + OpFixedSqrtINTEL = 5923, + OpFixedRecipINTEL = 5924, + OpFixedRsqrtINTEL = 5925, + OpFixedSinINTEL = 5926, + OpFixedCosINTEL = 5927, + OpFixedSinCosINTEL = 5928, + OpFixedSinPiINTEL = 5929, + OpFixedCosPiINTEL = 5930, + OpFixedSinCosPiINTEL = 5931, + OpFixedLogINTEL = 5932, + OpFixedExpINTEL = 5933, + OpPtrCastToCrossWorkgroupINTEL = 5934, + OpCrossWorkgroupCastToPtrINTEL = 5938, + OpReadPipeBlockingINTEL = 5946, + OpWritePipeBlockingINTEL = 5947, + OpFPGARegINTEL = 5949, + OpRayQueryGetRayTMinKHR = 6016, + OpRayQueryGetRayFlagsKHR = 6017, + OpRayQueryGetIntersectionTKHR = 6018, + OpRayQueryGetIntersectionInstanceCustomIndexKHR = 6019, + OpRayQueryGetIntersectionInstanceIdKHR = 6020, + OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR = 6021, + OpRayQueryGetIntersectionGeometryIndexKHR = 6022, + OpRayQueryGetIntersectionPrimitiveIndexKHR = 6023, + OpRayQueryGetIntersectionBarycentricsKHR = 6024, + OpRayQueryGetIntersectionFrontFaceKHR = 6025, + OpRayQueryGetIntersectionCandidateAABBOpaqueKHR = 6026, + OpRayQueryGetIntersectionObjectRayDirectionKHR = 6027, + OpRayQueryGetIntersectionObjectRayOriginKHR = 6028, + OpRayQueryGetWorldRayDirectionKHR = 6029, + OpRayQueryGetWorldRayOriginKHR = 6030, + OpRayQueryGetIntersectionObjectToWorldKHR = 6031, + OpRayQueryGetIntersectionWorldToObjectKHR = 6032, + OpAtomicFAddEXT = 6035, + OpTypeBufferSurfaceINTEL = 6086, + OpTypeStructContinuedINTEL = 6090, + OpConstantCompositeContinuedINTEL = 6091, + OpSpecConstantCompositeContinuedINTEL = 6092, + OpConvertFToBF16INTEL = 6116, + OpConvertBF16ToFINTEL = 6117, + OpControlBarrierArriveINTEL = 6142, + OpControlBarrierWaitINTEL = 6143, + OpGroupIMulKHR = 6401, + OpGroupFMulKHR = 6402, + OpGroupBitwiseAndKHR = 6403, + OpGroupBitwiseOrKHR = 6404, + OpGroupBitwiseXorKHR = 6405, + OpGroupLogicalAndKHR = 6406, + OpGroupLogicalOrKHR = 6407, + OpGroupLogicalXorKHR = 6408, + Max = 0x7fffffff, +}; + +#ifdef SPV_ENABLE_UTILITY_CODE +#ifndef __cplusplus +#include +#endif +inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) { + *hasResult = *hasResultType = false; + switch (opcode) { + default: /* unknown opcode */ break; + case Op::OpNop: *hasResult = false; *hasResultType = false; break; + case Op::OpUndef: *hasResult = true; *hasResultType = true; break; + case Op::OpSourceContinued: *hasResult = false; *hasResultType = false; break; + case Op::OpSource: *hasResult = false; *hasResultType = false; break; + case Op::OpSourceExtension: *hasResult = false; *hasResultType = false; break; + case Op::OpName: *hasResult = false; *hasResultType = false; break; + case Op::OpMemberName: *hasResult = false; *hasResultType = false; break; + case Op::OpString: *hasResult = true; *hasResultType = false; break; + case Op::OpLine: *hasResult = false; *hasResultType = false; break; + case Op::OpExtension: *hasResult = false; *hasResultType = false; break; + case Op::OpExtInstImport: *hasResult = true; *hasResultType = false; break; + case Op::OpExtInst: *hasResult = true; *hasResultType = true; break; + case Op::OpMemoryModel: *hasResult = false; *hasResultType = false; break; + case Op::OpEntryPoint: *hasResult = false; *hasResultType = false; break; + case Op::OpExecutionMode: *hasResult = false; *hasResultType = false; break; + case Op::OpCapability: *hasResult = false; *hasResultType = false; break; + case Op::OpTypeVoid: *hasResult = true; *hasResultType = false; break; + case Op::OpTypeBool: *hasResult = true; *hasResultType = false; break; + case Op::OpTypeInt: *hasResult = true; *hasResultType = false; break; + case Op::OpTypeFloat: *hasResult = true; *hasResultType = false; break; + case Op::OpTypeVector: *hasResult = true; *hasResultType = false; break; + case Op::OpTypeMatrix: *hasResult = true; *hasResultType = false; break; + case Op::OpTypeImage: *hasResult = true; *hasResultType = false; break; + case Op::OpTypeSampler: *hasResult = true; *hasResultType = false; break; + case Op::OpTypeSampledImage: *hasResult = true; *hasResultType = false; break; + case Op::OpTypeArray: *hasResult = true; *hasResultType = false; break; + case Op::OpTypeRuntimeArray: *hasResult = true; *hasResultType = false; break; + case Op::OpTypeStruct: *hasResult = true; *hasResultType = false; break; + case Op::OpTypeOpaque: *hasResult = true; *hasResultType = false; break; + case Op::OpTypePointer: *hasResult = true; *hasResultType = false; break; + case Op::OpTypeFunction: *hasResult = true; *hasResultType = false; break; + case Op::OpTypeEvent: *hasResult = true; *hasResultType = false; break; + case Op::OpTypeDeviceEvent: *hasResult = true; *hasResultType = false; break; + case Op::OpTypeReserveId: *hasResult = true; *hasResultType = false; break; + case Op::OpTypeQueue: *hasResult = true; *hasResultType = false; break; + case Op::OpTypePipe: *hasResult = true; *hasResultType = false; break; + case Op::OpTypeForwardPointer: *hasResult = false; *hasResultType = false; break; + case Op::OpConstantTrue: *hasResult = true; *hasResultType = true; break; + case Op::OpConstantFalse: *hasResult = true; *hasResultType = true; break; + case Op::OpConstant: *hasResult = true; *hasResultType = true; break; + case Op::OpConstantComposite: *hasResult = true; *hasResultType = true; break; + case Op::OpConstantSampler: *hasResult = true; *hasResultType = true; break; + case Op::OpConstantNull: *hasResult = true; *hasResultType = true; break; + case Op::OpSpecConstantTrue: *hasResult = true; *hasResultType = true; break; + case Op::OpSpecConstantFalse: *hasResult = true; *hasResultType = true; break; + case Op::OpSpecConstant: *hasResult = true; *hasResultType = true; break; + case Op::OpSpecConstantComposite: *hasResult = true; *hasResultType = true; break; + case Op::OpSpecConstantOp: *hasResult = true; *hasResultType = true; break; + case Op::OpFunction: *hasResult = true; *hasResultType = true; break; + case Op::OpFunctionParameter: *hasResult = true; *hasResultType = true; break; + case Op::OpFunctionEnd: *hasResult = false; *hasResultType = false; break; + case Op::OpFunctionCall: *hasResult = true; *hasResultType = true; break; + case Op::OpVariable: *hasResult = true; *hasResultType = true; break; + case Op::OpImageTexelPointer: *hasResult = true; *hasResultType = true; break; + case Op::OpLoad: *hasResult = true; *hasResultType = true; break; + case Op::OpStore: *hasResult = false; *hasResultType = false; break; + case Op::OpCopyMemory: *hasResult = false; *hasResultType = false; break; + case Op::OpCopyMemorySized: *hasResult = false; *hasResultType = false; break; + case Op::OpAccessChain: *hasResult = true; *hasResultType = true; break; + case Op::OpInBoundsAccessChain: *hasResult = true; *hasResultType = true; break; + case Op::OpPtrAccessChain: *hasResult = true; *hasResultType = true; break; + case Op::OpArrayLength: *hasResult = true; *hasResultType = true; break; + case Op::OpGenericPtrMemSemantics: *hasResult = true; *hasResultType = true; break; + case Op::OpInBoundsPtrAccessChain: *hasResult = true; *hasResultType = true; break; + case Op::OpDecorate: *hasResult = false; *hasResultType = false; break; + case Op::OpMemberDecorate: *hasResult = false; *hasResultType = false; break; + case Op::OpDecorationGroup: *hasResult = true; *hasResultType = false; break; + case Op::OpGroupDecorate: *hasResult = false; *hasResultType = false; break; + case Op::OpGroupMemberDecorate: *hasResult = false; *hasResultType = false; break; + case Op::OpVectorExtractDynamic: *hasResult = true; *hasResultType = true; break; + case Op::OpVectorInsertDynamic: *hasResult = true; *hasResultType = true; break; + case Op::OpVectorShuffle: *hasResult = true; *hasResultType = true; break; + case Op::OpCompositeConstruct: *hasResult = true; *hasResultType = true; break; + case Op::OpCompositeExtract: *hasResult = true; *hasResultType = true; break; + case Op::OpCompositeInsert: *hasResult = true; *hasResultType = true; break; + case Op::OpCopyObject: *hasResult = true; *hasResultType = true; break; + case Op::OpTranspose: *hasResult = true; *hasResultType = true; break; + case Op::OpSampledImage: *hasResult = true; *hasResultType = true; break; + case Op::OpImageSampleImplicitLod: *hasResult = true; *hasResultType = true; break; + case Op::OpImageSampleExplicitLod: *hasResult = true; *hasResultType = true; break; + case Op::OpImageSampleDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case Op::OpImageSampleDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case Op::OpImageSampleProjImplicitLod: *hasResult = true; *hasResultType = true; break; + case Op::OpImageSampleProjExplicitLod: *hasResult = true; *hasResultType = true; break; + case Op::OpImageSampleProjDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case Op::OpImageSampleProjDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case Op::OpImageFetch: *hasResult = true; *hasResultType = true; break; + case Op::OpImageGather: *hasResult = true; *hasResultType = true; break; + case Op::OpImageDrefGather: *hasResult = true; *hasResultType = true; break; + case Op::OpImageRead: *hasResult = true; *hasResultType = true; break; + case Op::OpImageWrite: *hasResult = false; *hasResultType = false; break; + case Op::OpImage: *hasResult = true; *hasResultType = true; break; + case Op::OpImageQueryFormat: *hasResult = true; *hasResultType = true; break; + case Op::OpImageQueryOrder: *hasResult = true; *hasResultType = true; break; + case Op::OpImageQuerySizeLod: *hasResult = true; *hasResultType = true; break; + case Op::OpImageQuerySize: *hasResult = true; *hasResultType = true; break; + case Op::OpImageQueryLod: *hasResult = true; *hasResultType = true; break; + case Op::OpImageQueryLevels: *hasResult = true; *hasResultType = true; break; + case Op::OpImageQuerySamples: *hasResult = true; *hasResultType = true; break; + case Op::OpConvertFToU: *hasResult = true; *hasResultType = true; break; + case Op::OpConvertFToS: *hasResult = true; *hasResultType = true; break; + case Op::OpConvertSToF: *hasResult = true; *hasResultType = true; break; + case Op::OpConvertUToF: *hasResult = true; *hasResultType = true; break; + case Op::OpUConvert: *hasResult = true; *hasResultType = true; break; + case Op::OpSConvert: *hasResult = true; *hasResultType = true; break; + case Op::OpFConvert: *hasResult = true; *hasResultType = true; break; + case Op::OpQuantizeToF16: *hasResult = true; *hasResultType = true; break; + case Op::OpConvertPtrToU: *hasResult = true; *hasResultType = true; break; + case Op::OpSatConvertSToU: *hasResult = true; *hasResultType = true; break; + case Op::OpSatConvertUToS: *hasResult = true; *hasResultType = true; break; + case Op::OpConvertUToPtr: *hasResult = true; *hasResultType = true; break; + case Op::OpPtrCastToGeneric: *hasResult = true; *hasResultType = true; break; + case Op::OpGenericCastToPtr: *hasResult = true; *hasResultType = true; break; + case Op::OpGenericCastToPtrExplicit: *hasResult = true; *hasResultType = true; break; + case Op::OpBitcast: *hasResult = true; *hasResultType = true; break; + case Op::OpSNegate: *hasResult = true; *hasResultType = true; break; + case Op::OpFNegate: *hasResult = true; *hasResultType = true; break; + case Op::OpIAdd: *hasResult = true; *hasResultType = true; break; + case Op::OpFAdd: *hasResult = true; *hasResultType = true; break; + case Op::OpISub: *hasResult = true; *hasResultType = true; break; + case Op::OpFSub: *hasResult = true; *hasResultType = true; break; + case Op::OpIMul: *hasResult = true; *hasResultType = true; break; + case Op::OpFMul: *hasResult = true; *hasResultType = true; break; + case Op::OpUDiv: *hasResult = true; *hasResultType = true; break; + case Op::OpSDiv: *hasResult = true; *hasResultType = true; break; + case Op::OpFDiv: *hasResult = true; *hasResultType = true; break; + case Op::OpUMod: *hasResult = true; *hasResultType = true; break; + case Op::OpSRem: *hasResult = true; *hasResultType = true; break; + case Op::OpSMod: *hasResult = true; *hasResultType = true; break; + case Op::OpFRem: *hasResult = true; *hasResultType = true; break; + case Op::OpFMod: *hasResult = true; *hasResultType = true; break; + case Op::OpVectorTimesScalar: *hasResult = true; *hasResultType = true; break; + case Op::OpMatrixTimesScalar: *hasResult = true; *hasResultType = true; break; + case Op::OpVectorTimesMatrix: *hasResult = true; *hasResultType = true; break; + case Op::OpMatrixTimesVector: *hasResult = true; *hasResultType = true; break; + case Op::OpMatrixTimesMatrix: *hasResult = true; *hasResultType = true; break; + case Op::OpOuterProduct: *hasResult = true; *hasResultType = true; break; + case Op::OpDot: *hasResult = true; *hasResultType = true; break; + case Op::OpIAddCarry: *hasResult = true; *hasResultType = true; break; + case Op::OpISubBorrow: *hasResult = true; *hasResultType = true; break; + case Op::OpUMulExtended: *hasResult = true; *hasResultType = true; break; + case Op::OpSMulExtended: *hasResult = true; *hasResultType = true; break; + case Op::OpAny: *hasResult = true; *hasResultType = true; break; + case Op::OpAll: *hasResult = true; *hasResultType = true; break; + case Op::OpIsNan: *hasResult = true; *hasResultType = true; break; + case Op::OpIsInf: *hasResult = true; *hasResultType = true; break; + case Op::OpIsFinite: *hasResult = true; *hasResultType = true; break; + case Op::OpIsNormal: *hasResult = true; *hasResultType = true; break; + case Op::OpSignBitSet: *hasResult = true; *hasResultType = true; break; + case Op::OpLessOrGreater: *hasResult = true; *hasResultType = true; break; + case Op::OpOrdered: *hasResult = true; *hasResultType = true; break; + case Op::OpUnordered: *hasResult = true; *hasResultType = true; break; + case Op::OpLogicalEqual: *hasResult = true; *hasResultType = true; break; + case Op::OpLogicalNotEqual: *hasResult = true; *hasResultType = true; break; + case Op::OpLogicalOr: *hasResult = true; *hasResultType = true; break; + case Op::OpLogicalAnd: *hasResult = true; *hasResultType = true; break; + case Op::OpLogicalNot: *hasResult = true; *hasResultType = true; break; + case Op::OpSelect: *hasResult = true; *hasResultType = true; break; + case Op::OpIEqual: *hasResult = true; *hasResultType = true; break; + case Op::OpINotEqual: *hasResult = true; *hasResultType = true; break; + case Op::OpUGreaterThan: *hasResult = true; *hasResultType = true; break; + case Op::OpSGreaterThan: *hasResult = true; *hasResultType = true; break; + case Op::OpUGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case Op::OpSGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case Op::OpULessThan: *hasResult = true; *hasResultType = true; break; + case Op::OpSLessThan: *hasResult = true; *hasResultType = true; break; + case Op::OpULessThanEqual: *hasResult = true; *hasResultType = true; break; + case Op::OpSLessThanEqual: *hasResult = true; *hasResultType = true; break; + case Op::OpFOrdEqual: *hasResult = true; *hasResultType = true; break; + case Op::OpFUnordEqual: *hasResult = true; *hasResultType = true; break; + case Op::OpFOrdNotEqual: *hasResult = true; *hasResultType = true; break; + case Op::OpFUnordNotEqual: *hasResult = true; *hasResultType = true; break; + case Op::OpFOrdLessThan: *hasResult = true; *hasResultType = true; break; + case Op::OpFUnordLessThan: *hasResult = true; *hasResultType = true; break; + case Op::OpFOrdGreaterThan: *hasResult = true; *hasResultType = true; break; + case Op::OpFUnordGreaterThan: *hasResult = true; *hasResultType = true; break; + case Op::OpFOrdLessThanEqual: *hasResult = true; *hasResultType = true; break; + case Op::OpFUnordLessThanEqual: *hasResult = true; *hasResultType = true; break; + case Op::OpFOrdGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case Op::OpFUnordGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case Op::OpShiftRightLogical: *hasResult = true; *hasResultType = true; break; + case Op::OpShiftRightArithmetic: *hasResult = true; *hasResultType = true; break; + case Op::OpShiftLeftLogical: *hasResult = true; *hasResultType = true; break; + case Op::OpBitwiseOr: *hasResult = true; *hasResultType = true; break; + case Op::OpBitwiseXor: *hasResult = true; *hasResultType = true; break; + case Op::OpBitwiseAnd: *hasResult = true; *hasResultType = true; break; + case Op::OpNot: *hasResult = true; *hasResultType = true; break; + case Op::OpBitFieldInsert: *hasResult = true; *hasResultType = true; break; + case Op::OpBitFieldSExtract: *hasResult = true; *hasResultType = true; break; + case Op::OpBitFieldUExtract: *hasResult = true; *hasResultType = true; break; + case Op::OpBitReverse: *hasResult = true; *hasResultType = true; break; + case Op::OpBitCount: *hasResult = true; *hasResultType = true; break; + case Op::OpDPdx: *hasResult = true; *hasResultType = true; break; + case Op::OpDPdy: *hasResult = true; *hasResultType = true; break; + case Op::OpFwidth: *hasResult = true; *hasResultType = true; break; + case Op::OpDPdxFine: *hasResult = true; *hasResultType = true; break; + case Op::OpDPdyFine: *hasResult = true; *hasResultType = true; break; + case Op::OpFwidthFine: *hasResult = true; *hasResultType = true; break; + case Op::OpDPdxCoarse: *hasResult = true; *hasResultType = true; break; + case Op::OpDPdyCoarse: *hasResult = true; *hasResultType = true; break; + case Op::OpFwidthCoarse: *hasResult = true; *hasResultType = true; break; + case Op::OpEmitVertex: *hasResult = false; *hasResultType = false; break; + case Op::OpEndPrimitive: *hasResult = false; *hasResultType = false; break; + case Op::OpEmitStreamVertex: *hasResult = false; *hasResultType = false; break; + case Op::OpEndStreamPrimitive: *hasResult = false; *hasResultType = false; break; + case Op::OpControlBarrier: *hasResult = false; *hasResultType = false; break; + case Op::OpMemoryBarrier: *hasResult = false; *hasResultType = false; break; + case Op::OpAtomicLoad: *hasResult = true; *hasResultType = true; break; + case Op::OpAtomicStore: *hasResult = false; *hasResultType = false; break; + case Op::OpAtomicExchange: *hasResult = true; *hasResultType = true; break; + case Op::OpAtomicCompareExchange: *hasResult = true; *hasResultType = true; break; + case Op::OpAtomicCompareExchangeWeak: *hasResult = true; *hasResultType = true; break; + case Op::OpAtomicIIncrement: *hasResult = true; *hasResultType = true; break; + case Op::OpAtomicIDecrement: *hasResult = true; *hasResultType = true; break; + case Op::OpAtomicIAdd: *hasResult = true; *hasResultType = true; break; + case Op::OpAtomicISub: *hasResult = true; *hasResultType = true; break; + case Op::OpAtomicSMin: *hasResult = true; *hasResultType = true; break; + case Op::OpAtomicUMin: *hasResult = true; *hasResultType = true; break; + case Op::OpAtomicSMax: *hasResult = true; *hasResultType = true; break; + case Op::OpAtomicUMax: *hasResult = true; *hasResultType = true; break; + case Op::OpAtomicAnd: *hasResult = true; *hasResultType = true; break; + case Op::OpAtomicOr: *hasResult = true; *hasResultType = true; break; + case Op::OpAtomicXor: *hasResult = true; *hasResultType = true; break; + case Op::OpPhi: *hasResult = true; *hasResultType = true; break; + case Op::OpLoopMerge: *hasResult = false; *hasResultType = false; break; + case Op::OpSelectionMerge: *hasResult = false; *hasResultType = false; break; + case Op::OpLabel: *hasResult = true; *hasResultType = false; break; + case Op::OpBranch: *hasResult = false; *hasResultType = false; break; + case Op::OpBranchConditional: *hasResult = false; *hasResultType = false; break; + case Op::OpSwitch: *hasResult = false; *hasResultType = false; break; + case Op::OpKill: *hasResult = false; *hasResultType = false; break; + case Op::OpReturn: *hasResult = false; *hasResultType = false; break; + case Op::OpReturnValue: *hasResult = false; *hasResultType = false; break; + case Op::OpUnreachable: *hasResult = false; *hasResultType = false; break; + case Op::OpLifetimeStart: *hasResult = false; *hasResultType = false; break; + case Op::OpLifetimeStop: *hasResult = false; *hasResultType = false; break; + case Op::OpGroupAsyncCopy: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupWaitEvents: *hasResult = false; *hasResultType = false; break; + case Op::OpGroupAll: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupAny: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupBroadcast: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupIAdd: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupFAdd: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupFMin: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupUMin: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupSMin: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupFMax: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupUMax: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupSMax: *hasResult = true; *hasResultType = true; break; + case Op::OpReadPipe: *hasResult = true; *hasResultType = true; break; + case Op::OpWritePipe: *hasResult = true; *hasResultType = true; break; + case Op::OpReservedReadPipe: *hasResult = true; *hasResultType = true; break; + case Op::OpReservedWritePipe: *hasResult = true; *hasResultType = true; break; + case Op::OpReserveReadPipePackets: *hasResult = true; *hasResultType = true; break; + case Op::OpReserveWritePipePackets: *hasResult = true; *hasResultType = true; break; + case Op::OpCommitReadPipe: *hasResult = false; *hasResultType = false; break; + case Op::OpCommitWritePipe: *hasResult = false; *hasResultType = false; break; + case Op::OpIsValidReserveId: *hasResult = true; *hasResultType = true; break; + case Op::OpGetNumPipePackets: *hasResult = true; *hasResultType = true; break; + case Op::OpGetMaxPipePackets: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupReserveReadPipePackets: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupReserveWritePipePackets: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupCommitReadPipe: *hasResult = false; *hasResultType = false; break; + case Op::OpGroupCommitWritePipe: *hasResult = false; *hasResultType = false; break; + case Op::OpEnqueueMarker: *hasResult = true; *hasResultType = true; break; + case Op::OpEnqueueKernel: *hasResult = true; *hasResultType = true; break; + case Op::OpGetKernelNDrangeSubGroupCount: *hasResult = true; *hasResultType = true; break; + case Op::OpGetKernelNDrangeMaxSubGroupSize: *hasResult = true; *hasResultType = true; break; + case Op::OpGetKernelWorkGroupSize: *hasResult = true; *hasResultType = true; break; + case Op::OpGetKernelPreferredWorkGroupSizeMultiple: *hasResult = true; *hasResultType = true; break; + case Op::OpRetainEvent: *hasResult = false; *hasResultType = false; break; + case Op::OpReleaseEvent: *hasResult = false; *hasResultType = false; break; + case Op::OpCreateUserEvent: *hasResult = true; *hasResultType = true; break; + case Op::OpIsValidEvent: *hasResult = true; *hasResultType = true; break; + case Op::OpSetUserEventStatus: *hasResult = false; *hasResultType = false; break; + case Op::OpCaptureEventProfilingInfo: *hasResult = false; *hasResultType = false; break; + case Op::OpGetDefaultQueue: *hasResult = true; *hasResultType = true; break; + case Op::OpBuildNDRange: *hasResult = true; *hasResultType = true; break; + case Op::OpImageSparseSampleImplicitLod: *hasResult = true; *hasResultType = true; break; + case Op::OpImageSparseSampleExplicitLod: *hasResult = true; *hasResultType = true; break; + case Op::OpImageSparseSampleDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case Op::OpImageSparseSampleDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case Op::OpImageSparseSampleProjImplicitLod: *hasResult = true; *hasResultType = true; break; + case Op::OpImageSparseSampleProjExplicitLod: *hasResult = true; *hasResultType = true; break; + case Op::OpImageSparseSampleProjDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case Op::OpImageSparseSampleProjDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case Op::OpImageSparseFetch: *hasResult = true; *hasResultType = true; break; + case Op::OpImageSparseGather: *hasResult = true; *hasResultType = true; break; + case Op::OpImageSparseDrefGather: *hasResult = true; *hasResultType = true; break; + case Op::OpImageSparseTexelsResident: *hasResult = true; *hasResultType = true; break; + case Op::OpNoLine: *hasResult = false; *hasResultType = false; break; + case Op::OpAtomicFlagTestAndSet: *hasResult = true; *hasResultType = true; break; + case Op::OpAtomicFlagClear: *hasResult = false; *hasResultType = false; break; + case Op::OpImageSparseRead: *hasResult = true; *hasResultType = true; break; + case Op::OpSizeOf: *hasResult = true; *hasResultType = true; break; + case Op::OpTypePipeStorage: *hasResult = true; *hasResultType = false; break; + case Op::OpConstantPipeStorage: *hasResult = true; *hasResultType = true; break; + case Op::OpCreatePipeFromPipeStorage: *hasResult = true; *hasResultType = true; break; + case Op::OpGetKernelLocalSizeForSubgroupCount: *hasResult = true; *hasResultType = true; break; + case Op::OpGetKernelMaxNumSubgroups: *hasResult = true; *hasResultType = true; break; + case Op::OpTypeNamedBarrier: *hasResult = true; *hasResultType = false; break; + case Op::OpNamedBarrierInitialize: *hasResult = true; *hasResultType = true; break; + case Op::OpMemoryNamedBarrier: *hasResult = false; *hasResultType = false; break; + case Op::OpModuleProcessed: *hasResult = false; *hasResultType = false; break; + case Op::OpExecutionModeId: *hasResult = false; *hasResultType = false; break; + case Op::OpDecorateId: *hasResult = false; *hasResultType = false; break; + case Op::OpGroupNonUniformElect: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupNonUniformAll: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupNonUniformAny: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupNonUniformAllEqual: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupNonUniformBroadcast: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupNonUniformBroadcastFirst: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupNonUniformBallot: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupNonUniformInverseBallot: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupNonUniformBallotBitExtract: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupNonUniformBallotBitCount: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupNonUniformBallotFindLSB: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupNonUniformBallotFindMSB: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupNonUniformShuffle: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupNonUniformShuffleXor: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupNonUniformShuffleUp: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupNonUniformShuffleDown: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupNonUniformIAdd: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupNonUniformFAdd: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupNonUniformIMul: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupNonUniformFMul: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupNonUniformSMin: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupNonUniformUMin: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupNonUniformFMin: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupNonUniformSMax: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupNonUniformUMax: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupNonUniformFMax: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupNonUniformBitwiseAnd: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupNonUniformBitwiseOr: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupNonUniformBitwiseXor: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupNonUniformLogicalAnd: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupNonUniformLogicalOr: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupNonUniformLogicalXor: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupNonUniformQuadBroadcast: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupNonUniformQuadSwap: *hasResult = true; *hasResultType = true; break; + case Op::OpCopyLogical: *hasResult = true; *hasResultType = true; break; + case Op::OpPtrEqual: *hasResult = true; *hasResultType = true; break; + case Op::OpPtrNotEqual: *hasResult = true; *hasResultType = true; break; + case Op::OpPtrDiff: *hasResult = true; *hasResultType = true; break; + case Op::OpColorAttachmentReadEXT: *hasResult = true; *hasResultType = true; break; + case Op::OpDepthAttachmentReadEXT: *hasResult = true; *hasResultType = true; break; + case Op::OpStencilAttachmentReadEXT: *hasResult = true; *hasResultType = true; break; + case Op::OpTerminateInvocation: *hasResult = false; *hasResultType = false; break; + case Op::OpSubgroupBallotKHR: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupFirstInvocationKHR: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAllKHR: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAnyKHR: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAllEqualKHR: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupNonUniformRotateKHR: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupReadInvocationKHR: *hasResult = true; *hasResultType = true; break; + case Op::OpTraceRayKHR: *hasResult = false; *hasResultType = false; break; + case Op::OpExecuteCallableKHR: *hasResult = false; *hasResultType = false; break; + case Op::OpConvertUToAccelerationStructureKHR: *hasResult = true; *hasResultType = true; break; + case Op::OpIgnoreIntersectionKHR: *hasResult = false; *hasResultType = false; break; + case Op::OpTerminateRayKHR: *hasResult = false; *hasResultType = false; break; + case Op::OpSDot: *hasResult = true; *hasResultType = true; break; + case Op::OpUDot: *hasResult = true; *hasResultType = true; break; + case Op::OpSUDot: *hasResult = true; *hasResultType = true; break; + case Op::OpSDotAccSat: *hasResult = true; *hasResultType = true; break; + case Op::OpUDotAccSat: *hasResult = true; *hasResultType = true; break; + case Op::OpSUDotAccSat: *hasResult = true; *hasResultType = true; break; + case Op::OpTypeRayQueryKHR: *hasResult = true; *hasResultType = false; break; + case Op::OpRayQueryInitializeKHR: *hasResult = false; *hasResultType = false; break; + case Op::OpRayQueryTerminateKHR: *hasResult = false; *hasResultType = false; break; + case Op::OpRayQueryGenerateIntersectionKHR: *hasResult = false; *hasResultType = false; break; + case Op::OpRayQueryConfirmIntersectionKHR: *hasResult = false; *hasResultType = false; break; + case Op::OpRayQueryProceedKHR: *hasResult = true; *hasResultType = true; break; + case Op::OpRayQueryGetIntersectionTypeKHR: *hasResult = true; *hasResultType = true; break; + case Op::OpImageSampleWeightedQCOM: *hasResult = true; *hasResultType = true; break; + case Op::OpImageBoxFilterQCOM: *hasResult = true; *hasResultType = true; break; + case Op::OpImageBlockMatchSSDQCOM: *hasResult = true; *hasResultType = true; break; + case Op::OpImageBlockMatchSADQCOM: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupIAddNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupFAddNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupFMinNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupUMinNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupSMinNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupFMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupUMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupSMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case Op::OpFragmentMaskFetchAMD: *hasResult = true; *hasResultType = true; break; + case Op::OpFragmentFetchAMD: *hasResult = true; *hasResultType = true; break; + case Op::OpReadClockKHR: *hasResult = true; *hasResultType = true; break; + case Op::OpHitObjectRecordHitMotionNV: *hasResult = false; *hasResultType = false; break; + case Op::OpHitObjectRecordHitWithIndexMotionNV: *hasResult = false; *hasResultType = false; break; + case Op::OpHitObjectRecordMissMotionNV: *hasResult = false; *hasResultType = false; break; + case Op::OpHitObjectGetWorldToObjectNV: *hasResult = true; *hasResultType = true; break; + case Op::OpHitObjectGetObjectToWorldNV: *hasResult = true; *hasResultType = true; break; + case Op::OpHitObjectGetObjectRayDirectionNV: *hasResult = true; *hasResultType = true; break; + case Op::OpHitObjectGetObjectRayOriginNV: *hasResult = true; *hasResultType = true; break; + case Op::OpHitObjectTraceRayMotionNV: *hasResult = false; *hasResultType = false; break; + case Op::OpHitObjectGetShaderRecordBufferHandleNV: *hasResult = true; *hasResultType = true; break; + case Op::OpHitObjectGetShaderBindingTableRecordIndexNV: *hasResult = true; *hasResultType = true; break; + case Op::OpHitObjectRecordEmptyNV: *hasResult = false; *hasResultType = false; break; + case Op::OpHitObjectTraceRayNV: *hasResult = false; *hasResultType = false; break; + case Op::OpHitObjectRecordHitNV: *hasResult = false; *hasResultType = false; break; + case Op::OpHitObjectRecordHitWithIndexNV: *hasResult = false; *hasResultType = false; break; + case Op::OpHitObjectRecordMissNV: *hasResult = false; *hasResultType = false; break; + case Op::OpHitObjectExecuteShaderNV: *hasResult = false; *hasResultType = false; break; + case Op::OpHitObjectGetCurrentTimeNV: *hasResult = true; *hasResultType = true; break; + case Op::OpHitObjectGetAttributesNV: *hasResult = false; *hasResultType = false; break; + case Op::OpHitObjectGetHitKindNV: *hasResult = true; *hasResultType = true; break; + case Op::OpHitObjectGetPrimitiveIndexNV: *hasResult = true; *hasResultType = true; break; + case Op::OpHitObjectGetGeometryIndexNV: *hasResult = true; *hasResultType = true; break; + case Op::OpHitObjectGetInstanceIdNV: *hasResult = true; *hasResultType = true; break; + case Op::OpHitObjectGetInstanceCustomIndexNV: *hasResult = true; *hasResultType = true; break; + case Op::OpHitObjectGetWorldRayDirectionNV: *hasResult = true; *hasResultType = true; break; + case Op::OpHitObjectGetWorldRayOriginNV: *hasResult = true; *hasResultType = true; break; + case Op::OpHitObjectGetRayTMaxNV: *hasResult = true; *hasResultType = true; break; + case Op::OpHitObjectGetRayTMinNV: *hasResult = true; *hasResultType = true; break; + case Op::OpHitObjectIsEmptyNV: *hasResult = true; *hasResultType = true; break; + case Op::OpHitObjectIsHitNV: *hasResult = true; *hasResultType = true; break; + case Op::OpHitObjectIsMissNV: *hasResult = true; *hasResultType = true; break; + case Op::OpReorderThreadWithHitObjectNV: *hasResult = false; *hasResultType = false; break; + case Op::OpReorderThreadWithHintNV: *hasResult = false; *hasResultType = false; break; + case Op::OpTypeHitObjectNV: *hasResult = true; *hasResultType = false; break; + case Op::OpImageSampleFootprintNV: *hasResult = true; *hasResultType = true; break; + case Op::OpEmitMeshTasksEXT: *hasResult = false; *hasResultType = false; break; + case Op::OpSetMeshOutputsEXT: *hasResult = false; *hasResultType = false; break; + case Op::OpGroupNonUniformPartitionNV: *hasResult = true; *hasResultType = true; break; + case Op::OpWritePackedPrimitiveIndices4x8NV: *hasResult = false; *hasResultType = false; break; + case Op::OpReportIntersectionNV: *hasResult = true; *hasResultType = true; break; + case Op::OpIgnoreIntersectionNV: *hasResult = false; *hasResultType = false; break; + case Op::OpTerminateRayNV: *hasResult = false; *hasResultType = false; break; + case Op::OpTraceNV: *hasResult = false; *hasResultType = false; break; + case Op::OpTraceMotionNV: *hasResult = false; *hasResultType = false; break; + case Op::OpTraceRayMotionNV: *hasResult = false; *hasResultType = false; break; + case Op::OpRayQueryGetIntersectionTriangleVertexPositionsKHR: *hasResult = true; *hasResultType = true; break; + case Op::OpTypeAccelerationStructureNV: *hasResult = true; *hasResultType = false; break; + case Op::OpExecuteCallableNV: *hasResult = false; *hasResultType = false; break; + case Op::OpTypeCooperativeMatrixNV: *hasResult = true; *hasResultType = false; break; + case Op::OpCooperativeMatrixLoadNV: *hasResult = true; *hasResultType = true; break; + case Op::OpCooperativeMatrixStoreNV: *hasResult = false; *hasResultType = false; break; + case Op::OpCooperativeMatrixMulAddNV: *hasResult = true; *hasResultType = true; break; + case Op::OpCooperativeMatrixLengthNV: *hasResult = true; *hasResultType = true; break; + case Op::OpBeginInvocationInterlockEXT: *hasResult = false; *hasResultType = false; break; + case Op::OpEndInvocationInterlockEXT: *hasResult = false; *hasResultType = false; break; + case Op::OpDemoteToHelperInvocation: *hasResult = false; *hasResultType = false; break; + case Op::OpIsHelperInvocationEXT: *hasResult = true; *hasResultType = true; break; + case Op::OpConvertUToImageNV: *hasResult = true; *hasResultType = true; break; + case Op::OpConvertUToSamplerNV: *hasResult = true; *hasResultType = true; break; + case Op::OpConvertImageToUNV: *hasResult = true; *hasResultType = true; break; + case Op::OpConvertSamplerToUNV: *hasResult = true; *hasResultType = true; break; + case Op::OpConvertUToSampledImageNV: *hasResult = true; *hasResultType = true; break; + case Op::OpConvertSampledImageToUNV: *hasResult = true; *hasResultType = true; break; + case Op::OpSamplerImageAddressingModeNV: *hasResult = false; *hasResultType = false; break; + case Op::OpSubgroupShuffleINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupShuffleDownINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupShuffleUpINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupShuffleXorINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupBlockReadINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupBlockWriteINTEL: *hasResult = false; *hasResultType = false; break; + case Op::OpSubgroupImageBlockReadINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupImageBlockWriteINTEL: *hasResult = false; *hasResultType = false; break; + case Op::OpSubgroupImageMediaBlockReadINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupImageMediaBlockWriteINTEL: *hasResult = false; *hasResultType = false; break; + case Op::OpUCountLeadingZerosINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpUCountTrailingZerosINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpAbsISubINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpAbsUSubINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpIAddSatINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpUAddSatINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpIAverageINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpUAverageINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpIAverageRoundedINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpUAverageRoundedINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpISubSatINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpUSubSatINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpIMul32x16INTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpUMul32x16INTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpConstantFunctionPointerINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpFunctionPointerCallINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpAsmTargetINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpAsmINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpAsmCallINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpAtomicFMinEXT: *hasResult = true; *hasResultType = true; break; + case Op::OpAtomicFMaxEXT: *hasResult = true; *hasResultType = true; break; + case Op::OpAssumeTrueKHR: *hasResult = false; *hasResultType = false; break; + case Op::OpExpectKHR: *hasResult = true; *hasResultType = true; break; + case Op::OpDecorateString: *hasResult = false; *hasResultType = false; break; + case Op::OpMemberDecorateString: *hasResult = false; *hasResultType = false; break; + case Op::OpVmeImageINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpTypeVmeImageINTEL: *hasResult = true; *hasResultType = false; break; + case Op::OpTypeAvcImePayloadINTEL: *hasResult = true; *hasResultType = false; break; + case Op::OpTypeAvcRefPayloadINTEL: *hasResult = true; *hasResultType = false; break; + case Op::OpTypeAvcSicPayloadINTEL: *hasResult = true; *hasResultType = false; break; + case Op::OpTypeAvcMcePayloadINTEL: *hasResult = true; *hasResultType = false; break; + case Op::OpTypeAvcMceResultINTEL: *hasResult = true; *hasResultType = false; break; + case Op::OpTypeAvcImeResultINTEL: *hasResult = true; *hasResultType = false; break; + case Op::OpTypeAvcImeResultSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = false; break; + case Op::OpTypeAvcImeResultDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = false; break; + case Op::OpTypeAvcImeSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = false; break; + case Op::OpTypeAvcImeDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = false; break; + case Op::OpTypeAvcRefResultINTEL: *hasResult = true; *hasResultType = false; break; + case Op::OpTypeAvcSicResultINTEL: *hasResult = true; *hasResultType = false; break; + case Op::OpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcMceSetInterShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcMceSetInterDirectionPenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcMceSetMotionVectorCostFunctionINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcMceSetAcOnlyHaarINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcMceConvertToImePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcMceConvertToImeResultINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcMceConvertToRefPayloadINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcMceConvertToRefResultINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcMceConvertToSicPayloadINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcMceConvertToSicResultINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcMceGetMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcMceGetInterDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcMceGetBestInterDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcMceGetInterMajorShapeINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcMceGetInterMinorShapeINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcMceGetInterDirectionsINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcMceGetInterMotionVectorCountINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcMceGetInterReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcImeInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcImeSetSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcImeSetDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcImeRefWindowSizeINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcImeAdjustRefOffsetINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcImeConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcImeSetMaxMotionVectorCountINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcImeSetUnidirectionalMixDisableINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcImeSetWeightedSadINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcImeEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcImeEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcImeConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcImeGetSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcImeGetDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcImeStripSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcImeStripDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcImeGetBorderReachedINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcImeGetTruncatedSearchIndicationINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcFmeInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcBmeInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcRefConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcRefSetBidirectionalMixDisableINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcRefSetBilinearFilterEnableINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcRefEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcRefEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcRefEvaluateWithMultiReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcRefConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcSicInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcSicConfigureSkcINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcSicConfigureIpeLumaINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcSicConfigureIpeLumaChromaINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcSicGetMotionVectorMaskINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcSicConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcSicSetBilinearFilterEnableINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcSicSetSkcForwardTransformEnableINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcSicEvaluateIpeINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcSicEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcSicEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcSicEvaluateWithMultiReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcSicConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcSicGetIpeLumaShapeINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcSicGetBestIpeLumaDistortionINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcSicGetBestIpeChromaDistortionINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcSicGetPackedIpeLumaModesINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcSicGetIpeChromaModeINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSubgroupAvcSicGetInterRawSadsINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpVariableLengthArrayINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpSaveMemoryINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpRestoreMemoryINTEL: *hasResult = false; *hasResultType = false; break; + case Op::OpArbitraryFloatSinCosPiINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpArbitraryFloatCastINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpArbitraryFloatCastFromIntINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpArbitraryFloatCastToIntINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpArbitraryFloatAddINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpArbitraryFloatSubINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpArbitraryFloatMulINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpArbitraryFloatDivINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpArbitraryFloatGTINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpArbitraryFloatGEINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpArbitraryFloatLTINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpArbitraryFloatLEINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpArbitraryFloatEQINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpArbitraryFloatRecipINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpArbitraryFloatRSqrtINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpArbitraryFloatCbrtINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpArbitraryFloatHypotINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpArbitraryFloatSqrtINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpArbitraryFloatLogINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpArbitraryFloatLog2INTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpArbitraryFloatLog10INTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpArbitraryFloatLog1pINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpArbitraryFloatExpINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpArbitraryFloatExp2INTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpArbitraryFloatExp10INTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpArbitraryFloatExpm1INTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpArbitraryFloatSinINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpArbitraryFloatCosINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpArbitraryFloatSinCosINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpArbitraryFloatSinPiINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpArbitraryFloatCosPiINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpArbitraryFloatASinINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpArbitraryFloatASinPiINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpArbitraryFloatACosINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpArbitraryFloatACosPiINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpArbitraryFloatATanINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpArbitraryFloatATanPiINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpArbitraryFloatATan2INTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpArbitraryFloatPowINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpArbitraryFloatPowRINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpArbitraryFloatPowNINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpLoopControlINTEL: *hasResult = false; *hasResultType = false; break; + case Op::OpAliasDomainDeclINTEL: *hasResult = true; *hasResultType = false; break; + case Op::OpAliasScopeDeclINTEL: *hasResult = true; *hasResultType = false; break; + case Op::OpAliasScopeListDeclINTEL: *hasResult = true; *hasResultType = false; break; + case Op::OpFixedSqrtINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpFixedRecipINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpFixedRsqrtINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpFixedSinINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpFixedCosINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpFixedSinCosINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpFixedSinPiINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpFixedCosPiINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpFixedSinCosPiINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpFixedLogINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpFixedExpINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpPtrCastToCrossWorkgroupINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpCrossWorkgroupCastToPtrINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpReadPipeBlockingINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpWritePipeBlockingINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpFPGARegINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpRayQueryGetRayTMinKHR: *hasResult = true; *hasResultType = true; break; + case Op::OpRayQueryGetRayFlagsKHR: *hasResult = true; *hasResultType = true; break; + case Op::OpRayQueryGetIntersectionTKHR: *hasResult = true; *hasResultType = true; break; + case Op::OpRayQueryGetIntersectionInstanceCustomIndexKHR: *hasResult = true; *hasResultType = true; break; + case Op::OpRayQueryGetIntersectionInstanceIdKHR: *hasResult = true; *hasResultType = true; break; + case Op::OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR: *hasResult = true; *hasResultType = true; break; + case Op::OpRayQueryGetIntersectionGeometryIndexKHR: *hasResult = true; *hasResultType = true; break; + case Op::OpRayQueryGetIntersectionPrimitiveIndexKHR: *hasResult = true; *hasResultType = true; break; + case Op::OpRayQueryGetIntersectionBarycentricsKHR: *hasResult = true; *hasResultType = true; break; + case Op::OpRayQueryGetIntersectionFrontFaceKHR: *hasResult = true; *hasResultType = true; break; + case Op::OpRayQueryGetIntersectionCandidateAABBOpaqueKHR: *hasResult = true; *hasResultType = true; break; + case Op::OpRayQueryGetIntersectionObjectRayDirectionKHR: *hasResult = true; *hasResultType = true; break; + case Op::OpRayQueryGetIntersectionObjectRayOriginKHR: *hasResult = true; *hasResultType = true; break; + case Op::OpRayQueryGetWorldRayDirectionKHR: *hasResult = true; *hasResultType = true; break; + case Op::OpRayQueryGetWorldRayOriginKHR: *hasResult = true; *hasResultType = true; break; + case Op::OpRayQueryGetIntersectionObjectToWorldKHR: *hasResult = true; *hasResultType = true; break; + case Op::OpRayQueryGetIntersectionWorldToObjectKHR: *hasResult = true; *hasResultType = true; break; + case Op::OpAtomicFAddEXT: *hasResult = true; *hasResultType = true; break; + case Op::OpTypeBufferSurfaceINTEL: *hasResult = true; *hasResultType = false; break; + case Op::OpTypeStructContinuedINTEL: *hasResult = false; *hasResultType = false; break; + case Op::OpConstantCompositeContinuedINTEL: *hasResult = false; *hasResultType = false; break; + case Op::OpSpecConstantCompositeContinuedINTEL: *hasResult = false; *hasResultType = false; break; + case Op::OpConvertFToBF16INTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpConvertBF16ToFINTEL: *hasResult = true; *hasResultType = true; break; + case Op::OpControlBarrierArriveINTEL: *hasResult = false; *hasResultType = false; break; + case Op::OpControlBarrierWaitINTEL: *hasResult = false; *hasResultType = false; break; + case Op::OpGroupIMulKHR: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupFMulKHR: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupBitwiseAndKHR: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupBitwiseOrKHR: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupBitwiseXorKHR: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupLogicalAndKHR: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupLogicalOrKHR: *hasResult = true; *hasResultType = true; break; + case Op::OpGroupLogicalXorKHR: *hasResult = true; *hasResultType = true; break; + } +} +#endif /* SPV_ENABLE_UTILITY_CODE */ + +// Overload bitwise operators for mask bit combining + +constexpr ImageOperandsMask operator|(ImageOperandsMask a, ImageOperandsMask b) { return ImageOperandsMask(unsigned(a) | unsigned(b)); } +constexpr ImageOperandsMask operator&(ImageOperandsMask a, ImageOperandsMask b) { return ImageOperandsMask(unsigned(a) & unsigned(b)); } +constexpr ImageOperandsMask operator^(ImageOperandsMask a, ImageOperandsMask b) { return ImageOperandsMask(unsigned(a) ^ unsigned(b)); } +constexpr ImageOperandsMask operator~(ImageOperandsMask a) { return ImageOperandsMask(~unsigned(a)); } +constexpr FPFastMathModeMask operator|(FPFastMathModeMask a, FPFastMathModeMask b) { return FPFastMathModeMask(unsigned(a) | unsigned(b)); } +constexpr FPFastMathModeMask operator&(FPFastMathModeMask a, FPFastMathModeMask b) { return FPFastMathModeMask(unsigned(a) & unsigned(b)); } +constexpr FPFastMathModeMask operator^(FPFastMathModeMask a, FPFastMathModeMask b) { return FPFastMathModeMask(unsigned(a) ^ unsigned(b)); } +constexpr FPFastMathModeMask operator~(FPFastMathModeMask a) { return FPFastMathModeMask(~unsigned(a)); } +constexpr SelectionControlMask operator|(SelectionControlMask a, SelectionControlMask b) { return SelectionControlMask(unsigned(a) | unsigned(b)); } +constexpr SelectionControlMask operator&(SelectionControlMask a, SelectionControlMask b) { return SelectionControlMask(unsigned(a) & unsigned(b)); } +constexpr SelectionControlMask operator^(SelectionControlMask a, SelectionControlMask b) { return SelectionControlMask(unsigned(a) ^ unsigned(b)); } +constexpr SelectionControlMask operator~(SelectionControlMask a) { return SelectionControlMask(~unsigned(a)); } +constexpr LoopControlMask operator|(LoopControlMask a, LoopControlMask b) { return LoopControlMask(unsigned(a) | unsigned(b)); } +constexpr LoopControlMask operator&(LoopControlMask a, LoopControlMask b) { return LoopControlMask(unsigned(a) & unsigned(b)); } +constexpr LoopControlMask operator^(LoopControlMask a, LoopControlMask b) { return LoopControlMask(unsigned(a) ^ unsigned(b)); } +constexpr LoopControlMask operator~(LoopControlMask a) { return LoopControlMask(~unsigned(a)); } +constexpr FunctionControlMask operator|(FunctionControlMask a, FunctionControlMask b) { return FunctionControlMask(unsigned(a) | unsigned(b)); } +constexpr FunctionControlMask operator&(FunctionControlMask a, FunctionControlMask b) { return FunctionControlMask(unsigned(a) & unsigned(b)); } +constexpr FunctionControlMask operator^(FunctionControlMask a, FunctionControlMask b) { return FunctionControlMask(unsigned(a) ^ unsigned(b)); } +constexpr FunctionControlMask operator~(FunctionControlMask a) { return FunctionControlMask(~unsigned(a)); } +constexpr MemorySemanticsMask operator|(MemorySemanticsMask a, MemorySemanticsMask b) { return MemorySemanticsMask(unsigned(a) | unsigned(b)); } +constexpr MemorySemanticsMask operator&(MemorySemanticsMask a, MemorySemanticsMask b) { return MemorySemanticsMask(unsigned(a) & unsigned(b)); } +constexpr MemorySemanticsMask operator^(MemorySemanticsMask a, MemorySemanticsMask b) { return MemorySemanticsMask(unsigned(a) ^ unsigned(b)); } +constexpr MemorySemanticsMask operator~(MemorySemanticsMask a) { return MemorySemanticsMask(~unsigned(a)); } +constexpr MemoryAccessMask operator|(MemoryAccessMask a, MemoryAccessMask b) { return MemoryAccessMask(unsigned(a) | unsigned(b)); } +constexpr MemoryAccessMask operator&(MemoryAccessMask a, MemoryAccessMask b) { return MemoryAccessMask(unsigned(a) & unsigned(b)); } +constexpr MemoryAccessMask operator^(MemoryAccessMask a, MemoryAccessMask b) { return MemoryAccessMask(unsigned(a) ^ unsigned(b)); } +constexpr MemoryAccessMask operator~(MemoryAccessMask a) { return MemoryAccessMask(~unsigned(a)); } +constexpr KernelProfilingInfoMask operator|(KernelProfilingInfoMask a, KernelProfilingInfoMask b) { return KernelProfilingInfoMask(unsigned(a) | unsigned(b)); } +constexpr KernelProfilingInfoMask operator&(KernelProfilingInfoMask a, KernelProfilingInfoMask b) { return KernelProfilingInfoMask(unsigned(a) & unsigned(b)); } +constexpr KernelProfilingInfoMask operator^(KernelProfilingInfoMask a, KernelProfilingInfoMask b) { return KernelProfilingInfoMask(unsigned(a) ^ unsigned(b)); } +constexpr KernelProfilingInfoMask operator~(KernelProfilingInfoMask a) { return KernelProfilingInfoMask(~unsigned(a)); } +constexpr RayFlagsMask operator|(RayFlagsMask a, RayFlagsMask b) { return RayFlagsMask(unsigned(a) | unsigned(b)); } +constexpr RayFlagsMask operator&(RayFlagsMask a, RayFlagsMask b) { return RayFlagsMask(unsigned(a) & unsigned(b)); } +constexpr RayFlagsMask operator^(RayFlagsMask a, RayFlagsMask b) { return RayFlagsMask(unsigned(a) ^ unsigned(b)); } +constexpr RayFlagsMask operator~(RayFlagsMask a) { return RayFlagsMask(~unsigned(a)); } +constexpr FragmentShadingRateMask operator|(FragmentShadingRateMask a, FragmentShadingRateMask b) { return FragmentShadingRateMask(unsigned(a) | unsigned(b)); } +constexpr FragmentShadingRateMask operator&(FragmentShadingRateMask a, FragmentShadingRateMask b) { return FragmentShadingRateMask(unsigned(a) & unsigned(b)); } +constexpr FragmentShadingRateMask operator^(FragmentShadingRateMask a, FragmentShadingRateMask b) { return FragmentShadingRateMask(unsigned(a) ^ unsigned(b)); } +constexpr FragmentShadingRateMask operator~(FragmentShadingRateMask a) { return FragmentShadingRateMask(~unsigned(a)); } + +} // end namespace spv + +#endif // #ifndef spirv_HPP + diff --git a/hw/amdgpu/shader/CMakeLists.txt b/hw/amdgpu/shader/CMakeLists.txt new file mode 100644 index 000000000..90c2ad1d9 --- /dev/null +++ b/hw/amdgpu/shader/CMakeLists.txt @@ -0,0 +1,22 @@ +project(libamdgpu-shader) +set(PROJECT_PATH amdgpu/shader) + +set(SRC + src/cf.cpp + src/scf.cpp + src/CfBuilder.cpp + src/Converter.cpp + src/ConverterContext.cpp + src/Fragment.cpp + src/Function.cpp + src/Instruction.cpp + src/RegisterState.cpp + src/TypeId.cpp +) + +add_library(${PROJECT_NAME} STATIC ${INCLUDE} ${SRC}) +target_link_libraries(${PROJECT_NAME} PUBLIC spirv amdgpu::base) +target_include_directories(${PROJECT_NAME} PUBLIC include PRIVATE include/${PROJECT_PATH}) +set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "") +add_library(amdgpu::shader ALIAS ${PROJECT_NAME}) +set_property(TARGET ${PROJECT_NAME} PROPERTY POSITION_INDEPENDENT_CODE ON) diff --git a/hw/amdgpu/shader/include/amdgpu/shader/AccessOp.hpp b/hw/amdgpu/shader/include/amdgpu/shader/AccessOp.hpp new file mode 100644 index 000000000..8557a54ae --- /dev/null +++ b/hw/amdgpu/shader/include/amdgpu/shader/AccessOp.hpp @@ -0,0 +1,18 @@ +#pragma once + +namespace amdgpu::shader { +enum class AccessOp { None = 0, Load = 1 << 0, Store = 1 << 1 }; + +constexpr AccessOp operator|(AccessOp lhs, AccessOp rhs) { + return static_cast(static_cast(lhs) | static_cast(rhs)); +} +constexpr AccessOp operator&(AccessOp lhs, AccessOp rhs) { + return static_cast(static_cast(lhs) & static_cast(rhs)); +} +constexpr AccessOp &operator|=(AccessOp &lhs, AccessOp rhs) { + return ((lhs = lhs | rhs)); +} +constexpr AccessOp &operator&=(AccessOp &lhs, AccessOp rhs) { + return ((lhs = lhs & rhs)); +} +} // namespace amdgpu::shader diff --git a/hw/amdgpu/shader/include/amdgpu/shader/BufferKind.hpp b/hw/amdgpu/shader/include/amdgpu/shader/BufferKind.hpp new file mode 100644 index 000000000..09dc226e0 --- /dev/null +++ b/hw/amdgpu/shader/include/amdgpu/shader/BufferKind.hpp @@ -0,0 +1,5 @@ +#pragma once + +namespace amdgpu::shader { +enum class BufferKind { VBuffer, TBuffer }; +} diff --git a/hw/amdgpu/shader/include/amdgpu/shader/CfBuilder.hpp b/hw/amdgpu/shader/include/amdgpu/shader/CfBuilder.hpp new file mode 100644 index 000000000..92ad6093d --- /dev/null +++ b/hw/amdgpu/shader/include/amdgpu/shader/CfBuilder.hpp @@ -0,0 +1,8 @@ +#pragma once +#include "cf.hpp" +#include + +namespace amdgpu::shader { +cf::BasicBlock *buildCf(cf::Context &ctxt, RemoteMemory memory, + std::uint64_t entryPoint); +} // namespace amdgpu::shader diff --git a/hw/amdgpu/shader/include/amdgpu/shader/Converter.hpp b/hw/amdgpu/shader/include/amdgpu/shader/Converter.hpp new file mode 100644 index 000000000..817eed622 --- /dev/null +++ b/hw/amdgpu/shader/include/amdgpu/shader/Converter.hpp @@ -0,0 +1,35 @@ +#pragma once + +#include "Stage.hpp" +#include "AccessOp.hpp" + +#include + +#include +#include +#include + +namespace amdgpu::shader { +struct Shader { + enum class UniformKind { + Buffer, + Sampler, + Image + }; + + struct UniformInfo { + std::uint32_t binding; + std::uint32_t buffer[8]; + UniformKind kind; + AccessOp accessOp; + }; + + std::vector uniforms; + std::vector spirv; +}; + +Shader convert(RemoteMemory memory, Stage stage, std::uint64_t entry, + std::span userSpgrs, int bindingOffset, + std::uint32_t dimX = 1, std::uint32_t dimY = 1, + std::uint32_t dimZ = 1); +} // namespace amdgpu::shader diff --git a/hw/amdgpu/shader/include/amdgpu/shader/ConverterContext.hpp b/hw/amdgpu/shader/include/amdgpu/shader/ConverterContext.hpp new file mode 100644 index 000000000..c6707097b --- /dev/null +++ b/hw/amdgpu/shader/include/amdgpu/shader/ConverterContext.hpp @@ -0,0 +1,257 @@ +#pragma once + +#include "Fragment.hpp" +#include "Function.hpp" +#include "RegisterId.hpp" +#include "Stage.hpp" +#include "TypeId.hpp" +#include "Uniform.hpp" +#include "Value.hpp" +#include "scf.hpp" + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace amdgpu::shader { +/* +struct MaterializedFunction { + spirv::Function function; + spirv::FunctionType type; + spirv::Type returnType; + + std::vector> args; + std::vector> results; +}; +*/ + +class ConverterContext { + Stage mStage; + RemoteMemory mMemory; + spirv::IdGenerator mGenerator; + spirv::SpirvBuilder mBuilder{mGenerator, 1024}; + static constexpr auto kGenericTypesCount = + static_cast(TypeId::Void) + 1; + spirv::Type mTypes[kGenericTypesCount]; + spirv::PointerType mPtrTypes[13][kGenericTypesCount]; + spirv::RuntimeArrayType mRuntimeArrayTypes[kGenericTypesCount]; + spirv::VariableValue mThreadId; + spirv::VariableValue mWorkgroupId; + spirv::VariableValue mLocalInvocationId; + spirv::VariableValue mPerVertex; + spirv::VariableValue mFragCoord; + std::vector mInterfaces; + std::map mIns; + std::map mOuts; + + std::map mConstantFloat32Map; + std::map mConstantUint32Map; + std::map mConstantSint32Map; + std::map mConstantUint64Map; + + struct FunctionType { + spirv::Type resultType; + std::vector params; + spirv::FunctionType id; + }; + + std::vector mFunctionTypes; + + struct StructTypeEntry { + spirv::StructType id; + std::vector members; + spirv::PointerType ptrTypes[13]; + + bool match(std::span other) { + if (members.size() != other.size()) { + return false; + } + + for (std::size_t i = 0; i < other.size(); ++i) { + if (members[i] != other[i]) { + return false; + } + } + + return true; + } + }; + + std::vector mStructTypes; + + std::forward_list mFragments; + std::forward_list mFunctions; + + spirv::ConstantBool mTrue; + spirv::ConstantBool mFalse; + + std::vector mUniforms; + spirv::ExtInstSet mGlslStd450; + spirv::Function mDiscardFn; + +public: + ConverterContext(RemoteMemory memory, Stage stage) : mMemory(memory), mStage(stage) { + mGlslStd450 = mBuilder.createExtInstImport("GLSL.std.450"); + } + + const decltype(mInterfaces) &getInterfaces() const { + return mInterfaces; + } + + spirv::SpirvBuilder &getBuilder() { return mBuilder; } + RemoteMemory getMemory() const { return mMemory; } + spirv::ExtInstSet getGlslStd450() const { return mGlslStd450; } + std::optional getTypeIdOf(spirv::Type type) const; + + spirv::StructType findStructType(std::span members); + spirv::StructType getStructType(std::span members); + spirv::PointerType getStructPointerType(spv::StorageClass storageClass, + spirv::StructType structType); + spirv::Type getType(TypeId id); + + spirv::PointerType getPointerType(spv::StorageClass storageClass, TypeId id) { + assert(static_cast(storageClass) < 13); + auto &type = mPtrTypes[static_cast(storageClass)] + [static_cast(id)]; + + if (!type) { + type = mBuilder.createTypePointer(storageClass, getType(id)); + } + + return type; + } + + spirv::RuntimeArrayType getRuntimeArrayType(TypeId id); + + spirv::UIntType getUInt32Type() { + return spirv::cast(getType(TypeId::UInt32)); + } + spirv::UIntType getUInt64Type() { + return spirv::cast(getType(TypeId::UInt64)); + } + + spirv::VectorOfType getUint32x2Type() { + return spirv::cast>( + getType(TypeId::UInt32x2)); + } + + spirv::VectorOfType getUint32x3Type() { + return spirv::cast>( + getType(TypeId::UInt32x3)); + } + + spirv::VectorOfType getUint32x4Type() { + return spirv::cast>( + getType(TypeId::UInt32x4)); + } + + spirv::ArrayOfType getArrayUint32x8Type() { + return spirv::cast>(getType(TypeId::ArrayUInt32x8)); + } + + spirv::ArrayOfType getArrayUint32x16Type() { + return spirv::cast>(getType(TypeId::ArrayUInt32x16)); + } + + spirv::SIntType getSint32Type() { + return spirv::cast(getType(TypeId::SInt32)); + } + spirv::SIntType getSint64Type() { + return spirv::cast(getType(TypeId::SInt64)); + } + + spirv::FloatType getFloat32Type() { + return spirv::cast(getType(TypeId::Float32)); + } + + spirv::VectorOfType getFloat32x4Type() { + return spirv::cast>( + getType(TypeId::Float32x4)); + } + + spirv::VectorOfType getFloat32x3Type() { + return spirv::cast>( + getType(TypeId::Float32x3)); + } + + spirv::VectorOfType getFloat32x2Type() { + return spirv::cast>( + getType(TypeId::Float32x2)); + } + + spirv::BoolType getBoolType() { + return spirv::cast(getType(TypeId::Bool)); + } + + spirv::VoidType getVoidType() { + return spirv::cast(getType(TypeId::Void)); + } + + spirv::ConstantBool getTrue() { + if (!mTrue) { + mTrue = mBuilder.createConstantTrue(getBoolType()); + } + return mTrue; + } + spirv::ConstantBool getFalse() { + if (!mFalse) { + mFalse = mBuilder.createConstantFalse(getBoolType()); + } + return mFalse; + } + + spirv::ConstantUInt getUInt64(std::uint64_t value); + spirv::ConstantUInt getUInt32(std::uint32_t value); + spirv::ConstantSInt getSInt32(std::uint32_t value); + spirv::ConstantFloat getFloat32Raw(std::uint32_t value); + + spirv::ConstantFloat getFloat32(float id) { + return getFloat32Raw(std::bit_cast(id)); + } + + spirv::SamplerType getSamplerType() { + return spirv::cast(getType(TypeId::Sampler)); + } + spirv::ImageType getImage2DType() { + return spirv::cast(getType(TypeId::Image2D)); + } + spirv::SampledImageType getSampledImage2DType() { + return spirv::cast(getType(TypeId::SampledImage2D)); + } + + UniformInfo *createStorageBuffer(TypeId type); + UniformInfo *getOrCreateStorageBuffer(std::uint32_t *vbuffer, TypeId type); + UniformInfo *getOrCreateUniformConstant(std::uint32_t *buffer, std::size_t size, TypeId type); + spirv::VariableValue getThreadId(); + spirv::VariableValue getWorkgroupId(); + spirv::VariableValue getLocalInvocationId(); + spirv::VariableValue getPerVertex(); + spirv::VariableValue getFragCoord(); + spirv::VariableValue getIn(unsigned location); + spirv::VariableValue getOut(unsigned location); + + spirv::Function getDiscardFn(); + + std::optional findUint32Value(spirv::Value id) const; + std::optional findSint32Value(spirv::Value id) const; + std::optional findFloat32Value(spirv::Value id) const; + spirv::FunctionType getFunctionType(spirv::Type resultType, + std::span params); + + Function *createFunction(std::size_t expectedSize); + Fragment *createFragment(std::size_t expectedSize); + + std::vector &getUniforms() { + return mUniforms; + } +}; +} // namespace amdgpu::shader diff --git a/hw/amdgpu/shader/include/amdgpu/shader/Fragment.hpp b/hw/amdgpu/shader/include/amdgpu/shader/Fragment.hpp new file mode 100644 index 000000000..c3533af5d --- /dev/null +++ b/hw/amdgpu/shader/include/amdgpu/shader/Fragment.hpp @@ -0,0 +1,95 @@ +#pragma once + +#include "FragmentTerminator.hpp" +#include "Instruction.hpp" +#include "RegisterId.hpp" +#include "RegisterState.hpp" +#include "Stage.hpp" +#include "TypeId.hpp" +#include "Uniform.hpp" +#include "scf.hpp" + +#include +#include +#include + +namespace amdgpu::shader { +enum class OperandGetFlags { + None, + PreserveType = 1 << 0 +}; + +struct Function; +class ConverterContext; + +struct Fragment { + ConverterContext *context = nullptr; + Function *function = nullptr; + spirv::Block entryBlockId; + spirv::BlockBuilder builder; + RegisterState *registers = nullptr; + + std::set values; + std::set outputs; + + std::vector predecessors; + std::uint64_t jumpAddress = 0; + spirv::BoolValue branchCondition; + + void appendBranch(Fragment &other) { + other.predecessors.push_back(this); + } + + void injectValuesFromPreds(); + + // std::optional findInput(spirv::Value value); + // Value addInput(RegisterId id, spirv::Type type); + spirv::SamplerValue createSampler(RegisterId base); + spirv::ImageValue createImage(RegisterId base, bool r128); // TODO: params + Value createCompositeExtract(Value composite, std::uint32_t member); + Value getOperand(RegisterId id, TypeId type, OperandGetFlags flags = OperandGetFlags::None); + void setOperand(RegisterId id, Value value); + void setVcc(Value value); + void setScc(Value value); + spirv::BoolValue getScc(); + spirv::Value createBitcast(spirv::Type to, spirv::Type from, spirv::Value value); + + Value getScalarOperand(int id, TypeId type, OperandGetFlags flags = OperandGetFlags::None) { + return getOperand(RegisterId::Scalar(id), type, flags); + } + Value getVectorOperand(int id, TypeId type, OperandGetFlags flags = OperandGetFlags::None) { + return getOperand(RegisterId::Vector(id), type, flags); + } + Value getAttrOperand(int id, TypeId type, OperandGetFlags flags = OperandGetFlags::None) { + return getOperand(RegisterId::Attr(id), type, flags); + } + Value getVccLo() { + return getOperand(RegisterId::VccLo, TypeId::UInt32); + } + Value getVccHi() { + return getOperand(RegisterId::VccHi, TypeId::UInt32); + } + Value getExecLo() { + return getOperand(RegisterId::ExecLo, TypeId::UInt32); + } + Value getExecHi() { + return getOperand(RegisterId::ExecHi, TypeId::UInt32); + } + void setScalarOperand(int id, Value value) { + setOperand(RegisterId::Scalar(id), value); + } + void setVectorOperand(int id, Value value) { + setOperand(RegisterId::Vector(id), value); + } + void setExportTarget(int id, Value value) { + setOperand(RegisterId::Export(id), value); + } + // void createCallTo(MaterializedFunction *other); + void convert(std::uint64_t size); + +private: + Value getRegister(RegisterId id); + Value getRegister(RegisterId id, spirv::Type type); + void setRegister(RegisterId id, Value value); +}; +} // namespace amdgpu::shader diff --git a/hw/amdgpu/shader/include/amdgpu/shader/FragmentTerminator.hpp b/hw/amdgpu/shader/include/amdgpu/shader/FragmentTerminator.hpp new file mode 100644 index 000000000..48ad5a031 --- /dev/null +++ b/hw/amdgpu/shader/include/amdgpu/shader/FragmentTerminator.hpp @@ -0,0 +1,11 @@ +#pragma once + +namespace amdgpu::shader { +enum class FragmentTerminator { + None, + EndProgram, + CallToReg, + BranchToReg, + Branch, +}; +} diff --git a/hw/amdgpu/shader/include/amdgpu/shader/Function.hpp b/hw/amdgpu/shader/include/amdgpu/shader/Function.hpp new file mode 100644 index 000000000..3c11e4596 --- /dev/null +++ b/hw/amdgpu/shader/include/amdgpu/shader/Function.hpp @@ -0,0 +1,31 @@ +#pragma once +#include "Fragment.hpp" +#include "RegisterId.hpp" +#include "spirv/spirv-builder.hpp" +#include + +namespace amdgpu::shader { +class ConverterContext; + +struct Function { + ConverterContext *context = nullptr; + Stage stage = Stage::None; + std::span userSgprs; + std::span userVgprs; + Fragment entryFragment; + Fragment exitFragment; + std::map inputs; + spirv::FunctionBuilder builder; + std::vector fragments; + + Value getInput(RegisterId id); + Value createInput(RegisterId id); + void createExport(spirv::BlockBuilder &builder, unsigned index, Value value); + spirv::Type getResultType(); + spirv::FunctionType getFunctionType(); + + Fragment *createFragment(); + + void insertReturn(); +}; +} // namespace amdgpu::shader diff --git a/hw/amdgpu/shader/include/amdgpu/shader/Instruction.hpp b/hw/amdgpu/shader/include/amdgpu/shader/Instruction.hpp new file mode 100644 index 000000000..ced8d2b95 --- /dev/null +++ b/hw/amdgpu/shader/include/amdgpu/shader/Instruction.hpp @@ -0,0 +1,1972 @@ +#pragma once + +#include +#include + +namespace amdgpu::shader { +inline constexpr std::uint32_t genMask(std::uint32_t offset, + std::uint32_t bitCount) { + return ((1u << bitCount) - 1u) << offset; +} + +inline constexpr std::uint32_t getMaskEnd(std::uint32_t mask) { + return 32 - std::countl_zero(mask); +} + +inline constexpr std::uint32_t fetchMaskedValue(std::uint32_t hex, + std::uint32_t mask) { + return (hex & mask) >> std::countr_zero(mask); +} + +enum SurfaceFormat { + kSurfaceFormatInvalid = 0x00000000, + kSurfaceFormat8 = 0x00000001, + kSurfaceFormat16 = 0x00000002, + kSurfaceFormat8_8 = 0x00000003, + kSurfaceFormat32 = 0x00000004, + kSurfaceFormat16_16 = 0x00000005, + kSurfaceFormat10_11_11 = 0x00000006, + kSurfaceFormat11_11_10 = 0x00000007, + kSurfaceFormat10_10_10_2 = 0x00000008, + kSurfaceFormat2_10_10_10 = 0x00000009, + kSurfaceFormat8_8_8_8 = 0x0000000a, + kSurfaceFormat32_32 = 0x0000000b, + kSurfaceFormat16_16_16_16 = 0x0000000c, + kSurfaceFormat32_32_32 = 0x0000000d, + kSurfaceFormat32_32_32_32 = 0x0000000e, +}; +enum TextureChannelType { + kTextureChannelTypeUNorm = 0x00000000, + kTextureChannelTypeSNorm = 0x00000001, + kTextureChannelTypeUScaled = 0x00000002, + kTextureChannelTypeSScaled = 0x00000003, + kTextureChannelTypeUInt = 0x00000004, + kTextureChannelTypeSInt = 0x00000005, + kTextureChannelTypeSNormNoZero = 0x00000006, + kTextureChannelTypeFloat = 0x00000007, +}; + +inline int getScalarInstSize(int id) { return id == 255 ? 1 : 0; } + +struct Sop1 { + enum class Op { + S_MOV_B32 = 3, + S_MOV_B64, + S_CMOV_B32, + S_CMOV_B64, + S_NOT_B32, + S_NOT_B64, + S_WQM_B32, + S_WQM_B64, + S_BREV_B32, + S_BREV_B64, + S_BCNT0_I32_B32, + S_BCNT0_I32_B64, + S_BCNT1_I32_B32, + S_BCNT1_I32_B64, + S_FF0_I32_B32, + S_FF0_I32_B64, + S_FF1_I32_B32, + S_FF1_I32_B64, + S_FLBIT_I32_B32, + S_FLBIT_I32_B64, + S_FLBIT_I32, + S_FLBIT_I32_I64, + S_SEXT_I32_I8, + S_SEXT_I32_I16, + S_BITSET0_B32, + S_BITSET0_B64, + S_BITSET1_B32, + S_BITSET1_B64, + S_GETPC_B64, + S_SETPC_B64, + S_SWAPPC_B64, + S_RFE_B64, + S_AND_SAVEEXEC_B64 = 36, + S_OR_SAVEEXEC_B64, + S_XOR_SAVEEXEC_B64, + S_ANDN2_SAVEEXEC_B64, + S_ORN2_SAVEEXEC_B64, + S_NAND_SAVEEXEC_B64, + S_NOR_SAVEEXEC_B64, + S_XNOR_SAVEEXEC_B64, + S_QUADMASK_B32, + S_QUADMASK_B64, + S_MOVRELS_B32, + S_MOVRELS_B64, + S_MOVRELD_B32, + S_MOVRELD_B64, + S_CBRANCH_JOIN, + S_ABS_I32 = 52, + S_MOV_FED_B32, + }; + + static constexpr int kMinInstSize = 1; + + static constexpr auto ssrc0Mask = genMask(0, 8); + static constexpr auto opMask = genMask(getMaskEnd(ssrc0Mask), 8); + static constexpr auto sdstMask = genMask(getMaskEnd(opMask), 7); + + const std::uint32_t *inst; + + const std::uint32_t ssrc0 = fetchMaskedValue(inst[0], ssrc0Mask); + const Op op = static_cast(fetchMaskedValue(inst[0], opMask)); + const std::uint32_t sdst = fetchMaskedValue(inst[0], sdstMask); + + Sop1(const std::uint32_t *inst) : inst(inst) {} + + int size() const { return kMinInstSize + getScalarInstSize(ssrc0); } + + void dump() const; +}; + +struct Sopk { + enum class Op { + S_MOVK_I32, + S_CMOVK_I32 = 2, + S_CMPK_EQ_I32, + S_CMPK_LG_I32, + S_CMPK_GT_I32, + S_CMPK_GE_I32, + S_CMPK_LT_I32, + S_CMPK_LE_I32, + S_CMPK_EQ_U32, + S_CMPK_LG_U32, + S_CMPK_GT_U32, + S_CMPK_GE_U32, + S_CMPK_LT_U32, + S_CMPK_LE_U32, + S_ADDK_I32, + S_MULK_I32, + S_CBRANCH_I_FORK, + S_GETREG_B32, + S_SETREG_B32, + S_SETREG_IMM + }; + + static constexpr int kMinInstSize = 1; + + static constexpr auto simmMask = genMask(0, 16); + static constexpr auto sdstMask = genMask(getMaskEnd(simmMask), 7); + static constexpr auto opMask = genMask(getMaskEnd(sdstMask), 5); + + const std::uint32_t *inst; + + const std::int16_t simm = (std::int16_t)fetchMaskedValue(inst[0], simmMask); + const Op op = static_cast(fetchMaskedValue(inst[0], opMask)); + const std::uint32_t sdst = fetchMaskedValue(inst[0], sdstMask); + + Sopk(const std::uint32_t *inst) : inst(inst) {} + + int size() const { return kMinInstSize; } + + void dump() const; +}; + +struct Sopc { + enum class Op { + S_CMP_EQ_I32, + S_CMP_LG_I32, + S_CMP_GT_I32, + S_CMP_GE_I32, + S_CMP_LT_I32, + S_CMP_LE_I32, + S_CMP_EQ_U32, + S_CMP_LG_U32, + S_CMP_GT_U32, + S_CMP_GE_U32, + S_CMP_LT_U32, + S_CMP_LE_U32, + S_BITCMP0_B32, + S_BITCMP1_B32, + S_BITCMP0_B64, + S_BITCMP1_B64, + S_SETVSKIP, + S_ILLEGALD + }; + + static constexpr int kMinInstSize = 1; + + static constexpr auto ssrc0Mask = genMask(0, 8); + static constexpr auto ssrc1Mask = genMask(getMaskEnd(ssrc0Mask), 8); + static constexpr auto opMask = genMask(getMaskEnd(ssrc1Mask), 7); + + const std::uint32_t *inst; + + const std::uint32_t ssrc0 = fetchMaskedValue(inst[0], ssrc0Mask); + const std::uint32_t ssrc1 = fetchMaskedValue(inst[0], ssrc1Mask); + const Op op = static_cast(fetchMaskedValue(inst[0], opMask)); + + Sopc(const std::uint32_t *inst) : inst(inst) {} + + int size() const { return kMinInstSize + getScalarInstSize(ssrc0); } + + void dump() const; +}; + +struct Sop2 { + enum class Op { + S_ADD_U32, + S_SUB_U32, + S_ADD_I32, + S_SUB_I32, + S_ADDC_U32, + S_SUBB_U32, + S_MIN_I32, + S_MIN_U32, + S_MAX_I32, + S_MAX_U32, + S_CSELECT_B32, + S_CSELECT_B64, + S_AND_B32 = 14, + S_AND_B64, + S_OR_B32, + S_OR_B64, + S_XOR_B32, + S_XOR_B64, + S_ANDN2_B32, + S_ANDN2_B64, + S_ORN2_B32, + S_ORN2_B64, + S_NAND_B32, + S_NAND_B64, + S_NOR_B32, + S_NOR_B64, + S_XNOR_B32, + S_XNOR_B64, + S_LSHL_B32, + S_LSHL_B64, + S_LSHR_B32, + S_LSHR_B64, + S_ASHR_I32, + S_ASHR_I64, + S_BFM_B32, + S_BFM_B64, + S_MUL_I32, + S_BFE_U32, + S_BFE_I32, + S_BFE_U64, + S_BFE_I64, + S_CBRANCH_G_FORK, + S_ABSDIFF_I32, + S_LSHL1_ADD_U32, + S_LSHL2_ADD_U32, + S_LSHL3_ADD_U32, + S_LSHL4_ADD_U32, + S_PACK_LL_B32_B16, + S_PACK_LH_B32_B16, + S_PACK_HH_B32_B16, + S_MUL_HI_U32, + S_MUL_HI_I32, + }; + + static constexpr int kMinInstSize = 1; + + static constexpr auto ssrc0Mask = genMask(0, 8); + static constexpr auto ssrc1Mask = genMask(getMaskEnd(ssrc0Mask), 8); + static constexpr auto sdstMask = genMask(getMaskEnd(ssrc1Mask), 7); + static constexpr auto opMask = genMask(getMaskEnd(sdstMask), 7); + + const std::uint32_t *inst; + const std::uint32_t ssrc0 = fetchMaskedValue(inst[0], ssrc0Mask); + const std::uint32_t ssrc1 = fetchMaskedValue(inst[0], ssrc1Mask); + const Op op = static_cast(fetchMaskedValue(inst[0], opMask)); + const std::uint32_t sdst = fetchMaskedValue(inst[0], sdstMask); + + Sop2(const std::uint32_t *inst) : inst(inst) {} + + int size() const { + return kMinInstSize + getScalarInstSize(ssrc0) + getScalarInstSize(ssrc1); + } + + void dump() const; +}; + +struct Sopp { + enum class Op { + S_NOP, + S_ENDPGM, + S_BRANCH, + S_CBRANCH_SCC0 = 4, + S_CBRANCH_SCC1, + S_CBRANCH_VCCZ, + S_CBRANCH_VCCNZ, + S_CBRANCH_EXECZ, + S_CBRANCH_EXECNZ, + S_BARRIER, + S_WAITCNT = 12, + S_SETHALT, + S_SLEEP, + S_SETPRIO, + S_SENDMSG, + S_SENDMSGHALT, + S_TRAP, + S_ICACHE_INV, + S_INCPERFLEVEL, + S_DECPERFLEVEL, + S_TTRACEDATA, + S_CBRANCH_CDBGSYS = 23, + S_CBRANCH_CDBGUSER = 24, + S_CBRANCH_CDBGSYS_OR_USER = 25, + S_CBRANCH_CDBGSYS_AND_USER = 26, + }; + + static constexpr int kMinInstSize = 1; + + static constexpr auto simmMask = genMask(0, 16); + static constexpr auto opMask = genMask(getMaskEnd(simmMask), 7); + + const std::uint32_t *inst; + const std::int16_t simm = (std::int16_t)fetchMaskedValue(inst[0], simmMask); + const Op op = static_cast(fetchMaskedValue(inst[0], opMask)); + + Sopp(const std::uint32_t *inst) : inst(inst) {} + + int size() const { return kMinInstSize; } + + void dump() const; +}; + +struct Vop1 { + enum class Op { + V_NOP, + V_MOV_B32, + V_READFIRSTLANE_B32, + V_CVT_I32_F64, + V_CVT_F64_I32, + V_CVT_F32_I32, + V_CVT_F32_U32, + V_CVT_U32_F32, + V_CVT_I32_F32, + V_MOV_FED_B32, + V_CVT_F16_F32, + V_CVT_F32_F16, + V_CVT_RPI_I32_F32, + V_CVT_FLR_I32_F32, + V_CVT_OFF_F32_I4, + V_CVT_F32_F64, + V_CVT_F64_F32, + V_CVT_F32_UBYTE0, + V_CVT_F32_UBYTE1, + V_CVT_F32_UBYTE2, + V_CVT_F32_UBYTE3, + V_CVT_U32_F64, + V_CVT_F64_U32, + V_FRACT_F32 = 32, + V_TRUNC_F32, + V_CEIL_F32, + V_RNDNE_F32, + V_FLOOR_F32, + V_EXP_F32, + V_LOG_CLAMP_F32, + V_LOG_F32, + V_RCP_CLAMP_F32, + V_RCP_LEGACY_F32, + V_RCP_F32, + V_RCP_IFLAG_F32, + V_RSQ_CLAMP_F32, + V_RSQ_LEGACY_F32, + V_RSQ_F32, + V_RCP_F64, + V_RCP_CLAMP_F64, + V_RSQ_F64, + V_RSQ_CLAMP_F64, + V_SQRT_F32, + V_SQRT_F64, + V_SIN_F32, + V_COS_F32, + V_NOT_B32, + V_BFREV_B32, + V_FFBH_U32, + V_FFBL_B32, + V_FFBH_I32, + V_FREXP_EXP_I32_F64, + V_FREXP_MANT_F64, + V_FRACT_F64, + V_FREXP_EXP_I32_F32, + V_FREXP_MANT_F32, + V_CLREXCP, + V_MOVRELD_B32, + V_MOVRELS_B32, + V_MOVRELSD_B32, + V_CVT_F16_U16 = 80, + V_CVT_F16_I16, + V_CVT_U16_F16, + V_CVT_I16_F16, + V_RCP_F16, + V_SQRT_F16, + V_RSQ_F16, + V_LOG_F16, + V_EXP_F16, + V_FREXP_MANT_F16, + V_FREXP_EXP_I16_F16, + V_FLOOR_F16, + V_CEIL_F16, + V_TRUNC_F16, + V_RNDNE_F16, + V_FRACT_F16, + V_SIN_F16, + V_COS_F16, + V_SAT_PK_U8_I16, + V_CVT_NORM_I16_F16, + V_CVT_NORM_U16_F16, + V_SWAP_B32, + }; + + static constexpr int kMinInstSize = 1; + + static constexpr auto src0Mask = genMask(0, 9); + static constexpr auto opMask = genMask(getMaskEnd(src0Mask), 8); + static constexpr auto vdstMask = genMask(getMaskEnd(opMask), 8); + + const std::uint32_t *inst; + const std::uint32_t src0 = fetchMaskedValue(inst[0], src0Mask); + const Op op = static_cast(fetchMaskedValue(inst[0], opMask)); + const std::uint32_t vdst = fetchMaskedValue(inst[0], vdstMask); + + int size() const { return kMinInstSize + getScalarInstSize(src0); } + + Vop1(const std::uint32_t *inst) : inst(inst) {} + + void dump() const; +}; + +struct Vop2 { + enum class Op { + V_CNDMASK_B32, + V_READLANE_B32, + V_WRITELANE_B32, + V_ADD_F32, + V_SUB_F32, + V_SUBREV_F32, + V_MAC_LEGACY_F32, + V_MUL_LEGACY_F32, + V_MUL_F32, + V_MUL_I32_I24, + V_MUL_HI_I32_I24, + V_MUL_U32_U24, + V_MUL_HI_U32_U24, + V_MIN_LEGACY_F32, + V_MAX_LEGACY_F32, + V_MIN_F32, + V_MAX_F32, + V_MIN_I32, + V_MAX_I32, + V_MIN_U32, + V_MAX_U32, + V_LSHR_B32, + V_LSHRREV_B32, + V_ASHR_I32, + V_ASHRREV_I32, + V_LSHL_B32, + V_LSHLREV_B32, + V_AND_B32, + V_OR_B32, + V_XOR_B32, + V_BFM_B32, + V_MAC_F32, + V_MADMK_F32, + V_MADAK_F32, + V_BCNT_U32_B32, + V_MBCNT_LO_U32_B32, + V_MBCNT_HI_U32_B32, + V_ADD_I32, + V_SUB_I32, + V_SUBREV_I32, + V_ADDC_U32, + V_SUBB_U32, + V_SUBBREV_U32, + V_LDEXP_F32, + V_CVT_PKACCUM_U8_F32, + V_CVT_PKNORM_I16_F32, + V_CVT_PKNORM_U16_F32, + V_CVT_PKRTZ_F16_F32, + V_CVT_PK_U16_U32, + V_CVT_PK_I16_I32, + }; + + static constexpr int kMinInstSize = 1; + static constexpr auto src0Mask = genMask(0, 9); + static constexpr auto vsrc1Mask = genMask(getMaskEnd(src0Mask), 8); + static constexpr auto vdstMask = genMask(getMaskEnd(vsrc1Mask), 8); + static constexpr auto opMask = genMask(getMaskEnd(vdstMask), 6); + + const std::uint32_t *inst; + const std::uint32_t src0 = fetchMaskedValue(inst[0], src0Mask); + const std::uint32_t vsrc1 = fetchMaskedValue(inst[0], vsrc1Mask); + const std::uint32_t vdst = fetchMaskedValue(inst[0], vdstMask); + const Op op = static_cast(fetchMaskedValue(inst[0], opMask)); + + Vop2(const std::uint32_t *inst) : inst(inst) {} + + int size() const { + int result = kMinInstSize + getScalarInstSize(src0); + + if (op == Vop2::Op::V_MADMK_F32 || op == Vop2::Op::V_MADAK_F32) { + result += 1; + } + + return result; + } + void dump() const; +}; + +struct Vop3 { + enum class Op { + V3_CMP_F_F32, + V3_CMP_LT_F32, + V3_CMP_EQ_F32, + V3_CMP_LE_F32, + V3_CMP_GT_F32, + V3_CMP_LG_F32, + V3_CMP_GE_F32, + V3_CMP_O_F32, + V3_CMP_U_F32, + V3_CMP_NGE_F32, + V3_CMP_NLG_F32, + V3_CMP_NGT_F32, + V3_CMP_NLE_F32, + V3_CMP_NEQ_F32, + V3_CMP_NLT_F32, + V3_CMP_TRU_F32, + V3_CMPX_F_F32, + V3_CMPX_LT_F32, + V3_CMPX_EQ_F32, + V3_CMPX_LE_F32, + V3_CMPX_GT_F32, + V3_CMPX_LG_F32, + V3_CMPX_GE_F32, + V3_CMPX_O_F32, + V3_CMPX_U_F32, + V3_CMPX_NGE_F32, + V3_CMPX_NLG_F32, + V3_CMPX_NGT_F32, + V3_CMPX_NLE_F32, + V3_CMPX_NEQ_F32, + V3_CMPX_NLT_F32, + V3_CMPX_TRU_F32, + V3_CMP_F_F64, + V3_CMP_LT_F64, + V3_CMP_EQ_F64, + V3_CMP_LE_F64, + V3_CMP_GT_F64, + V3_CMP_LG_F64, + V3_CMP_GE_F64, + V3_CMP_O_F64, + V3_CMP_U_F64, + V3_CMP_NGE_F64, + V3_CMP_NLG_F64, + V3_CMP_NGT_F64, + V3_CMP_NLE_F64, + V3_CMP_NEQ_F64, + V3_CMP_NLT_F64, + V3_CMP_TRU_F64, + V3_CMPX_F_F64, + V3_CMPX_LT_F64, + V3_CMPX_EQ_F64, + V3_CMPX_LE_F64, + V3_CMPX_GT_F64, + V3_CMPX_LG_F64, + V3_CMPX_GE_F64, + V3_CMPX_O_F64, + V3_CMPX_U_F64, + V3_CMPX_NGE_F64, + V3_CMPX_NLG_F64, + V3_CMPX_NGT_F64, + V3_CMPX_NLE_F64, + V3_CMPX_NEQ_F64, + V3_CMPX_NLT_F64, + V3_CMPX_TRU_F64, + V3_CMPS_F_F32, + V3_CMPS_LT_F32, + V3_CMPS_EQ_F32, + V3_CMPS_LE_F32, + V3_CMPS_GT_F32, + V3_CMPS_LG_F32, + V3_CMPS_GE_F32, + V3_CMPS_O_F32, + V3_CMPS_U_F32, + V3_CMPS_NGE_F32, + V3_CMPS_NLG_F32, + V3_CMPS_NGT_F32, + V3_CMPS_NLE_F32, + V3_CMPS_NEQ_F32, + V3_CMPS_NLT_F32, + V3_CMPS_TRU_F32, + V3_CMPSX_F_F32, + V3_CMPSX_LT_F32, + V3_CMPSX_EQ_F32, + V3_CMPSX_LE_F32, + V3_CMPSX_GT_F32, + V3_CMPSX_LG_F32, + V3_CMPSX_GE_F32, + V3_CMPSX_O_F32, + V3_CMPSX_U_F32, + V3_CMPSX_NGE_F32, + V3_CMPSX_NLG_F32, + V3_CMPSX_NGT_F32, + V3_CMPSX_NLE_F32, + V3_CMPSX_NEQ_F32, + V3_CMPSX_NLT_F32, + V3_CMPSX_TRU_F32, + V3_CMPS_F_F64, + V3_CMPS_LT_F64, + V3_CMPS_EQ_F64, + V3_CMPS_LE_F64, + V3_CMPS_GT_F64, + V3_CMPS_LG_F64, + V3_CMPS_GE_F64, + V3_CMPS_O_F64, + V3_CMPS_U_F64, + V3_CMPS_NGE_F64, + V3_CMPS_NLG_F64, + V3_CMPS_NGT_F64, + V3_CMPS_NLE_F64, + V3_CMPS_NEQ_F64, + V3_CMPS_NLT_F64, + V3_CMPS_TRU_F64, + V3_CMPSX_F_F64, + V3_CMPSX_LT_F64, + V3_CMPSX_EQ_F64, + V3_CMPSX_LE_F64, + V3_CMPSX_GT_F64, + V3_CMPSX_LG_F64, + V3_CMPSX_GE_F64, + V3_CMPSX_O_F64, + V3_CMPSX_U_F64, + V3_CMPSX_NGE_F64, + V3_CMPSX_NLG_F64, + V3_CMPSX_NGT_F64, + V3_CMPSX_NLE_F64, + V3_CMPSX_NEQ_F64, + V3_CMPSX_NLT_F64, + V3_CMPSX_TRU_F64, + V3_CMP_F_I32, + V3_CMP_LT_I32, + V3_CMP_EQ_I32, + V3_CMP_LE_I32, + V3_CMP_GT_I32, + V3_CMP_NE_I32, + V3_CMP_GE_I32, + V3_CMP_T_I32, + V3_CMP_CLASS_F32, + V3_CMP_LT_I16, + V3_CMP_EQ_I16, + V3_CMP_LE_I16, + V3_CMP_GT_I16, + V3_CMP_NE_I16, + V3_CMP_GE_I16, + V3_CMP_CLASS_F16, + V3_CMPX_F_I32, + V3_CMPX_LT_I32, + V3_CMPX_EQ_I32, + V3_CMPX_LE_I32, + V3_CMPX_GT_I32, + V3_CMPX_NE_I32, + V3_CMPX_GE_I32, + V3_CMPX_T_I32, + V3_CMPX_CLASS_F32, + V3_CMPX_LT_I16, + V3_CMPX_EQ_I16, + V3_CMPX_LE_I16, + V3_CMPX_GT_I16, + V3_CMPX_NE_I16, + V3_CMPX_GE_I16, + V3_CMPX_CLASS_F16, + V3_CMP_F_I64, + V3_CMP_LT_I64, + V3_CMP_EQ_I64, + V3_CMP_LE_I64, + V3_CMP_GT_I64, + V3_CMP_NE_I64, + V3_CMP_GE_I64, + V3_CMP_T_I64, + V3_CMP_CLASS_F64, + V3_CMP_LT_U16, + V3_CMP_EQ_U16, + V3_CMP_LE_U16, + V3_CMP_GT_U16, + V3_CMP_NE_U16, + V3_CMP_GE_U16, + V3_CMPX_F_I64 = 176, + V3_CMPX_LT_I64, + V3_CMPX_EQ_I64, + V3_CMPX_LE_I64, + V3_CMPX_GT_I64, + V3_CMPX_NE_I64, + V3_CMPX_GE_I64, + V3_CMPX_T_I64, + V3_CMPX_CLASS_F64, + V3_CMPX_LT_U16, + V3_CMPX_EQ_U16, + V3_CMPX_LE_U16, + V3_CMPX_GT_U16, + V3_CMPX_NE_U16, + V3_CMPX_GE_U16, + V3_CMP_F_U32 = 192, + V3_CMP_LT_U32, + V3_CMP_EQ_U32, + V3_CMP_LE_U32, + V3_CMP_GT_U32, + V3_CMP_NE_U32, + V3_CMP_GE_U32, + V3_CMP_T_U32, + V3_CMP_F_F16, + V3_CMP_LT_F16, + V3_CMP_EQ_F16, + V3_CMP_LE_F16, + V3_CMP_GT_F16, + V3_CMP_LG_F16, + V3_CMP_GE_F16, + V3_CMP_O_F16, + V3_CMPX_F_U32, + V3_CMPX_LT_U32, + V3_CMPX_EQ_U32, + V3_CMPX_LE_U32, + V3_CMPX_GT_U32, + V3_CMPX_NE_U32, + V3_CMPX_GE_U32, + V3_CMPX_T_U32, + V3_CMPX_F_F16, + V3_CMPX_LT_F16, + V3_CMPX_EQ_F16, + V3_CMPX_LE_F16, + V3_CMPX_GT_F16, + V3_CMPX_LG_F16, + V3_CMPX_GE_F16, + V3_CMPX_O_F16, + V3_CMP_F_U64, + V3_CMP_LT_U64, + V3_CMP_EQ_U64, + V3_CMP_LE_U64, + V3_CMP_GT_U64, + V3_CMP_NE_U64, + V3_CMP_GE_U64, + V3_CMP_T_U64, + V3_CMP_U_F16, + V3_CMP_NGE_F16, + V3_CMP_NLG_F16, + V3_CMP_NGT_F16, + V3_CMP_NLE_F16, + V3_CMP_NEQ_F16, + V3_CMP_NLT_F16, + V3_CMP_TRU_F16, + V3_CMPX_F_U64, + V3_CMPX_LT_U64, + V3_CMPX_EQ_U64, + V3_CMPX_LE_U64, + V3_CMPX_GT_U64, + V3_CMPX_NE_U64, + V3_CMPX_GE_U64, + V3_CMPX_T_U64, + V3_CNDMASK_B32 = 256, + V3_READLANE_B32, + V3_WRITELANE_B32, + V3_ADD_F32, + V3_SUB_F32, + V3_SUBREV_F32, + V3_MAC_LEGACY_F32, + V3_MUL_LEGACY_F32, + V3_MUL_F32, + V3_MUL_I32_I24, + V3_MUL_HI_I32_I24, + V3_MUL_U32_U24, + V3_MUL_HI_U32_U24, + V3_MIN_LEGACY_F32, + V3_MAX_LEGACY_F32, + V3_MIN_F32, + V3_MAX_F32, + V3_MIN_I32, + V3_MAX_I32, + V3_MIN_U32, + V3_MAX_U32, + V3_LSHR_B32, + V3_LSHRREV_B32, + V3_ASHR_I32, + V3_ASHRREV_I32, + V3_LSHL_B32, + V3_LSHLREV_B32, + V3_AND_B32, + V3_OR_B32, + V3_XOR_B32, + V3_BFM_B32, + V3_MAC_F32, + V3_MADMK_F32, + V3_MADAK_F32, + V3_BCNT_U32_B32, + V3_MBCNT_LO_U32_B32, + V3_MBCNT_HI_U32_B32, + V3_ADD_I32, + V3_SUB_I32, + V3_SUBREV_I32, + V3_ADDC_U32, + V3_SUBB_U32, + V3_SUBBREV_U32, + V3_LDEXP_F32, + V3_CVT_PKACCUM_U8_F32, + V3_CVT_PKNORM_I16_F32, + V3_CVT_PKNORM_U16_F32, + V3_CVT_PKRTZ_F16_F32, + V3_CVT_PK_U16_U32, + V3_CVT_PK_I16_I32, + V3_MAD_LEGACY_F32 = 320, + V3_MAD_F32, + V3_MAD_I32_I24, + V3_MAD_U32_U24, + V3_CUBEID_F32, + V3_CUBESC_F32, + V3_CUBETC_F32, + V3_CUBEMA_F32, + V3_BFE_U32, + V3_BFE_I32, + V3_BFI_B32, + V3_FMA_F32, + V3_FMA_F64, + V3_LERP_U8, + V3_ALIGNBIT_B32, + V3_ALIGNBYTE_B32, + V3_MULLIT_F32, + V3_MIN3_F32, + V3_MIN3_I32, + V3_MIN3_U32, + V3_MAX3_F32, + V3_MAX3_I32, + V3_MAX3_U32, + V3_MED3_F32, + V3_MED3_I32, + V3_MED3_U32, + V3_SAD_U8, + V3_SAD_HI_U8, + V3_SAD_U16, + V3_SAD_U32, + V3_CVT_PK_U8_F32, + V3_DIV_FIXUP_F32, + V3_DIV_FIXUP_F64, + V3_LSHL_B64, + V3_LSHR_B64, + V3_ASHR_I64, + V3_ADD_F64, + V3_MUL_F64, + V3_MIN_F64, + V3_MAX_F64, + V3_LDEXP_F64, + V3_MUL_LO_U32, + V3_MUL_HI_U32, + V3_MUL_LO_I32, + V3_MUL_HI_I32, + V3_DIV_SCALE_F32, + V3_DIV_SCALE_F64, + V3_DIV_FMAS_F32, + V3_DIV_FMAS_F64, + V3_MSAD_U8, + V3_QSAD_U8, + V3_MQSAD_U8, + V3_TRIG_PREOP_F64, + V3_NOP = 384, + V3_MOV_B32, + V3_READFIRSTLANE_B32, + V3_CVT_I32_F64, + V3_CVT_F64_I32, + V3_CVT_F32_I32, + V3_CVT_F32_U32, + V3_CVT_U32_F32, + V3_CVT_I32_F32, + V3_MOV_FED_B32, + V3_CVT_F16_F32, + V3_CVT_F32_F16, + V3_CVT_RPI_I32_F32, + V3_CVT_FLR_I32_F32, + V3_CVT_OFF_F32_I4, + V3_CVT_F32_F64, + V3_CVT_F64_F32, + V3_CVT_F32_UBYTE0, + V3_CVT_F32_UBYTE1, + V3_CVT_F32_UBYTE2, + V3_CVT_F32_UBYTE3, + V3_CVT_U32_F64, + V3_CVT_F64_U32, + V3_FRACT_F32 = 416, + V3_TRUNC_F32, + V3_CEIL_F32, + V3_RNDNE_F32, + V3_FLOOR_F32, + V3_EXP_F32, + V3_LOG_CLAMP_F32, + V3_LOG_F32, + V3_RCP_CLAMP_F32, + V3_RCP_LEGACY_F32, + V3_RCP_F32, + V3_RCP_IFLAG_F32, + V3_RSQ_CLAMP_F32, + V3_RSQ_LEGACY_F32, + V3_RSQ_F32, + V3_RCP_F64, + V3_RCP_CLAMP_F64, + V3_RSQ_F64, + V3_RSQ_CLAMP_F64, + V3_SQRT_F32, + V3_SQRT_F64, + V3_SIN_F32, + V3_COS_F32, + V3_NOT_B32, + V3_BFREV_B32, + V3_FFBH_U32, + V3_FFBL_B32, + V3_FFBH_I32, + V3_FREXP_EXP_I32_F64, + V3_FREXP_MANT_F64, + V3_FRACT_F64, + V3_FREXP_EXP_I32_F32, + V3_FREXP_MANT_F32, + V3_CLREXCP, + V3_MOVRELD_B32, + V3_MOVRELS_B32, + V3_MOVRELSD_B32, + }; + + static constexpr int kMinInstSize = 2; + static constexpr auto vdstMask = genMask(0, 8); + + static constexpr auto absMask = genMask(getMaskEnd(vdstMask), 3); + static constexpr auto abs0Mask = genMask(getMaskEnd(vdstMask), 1); + static constexpr auto abs1Mask = genMask(getMaskEnd(abs0Mask), 1); + static constexpr auto abs2Mask = genMask(getMaskEnd(abs1Mask), 1); + static constexpr auto clmpMask = genMask(getMaskEnd(absMask), 1); + + static constexpr auto sdstMask = genMask(getMaskEnd(vdstMask), 7); + + static_assert(getMaskEnd(clmpMask) + 5 == getMaskEnd(sdstMask) + 2); + + static constexpr auto opMask = genMask(getMaskEnd(clmpMask) + 5, 9); + + static constexpr auto src0Mask = genMask(0, 9); + static constexpr auto src1Mask = genMask(getMaskEnd(src0Mask), 9); + static constexpr auto src2Mask = genMask(getMaskEnd(src1Mask), 9); + static constexpr auto omodMask = genMask(getMaskEnd(src2Mask), 2); + static constexpr auto negMask = genMask(getMaskEnd(omodMask), 3); + static constexpr auto neg0Mask = genMask(getMaskEnd(omodMask), 1); + static constexpr auto neg1Mask = genMask(getMaskEnd(neg0Mask), 1); + static constexpr auto neg2Mask = genMask(getMaskEnd(neg1Mask), 1); + + const std::uint32_t *inst; + const std::uint32_t vdst = fetchMaskedValue(inst[0], vdstMask); + const std::uint32_t abs = fetchMaskedValue(inst[0], absMask); + const std::uint32_t clmp = fetchMaskedValue(inst[0], clmpMask); + const std::uint32_t sdst = fetchMaskedValue(inst[0], sdstMask); + const Op op = static_cast(fetchMaskedValue(inst[0], opMask)); + + const std::uint32_t src0 = fetchMaskedValue(inst[1], src0Mask); + const std::uint32_t src1 = fetchMaskedValue(inst[1], src1Mask); + const std::uint32_t src2 = fetchMaskedValue(inst[1], src2Mask); + const std::uint32_t omod = fetchMaskedValue(inst[1], omodMask); + const std::uint32_t neg = fetchMaskedValue(inst[1], negMask); + + Vop3(const std::uint32_t *inst) : inst(inst) {} + + int size() const { + return kMinInstSize + getScalarInstSize(src0) + getScalarInstSize(src1) + + getScalarInstSize(src2); + } + + void dump() const; +}; + +struct Vopc { + enum class Op { + V_CMP_F_F32, + V_CMP_LT_F32, + V_CMP_EQ_F32, + V_CMP_LE_F32, + V_CMP_GT_F32, + V_CMP_LG_F32, + V_CMP_GE_F32, + V_CMP_O_F32, + V_CMP_U_F32, + V_CMP_NGE_F32, + V_CMP_NLG_F32, + V_CMP_NGT_F32, + V_CMP_NLE_F32, + V_CMP_NEQ_F32, + V_CMP_NLT_F32, + V_CMP_TRU_F32, + V_CMPX_F_F32, + V_CMPX_LT_F32, + V_CMPX_EQ_F32, + V_CMPX_LE_F32, + V_CMPX_GT_F32, + V_CMPX_LG_F32, + V_CMPX_GE_F32, + V_CMPX_O_F32, + V_CMPX_U_F32, + V_CMPX_NGE_F32, + V_CMPX_NLG_F32, + V_CMPX_NGT_F32, + V_CMPX_NLE_F32, + V_CMPX_NEQ_F32, + V_CMPX_NLT_F32, + V_CMPX_TRU_F32, + V_CMP_F_F64, + V_CMP_LT_F64, + V_CMP_EQ_F64, + V_CMP_LE_F64, + V_CMP_GT_F64, + V_CMP_LG_F64, + V_CMP_GE_F64, + V_CMP_O_F64, + V_CMP_U_F64, + V_CMP_NGE_F64, + V_CMP_NLG_F64, + V_CMP_NGT_F64, + V_CMP_NLE_F64, + V_CMP_NEQ_F64, + V_CMP_NLT_F64, + V_CMP_TRU_F64, + V_CMPX_F_F64, + V_CMPX_LT_F64, + V_CMPX_EQ_F64, + V_CMPX_LE_F64, + V_CMPX_GT_F64, + V_CMPX_LG_F64, + V_CMPX_GE_F64, + V_CMPX_O_F64, + V_CMPX_U_F64, + V_CMPX_NGE_F64, + V_CMPX_NLG_F64, + V_CMPX_NGT_F64, + V_CMPX_NLE_F64, + V_CMPX_NEQ_F64, + V_CMPX_NLT_F64, + V_CMPX_TRU_F64, + V_CMPS_F_F32, + V_CMPS_LT_F32, + V_CMPS_EQ_F32, + V_CMPS_LE_F32, + V_CMPS_GT_F32, + V_CMPS_LG_F32, + V_CMPS_GE_F32, + V_CMPS_O_F32, + V_CMPS_U_F32, + V_CMPS_NGE_F32, + V_CMPS_NLG_F32, + V_CMPS_NGT_F32, + V_CMPS_NLE_F32, + V_CMPS_NEQ_F32, + V_CMPS_NLT_F32, + V_CMPS_TRU_F32, + V_CMPSX_F_F32, + V_CMPSX_LT_F32, + V_CMPSX_EQ_F32, + V_CMPSX_LE_F32, + V_CMPSX_GT_F32, + V_CMPSX_LG_F32, + V_CMPSX_GE_F32, + V_CMPSX_O_F32, + V_CMPSX_U_F32, + V_CMPSX_NGE_F32, + V_CMPSX_NLG_F32, + V_CMPSX_NGT_F32, + V_CMPSX_NLE_F32, + V_CMPSX_NEQ_F32, + V_CMPSX_NLT_F32, + V_CMPSX_TRU_F32, + V_CMPS_F_F64, + V_CMPS_LT_F64, + V_CMPS_EQ_F64, + V_CMPS_LE_F64, + V_CMPS_GT_F64, + V_CMPS_LG_F64, + V_CMPS_GE_F64, + V_CMPS_O_F64, + V_CMPS_U_F64, + V_CMPS_NGE_F64, + V_CMPS_NLG_F64, + V_CMPS_NGT_F64, + V_CMPS_NLE_F64, + V_CMPS_NEQ_F64, + V_CMPS_NLT_F64, + V_CMPS_TRU_F64, + V_CMPSX_F_F64, + V_CMPSX_LT_F64, + V_CMPSX_EQ_F64, + V_CMPSX_LE_F64, + V_CMPSX_GT_F64, + V_CMPSX_LG_F64, + V_CMPSX_GE_F64, + V_CMPSX_O_F64, + V_CMPSX_U_F64, + V_CMPSX_NGE_F64, + V_CMPSX_NLG_F64, + V_CMPSX_NGT_F64, + V_CMPSX_NLE_F64, + V_CMPSX_NEQ_F64, + V_CMPSX_NLT_F64, + V_CMPSX_TRU_F64, + V_CMP_F_I32, + V_CMP_LT_I32, + V_CMP_EQ_I32, + V_CMP_LE_I32, + V_CMP_GT_I32, + V_CMP_NE_I32, + V_CMP_GE_I32, + V_CMP_T_I32, + V_CMP_CLASS_F32, + V_CMP_LT_I16, + V_CMP_EQ_I16, + V_CMP_LE_I16, + V_CMP_GT_I16, + V_CMP_NE_I16, + V_CMP_GE_I16, + V_CMP_CLASS_F16, + V_CMPX_F_I32, + V_CMPX_LT_I32, + V_CMPX_EQ_I32, + V_CMPX_LE_I32, + V_CMPX_GT_I32, + V_CMPX_NE_I32, + V_CMPX_GE_I32, + V_CMPX_T_I32, + V_CMPX_CLASS_F32, + V_CMPX_LT_I16, + V_CMPX_EQ_I16, + V_CMPX_LE_I16, + V_CMPX_GT_I16, + V_CMPX_NE_I16, + V_CMPX_GE_I16, + V_CMPX_CLASS_F16, + V_CMP_F_I64, + V_CMP_LT_I64, + V_CMP_EQ_I64, + V_CMP_LE_I64, + V_CMP_GT_I64, + V_CMP_NE_I64, + V_CMP_GE_I64, + V_CMP_T_I64, + V_CMP_CLASS_F64, + V_CMP_LT_U16, + V_CMP_EQ_U16, + V_CMP_LE_U16, + V_CMP_GT_U16, + V_CMP_NE_U16, + V_CMP_GE_U16, + V_CMPX_F_I64 = 176, + V_CMPX_LT_I64, + V_CMPX_EQ_I64, + V_CMPX_LE_I64, + V_CMPX_GT_I64, + V_CMPX_NE_I64, + V_CMPX_GE_I64, + V_CMPX_T_I64, + V_CMPX_CLASS_F64, + V_CMPX_LT_U16, + V_CMPX_EQ_U16, + V_CMPX_LE_U16, + V_CMPX_GT_U16, + V_CMPX_NE_U16, + V_CMPX_GE_U16, + V_CMP_F_U32 = 192, + V_CMP_LT_U32, + V_CMP_EQ_U32, + V_CMP_LE_U32, + V_CMP_GT_U32, + V_CMP_NE_U32, + V_CMP_GE_U32, + V_CMP_T_U32, + V_CMP_F_F16, + V_CMP_LT_F16, + V_CMP_EQ_F16, + V_CMP_LE_F16, + V_CMP_GT_F16, + V_CMP_LG_F16, + V_CMP_GE_F16, + V_CMP_O_F16, + V_CMPX_F_U32, + V_CMPX_LT_U32, + V_CMPX_EQ_U32, + V_CMPX_LE_U32, + V_CMPX_GT_U32, + V_CMPX_NE_U32, + V_CMPX_GE_U32, + V_CMPX_T_U32, + V_CMPX_F_F16, + V_CMPX_LT_F16, + V_CMPX_EQ_F16, + V_CMPX_LE_F16, + V_CMPX_GT_F16, + V_CMPX_LG_F16, + V_CMPX_GE_F16, + V_CMPX_O_F16, + V_CMP_F_U64, + V_CMP_LT_U64, + V_CMP_EQ_U64, + V_CMP_LE_U64, + V_CMP_GT_U64, + V_CMP_NE_U64, + V_CMP_GE_U64, + V_CMP_T_U64, + V_CMP_U_F16, + V_CMP_NGE_F16, + V_CMP_NLG_F16, + V_CMP_NGT_F16, + V_CMP_NLE_F16, + V_CMP_NEQ_F16, + V_CMP_NLT_F16, + V_CMP_TRU_F16, + V_CMPX_F_U64, + V_CMPX_LT_U64, + V_CMPX_EQ_U64, + V_CMPX_LE_U64, + V_CMPX_GT_U64, + V_CMPX_NE_U64, + V_CMPX_GE_U64, + V_CMPX_T_U64, + V_CMPX_U_F16, + V_CMPX_NGE_F16, + V_CMPX_NLG_F16, + V_CMPX_NGT_F16, + V_CMPX_NLE_F16, + V_CMPX_NEQ_F16, + V_CMPX_NLT_F16, + V_CMPX_TRU_F16, + }; + + static constexpr int kMinInstSize = 1; + + static constexpr auto src0Mask = genMask(0, 9); + static constexpr auto vsrc1Mask = genMask(getMaskEnd(src0Mask), 8); + static constexpr auto opMask = genMask(getMaskEnd(vsrc1Mask), 8); + + const std::uint32_t *inst; + const std::uint16_t src0 = fetchMaskedValue(inst[0], src0Mask); + const std::uint8_t vsrc1 = fetchMaskedValue(inst[0], vsrc1Mask); + const Op op = static_cast(fetchMaskedValue(inst[0], opMask)); + + Vopc(const std::uint32_t *inst) : inst(inst) {} + + int size() const { return kMinInstSize; } + + void dump() const; +}; + +struct Smrd { + enum class Op { + S_LOAD_DWORD, + S_LOAD_DWORDX2, + S_LOAD_DWORDX4, + S_LOAD_DWORDX8, + S_LOAD_DWORDX16, + S_BUFFER_LOAD_DWORD = 8, + S_BUFFER_LOAD_DWORDX2, + S_BUFFER_LOAD_DWORDX4, + S_BUFFER_LOAD_DWORDX8, + S_BUFFER_LOAD_DWORDX16, + S_DCACHE_INV_VOL = 29, + S_MEMTIME, + S_DCACHE_INV, + }; + + static constexpr int kMinInstSize = 1; + static constexpr auto offsetMask = genMask(0, 8); + static constexpr auto immMask = genMask(getMaskEnd(offsetMask), 1); + static constexpr auto sbaseMask = genMask(getMaskEnd(immMask), 6); + static constexpr auto sdstMask = genMask(getMaskEnd(sbaseMask), 7); + static constexpr auto opMask = genMask(getMaskEnd(sdstMask), 5); + + const std::uint32_t *inst; + const std::uint32_t offset = fetchMaskedValue(inst[0], offsetMask); + const std::uint32_t imm = fetchMaskedValue(inst[0], immMask); + const std::uint32_t sbase = fetchMaskedValue(inst[0], sbaseMask); + const std::uint32_t sdst = fetchMaskedValue(inst[0], sdstMask); + const Op op = static_cast(fetchMaskedValue(inst[0], opMask)); + + Smrd(const std::uint32_t *inst) : inst(inst) {} + + int size() const { return kMinInstSize; } + void dump() const; +}; + +struct Mubuf { + enum class Op { + BUFFER_LOAD_FORMAT_X, + BUFFER_LOAD_FORMAT_XY, + BUFFER_LOAD_FORMAT_XYZ, + BUFFER_LOAD_FORMAT_XYZW, + BUFFER_STORE_FORMAT_X, + BUFFER_STORE_FORMAT_XY, + BUFFER_STORE_FORMAT_XYZ, + BUFFER_STORE_FORMAT_XYZW, + BUFFER_LOAD_UBYTE, + BUFFER_LOAD_SBYTE, + BUFFER_LOAD_USHORT, + BUFFER_LOAD_SSHORT, + BUFFER_LOAD_DWORD, + BUFFER_LOAD_DWORDX2, + BUFFER_LOAD_DWORDX4, + BUFFER_LOAD_DWORDX3, + BUFFER_STORE_BYTE = 24, + BUFFER_STORE_SHORT = 26, + BUFFER_STORE_DWORD = 28, + BUFFER_STORE_DWORDX2, + BUFFER_STORE_DWORDX4, + BUFFER_STORE_DWORDX3, + BUFFER_ATOMIC_SWAP = 48, + BUFFER_ATOMIC_CMPSWAP, + BUFFER_ATOMIC_ADD, + BUFFER_ATOMIC_SUB, + BUFFER_ATOMIC_RSUB, + BUFFER_ATOMIC_SMIN, + BUFFER_ATOMIC_UMIN, + BUFFER_ATOMIC_SMAX, + BUFFER_ATOMIC_UMAX, + BUFFER_ATOMIC_AND, + BUFFER_ATOMIC_OR, + BUFFER_ATOMIC_XOR, + BUFFER_ATOMIC_INC, + BUFFER_ATOMIC_DEC, + BUFFER_ATOMIC_FCMPSWAP, + BUFFER_ATOMIC_FMIN, + BUFFER_ATOMIC_FMAX, + BUFFER_ATOMIC_SWAP_X2 = 80, + BUFFER_ATOMIC_CMPSWAP_X2, + BUFFER_ATOMIC_ADD_X2, + BUFFER_ATOMIC_SUB_X2, + BUFFER_ATOMIC_RSUB_X2, + BUFFER_ATOMIC_SMIN_X2, + BUFFER_ATOMIC_UMIN_X2, + BUFFER_ATOMIC_SMAX_X2, + BUFFER_ATOMIC_UMAX_X2, + BUFFER_ATOMIC_AND_X2, + BUFFER_ATOMIC_OR_X2, + BUFFER_ATOMIC_XOR_X2, + BUFFER_ATOMIC_INC_X2, + BUFFER_ATOMIC_DEC_X2, + BUFFER_ATOMIC_FCMPSWAP_X2, + BUFFER_ATOMIC_FMIN_X2, + BUFFER_ATOMIC_FMAX_X2, + BUFFER_WBINVL1_SC_VOL = 112, + BUFFER_WBINVL1, + }; + + static constexpr int kMinInstSize = 2; + static constexpr auto offsetMask = genMask(0, 12); + static constexpr auto offenMask = genMask(getMaskEnd(offsetMask), 1); + static constexpr auto idxenMask = genMask(getMaskEnd(offenMask), 1); + static constexpr auto glcMask = genMask(getMaskEnd(idxenMask), 1); + static constexpr auto ldsMask = genMask(getMaskEnd(glcMask) + 1, 1); + static constexpr auto opMask = genMask(getMaskEnd(ldsMask) + 1, 7); + + static constexpr auto vaddrMask = genMask(0, 8); + static constexpr auto vdataMask = genMask(getMaskEnd(vaddrMask), 8); + static constexpr auto srsrcMask = genMask(getMaskEnd(vdataMask), 5); + static constexpr auto slcMask = genMask(getMaskEnd(srsrcMask) + 1, 1); + static constexpr auto tfeMask = genMask(getMaskEnd(slcMask), 1); + static constexpr auto soffsetMask = genMask(getMaskEnd(tfeMask), 8); + + const std::uint32_t *inst; + std::uint16_t offset = fetchMaskedValue(inst[0], offsetMask); + bool offen = fetchMaskedValue(inst[0], offenMask); + bool idxen = fetchMaskedValue(inst[0], idxenMask); + bool glc = fetchMaskedValue(inst[0], glcMask); + bool lds = fetchMaskedValue(inst[0], ldsMask); + Op op = static_cast(fetchMaskedValue(inst[0], opMask)); + + std::uint8_t vaddr = fetchMaskedValue(inst[1], vaddrMask); + std::uint8_t vdata = fetchMaskedValue(inst[1], vdataMask); + std::uint8_t srsrc = fetchMaskedValue(inst[1], srsrcMask); + bool slc = fetchMaskedValue(inst[1], slcMask); + bool tfe = fetchMaskedValue(inst[1], tfeMask); + std::uint8_t soffset = fetchMaskedValue(inst[1], soffsetMask); + + Mubuf(const std::uint32_t *inst) : inst(inst) {} + + int size() const { return kMinInstSize; } + + void dump() const; +}; + +struct Mtbuf { + enum class Op { + TBUFFER_LOAD_FORMAT_X, + TBUFFER_LOAD_FORMAT_XY, + TBUFFER_LOAD_FORMAT_XYZ, + TBUFFER_LOAD_FORMAT_XYZW, + TBUFFER_STORE_FORMAT_X, + TBUFFER_STORE_FORMAT_XY, + TBUFFER_STORE_FORMAT_XYZ, + TBUFFER_STORE_FORMAT_XYZW, + }; + static constexpr int kMinInstSize = 2; + + static constexpr auto offsetMask = genMask(0, 12); + static constexpr auto offenMask = genMask(getMaskEnd(offsetMask), 1); + static constexpr auto idxenMask = genMask(getMaskEnd(offenMask), 1); + static constexpr auto glcMask = genMask(getMaskEnd(idxenMask), 1); + static constexpr auto opMask = genMask(getMaskEnd(glcMask) + 1, 3); + static constexpr auto dfmtMask = genMask(getMaskEnd(opMask), 4); + static constexpr auto nfmtMask = genMask(getMaskEnd(dfmtMask), 4); + + static constexpr auto vaddrMask = genMask(0, 8); + static constexpr auto vdataMask = genMask(getMaskEnd(vaddrMask), 8); + static constexpr auto srsrcMask = genMask(getMaskEnd(vdataMask), 5); + static constexpr auto slcMask = genMask(getMaskEnd(srsrcMask) + 1, 1); + static constexpr auto tfeMask = genMask(getMaskEnd(slcMask), 1); + static constexpr auto soffsetMask = genMask(getMaskEnd(tfeMask), 8); + + const std::uint32_t *inst; + const Op op = static_cast(fetchMaskedValue(inst[0], opMask)); + + std::uint16_t offset = fetchMaskedValue(inst[0], offsetMask); + bool offen = fetchMaskedValue(inst[0], offenMask); + bool idxen = fetchMaskedValue(inst[0], idxenMask); + bool glc = fetchMaskedValue(inst[0], glcMask); + SurfaceFormat dfmt = (SurfaceFormat)fetchMaskedValue(inst[0], dfmtMask); + TextureChannelType nfmt = (TextureChannelType)fetchMaskedValue(inst[0], nfmtMask); + + std::uint8_t vaddr = fetchMaskedValue(inst[1], vaddrMask); + std::uint8_t vdata = fetchMaskedValue(inst[1], vdataMask); + std::uint8_t srsrc = fetchMaskedValue(inst[1], srsrcMask); + bool slc = fetchMaskedValue(inst[1], slcMask); + bool tfe = fetchMaskedValue(inst[1], tfeMask); + std::uint8_t soffset = fetchMaskedValue(inst[1], soffsetMask); + + Mtbuf(const std::uint32_t *inst) : inst(inst) {} + + int size() const { return kMinInstSize; } + + void dump() const; +}; + +struct Mimg { + enum class Op { + IMAGE_LOAD, + IMAGE_LOAD_MIP, + IMAGE_LOAD_PCK, + IMAGE_LOAD_PCK_SGN, + IMAGE_LOAD_MIP_PCK, + IMAGE_LOAD_MIP_PCK_SGN, + IMAGE_STORE = 8, + IMAGE_STORE_MIP, + IMAGE_STORE_PCK, + IMAGE_STORE_MIP_PCK, + IMAGE_GET_RESINFO = 14, + IMAGE_ATOMIC_SWAP, + IMAGE_ATOMIC_CMPSWAP, + IMAGE_ATOMIC_ADD, + IMAGE_ATOMIC_SUB, + IMAGE_ATOMIC_RSUB, + IMAGE_ATOMIC_SMIN, + IMAGE_ATOMIC_UMIN, + IMAGE_ATOMIC_SMAX, + IMAGE_ATOMIC_UMAX, + IMAGE_ATOMIC_AND, + IMAGE_ATOMIC_OR, + IMAGE_ATOMIC_XOR, + IMAGE_ATOMIC_INC, + IMAGE_ATOMIC_DEC, + IMAGE_ATOMIC_FCMPSWAP, + IMAGE_ATOMIC_FMIN, + IMAGE_ATOMIC_FMAX, + IMAGE_SAMPLE, + IMAGE_SAMPLE_CL, + IMAGE_SAMPLE_D, + IMAGE_SAMPLE_D_CL, + IMAGE_SAMPLE_L, + IMAGE_SAMPLE_B, + IMAGE_SAMPLE_B_CL, + IMAGE_SAMPLE_LZ, + IMAGE_SAMPLE_C, + IMAGE_SAMPLE_C_CL, + IMAGE_SAMPLE_C_D, + IMAGE_SAMPLE_C_D_CL, + IMAGE_SAMPLE_C_L, + IMAGE_SAMPLE_C_B, + IMAGE_SAMPLE_C_B_CL, + IMAGE_SAMPLE_C_LZ, + IMAGE_SAMPLE_O, + IMAGE_SAMPLE_CL_O, + IMAGE_SAMPLE_D_O, + IMAGE_SAMPLE_D_CL_O, + IMAGE_SAMPLE_L_O, + IMAGE_SAMPLE_B_O, + IMAGE_SAMPLE_B_CL_O, + IMAGE_SAMPLE_LZ_O, + IMAGE_SAMPLE_C_O, + IMAGE_SAMPLE_C_CL_O, + IMAGE_SAMPLE_C_D_O, + IMAGE_SAMPLE_C_D_CL_O, + IMAGE_SAMPLE_C_L_O, + IMAGE_SAMPLE_C_B_O, + IMAGE_SAMPLE_C_B_CL_O, + IMAGE_SAMPLE_C_LZ_O, + IMAGE_GATHER4, + IMAGE_GATHER4_CL, + IMAGE_GATHER4_L = 68, + IMAGE_GATHER4_B, + IMAGE_GATHER4_B_CL, + IMAGE_GATHER4_LZ, + IMAGE_GATHER4_C, + IMAGE_GATHER4_C_CL, + IMAGE_GATHER4_C_L = 76, + IMAGE_GATHER4_C_B, + IMAGE_GATHER4_C_B_CL, + IMAGE_GATHER4_C_LZ, + IMAGE_GATHER4_O, + IMAGE_GATHER4_CL_O, + IMAGE_GATHER4_L_O = 84, + IMAGE_GATHER4_B_O, + IMAGE_GATHER4_B_CL_O, + IMAGE_GATHER4_LZ_O, + IMAGE_GATHER4_C_O, + IMAGE_GATHER4_C_CL_O, + IMAGE_GATHER4_C_L_O = 92, + IMAGE_GATHER4_C_B_O, + IMAGE_GATHER4_C_B_CL_O, + IMAGE_GATHER4_C_LZ_O, + IMAGE_GET_LOD, + IMAGE_SAMPLE_CD = 104, + IMAGE_SAMPLE_CD_CL, + IMAGE_SAMPLE_C_CD, + IMAGE_SAMPLE_C_CD_CL, + IMAGE_SAMPLE_CD_O, + IMAGE_SAMPLE_CD_CL_O, + IMAGE_SAMPLE_C_CD_O, + IMAGE_SAMPLE_C_CD_CL_O, + }; + + static constexpr int kMinInstSize = 2; + + static constexpr auto dmaskMask = genMask(8, 4); + static constexpr auto unrmMask = genMask(getMaskEnd(dmaskMask), 1); + static constexpr auto glcMask = genMask(getMaskEnd(unrmMask), 1); + static constexpr auto daMask = genMask(getMaskEnd(glcMask), 1); + static constexpr auto r128Mask = genMask(getMaskEnd(daMask), 1); + static constexpr auto tfeMask = genMask(getMaskEnd(r128Mask), 1); + static constexpr auto lweMask = genMask(getMaskEnd(tfeMask), 1); + static constexpr auto opMask = genMask(getMaskEnd(lweMask), 7); + static constexpr auto slcMask = genMask(getMaskEnd(opMask), 1); + + static constexpr auto vaddrMask = genMask(0, 8); + static constexpr auto vdataMask = genMask(getMaskEnd(vaddrMask), 8); + static constexpr auto srsrcMask = genMask(getMaskEnd(vdataMask), 5); + static constexpr auto ssampMask = genMask(getMaskEnd(srsrcMask), 5); + + const std::uint32_t *inst; + const Op op = static_cast(fetchMaskedValue(inst[0], opMask)); + + std::uint8_t dmask = fetchMaskedValue(inst[0], dmaskMask); + bool unrm = fetchMaskedValue(inst[0], unrmMask); + bool glc = fetchMaskedValue(inst[0], glcMask); + bool da = fetchMaskedValue(inst[0], daMask); + bool r128 = fetchMaskedValue(inst[0], r128Mask); + bool tfe = fetchMaskedValue(inst[0], tfeMask); + bool lwe = fetchMaskedValue(inst[0], lweMask); + bool slc = fetchMaskedValue(inst[0], slcMask); + + std::uint8_t vaddr = fetchMaskedValue(inst[1], vaddrMask); + std::uint8_t vdata = fetchMaskedValue(inst[1], vdataMask); + std::uint8_t srsrc = fetchMaskedValue(inst[1], srsrcMask); + std::uint8_t ssamp = fetchMaskedValue(inst[1], ssampMask); + + Mimg(const std::uint32_t *inst) : inst(inst) {} + + int size() const { return kMinInstSize; } + + void dump() const; +}; + +struct Ds { + enum class Op { + DS_ADD_U32, + DS_SUB_U32, + DS_RSUB_U32, + DS_INC_U32, + DS_DEC_U32, + DS_MIN_I32, + DS_MAX_I32, + DS_MIN_U32, + DS_MAX_U32, + DS_AND_B32, + DS_OR_B32, + DS_XOR_B32, + DS_MSKOR_B32, + DS_WRITE_B32, + DS_WRITE2_B32, + DS_WRITE2ST64_B32, + DS_CMPST_B32, + DS_CMPST_F32, + DS_MIN_F32, + DS_MAX_F32, + DS_NOP, + DS_GWS_SEMA_RELEASE_ALL = 24, + DS_GWS_INIT, + DS_GWS_SEMA_V, + DS_GWS_SEMA_BR, + DS_GWS_SEMA_P, + DS_GWS_BARRIER, + DS_WRITE_B8, + DS_WRITE_B16, + DS_ADD_RTN_U32, + DS_SUB_RTN_U32, + DS_RSUB_RTN_U32, + DS_INC_RTN_U32, + DS_DEC_RTN_U32, + DS_MIN_RTN_I32, + DS_MAX_RTN_I32, + DS_MIN_RTN_U32, + DS_MAX_RTN_U32, + DS_AND_RTN_B32, + DS_OR_RTN_B32, + DS_XOR_RTN_B32, + DS_MSKOR_RTN_B32, + DS_WRXCHG_RTN_B32, + DS_WRXCHG2_RTN_B32, + DS_WRXCHG2ST64_RTN_B32, + DS_CMPST_RTN_B32, + DS_CMPST_RTN_F32, + DS_MIN_RTN_F32, + DS_MAX_RTN_F32, + DS_WRAP_RTN_B32, + DS_SWIZZLE_B32, + DS_READ_B32, + DS_READ2_B32, + DS_READ2ST64_B32, + DS_READ_I8, + DS_READ_U8, + DS_READ_I16, + DS_READ_U16, + DS_CONSUME, + DS_APPEND, + DS_ORDERED_COUNT, + DS_ADD_U64, + DS_SUB_U64, + DS_RSUB_U64, + DS_INC_U64, + DS_DEC_U64, + DS_MIN_I64, + DS_MAX_I64, + DS_MIN_U64, + DS_MAX_U64, + DS_AND_B64, + DS_OR_B64, + DS_XOR_B64, + DS_MSKOR_B64, + DS_WRITE_B64, + DS_WRITE2_B64, + DS_WRITE2ST64_B64, + DS_CMPST_B64, + DS_CMPST_F64, + DS_MIN_F64, + DS_MAX_F64, + DS_ADD_RTN_U64 = 96, + DS_SUB_RTN_U64, + DS_RSUB_RTN_U64, + DS_INC_RTN_U64, + DS_DEC_RTN_U64, + DS_MIN_RTN_I64, + DS_MAX_RTN_I64, + DS_MIN_RTN_U64, + DS_MAX_RTN_U64, + DS_AND_RTN_B64, + DS_OR_RTN_B64, + DS_XOR_RTN_B64, + DS_MSKOR_RTN_B64, + DS_WRXCHG_RTN_B64, + DS_WRXCHG2_RTN_B64, + DS_WRXCHG2ST64_RTN_B64, + DS_CMPST_RTN_B64, + DS_CMPST_RTN_F64, + DS_MIN_RTN_F64, + DS_MAX_RTN_F64, + DS_READ_B64 = 118, + DS_READ2_B64, + DS_READ2ST64_B64, + DS_CONDXCHG32_RTN_B64 = 126, + DS_ADD_SRC2_U32 = 128, + DS_SUB_SRC2_U32, + DS_RSUB_SRC2_U32, + DS_INC_SRC2_U32, + DS_DEC_SRC2_U32, + DS_MIN_SRC2_I32, + DS_MAX_SRC2_I32, + DS_MIN_SRC2_U32, + DS_MAX_SRC2_U32, + DS_AND_SRC2_B32, + DS_OR_SRC2_B32, + DS_XOR_SRC2_B32, + DS_WRITE_SRC2_B32, + DS_MIN_SRC2_F32 = 146, + DS_MAX_SRC2_F32, + DS_ADD_SRC2_U64 = 192, + DS_SUB_SRC2_U64, + DS_RSUB_SRC2_U64, + DS_INC_SRC2_U64, + DS_DEC_SRC2_U64, + DS_MIN_SRC2_I64, + DS_MAX_SRC2_I64, + DS_MIN_SRC2_U64, + DS_MAX_SRC2_U64, + DS_AND_SRC2_B64, + DS_OR_SRC2_B64, + DS_XOR_SRC2_B64, + DS_WRITE_SRC2_B64, + DS_MIN_SRC2_F64 = 210, + DS_MAX_SRC2_F64, + DS_WRITE_B96 = 222, + DS_WRITE_B128, + DS_CONDXCHG32_RTN_B128 = 253, + DS_READ_B96, + DS_READ_B128, + }; + + static constexpr int kMinInstSize = 2; + static constexpr auto offset0Mask = genMask(0, 8); + static constexpr auto offset1Mask = genMask(getMaskEnd(offset0Mask), 8); + static constexpr auto gdsMask = genMask(getMaskEnd(offset1Mask) + 1, 1); + static constexpr auto opMask = genMask(getMaskEnd(gdsMask), 8); + + static constexpr auto addrMask = genMask(0, 8); + static constexpr auto data0Mask = genMask(getMaskEnd(addrMask), 8); + static constexpr auto data1Mask = genMask(getMaskEnd(data0Mask), 8); + static constexpr auto vdstMask = genMask(getMaskEnd(data1Mask), 8); + + const std::uint32_t *inst; + const Op op = static_cast(fetchMaskedValue(inst[0], opMask)); + + Ds(const std::uint32_t *inst) : inst(inst) {} + + int size() const { return kMinInstSize; } + + void dump() const; +}; + +struct Vintrp { + enum class Op { V_INTERP_P1_F32, V_INTERP_P2_F32, V_INTERP_MOV_F32 }; + + static constexpr int kMinInstSize = 1; + static constexpr auto vsrcMask = genMask(0, 8); + static constexpr auto attrChanMask = genMask(getMaskEnd(vsrcMask), 2); + static constexpr auto attrMask = genMask(getMaskEnd(attrChanMask), 6); + static constexpr auto opMask = genMask(getMaskEnd(attrMask), 2); + static constexpr auto vdstMask = genMask(getMaskEnd(opMask), 8); + + const std::uint32_t *inst; + uint32_t vsrc = fetchMaskedValue(inst[0], vsrcMask); + uint32_t attrChan = fetchMaskedValue(inst[0], attrChanMask); + uint32_t attr = fetchMaskedValue(inst[0], attrMask); + const Op op = static_cast(fetchMaskedValue(inst[0], opMask)); + uint32_t vdst = fetchMaskedValue(inst[0], vdstMask); + + Vintrp(const std::uint32_t *inst) : inst(inst) {} + + int size() const { return kMinInstSize; } + + void dump() const; +}; + +struct Exp { + static constexpr int kMinInstSize = 2; + + static constexpr auto enMask = genMask(0, 4); + static constexpr auto targetMask = genMask(getMaskEnd(enMask), 6); + static constexpr auto comprMask = genMask(getMaskEnd(targetMask), 1); + static constexpr auto doneMask = genMask(getMaskEnd(comprMask), 1); + static constexpr auto vmMask = genMask(getMaskEnd(doneMask), 1); + + static constexpr auto vsrc0Mask = genMask(0, 8); + static constexpr auto vsrc1Mask = genMask(getMaskEnd(vsrc0Mask), 8); + static constexpr auto vsrc2Mask = genMask(getMaskEnd(vsrc1Mask), 8); + static constexpr auto vsrc3Mask = genMask(getMaskEnd(vsrc2Mask), 8); + + const std::uint32_t *inst; + + std::uint8_t en = fetchMaskedValue(inst[0], enMask); + std::uint8_t target = fetchMaskedValue(inst[0], targetMask); + bool compr = fetchMaskedValue(inst[0], comprMask); + bool done = fetchMaskedValue(inst[0], doneMask); + bool vm = fetchMaskedValue(inst[0], vmMask); + std::uint8_t vsrc0 = fetchMaskedValue(inst[1], vsrc0Mask); + std::uint8_t vsrc1 = fetchMaskedValue(inst[1], vsrc1Mask); + std::uint8_t vsrc2 = fetchMaskedValue(inst[1], vsrc2Mask); + std::uint8_t vsrc3 = fetchMaskedValue(inst[1], vsrc3Mask); + + Exp(const std::uint32_t *inst) : inst(inst) {} + + int size() const { return kMinInstSize; } + + void dump() const; +}; + +enum class InstructionClass : std::uint8_t { + Invalid, + Vop2, + Sop2, + Sopk, + Smrd, + Vop3, + Mubuf, + Mtbuf, + Mimg, + Ds, + Vintrp, + Exp, + Vop1, + Vopc, + Sop1, + Sopc, + Sopp, +}; + +static constexpr std::uint32_t kInstMask1 = + static_cast(~0u << (32 - 1)); +static constexpr std::uint32_t kInstMask2 = + static_cast(~0u << (32 - 2)); +static constexpr std::uint32_t kInstMask4 = + static_cast(~0u << (32 - 4)); +static constexpr std::uint32_t kInstMask5 = + static_cast(~0u << (32 - 5)); +static constexpr std::uint32_t kInstMask6 = + static_cast(~0u << (32 - 6)); +static constexpr std::uint32_t kInstMask7 = + static_cast(~0u << (32 - 7)); +static constexpr std::uint32_t kInstMask9 = + static_cast(~0u << (32 - 9)); + +static constexpr std::uint32_t kInstMaskValVop2 = 0b0u << (32 - 1); +static constexpr std::uint32_t kInstMaskValSop2 = 0b10u << (32 - 2); +static constexpr std::uint32_t kInstMaskValSopk = 0b1011u << (32 - 4); +static constexpr std::uint32_t kInstMaskValSmrd = 0b11000u << (32 - 5); +static constexpr std::uint32_t kInstMaskValVop3 = 0b110100u << (32 - 6); +static constexpr std::uint32_t kInstMaskValMubuf = 0b111000u << (32 - 6); +static constexpr std::uint32_t kInstMaskValMtbuf = 0b111010u << (32 - 6); +static constexpr std::uint32_t kInstMaskValMimg = 0b111100u << (32 - 6); +static constexpr std::uint32_t kInstMaskValDs = 0b110110u << (32 - 6); +static constexpr std::uint32_t kInstMaskValVintrp = 0b110010u << (32 - 6); +static constexpr std::uint32_t kInstMaskValExp = 0b111110u << (32 - 6); +static constexpr std::uint32_t kInstMaskValVop1 = 0b0111111u << (32 - 7); +static constexpr std::uint32_t kInstMaskValVopC = 0b0111110u << (32 - 7); +static constexpr std::uint32_t kInstMaskValSop1 = 0b101111101u << (32 - 9); +static constexpr std::uint32_t kInstMaskValSopc = 0b101111110u << (32 - 9); +static constexpr std::uint32_t kInstMaskValSopp = 0b101111111u << (32 - 9); + +inline InstructionClass getInstructionClass(std::uint32_t instr) { + switch (instr & kInstMask9) { + case kInstMaskValSop1: + return InstructionClass::Sop1; + case kInstMaskValSopc: + return InstructionClass::Sopc; + case kInstMaskValSopp: + return InstructionClass::Sopp; + } + + switch (instr & kInstMask7) { + case kInstMaskValVop1: + return InstructionClass::Vop1; + case kInstMaskValVopC: + return InstructionClass::Vopc; + } + + switch (instr & kInstMask6) { + case kInstMaskValVop3: + return InstructionClass::Vop3; + case kInstMaskValMubuf: + return InstructionClass::Mubuf; + case kInstMaskValMtbuf: + return InstructionClass::Mtbuf; + case kInstMaskValMimg: + return InstructionClass::Mimg; + case kInstMaskValDs: + return InstructionClass::Ds; + case kInstMaskValVintrp: + return InstructionClass::Vintrp; + case kInstMaskValExp: + return InstructionClass::Exp; + } + + if ((instr & kInstMask5) == kInstMaskValSmrd) { + return InstructionClass::Smrd; + } + + if ((instr & kInstMask4) == kInstMaskValSopk) { + return InstructionClass::Sopk; + } + + if ((instr & kInstMask2) == kInstMaskValSop2) { + return InstructionClass::Sop2; + } + + if ((instr & kInstMask1) == kInstMaskValVop2) { + return InstructionClass::Vop2; + } + + return InstructionClass::Invalid; +} + +struct Instruction { + const std::uint32_t *inst; + InstructionClass instClass = getInstructionClass(*inst); + + Instruction(const std::uint32_t *inst) : inst(inst) {} + + int size() const { + switch (instClass) { + case InstructionClass::Vop2: + return Vop2(inst).size(); + case InstructionClass::Sop2: + return Sop2(inst).size(); + case InstructionClass::Sopk: + return Sopk(inst).size(); + case InstructionClass::Smrd: + return Smrd(inst).size(); + case InstructionClass::Vop3: + return Vop3(inst).size(); + case InstructionClass::Mubuf: + return Mubuf(inst).size(); + case InstructionClass::Mtbuf: + return Mtbuf(inst).size(); + case InstructionClass::Mimg: + return Mimg(inst).size(); + case InstructionClass::Ds: + return Ds(inst).size(); + case InstructionClass::Vintrp: + return Vintrp(inst).size(); + case InstructionClass::Exp: + return Exp(inst).size(); + case InstructionClass::Vop1: + return Vop1(inst).size(); + case InstructionClass::Vopc: + return Vopc(inst).size(); + case InstructionClass::Sop1: + return Sop1(inst).size(); + case InstructionClass::Sopc: + return Sopc(inst).size(); + case InstructionClass::Sopp: + return Sopp(inst).size(); + + case InstructionClass::Invalid: + break; + } + + return 1; + } + + void dump() const; +}; + +const char *instructionClassToString(InstructionClass instrClass); +const char *opcodeToString(InstructionClass instClass, int op); + +const char *sop1OpcodeToString(Sop1::Op op); +const char *sop2OpcodeToString(Sop2::Op op); +const char *sopkOpcodeToString(Sopk::Op op); +const char *sopcOpcodeToString(Sopc::Op op); +const char *soppOpcodeToString(Sopp::Op op); +const char *vop2OpcodeToString(Vop2::Op op); +const char *vop1OpcodeToString(Vop1::Op op); +const char *vopcOpcodeToString(Vopc::Op op); +const char *vop3OpcodeToString(Vop3::Op op); +const char *smrdOpcodeToString(Smrd::Op op); +const char *mubufOpcodeToString(Mubuf::Op op); +const char *mtbufOpcodeToString(Mtbuf::Op op); +const char *mimgOpcodeToString(Mimg::Op op); +const char *dsOpcodeToString(Ds::Op op); +const char *vintrpOpcodeToString(Vintrp::Op op); +} // namespace amdgpu::shader diff --git a/hw/amdgpu/shader/include/amdgpu/shader/RegisterId.hpp b/hw/amdgpu/shader/include/amdgpu/shader/RegisterId.hpp new file mode 100644 index 000000000..d4fd13094 --- /dev/null +++ b/hw/amdgpu/shader/include/amdgpu/shader/RegisterId.hpp @@ -0,0 +1,102 @@ +#pragma once + +#include + +namespace amdgpu::shader { +class RegisterId { + static constexpr std::uint32_t kScalarOperandsOffset = 0; + static constexpr std::uint32_t kScalarOperandsCount = 256; + static constexpr std::uint32_t kVectorOperandsOffset = + kScalarOperandsOffset + kScalarOperandsCount; + static constexpr std::uint32_t kVectorOperandsCount = 512; + static constexpr std::uint32_t kExportOperandsOffset = + kVectorOperandsOffset + kVectorOperandsCount; + static constexpr std::uint32_t kExportOperandsCount = 64; + static constexpr std::uint32_t kAttrOperandsOffset = + kExportOperandsOffset + kExportOperandsCount; + static constexpr std::uint32_t kAttrOperandsCount = 32; + static constexpr std::uint32_t kOperandsCount = + kAttrOperandsOffset + kAttrOperandsCount; + + static constexpr std::uint32_t kRegisterVccLoId = kScalarOperandsOffset + 106; + static constexpr std::uint32_t kRegisterVccHiId = kScalarOperandsOffset + 107; + static constexpr std::uint32_t kRegisterM0Id = kScalarOperandsOffset + 124; + static constexpr std::uint32_t kRegisterExecLoId = + kScalarOperandsOffset + 126; + static constexpr std::uint32_t kRegisterExecHiId = + kScalarOperandsOffset + 127; + static constexpr std::uint32_t kRegisterSccId = kScalarOperandsOffset + 253; + static constexpr std::uint32_t kRegisterLdsDirect = + kScalarOperandsOffset + 254; + +public: + enum enum_type : std::uint32_t { + Invalid = ~static_cast(0), + + VccLo = kRegisterVccLoId, + VccHi = kRegisterVccHiId, + M0 = kRegisterM0Id, + ExecLo = kRegisterExecLoId, + ExecHi = kRegisterExecHiId, + Scc = kRegisterSccId, + LdsDirect = kRegisterLdsDirect, + } raw = Invalid; + + RegisterId(enum_type value) : raw(value) {} + + operator enum_type() const { return raw; } + + static RegisterId Raw(std::uint32_t index) { + return static_cast(index); + } + static RegisterId Scalar(std::uint32_t index) { + return static_cast(index + kScalarOperandsOffset); + } + static RegisterId Vector(std::uint32_t index) { + return static_cast(index + kVectorOperandsOffset); + } + static RegisterId Export(std::uint32_t index) { + return static_cast(index + kExportOperandsOffset); + } + static RegisterId Attr(std::uint32_t index) { + return static_cast(index + kAttrOperandsOffset); + } + + bool isScalar() const { + return raw >= kScalarOperandsOffset && + raw < kScalarOperandsOffset + kScalarOperandsCount; + } + bool isVector() const { + return raw >= kVectorOperandsOffset && + raw < kVectorOperandsOffset + kVectorOperandsCount; + } + bool isExport() const { + return raw >= kExportOperandsOffset && + raw < kExportOperandsOffset + kExportOperandsCount; + } + bool isAttr() const { + return raw >= kAttrOperandsOffset && + raw < kAttrOperandsOffset + kAttrOperandsCount; + } + + unsigned getOffset() const { + if (isScalar()) { + return raw - kScalarOperandsOffset; + } + + if (isVector()) { + return raw - kVectorOperandsOffset; + } + + if (isExport()) { + return raw - kExportOperandsOffset; + } + + if (isAttr()) { + return raw - kAttrOperandsOffset; + } + + return raw; + } +}; +} // namespace amdgpu::shader diff --git a/hw/amdgpu/shader/include/amdgpu/shader/RegisterState.hpp b/hw/amdgpu/shader/include/amdgpu/shader/RegisterState.hpp new file mode 100644 index 000000000..c0f11f764 --- /dev/null +++ b/hw/amdgpu/shader/include/amdgpu/shader/RegisterState.hpp @@ -0,0 +1,27 @@ +#pragma once +#include "RegisterId.hpp" +#include "Value.hpp" +#include + +namespace amdgpu::shader { +struct RegisterState { + std::uint64_t pc; + + Value sgprs[104]; + Value vccLo; + Value vccHi; + Value m0; + Value execLo; + Value execHi; + Value scc; + Value ldsDirect; + Value vgprs[512]; + Value attrs[32]; + + Value getRegister(RegisterId regId); + void setRegister(RegisterId regId, Value value); + +private: + Value getRegisterImpl(RegisterId regId); +}; +} // namespace amdgpu::shader diff --git a/hw/amdgpu/shader/include/amdgpu/shader/Stage.hpp b/hw/amdgpu/shader/include/amdgpu/shader/Stage.hpp new file mode 100644 index 000000000..3228af053 --- /dev/null +++ b/hw/amdgpu/shader/include/amdgpu/shader/Stage.hpp @@ -0,0 +1,5 @@ +#pragma once + +namespace amdgpu::shader { +enum class Stage { None, Vertex, Fragment, Geometry, Compute }; +} diff --git a/hw/amdgpu/shader/include/amdgpu/shader/TypeId.hpp b/hw/amdgpu/shader/include/amdgpu/shader/TypeId.hpp new file mode 100644 index 000000000..57b70f0a3 --- /dev/null +++ b/hw/amdgpu/shader/include/amdgpu/shader/TypeId.hpp @@ -0,0 +1,57 @@ +#pragma once + +#include + +namespace amdgpu::shader { +struct TypeId { + enum { + Bool, + SInt8, + UInt8, + SInt16, + UInt16, + SInt32, + UInt32, + UInt32x2, + UInt32x3, + UInt32x4, + UInt64, + SInt64, + ArrayUInt32x8, + ArrayUInt32x16, + Float16, + Float32, + Float32x2, + Float32x3, + Float32x4, + Float64, + ArrayFloat32x8, + ArrayFloat32x16, + Sampler, + Image2D, + SampledImage2D, + + Void // should be last + } raw = Void; + + using enum_type = decltype(raw); + + TypeId() = default; + TypeId(enum_type value) : raw(value) {} + operator enum_type() const { return raw; } + + TypeId getBaseType() const; + std::size_t getSize() const; + std::size_t getElementsCount() const; + + bool isSignedInt() const { + return raw == TypeId::SInt8 || raw == TypeId::SInt16 || + raw == TypeId::SInt32 || raw == TypeId::SInt64; + } + + bool isFloatPoint() const { + return raw == TypeId::Float16 || raw == TypeId::Float32 || + raw == TypeId::Float64; + } +}; +} // namespace amdgpu::shader diff --git a/hw/amdgpu/shader/include/amdgpu/shader/Uniform.hpp b/hw/amdgpu/shader/include/amdgpu/shader/Uniform.hpp new file mode 100644 index 000000000..ab8878912 --- /dev/null +++ b/hw/amdgpu/shader/include/amdgpu/shader/Uniform.hpp @@ -0,0 +1,20 @@ +#pragma once + +#include "AccessOp.hpp" +#include "TypeId.hpp" +#include "spirv/spirv-builder.hpp" + +#include +#include + +namespace amdgpu::shader { +struct UniformInfo { + std::uint32_t buffer[8]; + int index; + TypeId typeId; + spirv::PointerType type; + spirv::VariableValue variable; + AccessOp accessOp = AccessOp::None; + bool isBuffer; +}; +} // namespace amdgpu::shader diff --git a/hw/amdgpu/shader/include/amdgpu/shader/Value.hpp b/hw/amdgpu/shader/include/amdgpu/shader/Value.hpp new file mode 100644 index 000000000..b98d93d8e --- /dev/null +++ b/hw/amdgpu/shader/include/amdgpu/shader/Value.hpp @@ -0,0 +1,15 @@ +#pragma once +#include + +namespace amdgpu::shader { +struct Value { + spirv::Type type; + spirv::Value value; + + Value() = default; + Value(spirv::Type type, spirv::Value value) : type(type), value(value) {} + + explicit operator bool() const { return static_cast(value); } + bool operator==(Value other) const { return value == other.value; } +}; +} // namespace amdgpu::shader diff --git a/hw/amdgpu/shader/include/amdgpu/shader/cf.hpp b/hw/amdgpu/shader/include/amdgpu/shader/cf.hpp new file mode 100644 index 000000000..8fcedc8c3 --- /dev/null +++ b/hw/amdgpu/shader/include/amdgpu/shader/cf.hpp @@ -0,0 +1,146 @@ +#pragma once + +#include +#include +#include +#include + +namespace cf { +enum class TerminatorKind { + None, + Branch, + BranchToUnknown, + Return, +}; + +class BasicBlock { + std::uint64_t address; + std::uint64_t size = 0; + + std::set predecessors; + BasicBlock *successors[2]{}; + TerminatorKind terminator = TerminatorKind::None; + +public: + explicit BasicBlock(std::uint64_t address, std::uint64_t size = 0) + : address(address), size(size) {} + + BasicBlock(const BasicBlock &) = delete; + + void setSize(std::uint64_t newSize) { size = newSize; } + std::uint64_t getSize() const { return size; } + std::uint64_t getAddress() const { return address; } + TerminatorKind getTerminator() const { return terminator; } + + void createConditionalBranch(BasicBlock *ifTrue, BasicBlock *ifFalse); + void createBranch(BasicBlock *target); + void createBranchToUnknown(); + void createReturn(); + + void replaceSuccessor(BasicBlock *origBB, BasicBlock *newBB); + void replacePredecessor(BasicBlock *origBB, BasicBlock *newBB) { + origBB->replaceSuccessor(this, newBB); + } + + template T> void walk(T &&cb) { + std::vector workStack; + std::set processed; + + workStack.push_back(this); + processed.insert(this); + + while (!workStack.empty()) { + auto block = workStack.back(); + workStack.pop_back(); + + block->walkSuccessors([&](BasicBlock *successor) { + if (processed.insert(successor).second) { + workStack.push_back(successor); + } + }); + + cb(*block); + } + } + + template T> void walkSuccessors(T &&cb) const { + if (successors[0]) { + cb(successors[0]); + + if (successors[1]) { + cb(successors[1]); + } + } + } + + template T> void walkPredecessors(T &&cb) const { + for (auto pred : predecessors) { + cb(pred); + } + } + + std::size_t getPredecessorsCount() const { return predecessors.size(); } + + bool hasDirectPredecessor(const BasicBlock &block) const; + bool hasPredecessor(const BasicBlock &block) const; + + std::size_t getSuccessorsCount() const { + if (successors[0] == nullptr) { + return 0; + } + + return successors[1] != nullptr ? 2 : 1; + } + + BasicBlock *getSuccessor(std::size_t index) const { return successors[index]; } + + void split(BasicBlock *target); +}; + +class Context { + std::map> basicBlocks; + +public: + BasicBlock *getBasicBlockAt(std::uint64_t address) { + if (auto it = basicBlocks.find(address); it != basicBlocks.end()) { + return &it->second; + } + + return nullptr; + } + + BasicBlock *getBasicBlock(std::uint64_t address) { + if (auto it = basicBlocks.lower_bound(address); it != basicBlocks.end()) { + auto bb = &it->second; + + if (bb->getAddress() <= address && + bb->getAddress() + bb->getSize() > address) { + return bb; + } + } + + return nullptr; + } + + BasicBlock *getOrCreateBasicBlock(std::uint64_t address, bool split = true) { + auto it = basicBlocks.lower_bound(address); + + if (it != basicBlocks.end()) { + auto bb = &it->second; + + if (bb->getAddress() <= address && + bb->getAddress() + bb->getSize() > address) { + if (split && bb->getAddress() != address) { + auto result = &basicBlocks.emplace_hint(it, address, address)->second; + bb->split(result); + return result; + } + + return bb; + } + } + + return &basicBlocks.emplace_hint(it, address, address)->second; + } +}; +} // namespace cf diff --git a/hw/amdgpu/shader/include/amdgpu/shader/scf.hpp b/hw/amdgpu/shader/include/amdgpu/shader/scf.hpp new file mode 100644 index 000000000..d439ee048 --- /dev/null +++ b/hw/amdgpu/shader/include/amdgpu/shader/scf.hpp @@ -0,0 +1,371 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace cf { +class BasicBlock; +} + +namespace scf { +class BasicBlock; +struct PrintOptions { + unsigned char identCount = 2; + char identChar = ' '; + std::function + blockPrinter; + + std::string makeIdent(unsigned depth) const { + return std::string(depth * identCount, identChar); + } +}; + +class Node { + Node *mParent = nullptr; + Node *mNext = nullptr; + Node *mPrev = nullptr; + +public: + virtual ~Node() = default; + virtual void print(const PrintOptions &options, unsigned depth) = 0; + virtual bool isEqual(const Node &other) const { + return this == &other; + } + + void dump() { + print({}, 0); + } + + void setParent(Node *parent) { + mParent = parent; + } + + Node *getParent() const { + return mParent; + } + + template requires(std::is_base_of_v) + auto getParent() const -> decltype(dynCast(mParent)) { + return dynCast(mParent); + } + + Node *getNext() const { + return mNext; + } + + Node *getPrev() const { + return mPrev; + } + + friend class Block; +}; + +template + requires(std::is_base_of_v && std::is_base_of_v) && + requires(ST *s) { dynamic_cast(s); } +T *dynCast(ST *s) { + return dynamic_cast(s); +} + +template + requires(std::is_base_of_v && std::is_base_of_v) && + requires(const ST *s) { dynamic_cast(s); } +const T *dynCast(const ST *s) { + return dynamic_cast(s); +} + +inline bool isNodeEqual(const Node *lhs, const Node *rhs) { + if (lhs == rhs) { + return true; + } + + return lhs != nullptr && rhs != nullptr && lhs->isEqual(*rhs); +} + +struct UnknownBlock final : Node { + void print(const PrintOptions &options, unsigned depth) override { + std::printf("%sunknown\n", options.makeIdent(depth).c_str()); + } + + bool isEqual(const Node &other) const override { + return this == &other || dynCast(&other) != nullptr; + } +}; + +struct Return final : Node { + void print(const PrintOptions &options, unsigned depth) override { + std::printf("%sreturn\n", options.makeIdent(depth).c_str()); + } + + bool isEqual(const Node &other) const override { + return this == &other || dynCast(&other) != nullptr; + } +}; + + +class Context; + +class Block final : public Node { + Node *mBegin = nullptr; + Node *mEnd = nullptr; + + void *mUserData = nullptr; + +public: + void print(const PrintOptions &options, unsigned depth) override { + std::printf("%s{\n", options.makeIdent(depth).c_str()); + + for (auto node = mBegin; node != nullptr; node = node->getNext()) { + node->print(options, depth + 1); + } + std::printf("%s}\n", options.makeIdent(depth).c_str()); + } + + bool isEmpty() const { + return mBegin == nullptr; + } + + Node *getRootNode() const { + return mBegin; + } + Node *getLastNode() const { + return mEnd; + } + + void setUserData(void *data) { + mUserData = data; + } + void* getUserData() const { + return mUserData; + } + template + T* getUserData() const { + return static_cast(mUserData); + } + + void eraseFrom(Node *endBefore); + void splitInto(Block *target, Node *splitPoint); + Block *split(Context &context, Node *splitPoint); + + void append(Node *node) { + assert(node->mParent == nullptr); + assert(node->mPrev == nullptr); + assert(node->mNext == nullptr); + + node->mParent = this; + node->mPrev = mEnd; + + if (mEnd != nullptr) { + mEnd->mNext = node; + } + + if (mBegin == nullptr) { + mBegin = node; + } + + mEnd = node; + } + + void detachNode(Node *node) { + if (node->mPrev != nullptr) { + node->mPrev->mNext = node->mNext; + } + + if (node->mNext != nullptr) { + node->mNext->mPrev = node->mPrev; + } + + if (mBegin == node) { + mBegin = node->mNext; + } + + if (mEnd == node) { + mEnd = node->mPrev; + } + + node->mNext = nullptr; + node->mPrev = nullptr; + node->mParent = nullptr; + } + + bool isEqual(const Node &other) const override { + if (this == &other) { + return true; + } + + auto otherBlock = dynCast(&other); + + if (otherBlock == nullptr) { + return false; + } + + auto thisIt = mBegin; + auto otherIt = otherBlock->mBegin; + + while (thisIt != nullptr && otherIt != nullptr) { + if (!thisIt->isEqual(*otherIt)) { + return false; + } + + thisIt = thisIt->mNext; + otherIt = otherIt->mNext; + } + + return thisIt == otherIt; + } +}; + +class BasicBlock final : public Node { + std::uint64_t address; + std::uint64_t size = 0; + +public: + explicit BasicBlock(std::uint64_t address, std::uint64_t size = 0) + : address(address), size(size) {} + + std::uint64_t getSize() const { return size; } + std::uint64_t getAddress() const { return address; } + + void print(const PrintOptions &options, unsigned depth) override { + std::printf( + "%sbb%lx\n", + std::string(depth * options.identCount, options.identChar).c_str(), + getAddress()); + if (depth != 0 && options.blockPrinter) { + options.blockPrinter(options, depth + 1, this); + } + } + + Block *getBlock() const { + return dynCast(getParent()); + } + + bool isEqual(const Node &other) const override { + if (this == &other) { + return true; + } + + if (auto otherBlock = dynCast(&other)) { + return address == otherBlock->address; + } + + return false; + } +}; + +struct IfElse final : Node { + Block *ifTrue; + Block *ifFalse; + + IfElse(Block *ifTrue, Block *ifFalse) : ifTrue(ifTrue), ifFalse(ifFalse) { + ifTrue->setParent(this); + ifFalse->setParent(this); + } + + void print(const PrintOptions &options, unsigned depth) override { + if (ifTrue->isEmpty()) { + std::printf("%sif false\n", options.makeIdent(depth).c_str()); + ifFalse->print(options, depth); + return; + } + + std::printf("%sif true\n", options.makeIdent(depth).c_str()); + ifTrue->print(options, depth); + if (!ifFalse->isEmpty()) { + std::printf("%selse\n", options.makeIdent(depth).c_str()); + ifFalse->print(options, depth); + } + } + + bool isEqual(const Node &other) const override { + if (this == &other) { + return true; + } + + if (auto otherBlock = dynCast(&other)) { + return ifTrue->isEqual(*otherBlock->ifTrue) && + ifFalse->isEqual(*otherBlock->ifFalse); + } + + return false; + } +}; + +struct Jump final : Node { + BasicBlock *target; + + Jump(BasicBlock *target) : target(target) {} + + bool isEqual(const Node &other) const override { + if (this == &other) { + return true; + } + + if (auto otherJump = dynCast(&other)) { + return target == otherJump->target; + } + + return false; + } + + void print(const PrintOptions &options, unsigned depth) override { + std::printf("%sjump ", options.makeIdent(depth).c_str()); + target->print(options, 0); + } +}; + +struct Loop final : Node { + Block *body; + + Loop(Block *body) : body(body) { + body->setParent(this); + } + + bool isEqual(const Node &other) const override { + if (this == &other) { + return true; + } + + if (auto otherLoop = dynCast(&other)) { + return body->isEqual(*otherLoop->body); + } + + return false; + } + + void print(const PrintOptions &options, unsigned depth) override { + std::printf("%sloop {\n", options.makeIdent(depth).c_str()); + body->print(options, depth + 1); + std::printf("%s}\n", options.makeIdent(depth).c_str()); + } +}; + +struct Break final : Node { + bool isEqual(const Node &other) const override { + return this == &other || dynCast(&other) != nullptr; + } + + void print(const PrintOptions &options, unsigned depth) override { + std::printf("%sbreak\n", options.makeIdent(depth).c_str()); + } +}; + +class Context { + std::forward_list> mNodes; + +public: + template + requires(std::is_constructible_v) + T *create(ArgsT &&...args) { + auto result = new T(std::forward(args)...); + mNodes.push_front(std::unique_ptr{result}); + return result; + } +}; + +scf::Block *structurize(Context &ctxt, cf::BasicBlock *bb); +void makeUniqueBasicBlocks(Context &ctxt, Block *block); +} // namespace scf diff --git a/hw/amdgpu/shader/src/CfBuilder.cpp b/hw/amdgpu/shader/src/CfBuilder.cpp new file mode 100644 index 000000000..51f435929 --- /dev/null +++ b/hw/amdgpu/shader/src/CfBuilder.cpp @@ -0,0 +1,187 @@ +#include "CfBuilder.hpp" +#include "Instruction.hpp" +#include +#include +#include + +using namespace amdgpu; +using namespace amdgpu::shader; + +struct CfgBuilder { + cf::Context *context; + RemoteMemory memory; + + std::size_t analyzeBb(cf::BasicBlock *bb, std::uint64_t *successors, + std::size_t *successorsCount, auto pushWork) { + auto address = bb->getAddress(); + auto instBegin = memory.getPointer(address); + auto instHex = instBegin; + + while (true) { + auto instruction = Instruction(instHex); + auto size = instruction.size(); + auto pc = address + ((instHex - instBegin) << 2); + instHex += size; + + if (instruction.instClass == InstructionClass::Sop1) { + Sop1 sop1{instHex - size}; + + if (sop1.op == Sop1::Op::S_SETPC_B64 || + sop1.op == Sop1::Op::S_SWAPPC_B64) { + bb->createBranchToUnknown(); + break; + } + + continue; + } + + if (instruction.instClass == InstructionClass::Sopp) { + Sopp sopp{instHex - size}; + + if (sopp.op == Sopp::Op::S_ENDPGM) { + bb->createReturn(); + break; + } + + bool isEnd = false; + switch (sopp.op) { + case Sopp::Op::S_BRANCH: + successors[0] = pc + ((size + sopp.simm) << 2); + *successorsCount = 1; + + isEnd = true; + break; + + case Sopp::Op::S_CBRANCH_SCC0: + case Sopp::Op::S_CBRANCH_SCC1: + case Sopp::Op::S_CBRANCH_VCCZ: + case Sopp::Op::S_CBRANCH_VCCNZ: + case Sopp::Op::S_CBRANCH_EXECZ: + case Sopp::Op::S_CBRANCH_EXECNZ: + successors[0] = pc + ((size + sopp.simm) << 2); + successors[1] = pc + (size << 2); + *successorsCount = 2; + isEnd = true; + break; + + default: + break; + } + + if (isEnd) { + break; + } + continue; + } + + // move instruction that requires EXEC test to separate bb + if (instruction.instClass == InstructionClass::Vop2 || + instruction.instClass == InstructionClass::Vop3 || + instruction.instClass == InstructionClass::Mubuf || + instruction.instClass == InstructionClass::Mtbuf || + instruction.instClass == InstructionClass::Mimg || + instruction.instClass == InstructionClass::Ds || + instruction.instClass == InstructionClass::Vintrp || + instruction.instClass == InstructionClass::Exp || + instruction.instClass == InstructionClass::Vop1 || + instruction.instClass == InstructionClass::Vopc || + instruction.instClass == InstructionClass::Smrd) { + *successorsCount = 1; + + if (instBegin != instHex - size) { + // if it is not first instruction in block, move end to prev + // instruction, successor is current instruction + instHex -= size; + successors[0] = pc; + break; + } + + successors[0] = pc + (size << 2); + break; + } + } + + return (instHex - instBegin) << 2; + } + + cf::BasicBlock *buildCfg(std::uint64_t entryPoint) { + std::vector workList; + workList.push_back(entryPoint); + std::unordered_set processed; + processed.insert(entryPoint); + + struct BranchInfo { + std::uint64_t source; + std::size_t count; + std::uint64_t targets[2]; + }; + + std::vector branches; + + while (!workList.empty()) { + auto address = workList.back(); + workList.pop_back(); + + auto bb = context->getOrCreateBasicBlock(address); + + if (bb->getSize() != 0) { + continue; + } + + std::uint64_t successors[2]; + std::size_t successorsCount = 0; + std::size_t size = analyzeBb(bb, successors, &successorsCount, + [&](std::uint64_t address) { + if (processed.insert(address).second) { + workList.push_back(address); + } + }); + bb->setSize(size); + + if (successorsCount == 2) { + auto succ0Address = successors[0]; + auto succ1Address = successors[1]; + + branches.push_back( + {address + size - 4, 2, {successors[0], successors[1]}}); + + if (processed.insert(successors[0]).second) { + workList.push_back(successors[0]); + } + if (processed.insert(successors[1]).second) { + workList.push_back(successors[1]); + } + } else if (successorsCount == 1) { + branches.push_back({address + size - 4, 1, {successors[0]}}); + + if (processed.insert(successors[0]).second) { + workList.push_back(successors[0]); + } + } + } + + for (auto branch : branches) { + auto bb = context->getBasicBlock(branch.source); + assert(bb); + if (branch.count == 2) { + bb->createConditionalBranch( + context->getBasicBlockAt(branch.targets[0]), + context->getBasicBlockAt(branch.targets[1])); + } else { + bb->createBranch(context->getBasicBlockAt(branch.targets[0])); + } + } + + return context->getBasicBlockAt(entryPoint); + } +}; + +cf::BasicBlock *amdgpu::shader::buildCf(cf::Context &ctxt, + RemoteMemory memory, + std::uint64_t entryPoint) { + CfgBuilder builder; + builder.context = &ctxt; + builder.memory = memory; + + return builder.buildCfg(entryPoint); +} diff --git a/hw/amdgpu/shader/src/Converter.cpp b/hw/amdgpu/shader/src/Converter.cpp new file mode 100644 index 000000000..124ed706b --- /dev/null +++ b/hw/amdgpu/shader/src/Converter.cpp @@ -0,0 +1,389 @@ +#include "Converter.hpp" +#include "CfBuilder.hpp" +#include "ConverterContext.hpp" +#include "Fragment.hpp" +#include "FragmentTerminator.hpp" +#include "Instruction.hpp" +#include "RegisterId.hpp" +#include "RegisterState.hpp" +#include "cf.hpp" +#include "amdgpu/RemoteMemory.hpp" +#include "scf.hpp" +#include "util/unreachable.hpp" +#include +#include +#include +#include +#include +#include +#include +#include + +static void printInstructions(const scf::PrintOptions &options, unsigned depth, + std::uint32_t *instBegin, std::size_t size) { + auto instHex = instBegin; + auto instEnd = instBegin + size / sizeof(std::uint32_t); + + while (instHex < instEnd) { + auto instruction = amdgpu::shader::Instruction(instHex); + std::printf("%s", options.makeIdent(depth).c_str()); + instruction.dump(); + std::printf("\n"); + instHex += instruction.size(); + } +} + +namespace amdgpu::shader { +class Converter { + scf::Context *scfContext; + cf::Context cfContext; + RemoteMemory memory; + Function *function = nullptr; + std::forward_list states; + std::vector freeStates; + +public: + void convertFunction(RemoteMemory mem, scf::Context *scfCtxt, + scf::Block *block, Function *fn) { + scfContext = scfCtxt; + function = fn; + memory = mem; + + auto lastFragment = convertBlock(block, &function->entryFragment); + + if (lastFragment != nullptr) { + lastFragment->builder.createBranch(fn->exitFragment.entryBlockId); + lastFragment->appendBranch(fn->exitFragment); + } + + initState(&fn->exitFragment); + } + +private: + RegisterState *allocateState() { + if (freeStates.empty()) { + return &states.emplace_front(); + } + + auto result = freeStates.back(); + freeStates.pop_back(); + *result = {}; + return result; + } + + void releaseState(RegisterState *state) { + assert(state != nullptr); + freeStates.push_back(state); + } + + void initState(Fragment *fragment, std::uint64_t address = 0) { + if (fragment->registers == nullptr) { + fragment->registers = allocateState(); + } + + if (address != 0) { + fragment->registers->pc = address; + } + + fragment->injectValuesFromPreds(); + fragment->predecessors.clear(); + } + + void releaseStateOf(Fragment *frag) { + releaseState(frag->registers); + frag->registers = nullptr; + frag->values = {}; + frag->outputs = {}; + } + + bool needInjectExecTest(Fragment *fragment) { + auto inst = memory.getPointer(fragment->registers->pc); + auto instClass = getInstructionClass(*inst); + return instClass == InstructionClass::Vop2 || + instClass == InstructionClass::Vop3 || + instClass == InstructionClass::Mubuf || + instClass == InstructionClass::Mtbuf || + instClass == InstructionClass::Mimg || + instClass == InstructionClass::Ds || + instClass == InstructionClass::Vintrp || + instClass == InstructionClass::Exp || + instClass == InstructionClass::Vop1 || + instClass == InstructionClass::Vopc/* || + instClass == InstructionClass::Smrd*/; + } + + spirv::BoolValue createExecTest(Fragment *fragment) { + auto context = fragment->context; + auto &builder = fragment->builder; + auto boolT = context->getBoolType(); + auto uint32_0 = context->getUInt32(0); + auto loIsNotZero = + builder.createINotEqual(boolT, fragment->getExecLo().value, uint32_0); + auto hiIsNotZero = + builder.createINotEqual(boolT, fragment->getExecHi().value, uint32_0); + + return builder.createLogicalOr(boolT, loIsNotZero, hiIsNotZero); + } + + Fragment *convertBlock(scf::Block *block, Fragment *rootFragment) { + Fragment *currentFragment = nullptr; + + for (scf::Node *node = block->getRootNode(); node != nullptr; + node = node->getNext()) { + + if (auto bb = dynCast(node)) { + if (currentFragment == nullptr) { + currentFragment = rootFragment; + } else { + auto newFragment = function->createFragment(); + currentFragment->appendBranch(*newFragment); + currentFragment->builder.createBranch(newFragment->entryBlockId); + currentFragment = newFragment; + } + + initState(currentFragment, bb->getAddress()); + for (auto pred : currentFragment->predecessors) { + releaseStateOf(pred); + } + + if (needInjectExecTest(currentFragment)) { + auto bodyFragment = function->createFragment(); + auto mergeFragment = function->createFragment(); + + auto cond = createExecTest(currentFragment); + + currentFragment->appendBranch(*bodyFragment); + currentFragment->appendBranch(*mergeFragment); + currentFragment->builder.createSelectionMerge( + mergeFragment->entryBlockId, {}); + currentFragment->builder.createBranchConditional( + cond, bodyFragment->entryBlockId, mergeFragment->entryBlockId); + + initState(bodyFragment, bb->getAddress()); + bodyFragment->convert(bb->getSize()); + + bodyFragment->appendBranch(*mergeFragment); + bodyFragment->builder.createBranch(mergeFragment->entryBlockId); + + initState(mergeFragment); + releaseState(currentFragment->registers); + releaseState(bodyFragment->registers); + + currentFragment = mergeFragment; + } else { + currentFragment->convert(bb->getSize()); + } + continue; + } + + if (auto ifElse = dynCast(node)) { + auto ifTrueFragment = function->createFragment(); + auto ifFalseFragment = function->createFragment(); + auto mergeFragment = function->createFragment(); + + currentFragment->appendBranch(*ifTrueFragment); + currentFragment->appendBranch(*ifFalseFragment); + + currentFragment->builder.createSelectionMerge( + mergeFragment->entryBlockId, {}); + currentFragment->builder.createBranchConditional( + currentFragment->branchCondition, ifTrueFragment->entryBlockId, + ifFalseFragment->entryBlockId); + + auto ifTrueLastBlock = convertBlock(ifElse->ifTrue, ifTrueFragment); + auto ifFalseLastBlock = convertBlock(ifElse->ifFalse, ifFalseFragment); + + if (ifTrueLastBlock != nullptr) { + ifTrueLastBlock->builder.createBranch(mergeFragment->entryBlockId); + ifTrueLastBlock->appendBranch(*mergeFragment); + + if (ifTrueLastBlock->registers == nullptr) { + initState(ifTrueLastBlock); + } + } + + if (ifFalseLastBlock != nullptr) { + ifFalseLastBlock->builder.createBranch(mergeFragment->entryBlockId); + ifFalseLastBlock->appendBranch(*mergeFragment); + + if (ifFalseLastBlock->registers == nullptr) { + initState(ifFalseLastBlock); + } + } + + releaseStateOf(currentFragment); + initState(mergeFragment); + + if (ifTrueLastBlock != nullptr) { + releaseStateOf(ifTrueLastBlock); + } + + if (ifFalseLastBlock != nullptr) { + releaseStateOf(ifFalseLastBlock); + } + currentFragment = mergeFragment; + continue; + } + + if (dynCast(node)) { + auto jumpAddress = currentFragment->jumpAddress; + + std::printf("jump to %lx\n", jumpAddress); + std::fflush(stdout); + + if (jumpAddress == 0) { + util::unreachable("no jump register on unknown block"); + } + + auto block = buildCf(cfContext, memory, jumpAddress); + auto basicBlockPrinter = [this](const scf::PrintOptions &opts, + unsigned depth, scf::BasicBlock *bb) { + printInstructions(opts, depth, + memory.getPointer(bb->getAddress()), + bb->getSize()); + }; + auto scfBlock = scf::structurize(*scfContext, block); + scfBlock->print({.blockPrinter = basicBlockPrinter}, 0); + std::fflush(stdout); + + auto targetFragment = function->createFragment(); + currentFragment->builder.createBranch(targetFragment->entryBlockId); + currentFragment->appendBranch(*targetFragment); + auto result = convertBlock(scfBlock, targetFragment); + + if (currentFragment->registers == nullptr) { + initState(targetFragment); + releaseStateOf(currentFragment); + } + + return result; + } + + if (dynCast(node)) { + currentFragment->appendBranch(function->exitFragment); + currentFragment->builder.createBranch( + function->exitFragment.entryBlockId); + return nullptr; + } + + util::unreachable(); + } + + return currentFragment != nullptr ? currentFragment : rootFragment; + } +}; +}; // namespace amdgpu::shader + +amdgpu::shader::Shader amdgpu::shader::convert( + RemoteMemory memory, Stage stage, std::uint64_t entry, + std::span userSpgrs, int bindingOffset, + std::uint32_t dimX, std::uint32_t dimY, std::uint32_t dimZ) { + ConverterContext ctxt(memory, stage); + auto &builder = ctxt.getBuilder(); + builder.createCapability(spv::Capability::Shader); + builder.createCapability(spv::Capability::ImageQuery); + builder.createCapability(spv::Capability::ImageBuffer); + builder.createCapability(spv::Capability::UniformAndStorageBuffer8BitAccess); + builder.createCapability(spv::Capability::UniformAndStorageBuffer16BitAccess); + builder.createCapability(spv::Capability::Int64); + builder.setMemoryModel(spv::AddressingModel::Logical, + spv::MemoryModel::GLSL450); + + scf::Context scfContext; + scf::Block *entryBlock = nullptr; + { + cf::Context cfContext; + auto entryBB = buildCf(cfContext, memory, entry); + entryBlock = scf::structurize(scfContext, entryBB); + } + + std::printf("========== stage: %u, user sgprs: %zu\n", (unsigned)stage, + userSpgrs.size()); + std::printf("structurized CFG:\n"); + + auto basicBlockPrinter = [memory](const scf::PrintOptions &opts, + unsigned depth, scf::BasicBlock *bb) { + printInstructions(opts, depth, + memory.getPointer(bb->getAddress()), + bb->getSize()); + }; + + entryBlock->print({.blockPrinter = basicBlockPrinter}, 0); + std::printf("==========\n"); + + auto mainFunction = ctxt.createFunction(0); + mainFunction->userSgprs = userSpgrs; + mainFunction->stage = stage; + + Converter converter; + converter.convertFunction(memory, &scfContext, entryBlock, mainFunction); + + Shader result; + + std::fflush(stdout); + mainFunction->exitFragment.outputs.clear(); + + for (auto &uniform : ctxt.getUniforms()) { + auto &newUniform = result.uniforms.emplace_back(); + newUniform.binding = bindingOffset++; + + for (int i = 0; i < 8; ++i) { + newUniform.buffer[i] = uniform.buffer[i]; + } + + std::uint32_t descriptorSet = 0; + + ctxt.getBuilder().createDecorate( + uniform.variable, spv::Decoration::DescriptorSet, {{descriptorSet}}); + ctxt.getBuilder().createDecorate(uniform.variable, spv::Decoration::Binding, + {{newUniform.binding}}); + + switch (uniform.typeId) { + case TypeId::Sampler: + newUniform.kind = Shader::UniformKind::Sampler; + break; + case TypeId::Image2D: + newUniform.kind = Shader::UniformKind::Image; + break; + default: + newUniform.kind = Shader::UniformKind::Buffer; + break; + } + + newUniform.accessOp = uniform.accessOp; + } + + mainFunction->insertReturn(); + + for (auto frag : mainFunction->fragments) { + mainFunction->builder.insertBlock(frag->builder); + } + + mainFunction->builder.insertBlock(mainFunction->exitFragment.builder); + + builder.insertFunction(mainFunction->builder, mainFunction->getResultType(), + spv::FunctionControlMask::MaskNone, + mainFunction->getFunctionType()); + + if (stage == Stage::Vertex) { + builder.createEntryPoint(spv::ExecutionModel::Vertex, + mainFunction->builder.id, "main", + ctxt.getInterfaces()); + } else if (stage == Stage::Fragment) { + builder.createEntryPoint(spv::ExecutionModel::Fragment, + mainFunction->builder.id, "main", + ctxt.getInterfaces()); + builder.createExecutionMode(mainFunction->builder.id, + spv::ExecutionMode::OriginUpperLeft, {}); + } else if (stage == Stage::Compute) { + builder.createEntryPoint(spv::ExecutionModel::GLCompute, + mainFunction->builder.id, "main", + ctxt.getInterfaces()); + builder.createExecutionMode(mainFunction->builder.id, + spv::ExecutionMode::LocalSize, + {{dimX, dimY, dimZ}}); + } + + result.spirv = ctxt.getBuilder().build(SPV_VERSION, 0); + return result; +} diff --git a/hw/amdgpu/shader/src/ConverterContext.cpp b/hw/amdgpu/shader/src/ConverterContext.cpp new file mode 100644 index 000000000..ceb2c3cb0 --- /dev/null +++ b/hw/amdgpu/shader/src/ConverterContext.cpp @@ -0,0 +1,567 @@ +#include "ConverterContext.hpp" +#include "util/unreachable.hpp" +using namespace amdgpu::shader; + +std::optional ConverterContext::getTypeIdOf(spirv::Type type) const { + for (int i = 0; i < kGenericTypesCount; ++i) { + if (mTypes[i] == type) { + return static_cast(i); + } + } + + return std::nullopt; +} + +spirv::StructType +ConverterContext::findStructType(std::span members) { + for (auto &structType : mStructTypes) { + if (structType.match(members)) { + return structType.id; + } + } + + return {}; +} + +spirv::StructType +ConverterContext::getStructType(std::span members) { + for (auto &structType : mStructTypes) { + if (structType.match(members)) { + return structType.id; + } + } + + auto &newType = mStructTypes.emplace_back(); + newType.id = mBuilder.createTypeStruct(members); + newType.members.reserve(members.size()); + for (auto member : members) { + newType.members.push_back(member); + } + return newType.id; +} + +spirv::PointerType +ConverterContext::getStructPointerType(spv::StorageClass storageClass, + spirv::StructType structType) { + StructTypeEntry *entry = nullptr; + for (auto &structType : mStructTypes) { + if (structType.id != structType.id) { + continue; + } + + entry = &structType; + } + + if (entry == nullptr) { + util::unreachable("Struct type not found"); + } + + auto &ptrType = entry->ptrTypes[static_cast(storageClass)]; + + if (!ptrType) { + ptrType = mBuilder.createTypePointer(storageClass, structType); + } + + return ptrType; +} + +spirv::Type ConverterContext::getType(TypeId id) { + auto &type = mTypes[static_cast(id)]; + + if (type) { + return type; + } + + switch (id) { + case TypeId::Void: + return ((type = mBuilder.createTypeVoid())); + case TypeId::Bool: + return ((type = mBuilder.createTypeBool())); + case TypeId::SInt8: + return ((type = mBuilder.createTypeSInt(8))); + case TypeId::UInt8: + return ((type = mBuilder.createTypeUInt(8))); + case TypeId::SInt16: + return ((type = mBuilder.createTypeSInt(16))); + case TypeId::UInt16: + return ((type = mBuilder.createTypeUInt(16))); + case TypeId::SInt32: + return ((type = mBuilder.createTypeSInt(32))); + case TypeId::UInt32: + return ((type = mBuilder.createTypeUInt(32))); + case TypeId::UInt32x2: + return ((type = mBuilder.createTypeVector(getType(TypeId::UInt32), 2))); + case TypeId::UInt32x3: + return ((type = mBuilder.createTypeVector(getType(TypeId::UInt32), 3))); + case TypeId::UInt32x4: + return ((type = mBuilder.createTypeVector(getType(TypeId::UInt32), 4))); + case TypeId::UInt64: + return ((type = mBuilder.createTypeUInt(64))); + case TypeId::SInt64: + return ((type = mBuilder.createTypeSInt(64))); + case TypeId::ArrayUInt32x8: + type = mBuilder.createTypeArray(getType(TypeId::UInt32x4), getUInt32(2)); + getBuilder().createDecorate(type, spv::Decoration::ArrayStride, + std::array{static_cast(16)}); + case TypeId::ArrayUInt32x16: + type = mBuilder.createTypeArray(getType(TypeId::UInt32x4), getUInt32(4)); + getBuilder().createDecorate(type, spv::Decoration::ArrayStride, + std::array{static_cast(16)}); + return type; + case TypeId::Float16: + return ((type = mBuilder.createTypeFloat(16))); + case TypeId::Float32: + return ((type = mBuilder.createTypeFloat(32))); + case TypeId::Float32x2: + return ((type = mBuilder.createTypeVector(getType(TypeId::Float32), 2))); + case TypeId::Float32x3: + return ((type = mBuilder.createTypeVector(getType(TypeId::Float32), 3))); + case TypeId::Float32x4: + return ((type = mBuilder.createTypeVector(getType(TypeId::Float32), 4))); + case TypeId::Float64: + return ((type = mBuilder.createTypeFloat(64))); + case TypeId::ArrayFloat32x8: + type = mBuilder.createTypeArray(getType(TypeId::Float32x4), getUInt32(2)); + getBuilder().createDecorate(type, spv::Decoration::ArrayStride, + std::array{static_cast(16)}); + return type; + case TypeId::ArrayFloat32x16: + type = mBuilder.createTypeArray(getType(TypeId::Float32x4), getUInt32(4)); + getBuilder().createDecorate(type, spv::Decoration::ArrayStride, + std::array{static_cast(16)}); + return type; + + case TypeId::Image2D: + return ((type = getBuilder().createTypeImage(getFloat32Type(), + spv::Dim::Dim2D, 0, 0, 0, 1, + spv::ImageFormat::Unknown))); + case TypeId::SampledImage2D: + return ((type = getBuilder().createTypeSampledImage(getImage2DType()))); + + case TypeId::Sampler: + return ((type = getBuilder().createTypeSampler())); + } + + util::unreachable(); +} + +spirv::RuntimeArrayType ConverterContext::getRuntimeArrayType(TypeId id) { + auto &type = mRuntimeArrayTypes[static_cast(id)]; + + if (!type) { + type = mBuilder.createTypeRuntimeArray(getType(id)); + mBuilder.createDecorate(type, spv::Decoration::ArrayStride, + {{(std::uint32_t)id.getSize()}}); + } + + return type; +} + +spirv::ConstantUInt ConverterContext::getUInt64(std::uint64_t value) { + auto &id = mConstantUint64Map[value]; + if (!id) { + id = mBuilder.createConstant64(getUInt64Type(), value); + } + return id; +} + +spirv::ConstantUInt ConverterContext::getUInt32(std::uint32_t value) { + auto &id = mConstantUint32Map[value]; + if (!id) { + id = mBuilder.createConstant32(getUInt32Type(), value); + } + return id; +} + +spirv::ConstantSInt ConverterContext::getSInt32(std::uint32_t value) { + auto &id = mConstantSint32Map[value]; + if (!id) { + id = mBuilder.createConstant32(getSint32Type(), value); + } + return id; +} + +spirv::ConstantFloat ConverterContext::getFloat32Raw(std::uint32_t value) { + auto &id = mConstantFloat32Map[value]; + if (!id) { + id = mBuilder.createConstant32(getFloat32Type(), value); + } + return id; +} + +UniformInfo *ConverterContext::createStorageBuffer(TypeId type) { + std::array uniformStructMembers{getRuntimeArrayType(type)}; + auto uniformStruct = findStructType(uniformStructMembers); + + if (!uniformStruct) { + uniformStruct = getStructType(uniformStructMembers); + + getBuilder().createDecorate(uniformStruct, spv::Decoration::Block, {}); + + getBuilder().createMemberDecorate( + uniformStruct, 0, spv::Decoration::Offset, + std::array{static_cast(0)}); + } + + auto uniformType = + getStructPointerType(spv::StorageClass::StorageBuffer, uniformStruct); + auto uniformVariable = getBuilder().createVariable( + uniformType, spv::StorageClass::StorageBuffer); + + mInterfaces.push_back(uniformVariable); + + auto &newUniform = mUniforms.emplace_back(); + newUniform.index = mUniforms.size() - 1; + newUniform.typeId = type; + newUniform.type = uniformType; + newUniform.variable = uniformVariable; + newUniform.isBuffer = true; + std::printf("new storage buffer %u of type %u\n", newUniform.index, + newUniform.typeId.raw); + return &newUniform; +} + +UniformInfo *ConverterContext::getOrCreateStorageBuffer(std::uint32_t *vbuffer, + TypeId type) { + for (auto &uniform : mUniforms) { + if (std::memcmp(uniform.buffer, vbuffer, sizeof(std::uint32_t) * 4)) { + continue; + } + + if (uniform.typeId != type) { + util::unreachable("getOrCreateStorageBuffer: access to the uniform with " + "different type"); + } + + if (!uniform.isBuffer) { + util::unreachable("getOrCreateStorageBuffer: uniform was constant"); + } + + // std::printf("reuse storage buffer %u of type %u\n", uniform.index, + // uniform.typeId.raw); + return &uniform; + } + + auto newUniform = createStorageBuffer(type); + std::memcpy(newUniform->buffer, vbuffer, sizeof(std::uint32_t) * 4); + return newUniform; +} + +UniformInfo *ConverterContext::getOrCreateUniformConstant(std::uint32_t *buffer, + std::size_t size, + TypeId type) { + for (auto &uniform : mUniforms) { + if (std::memcmp(uniform.buffer, buffer, sizeof(std::uint32_t) * size)) { + continue; + } + + if (uniform.typeId != type) { + util::unreachable( + "getOrCreateUniformConstant: access to the uniform with " + "different type"); + } + + if (uniform.isBuffer) { + util::unreachable("getOrCreateUniformConstant: uniform was buffer"); + } + + return &uniform; + } + + auto uniformType = getPointerType(spv::StorageClass::UniformConstant, type); + auto uniformVariable = getBuilder().createVariable( + uniformType, spv::StorageClass::UniformConstant); + mInterfaces.push_back(uniformVariable); + + auto &newUniform = mUniforms.emplace_back(); + newUniform.index = mUniforms.size() - 1; + newUniform.typeId = type; + newUniform.type = uniformType; + newUniform.variable = uniformVariable; + newUniform.isBuffer = false; + std::memcpy(newUniform.buffer, buffer, sizeof(std::uint32_t) * size); + + return &newUniform; +} + +spirv::VariableValue ConverterContext::getThreadId() { + if (mThreadId) { + return mThreadId; + } + + auto inputType = getPointerType(spv::StorageClass::Input, TypeId::UInt32); + mThreadId = mBuilder.createVariable(inputType, spv::StorageClass::Input); + + if (mStage == Stage::Vertex) { + mBuilder.createDecorate( + mThreadId, spv::Decoration::BuiltIn, + std::array{static_cast(spv::BuiltIn::VertexIndex)}); + } else { + util::unreachable(); + } + + mInterfaces.push_back(mThreadId); + + return mThreadId; +} + +spirv::VariableValue ConverterContext::getWorkgroupId() { + if (mWorkgroupId) { + return mWorkgroupId; + } + + if (mStage != Stage::Compute) { + util::unreachable(); + } + + auto workgroupIdType = + getPointerType(spv::StorageClass::Input, TypeId::UInt32x3); + mWorkgroupId = + mBuilder.createVariable(workgroupIdType, spv::StorageClass::Input); + + mBuilder.createDecorate( + mWorkgroupId, spv::Decoration::BuiltIn, + {{static_cast(spv::BuiltIn::WorkgroupId)}}); + mInterfaces.push_back(mWorkgroupId); + + return mWorkgroupId; +} + +spirv::VariableValue ConverterContext::getLocalInvocationId() { + if (mLocalInvocationId) { + return mLocalInvocationId; + } + + if (mStage != Stage::Compute) { + util::unreachable(); + } + + auto localInvocationIdType = + getPointerType(spv::StorageClass::Input, TypeId::UInt32x3); + mLocalInvocationId = + mBuilder.createVariable(localInvocationIdType, spv::StorageClass::Input); + + mBuilder.createDecorate( + mLocalInvocationId, spv::Decoration::BuiltIn, + std::array{static_cast(spv::BuiltIn::LocalInvocationId)}); + + mInterfaces.push_back(mLocalInvocationId); + + return mLocalInvocationId; +} + +spirv::VariableValue ConverterContext::getPerVertex() { + if (mPerVertex) { + return mPerVertex; + } + + auto floatT = getFloat32Type(); + auto float4T = getFloat32x4Type(); + + auto uintConst1 = getUInt32(1); + auto arr1Float = mBuilder.createTypeArray(floatT, uintConst1); + + auto gl_PerVertexStructT = mBuilder.createTypeStruct(std::array{ + static_cast(float4T), + static_cast(floatT), + static_cast(arr1Float), + static_cast(arr1Float), + }); + + mBuilder.createDecorate(gl_PerVertexStructT, spv::Decoration::Block, {}); + mBuilder.createMemberDecorate( + gl_PerVertexStructT, 0, spv::Decoration::BuiltIn, + std::array{static_cast(spv::BuiltIn::Position)}); + mBuilder.createMemberDecorate( + gl_PerVertexStructT, 1, spv::Decoration::BuiltIn, + std::array{static_cast(spv::BuiltIn::PointSize)}); + mBuilder.createMemberDecorate( + gl_PerVertexStructT, 2, spv::Decoration::BuiltIn, + std::array{static_cast(spv::BuiltIn::ClipDistance)}); + mBuilder.createMemberDecorate( + gl_PerVertexStructT, 3, spv::Decoration::BuiltIn, + std::array{static_cast(spv::BuiltIn::CullDistance)}); + + auto gl_PerVertexPtrT = mBuilder.createTypePointer(spv::StorageClass::Output, + gl_PerVertexStructT); + mPerVertex = + mBuilder.createVariable(gl_PerVertexPtrT, spv::StorageClass::Output); + + mInterfaces.push_back(mPerVertex); + return mPerVertex; +} + +spirv::VariableValue ConverterContext::getFragCoord() { + if (mFragCoord) { + return mFragCoord; + } + + auto inputType = getPointerType(spv::StorageClass::Input, TypeId::Float32x4); + mFragCoord = + mBuilder.createVariable(inputType, spv::StorageClass::Input); + + mBuilder.createDecorate(mFragCoord, spv::Decoration::BuiltIn, + {{static_cast(spv::BuiltIn::FragCoord)}}); + + mInterfaces.push_back(mFragCoord); + return mFragCoord; +} + +spirv::VariableValue ConverterContext::getIn(unsigned location) { + auto [it, inserted] = mIns.try_emplace(location); + if (!inserted) { + return it->second; + } + + auto inputType = getPointerType(spv::StorageClass::Input, TypeId::Float32x4); + auto inputVariable = + mBuilder.createVariable(inputType, spv::StorageClass::Input); + + mBuilder.createDecorate(inputVariable, spv::Decoration::Location, + {{location}}); + + mInterfaces.push_back(inputVariable); + it->second = inputVariable; + return inputVariable; +} + +spirv::VariableValue ConverterContext::getOut(unsigned location) { + auto [it, inserted] = mOuts.try_emplace(location); + if (!inserted) { + return it->second; + } + auto outputType = + getPointerType(spv::StorageClass::Output, TypeId::Float32x4); + auto outputVariable = + mBuilder.createVariable(outputType, spv::StorageClass::Output); + + mBuilder.createDecorate(outputVariable, spv::Decoration::Location, + {{location}}); + + mInterfaces.push_back(outputVariable); + it->second = outputVariable; + return outputVariable; +} + +spirv::Function ConverterContext::getDiscardFn() { + if (mDiscardFn) { + return mDiscardFn; + } + + if (mStage != Stage::Fragment) { + util::unreachable(); + } + + auto fn = mBuilder.createFunctionBuilder(5); + mDiscardFn = fn.id; + auto entry = fn.createBlockBuilder(5); + entry.createKill(); + + fn.insertBlock(entry); + mBuilder.insertFunction(fn, getVoidType(), {}, + getFunctionType(getVoidType(), {})); + + return mDiscardFn; +} + +std::optional +ConverterContext::findUint32Value(spirv::Value id) const { + for (auto [value, constId] : mConstantUint32Map) { + if (constId == id) { + return value; + } + } + + return std::nullopt; +} + +std::optional +ConverterContext::findSint32Value(spirv::Value id) const { + for (auto [value, constId] : mConstantSint32Map) { + if (constId == id) { + return value; + } + } + + return std::nullopt; +} + +std::optional ConverterContext::findFloat32Value(spirv::Value id) const { + for (auto [value, constId] : mConstantFloat32Map) { + if (constId == id) { + return std::bit_cast(value); + } + } + + return std::nullopt; +} + +spirv::FunctionType +ConverterContext::getFunctionType(spirv::Type resultType, + std::span params) { + for (auto fnType : mFunctionTypes) { + if (fnType.resultType != resultType) { + continue; + } + + if (fnType.params.size() != params.size()) { + continue; + } + + bool match = true; + for (std::size_t i = 0, end = params.size(); i < end; ++i) { + if (fnType.params[i] != params[i]) { + match = false; + break; + } + } + if (!match) { + continue; + } + + return fnType.id; + } + + auto id = mBuilder.createTypeFunction(resultType, params); + + std::vector paramsVec; + paramsVec.reserve(params.size()); + + for (auto param : params) { + paramsVec.push_back(param); + } + + mFunctionTypes.push_back(FunctionType{ + .resultType = resultType, .params = std::move(paramsVec), .id = id}); + + return id; +} + +Function *ConverterContext::createFunction(std::size_t expectedSize) { + auto result = &mFunctions.emplace_front(); + + result->context = this; + result->entryFragment.context = this; + result->entryFragment.function = result; + result->entryFragment.builder = mBuilder.createBlockBuilder(expectedSize); + result->entryFragment.entryBlockId = result->entryFragment.builder.id; + result->fragments.push_back(&result->entryFragment); + + result->exitFragment.context = this; + result->exitFragment.function = result; + result->exitFragment.builder = mBuilder.createBlockBuilder(0); + result->exitFragment.entryBlockId = result->exitFragment.builder.id; + result->builder = mBuilder.createFunctionBuilder(expectedSize); + + return result; +} + +Fragment *ConverterContext::createFragment(std::size_t expectedSize) { + auto result = &mFragments.emplace_front(); + + result->context = this; + result->builder = mBuilder.createBlockBuilder(expectedSize); + result->entryBlockId = result->builder.id; + + return result; +} diff --git a/hw/amdgpu/shader/src/Fragment.cpp b/hw/amdgpu/shader/src/Fragment.cpp new file mode 100644 index 000000000..507ae5691 --- /dev/null +++ b/hw/amdgpu/shader/src/Fragment.cpp @@ -0,0 +1,5380 @@ +#include "Fragment.hpp" +#include "ConverterContext.hpp" +#include "RegisterId.hpp" +#include "RegisterState.hpp" + +#include +#include +#include + +#include + +using namespace amdgpu::shader; + +namespace { +std::uint32_t getChannelsCount(SurfaceFormat format) { + switch (format) { + case kSurfaceFormat8: + return 1; + case kSurfaceFormat16: + return 1; + case kSurfaceFormat8_8: + return 2; + case kSurfaceFormat32: + return 1; + case kSurfaceFormat16_16: + return 2; + case kSurfaceFormat10_11_11: + return 3; + case kSurfaceFormat11_11_10: + return 3; + case kSurfaceFormat10_10_10_2: + return 4; + case kSurfaceFormat2_10_10_10: + return 4; + case kSurfaceFormat8_8_8_8: + return 4; + case kSurfaceFormat32_32: + return 2; + case kSurfaceFormat16_16_16_16: + return 4; + case kSurfaceFormat32_32_32: + return 3; + case kSurfaceFormat32_32_32_32: + return 4; + default: + util::unreachable(); + } +} + +std::uint32_t sizeOfFormat(SurfaceFormat format) { + switch (format) { + case kSurfaceFormat8: + return 8; + case kSurfaceFormat16: + return 16; + case kSurfaceFormat8_8: + return 16; + case kSurfaceFormat32: + return 32; + case kSurfaceFormat16_16: + return 32; + case kSurfaceFormat10_11_11: + return 32; + case kSurfaceFormat11_11_10: + return 32; + case kSurfaceFormat10_10_10_2: + return 32; + case kSurfaceFormat2_10_10_10: + return 32; + case kSurfaceFormat8_8_8_8: + return 32; + case kSurfaceFormat32_32: + return 64; + case kSurfaceFormat16_16_16_16: + return 64; + case kSurfaceFormat32_32_32: + return 96; + case kSurfaceFormat32_32_32_32: + return 128; + default: + util::unreachable(); + } +} + +TypeId pickBufferType(SurfaceFormat surfaceFormat, + TextureChannelType channelType) { + auto size = sizeOfFormat(surfaceFormat) / getChannelsCount(surfaceFormat); + + if (size == 8) { + switch (channelType) { + case kTextureChannelTypeUNorm: + case kTextureChannelTypeUScaled: + case kTextureChannelTypeUInt: + return TypeId::UInt8; + + default: + return TypeId::SInt8; + } + } + + if (size == 16) { + switch (channelType) { + case kTextureChannelTypeUNorm: + case kTextureChannelTypeUScaled: + case kTextureChannelTypeUInt: + return TypeId::UInt16; + + case kTextureChannelTypeFloat: + return TypeId::Float16; + + default: + return TypeId::SInt16; + } + } + + if (size == 32) { + switch (channelType) { + case kTextureChannelTypeUNorm: + case kTextureChannelTypeUScaled: + case kTextureChannelTypeUInt: + return TypeId::UInt32; + + case kTextureChannelTypeFloat: + return TypeId::Float32; + + default: + return TypeId::SInt32; + } + } + + if (size == 64) { + switch (channelType) { + case kTextureChannelTypeUNorm: + case kTextureChannelTypeUScaled: + case kTextureChannelTypeUInt: + return TypeId::UInt64; + + case kTextureChannelTypeFloat: + return TypeId::Float64; + + default: + return TypeId::SInt64; + } + } + + util::unreachable(); +} + +spirv::Type convertFromFormat(spirv::Value *result, int count, + Fragment &fragment, std::uint32_t *vBufferData, + spirv::UIntValue offset, + SurfaceFormat surfaceFormat, + TextureChannelType channelType) { + auto loadType = pickBufferType(surfaceFormat, channelType); + + auto uniform = + fragment.context->getOrCreateStorageBuffer(vBufferData, loadType); + uniform->accessOp |= AccessOp::Load; + + auto storageBufferPointerType = fragment.context->getPointerType( + spv::StorageClass::StorageBuffer, loadType); + + auto &builder = fragment.builder; + + switch (surfaceFormat) { + case kSurfaceFormat8: + case kSurfaceFormat8_8: + case kSurfaceFormat8_8_8_8: + case kSurfaceFormat16: + case kSurfaceFormat16_16: + case kSurfaceFormat16_16_16_16: + case kSurfaceFormat32: + case kSurfaceFormat32_32: + case kSurfaceFormat32_32_32: + case kSurfaceFormat32_32_32_32: { + // format not requires bit fetching + auto totalChannelsCount = getChannelsCount(surfaceFormat); + auto channelSize = sizeOfFormat(surfaceFormat) / 8 / totalChannelsCount; + auto channelsCount = std::min(count, totalChannelsCount); + + if (channelSize != 1) { + offset = builder.createUDiv(fragment.context->getUInt32Type(), offset, + fragment.context->getUInt32(channelSize)); + } + + int channel = 0; + auto resultType = fragment.context->getType(loadType); + for (; channel < channelsCount; ++channel) { + auto channelOffset = offset; + + if (channel != 0) { + channelOffset = + builder.createIAdd(fragment.context->getUInt32Type(), channelOffset, + fragment.context->getUInt32(channel)); + } + + auto uniformPointerValue = fragment.builder.createAccessChain( + storageBufferPointerType, uniform->variable, + {{fragment.context->getUInt32(0), channelOffset}}); + + auto channelValue = fragment.builder.createLoad( + fragment.context->getType(loadType), uniformPointerValue); + switch (channelType) { + case kTextureChannelTypeFloat: + case kTextureChannelTypeSInt: + case kTextureChannelTypeUInt: + result[channel] = channelValue; + break; + + case kTextureChannelTypeUNorm: { + auto maxValue = + (static_cast(1) << (channelSize * 8)) - 1; + + auto uintChannelValue = spirv::cast(channelValue); + + if (loadType != TypeId::UInt32) { + uintChannelValue = builder.createUConvert( + fragment.context->getUInt32Type(), uintChannelValue); + } + + auto floatChannelValue = builder.createConvertUToF( + fragment.context->getFloat32Type(), uintChannelValue); + floatChannelValue = builder.createFDiv( + fragment.context->getFloat32Type(), floatChannelValue, + fragment.context->getFloat32(maxValue)); + result[channel] = floatChannelValue; + resultType = fragment.context->getFloat32Type(); + break; + } + + case kTextureChannelTypeSNorm: { + auto maxValue = + (static_cast(1) << (channelSize * 8 - 1)) - 1; + + auto uintChannelValue = spirv::cast(channelValue); + + if (loadType != TypeId::SInt32) { + uintChannelValue = builder.createSConvert( + fragment.context->getSint32Type(), uintChannelValue); + } + + auto floatChannelValue = builder.createConvertSToF( + fragment.context->getFloat32Type(), uintChannelValue); + + floatChannelValue = builder.createFDiv( + fragment.context->getFloat32Type(), floatChannelValue, + fragment.context->getFloat32(maxValue)); + + auto glslStd450 = fragment.context->getGlslStd450(); + floatChannelValue = + spirv::cast(fragment.builder.createExtInst( + fragment.context->getFloat32Type(), glslStd450, + GLSLstd450FClamp, + {{floatChannelValue, fragment.context->getFloat32(-1), + fragment.context->getFloat32(1)}})); + result[channel] = floatChannelValue; + resultType = fragment.context->getFloat32Type(); + break; + } + + case kTextureChannelTypeUScaled: { + auto uintChannelValue = spirv::cast(channelValue); + + if (loadType != TypeId::UInt32) { + uintChannelValue = builder.createUConvert( + fragment.context->getUInt32Type(), uintChannelValue); + } + + auto floatChannelValue = builder.createConvertUToF( + fragment.context->getFloat32Type(), uintChannelValue); + + result[channel] = floatChannelValue; + resultType = fragment.context->getFloat32Type(); + break; + } + + case kTextureChannelTypeSScaled: { + auto uintChannelValue = spirv::cast(channelValue); + + if (loadType != TypeId::SInt32) { + uintChannelValue = builder.createSConvert( + fragment.context->getSint32Type(), uintChannelValue); + } + + auto floatChannelValue = builder.createConvertSToF( + fragment.context->getFloat32Type(), uintChannelValue); + + result[channel] = floatChannelValue; + resultType = fragment.context->getFloat32Type(); + break; + } + + case kTextureChannelTypeSNormNoZero: { + auto maxValue = + (static_cast(1) << (channelSize * 8)) - 1; + + auto uintChannelValue = spirv::cast(channelValue); + + if (loadType != TypeId::SInt32) { + uintChannelValue = builder.createSConvert( + fragment.context->getSint32Type(), uintChannelValue); + } + + auto floatChannelValue = builder.createConvertSToF( + fragment.context->getFloat32Type(), uintChannelValue); + + floatChannelValue = builder.createFMul( + fragment.context->getFloat32Type(), floatChannelValue, + fragment.context->getFloat32(2)); + floatChannelValue = builder.createFAdd( + fragment.context->getFloat32Type(), floatChannelValue, + fragment.context->getFloat32(1)); + + floatChannelValue = builder.createFDiv( + fragment.context->getFloat32Type(), floatChannelValue, + fragment.context->getFloat32(maxValue)); + + result[channel] = floatChannelValue; + resultType = fragment.context->getFloat32Type(); + break; + } + + default: + util::unreachable("unimplemented channel type %u", channelType); + } + } + + for (; channel < count; ++channel) { + result[channel] = + fragment.createBitcast(resultType, fragment.context->getUInt32Type(), + fragment.context->getUInt32(0)); + } + return resultType; + } + + default: + break; + } + + util::unreachable("unimplemented conversion type. %u.%u", surfaceFormat, + channelType); +} + +void convertToFormat(RegisterId sourceRegister, int count, Fragment &fragment, + std::uint32_t *vBufferData, spirv::UIntValue offset, + SurfaceFormat surfaceFormat, + TextureChannelType channelType) { + + auto storeType = pickBufferType(surfaceFormat, channelType); + + auto uniform = + fragment.context->getOrCreateStorageBuffer(vBufferData, storeType); + uniform->accessOp |= AccessOp::Store; + + auto uniformPointerType = fragment.context->getPointerType( + spv::StorageClass::StorageBuffer, storeType); + + auto &builder = fragment.builder; + switch (surfaceFormat) { + case kSurfaceFormat8: + case kSurfaceFormat8_8: + case kSurfaceFormat8_8_8_8: + case kSurfaceFormat16: + case kSurfaceFormat16_16: + case kSurfaceFormat16_16_16_16: + case kSurfaceFormat32: + case kSurfaceFormat32_32: + case kSurfaceFormat32_32_32: + case kSurfaceFormat32_32_32_32: { + // format not requires bit fetching + auto totalChannelsCount = getChannelsCount(surfaceFormat); + auto channelSize = sizeOfFormat(surfaceFormat) / 8 / totalChannelsCount; + auto channelsCount = std::min(count, totalChannelsCount); + + if (channelSize != 1) { + offset = builder.createUDiv(fragment.context->getUInt32Type(), offset, + fragment.context->getUInt32(channelSize)); + } + + int channel = 0; + + for (; channel < channelsCount; ++channel) { + auto channelOffset = offset; + + if (channel != 0) { + channelOffset = + builder.createIAdd(fragment.context->getUInt32Type(), channelOffset, + fragment.context->getUInt32(channel)); + } + + auto uniformPointerValue = fragment.builder.createAccessChain( + uniformPointerType, uniform->variable, + {{fragment.context->getUInt32(0), channelOffset}}); + + switch (channelType) { + case kTextureChannelTypeFloat: + case kTextureChannelTypeSInt: + case kTextureChannelTypeUInt: + fragment.builder.createStore( + uniformPointerValue, + fragment + .getOperand(RegisterId::Raw(sourceRegister + channel), + storeType) + .value); + break; + + default: + util::unreachable("unimplemented channel type %u", channelType); + } + } + + for (; channel < count; ++channel) { + auto channelOffset = + builder.createIAdd(fragment.context->getUInt32Type(), offset, + fragment.context->getUInt32(channel)); + auto uniformPointerValue = fragment.builder.createAccessChain( + uniformPointerType, uniform->variable, + {{fragment.context->getUInt32(0), channelOffset}}); + + fragment.builder.createStore( + uniformPointerValue, + fragment.createBitcast(fragment.context->getType(storeType), + fragment.context->getUInt32Type(), + fragment.context->getUInt32(0))); + } + + return; + } + + default: + break; + } + + util::unreachable("unimplemented conversion type. %u.%u", surfaceFormat, + channelType); +} + +struct GnmVBuffer { + uint64_t base : 44; + uint64_t mtype_L1s : 2; + uint64_t mtype_L2 : 2; + uint64_t stride : 14; + uint64_t cache_swizzle : 1; + uint64_t swizzle_en : 1; + + uint32_t num_records; + + uint32_t dst_sel_x : 3; + uint32_t dst_sel_y : 3; + uint32_t dst_sel_z : 3; + uint32_t dst_sel_w : 3; + + TextureChannelType nfmt : 3; + SurfaceFormat dfmt : 4; + uint32_t element_size : 2; + uint32_t index_stride : 2; + uint32_t addtid_en : 1; + uint32_t reserved0 : 1; + uint32_t hash_en : 1; + uint32_t reserved1 : 1; + uint32_t mtype : 3; + uint32_t type : 2; + + std::uint64_t getAddress() const { return base; } + + uint32_t getStride() const { return stride; } + + uint32_t getSize() const { + uint32_t stride = getStride(); + uint32_t numElements = getNumRecords(); + return stride ? numElements * stride : numElements; + } + + uint32_t getNumRecords() const { return num_records; } + uint32_t getElementSize() const { return element_size; } + uint32_t getIndexStrideSize() const { return index_stride; } + SurfaceFormat getSurfaceFormat() const { return (SurfaceFormat)dfmt; } + TextureChannelType getChannelType() const { return (TextureChannelType)nfmt; } +}; + +static_assert(sizeof(GnmVBuffer) == sizeof(std::uint64_t) * 2); + +enum class TextureType { + Dim1D = 8, + Dim2D, + Dim3D, + Cube, + Array1D, + Array2D, + Msaa2D, + MsaaArray2D, +}; + +struct GnmTBuffer { + uint64_t baseaddr256 : 38; + uint64_t mtype_L2 : 2; + uint64_t min_lod : 12; + SurfaceFormat dfmt : 6; + TextureChannelType nfmt : 4; + uint64_t mtype01 : 2; + + uint64_t width : 14; + uint64_t height : 14; + uint64_t perfMod : 3; + uint64_t interlaced : 1; + uint64_t dst_sel_x : 3; + uint64_t dst_sel_y : 3; + uint64_t dst_sel_z : 3; + uint64_t dst_sel_w : 3; + uint64_t base_level : 4; + uint64_t last_level : 4; + uint64_t tiling_idx : 5; + uint64_t pow2pad : 1; + uint64_t mtype2 : 1; + uint64_t : 1; // reserved + TextureType type : 4; + + uint64_t depth : 13; + uint64_t pitch : 14; + uint64_t : 5; // reserved + uint64_t base_array : 13; + uint64_t last_array : 13; + uint64_t : 6; // reserved + + uint64_t min_lod_warn : 12; // fixed point 4.8 + uint64_t counter_bank_id : 8; + uint64_t LOD_hdw_cnt_en : 1; + uint64_t : 42; // reserved + + std::uint64_t getAddress() const { + return static_cast(static_cast(baseaddr256)) + << 8; + } +}; + +static_assert(sizeof(GnmTBuffer) == sizeof(std::uint64_t) * 4); + +enum class CmpKind { + F, + LT, + EQ, + LE, + GT, + LG, + GE, + O, + U, + NGE, + NLG, + NGT, + NLE, + NEQ, + NLT, + NE, + TRU, + T = TRU +}; + +enum class CmpFlags { None = 0, X = 1 << 0, S = 1 << 1, SX = S | X }; +inline CmpFlags operator&(CmpFlags a, CmpFlags b) { + return static_cast(static_cast(a) & static_cast(b)); +} + +Value doCmpOp(Fragment &fragment, TypeId type, spirv::Value src0, + spirv::Value src1, CmpKind kind, CmpFlags flags) { + spirv::BoolValue cmp; + auto boolT = fragment.context->getBoolType(); + + switch (kind) { + case CmpKind::F: + cmp = fragment.context->getFalse(); + break; + case CmpKind::LT: + if (type.isFloatPoint()) { + cmp = fragment.builder.createFOrdLessThan(boolT, src0, src1); + } else if (type.isSignedInt()) { + cmp = fragment.builder.createSLessThan(boolT, src0, src1); + } else { + cmp = fragment.builder.createULessThan(boolT, src0, src1); + } + break; + case CmpKind::EQ: + if (type.isFloatPoint()) { + cmp = fragment.builder.createFOrdEqual(boolT, src0, src1); + } else { + cmp = fragment.builder.createIEqual(boolT, src0, src1); + } + break; + case CmpKind::LE: + if (type.isFloatPoint()) { + cmp = fragment.builder.createFOrdLessThanEqual(boolT, src0, src1); + } else if (type.isSignedInt()) { + cmp = fragment.builder.createSLessThanEqual(boolT, src0, src1); + } else { + cmp = fragment.builder.createULessThanEqual(boolT, src0, src1); + } + break; + case CmpKind::GT: + if (type.isFloatPoint()) { + cmp = fragment.builder.createFOrdGreaterThan(boolT, src0, src1); + } else if (type.isSignedInt()) { + cmp = fragment.builder.createSGreaterThan(boolT, src0, src1); + } else { + cmp = fragment.builder.createUGreaterThan(boolT, src0, src1); + } + break; + case CmpKind::LG: + cmp = fragment.builder.createFOrdNotEqual(boolT, src0, src1); + break; + case CmpKind::GE: + if (type.isFloatPoint()) { + cmp = fragment.builder.createFOrdGreaterThanEqual(boolT, src0, src1); + } else if (type.isSignedInt()) { + cmp = fragment.builder.createSGreaterThanEqual(boolT, src0, src1); + } else { + cmp = fragment.builder.createUGreaterThanEqual(boolT, src0, src1); + } + break; + case CmpKind::O: + cmp = fragment.builder.createLogicalAnd( + boolT, fragment.builder.createFOrdEqual(boolT, src0, src0), + fragment.builder.createFOrdEqual(boolT, src1, src1)); + break; + case CmpKind::U: + cmp = fragment.builder.createLogicalAnd( + boolT, fragment.builder.createFUnordNotEqual(boolT, src0, src0), + fragment.builder.createFUnordNotEqual(boolT, src1, src1)); + break; + case CmpKind::NGE: + cmp = fragment.builder.createFUnordLessThan(boolT, src0, src1); + break; + case CmpKind::NLG: + cmp = fragment.builder.createFUnordGreaterThanEqual(boolT, src0, src1); + break; + case CmpKind::NGT: + cmp = fragment.builder.createFUnordLessThanEqual(boolT, src0, src1); + break; + case CmpKind::NLE: + cmp = fragment.builder.createFUnordGreaterThan(boolT, src0, src1); + break; + case CmpKind::NE: + case CmpKind::NEQ: + if (type.isFloatPoint()) { + cmp = fragment.builder.createFUnordNotEqual(boolT, src0, src1); + } else { + cmp = fragment.builder.createINotEqual(boolT, src0, src1); + } + break; + case CmpKind::NLT: + cmp = fragment.builder.createFUnordGreaterThanEqual(boolT, src0, src1); + break; + case CmpKind::TRU: + cmp = fragment.context->getTrue(); + break; + } + + if (!cmp) { + util::unreachable(); + } + + auto uint32T = fragment.context->getUInt32Type(); + auto uint32_0 = fragment.context->getUInt32(0); + auto result = fragment.builder.createSelect( + uint32T, cmp, fragment.context->getUInt32(1), uint32_0); + + if ((flags & CmpFlags::X) == CmpFlags::X) { + fragment.setOperand(RegisterId::ExecLo, {uint32T, result}); + fragment.setOperand(RegisterId::ExecHi, {uint32T, uint32_0}); + } + + // TODO: handle flags + return {uint32T, result}; +}; + +void convertVop2(Fragment &fragment, Vop2 inst) { + fragment.registers->pc += Vop2::kMinInstSize * sizeof(std::uint32_t); + switch (inst.op) { + case Vop2::Op::V_CVT_PKRTZ_F16_F32: { + auto float2T = fragment.context->getType(TypeId::Float32x2); + auto uintT = fragment.context->getType(TypeId::UInt32); + auto glslStd450 = fragment.context->getGlslStd450(); + + auto src0 = fragment.getScalarOperand(inst.src0, TypeId::Float32).value; + auto src1 = fragment.getVectorOperand(inst.vsrc1, TypeId::Float32).value; + + auto src = fragment.builder.createCompositeConstruct( + float2T, std::array{src0, src1}); + auto dst = fragment.builder.createExtInst( + uintT, glslStd450, GLSLstd450PackHalf2x16, std::array{src}); + + fragment.setVectorOperand(inst.vdst, {uintT, dst}); + break; + } + case Vop2::Op::V_AND_B32: { + auto src0 = fragment.getScalarOperand(inst.src0, TypeId::UInt32).value; + auto src1 = fragment.getVectorOperand(inst.vsrc1, TypeId::UInt32).value; + auto uintT = fragment.context->getType(TypeId::UInt32); + + fragment.setVectorOperand( + inst.vdst, + {uintT, fragment.builder.createBitwiseAnd(uintT, src0, src1)}); + break; + } + + case Vop2::Op::V_OR_B32: { + auto src0 = fragment.getScalarOperand(inst.src0, TypeId::UInt32).value; + auto src1 = fragment.getVectorOperand(inst.vsrc1, TypeId::UInt32).value; + auto uintT = fragment.context->getType(TypeId::UInt32); + + fragment.setVectorOperand( + inst.vdst, + {uintT, fragment.builder.createBitwiseOr(uintT, src0, src1)}); + break; + } + + case Vop2::Op::V_ADD_I32: { + auto src0 = fragment.getScalarOperand(inst.src0, TypeId::UInt32).value; + auto src1 = fragment.getVectorOperand(inst.vsrc1, TypeId::UInt32).value; + auto uintT = fragment.context->getType(TypeId::UInt32); + auto resultStruct = + fragment.context->getStructType(std::array{uintT, uintT}); + auto result = fragment.builder.createIAddCarry(resultStruct, src0, src1); + fragment.setVectorOperand( + inst.vdst, + {uintT, fragment.builder.createCompositeExtract( + uintT, result, std::array{static_cast(0)})}); + fragment.setVcc( + {uintT, fragment.builder.createCompositeExtract( + uintT, result, std::array{static_cast(1)})}); + // TODO: update vcc hi + break; + } + + case Vop2::Op::V_SUB_I32: { + auto src0 = fragment.getScalarOperand(inst.src0, TypeId::UInt32).value; + auto src1 = fragment.getVectorOperand(inst.vsrc1, TypeId::UInt32).value; + auto uintT = fragment.context->getType(TypeId::UInt32); + auto resultStruct = + fragment.context->getStructType(std::array{uintT, uintT}); + auto result = fragment.builder.createISubBorrow(resultStruct, src0, src1); + fragment.setVectorOperand( + inst.vdst, + {uintT, fragment.builder.createCompositeExtract( + uintT, result, std::array{static_cast(0)})}); + fragment.setVcc( + {uintT, fragment.builder.createCompositeExtract( + uintT, result, std::array{static_cast(1)})}); + // TODO: update vcc hi + break; + } + + case Vop2::Op::V_MAC_F32: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.src0, TypeId::Float32).value); + auto src1 = spirv::cast( + fragment.getVectorOperand(inst.vsrc1, TypeId::Float32).value); + auto dst = spirv::cast( + fragment.getVectorOperand(inst.vdst, TypeId::Float32).value); + auto floatT = fragment.context->getFloat32Type(); + + auto result = fragment.builder.createFAdd( + floatT, fragment.builder.createFMul(floatT, src0, src1), dst); + + fragment.setVectorOperand(inst.vdst, {floatT, result}); + break; + } + + case Vop2::Op::V_MAC_LEGACY_F32: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.src0, TypeId::Float32).value); + auto src1 = spirv::cast( + fragment.getVectorOperand(inst.vsrc1, TypeId::Float32).value); + auto dst = spirv::cast( + fragment.getVectorOperand(inst.vdst, TypeId::Float32).value); + auto floatT = fragment.context->getFloat32Type(); + auto boolT = fragment.context->getBoolType(); + auto float0 = fragment.context->getFloat32(0); + + auto src0IsZero = fragment.builder.createFOrdEqual(boolT, src0, float0); + auto src1IsZero = fragment.builder.createFOrdEqual(boolT, src1, float0); + auto anySrcIsZero = + fragment.builder.createLogicalOr(boolT, src0IsZero, src1IsZero); + + auto result = fragment.builder.createFAdd( + floatT, + fragment.builder.createSelect( + floatT, anySrcIsZero, float0, + fragment.builder.createFMul(floatT, src0, src1)), + dst); + + fragment.setVectorOperand(inst.vdst, {floatT, result}); + break; + } + + case Vop2::Op::V_MUL_F32: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.src0, TypeId::Float32).value); + auto src1 = spirv::cast( + fragment.getVectorOperand(inst.vsrc1, TypeId::Float32).value); + auto floatT = fragment.context->getFloat32Type(); + + auto result = fragment.builder.createFMul(floatT, src0, src1); + + fragment.setVectorOperand(inst.vdst, {floatT, result}); + break; + } + + case Vop2::Op::V_ADD_F32: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.src0, TypeId::Float32).value); + auto src1 = spirv::cast( + fragment.getVectorOperand(inst.vsrc1, TypeId::Float32).value); + auto floatT = fragment.context->getFloat32Type(); + + auto result = fragment.builder.createFAdd(floatT, src0, src1); + + fragment.setVectorOperand(inst.vdst, {floatT, result}); + break; + } + + case Vop2::Op::V_SUB_F32: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.src0, TypeId::Float32).value); + auto src1 = spirv::cast( + fragment.getVectorOperand(inst.vsrc1, TypeId::Float32).value); + auto floatT = fragment.context->getFloat32Type(); + + auto result = fragment.builder.createFSub(floatT, src0, src1); + + fragment.setVectorOperand(inst.vdst, {floatT, result}); + break; + } + case Vop2::Op::V_SUBREV_F32: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.src0, TypeId::Float32).value); + auto src1 = spirv::cast( + fragment.getVectorOperand(inst.vsrc1, TypeId::Float32).value); + auto floatT = fragment.context->getFloat32Type(); + + auto result = fragment.builder.createFSub(floatT, src1, src0); + + fragment.setVectorOperand(inst.vdst, {floatT, result}); + break; + } + case Vop2::Op::V_SUBREV_I32: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.src0, TypeId::SInt32).value); + auto src1 = spirv::cast( + fragment.getVectorOperand(inst.vsrc1, TypeId::SInt32).value); + auto floatT = fragment.context->getSint32Type(); + + auto result = fragment.builder.createISub(floatT, src1, src0); + + fragment.setVectorOperand(inst.vdst, {floatT, result}); + break; + } + + case Vop2::Op::V_MIN_F32: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.src0, TypeId::Float32).value); + auto src1 = spirv::cast( + fragment.getVectorOperand(inst.vsrc1, TypeId::Float32).value); + auto floatT = fragment.context->getFloat32Type(); + auto boolT = fragment.context->getBoolType(); + + auto result = fragment.builder.createSelect( + floatT, fragment.builder.createFOrdLessThan(boolT, src0, src1), src0, + src1); + + fragment.setVectorOperand(inst.vdst, {floatT, result}); + break; + } + + case Vop2::Op::V_MAX_F32: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.src0, TypeId::Float32).value); + auto src1 = spirv::cast( + fragment.getVectorOperand(inst.vsrc1, TypeId::Float32).value); + auto floatT = fragment.context->getFloat32Type(); + auto boolT = fragment.context->getBoolType(); + + auto result = fragment.builder.createSelect( + floatT, fragment.builder.createFOrdGreaterThanEqual(boolT, src0, src1), + src0, src1); + + fragment.setVectorOperand(inst.vdst, {floatT, result}); + break; + } + + case Vop2::Op::V_MUL_LEGACY_F32: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.src0, TypeId::Float32).value); + auto src1 = spirv::cast( + fragment.getVectorOperand(inst.vsrc1, TypeId::Float32).value); + auto floatT = fragment.context->getFloat32Type(); + auto boolT = fragment.context->getBoolType(); + auto float0 = fragment.context->getFloat32(0); + + auto src0IsZero = fragment.builder.createFOrdEqual(boolT, src0, float0); + auto src1IsZero = fragment.builder.createFOrdEqual(boolT, src1, float0); + auto anySrcIsZero = + fragment.builder.createLogicalOr(boolT, src0IsZero, src1IsZero); + + auto result = fragment.builder.createSelect( + floatT, anySrcIsZero, float0, + fragment.builder.createFMul(floatT, src0, src1)); + + fragment.setVectorOperand(inst.vdst, {floatT, result}); + break; + } + + case Vop2::Op::V_MADAK_F32: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.src0, TypeId::Float32).value); + auto src1 = spirv::cast( + fragment.getVectorOperand(inst.vsrc1, TypeId::Float32).value); + auto constant = spirv::cast( + fragment.getScalarOperand(255, TypeId::Float32).value); + auto floatT = fragment.context->getFloat32Type(); + + auto result = fragment.builder.createFAdd( + floatT, fragment.builder.createFMul(floatT, src0, src1), constant); + + fragment.setVectorOperand(inst.vdst, {floatT, result}); + break; + } + + case Vop2::Op::V_MADMK_F32: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.src0, TypeId::Float32).value); + auto src1 = spirv::cast( + fragment.getVectorOperand(inst.vsrc1, TypeId::Float32).value); + auto constant = spirv::cast( + fragment.getScalarOperand(255, TypeId::Float32).value); + auto floatT = fragment.context->getFloat32Type(); + + auto result = fragment.builder.createFAdd( + floatT, fragment.builder.createFMul(floatT, src0, constant), src1); + + fragment.setVectorOperand(inst.vdst, {floatT, result}); + break; + } + + case Vop2::Op::V_LSHL_B32: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.src0, TypeId::UInt32).value); + auto src1 = spirv::cast( + fragment.getVectorOperand(inst.vsrc1, TypeId::UInt32).value); + auto uintT = fragment.context->getType(TypeId::UInt32); + + fragment.setVectorOperand( + inst.vdst, + {uintT, fragment.builder.createShiftLeftLogical(uintT, src0, src1)}); + break; + } + + case Vop2::Op::V_LSHLREV_B32: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.src0, TypeId::UInt32).value); + auto src1 = spirv::cast( + fragment.getVectorOperand(inst.vsrc1, TypeId::UInt32).value); + auto uintT = fragment.context->getType(TypeId::UInt32); + + fragment.setVectorOperand( + inst.vdst, + {uintT, fragment.builder.createShiftLeftLogical(uintT, src1, src0)}); + break; + } + + case Vop2::Op::V_LSHR_B32: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.src0, TypeId::UInt32).value); + auto src1 = spirv::cast( + fragment.getVectorOperand(inst.vsrc1, TypeId::UInt32).value); + auto uintT = fragment.context->getType(TypeId::UInt32); + + fragment.setVectorOperand( + inst.vdst, + {uintT, fragment.builder.createShiftRightLogical(uintT, src0, src1)}); + break; + } + + case Vop2::Op::V_LSHRREV_B32: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.src0, TypeId::UInt32).value); + auto src1 = spirv::cast( + fragment.getVectorOperand(inst.vsrc1, TypeId::UInt32).value); + auto uintT = fragment.context->getType(TypeId::UInt32); + + fragment.setVectorOperand( + inst.vdst, + {uintT, fragment.builder.createShiftRightLogical(uintT, src1, src0)}); + break; + } + + case Vop2::Op::V_ASHR_I32: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.src0, TypeId::SInt32).value); + auto src1 = spirv::cast( + fragment.getVectorOperand(inst.vsrc1, TypeId::SInt32).value); + auto sintT = fragment.context->getType(TypeId::SInt32); + + fragment.setVectorOperand( + inst.vdst, {sintT, fragment.builder.createShiftRightArithmetic( + sintT, src0, src1)}); + break; + } + + case Vop2::Op::V_ASHRREV_I32: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.src0, TypeId::SInt32).value); + auto src1 = spirv::cast( + fragment.getVectorOperand(inst.vsrc1, TypeId::SInt32).value); + auto sintT = fragment.context->getType(TypeId::SInt32); + + fragment.setVectorOperand( + inst.vdst, {sintT, fragment.builder.createShiftRightArithmetic( + sintT, src1, src0)}); + break; + } + + case Vop2::Op::V_CNDMASK_B32: { + auto src0 = fragment.getScalarOperand(inst.src0, TypeId::UInt32).value; + auto src1 = fragment.getVectorOperand(inst.vsrc1, TypeId::UInt32).value; + auto vcc = fragment.getVccLo(); + + auto cmp = fragment.builder.createINotEqual(fragment.context->getBoolType(), + vcc.value, + fragment.context->getUInt32(0)); + + auto uint32T = fragment.context->getUInt32Type(); + auto result = fragment.builder.createSelect(uint32T, cmp, src1, src0); + fragment.setVectorOperand(inst.vdst, {uint32T, result}); + break; + } + + default: + inst.dump(); + util::unreachable(); + } +} +void convertSop2(Fragment &fragment, Sop2 inst) { + fragment.registers->pc += Sop2::kMinInstSize * sizeof(std::uint32_t); + switch (inst.op) { + case Sop2::Op::S_ADD_U32: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32).value); + auto src1 = spirv::cast( + fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value); + auto resultT = fragment.context->getUInt32Type(); + auto result = fragment.builder.createIAdd(resultT, src0, src1); + fragment.setScc({resultT, result}); + fragment.setScalarOperand(inst.sdst, {resultT, result}); + break; + } + case Sop2::Op::S_ADD_I32: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.ssrc0, TypeId::SInt32).value); + auto src1 = spirv::cast( + fragment.getScalarOperand(inst.ssrc1, TypeId::SInt32).value); + auto resultT = fragment.context->getSint32Type(); + auto result = fragment.builder.createIAdd(resultT, src0, src1); + fragment.setScc({resultT, result}); + fragment.setScalarOperand(inst.sdst, {resultT, result}); + break; + } + + case Sop2::Op::S_ASHR_I32: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.ssrc0, TypeId::SInt32).value); + auto src1 = spirv::cast( + fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value); + + auto resultT = fragment.context->getSint32Type(); + src1 = spirv::cast(fragment.builder.createBitwiseAnd( + resultT, src1, fragment.context->getUInt32(0x3f))); + + auto result = + fragment.builder.createShiftRightArithmetic(resultT, src0, src1); + fragment.setScc({resultT, result}); + fragment.setScalarOperand(inst.sdst, {resultT, result}); + break; + } + case Sop2::Op::S_ASHR_I64: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.ssrc0, TypeId::SInt64).value); + auto src1 = spirv::cast( + fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value); + + auto resultT = fragment.context->getSint64Type(); + src1 = spirv::cast(fragment.builder.createBitwiseAnd( + resultT, src1, fragment.context->getUInt32(0x3f))); + + auto result = + fragment.builder.createShiftRightArithmetic(resultT, src0, src1); + fragment.setScc({resultT, result}); + fragment.setScalarOperand(inst.sdst, {resultT, result}); + break; + } + + case Sop2::Op::S_LSHR_B32: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32).value); + auto src1 = spirv::cast( + fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value); + + auto resultT = fragment.context->getUInt32Type(); + src1 = spirv::cast(fragment.builder.createBitwiseAnd( + resultT, src1, fragment.context->getUInt32(0x1f))); + + auto result = fragment.builder.createShiftRightLogical(resultT, src0, src1); + fragment.setScc({resultT, result}); + fragment.setScalarOperand(inst.sdst, {resultT, result}); + break; + } + case Sop2::Op::S_LSHR_B64: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.ssrc0, TypeId::UInt64).value); + auto src1 = spirv::cast( + fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value); + + auto resultT = fragment.context->getUInt64Type(); + src1 = spirv::cast(fragment.builder.createBitwiseAnd( + resultT, src1, fragment.context->getUInt32(0x3f))); + + auto result = fragment.builder.createShiftRightLogical(resultT, src0, src1); + fragment.setScc({resultT, result}); + fragment.setScalarOperand(inst.sdst, {resultT, result}); + break; + } + + case Sop2::Op::S_LSHL_B32: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32).value); + auto src1 = spirv::cast( + fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value); + + auto resultT = fragment.context->getUInt32Type(); + src1 = spirv::cast(fragment.builder.createBitwiseAnd( + resultT, src1, fragment.context->getUInt32(0x1f))); + + auto result = fragment.builder.createShiftLeftLogical(resultT, src0, src1); + fragment.setScc({resultT, result}); + fragment.setScalarOperand(inst.sdst, {resultT, result}); + break; + } + case Sop2::Op::S_LSHL_B64: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.ssrc0, TypeId::UInt64).value); + auto src1 = spirv::cast( + fragment.getScalarOperand(inst.ssrc1, TypeId::UInt64).value); + + auto resultT = fragment.context->getUInt64Type(); + src1 = spirv::cast(fragment.builder.createBitwiseAnd( + resultT, src1, fragment.context->getUInt32(0x3f))); + + auto result = fragment.builder.createShiftLeftLogical(resultT, src0, src1); + fragment.setScc({resultT, result}); + fragment.setScalarOperand(inst.sdst, {resultT, result}); + break; + } + + case Sop2::Op::S_CSELECT_B32: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32).value); + auto src1 = spirv::cast( + fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value); + + auto resultT = fragment.context->getUInt32Type(); + auto result = + fragment.builder.createSelect(resultT, fragment.getScc(), src0, src1); + fragment.setScalarOperand(inst.sdst, {resultT, result}); + break; + } + + case Sop2::Op::S_CSELECT_B64: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.ssrc0, TypeId::UInt64).value); + auto src1 = spirv::cast( + fragment.getScalarOperand(inst.ssrc1, TypeId::UInt64).value); + + auto resultT = fragment.context->getUInt64Type(); + auto result = + fragment.builder.createSelect(resultT, fragment.getScc(), src0, src1); + fragment.setScalarOperand(inst.sdst, {resultT, result}); + break; + } + + case Sop2::Op::S_MUL_I32: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.ssrc0, TypeId::SInt32).value); + auto src1 = spirv::cast( + fragment.getScalarOperand(inst.ssrc1, TypeId::SInt32).value); + auto resultT = fragment.context->getSint32Type(); + auto result = fragment.builder.createIMul(resultT, src0, src1); + fragment.setScalarOperand(inst.sdst, {resultT, result}); + break; + } + case Sop2::Op::S_AND_B32: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32).value); + auto src1 = spirv::cast( + fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value); + auto resultT = fragment.context->getUInt32Type(); + auto result = fragment.builder.createBitwiseAnd(resultT, src0, src1); + fragment.setScc({resultT, result}); + fragment.setScalarOperand(inst.sdst, {resultT, result}); + break; + } + case Sop2::Op::S_ANDN2_B32: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32).value); + auto src1 = spirv::cast( + fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value); + auto resultT = fragment.context->getUInt32Type(); + auto result = fragment.builder.createBitwiseAnd( + resultT, src0, fragment.builder.createNot(resultT, src1)); + fragment.setScc({resultT, result}); + fragment.setScalarOperand(inst.sdst, {resultT, result}); + break; + } + case Sop2::Op::S_AND_B64: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.ssrc0, TypeId::UInt64).value); + auto src1 = spirv::cast( + fragment.getScalarOperand(inst.ssrc1, TypeId::UInt64).value); + auto resultT = fragment.context->getUInt64Type(); + auto result = fragment.builder.createBitwiseAnd(resultT, src0, src1); + fragment.setScc({resultT, result}); + fragment.setScalarOperand(inst.sdst, {resultT, result}); + break; + } + case Sop2::Op::S_ANDN2_B64: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.ssrc0, TypeId::UInt64).value); + auto src1 = spirv::cast( + fragment.getScalarOperand(inst.ssrc1, TypeId::UInt64).value); + auto resultT = fragment.context->getUInt64Type(); + auto result = fragment.builder.createBitwiseAnd( + resultT, src0, fragment.builder.createNot(resultT, src1)); + fragment.setScc({resultT, result}); + fragment.setScalarOperand(inst.sdst, {resultT, result}); + break; + } + case Sop2::Op::S_OR_B32: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32).value); + auto src1 = spirv::cast( + fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value); + auto resultT = fragment.context->getUInt32Type(); + auto result = fragment.builder.createBitwiseOr(resultT, src0, src1); + fragment.setScc({resultT, result}); + fragment.setScalarOperand(inst.sdst, {resultT, result}); + break; + } + case Sop2::Op::S_OR_B64: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.ssrc0, TypeId::UInt64).value); + auto src1 = spirv::cast( + fragment.getScalarOperand(inst.ssrc1, TypeId::UInt64).value); + auto resultT = fragment.context->getUInt64Type(); + auto result = fragment.builder.createBitwiseOr(resultT, src0, src1); + fragment.setScc({resultT, result}); + fragment.setScalarOperand(inst.sdst, {resultT, result}); + break; + } + case Sop2::Op::S_NAND_B32: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32).value); + auto src1 = spirv::cast( + fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value); + auto resultT = fragment.context->getUInt32Type(); + auto result = fragment.builder.createNot( + resultT, fragment.builder.createBitwiseAnd(resultT, src0, src1)); + fragment.setScc({resultT, result}); + fragment.setScalarOperand(inst.sdst, {resultT, result}); + break; + } + case Sop2::Op::S_NAND_B64: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.ssrc0, TypeId::UInt64).value); + auto src1 = spirv::cast( + fragment.getScalarOperand(inst.ssrc1, TypeId::UInt64).value); + auto resultT = fragment.context->getUInt64Type(); + auto result = fragment.builder.createNot( + resultT, fragment.builder.createBitwiseAnd(resultT, src0, src1)); + fragment.setScc({resultT, result}); + fragment.setScalarOperand(inst.sdst, {resultT, result}); + break; + } + case Sop2::Op::S_NOR_B32: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32).value); + auto src1 = spirv::cast( + fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value); + auto resultT = fragment.context->getUInt32Type(); + auto result = fragment.builder.createNot( + resultT, fragment.builder.createBitwiseOr(resultT, src0, src1)); + fragment.setScc({resultT, result}); + fragment.setScalarOperand(inst.sdst, {resultT, result}); + break; + } + case Sop2::Op::S_NOR_B64: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.ssrc0, TypeId::UInt64).value); + auto src1 = spirv::cast( + fragment.getScalarOperand(inst.ssrc1, TypeId::UInt64).value); + auto resultT = fragment.context->getUInt64Type(); + auto result = fragment.builder.createNot( + resultT, fragment.builder.createBitwiseOr(resultT, src0, src1)); + fragment.setScc({resultT, result}); + fragment.setScalarOperand(inst.sdst, {resultT, result}); + break; + } + + case Sop2::Op::S_BFE_U32: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32).value); + auto src1 = spirv::cast( + fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value); + + auto operandT = fragment.context->getUInt32Type(); + + auto offset = + spirv::cast(fragment.builder.createBitwiseAnd( + operandT, src1, fragment.context->getUInt32(0x1f))); + auto size = spirv::cast(fragment.builder.createBitwiseAnd( + operandT, + fragment.builder.createShiftRightLogical( + operandT, src1, fragment.context->getUInt32(16)), + fragment.context->getUInt32(0x7f))); + + auto field = + fragment.builder.createShiftRightLogical(operandT, src0, offset); + auto mask = fragment.builder.createISub( + operandT, + fragment.builder.createShiftLeftLogical( + operandT, fragment.context->getUInt32(1), size), + fragment.context->getUInt32(1)); + + auto result = fragment.builder.createBitwiseAnd(operandT, field, mask); + auto resultT = fragment.context->getUInt32Type(); + fragment.setScc({resultT, result}); + fragment.setScalarOperand(inst.sdst, {resultT, result}); + break; + } + + default: + inst.dump(); + util::unreachable(); + } +} +void convertSopk(Fragment &fragment, Sopk inst) { + fragment.registers->pc += Sopk::kMinInstSize * sizeof(std::uint32_t); + switch (inst.op) { + case Sopk::Op::S_MOVK_I32: + fragment.setScalarOperand(inst.sdst, + {fragment.context->getSint32Type(), + fragment.context->getSInt32(inst.simm)}); + break; + default: + inst.dump(); + util::unreachable(); + } +} +void convertSmrd(Fragment &fragment, Smrd inst) { + fragment.registers->pc += Smrd::kMinInstSize * sizeof(std::uint32_t); + + auto getOffset = [&](std::int32_t adv = 0) -> spirv::IntValue { + if (inst.imm) { + return fragment.context->getUInt32(inst.offset + adv); + } + + auto resultT = fragment.context->getUInt32Type(); + auto resultV = fragment.getScalarOperand(inst.offset, TypeId::UInt32).value; + + if (auto constVal = fragment.context->findUint32Value(resultV)) { + return fragment.context->getUInt32(*constVal / 4 + adv); + } + + auto result = fragment.builder.createUDiv( + resultT, spirv::cast(resultV), + fragment.context->getUInt32(4)); + + if (adv != 0) { + result = fragment.builder.createIAdd(resultT, result, + fragment.context->getUInt32(adv)); + } + return result; + }; + + switch (inst.op) { + case Smrd::Op::S_BUFFER_LOAD_DWORD: + case Smrd::Op::S_BUFFER_LOAD_DWORDX2: + case Smrd::Op::S_BUFFER_LOAD_DWORDX4: + case Smrd::Op::S_BUFFER_LOAD_DWORDX8: + case Smrd::Op::S_BUFFER_LOAD_DWORDX16: { + std::uint32_t count = 1 + << (static_cast(inst.op) - + static_cast(Smrd::Op::S_BUFFER_LOAD_DWORD)); + auto vBuffer0 = + fragment.getScalarOperand((inst.sbase << 1) + 0, TypeId::UInt32); + auto vBuffer1 = + fragment.getScalarOperand((inst.sbase << 1) + 1, TypeId::UInt32); + auto vBuffer2 = + fragment.getScalarOperand((inst.sbase << 1) + 2, TypeId::UInt32); + auto vBuffer3 = + fragment.getScalarOperand((inst.sbase << 1) + 3, TypeId::UInt32); + + auto optVBuffer0Value = fragment.context->findUint32Value(vBuffer0.value); + auto optVBuffer1Value = fragment.context->findUint32Value(vBuffer1.value); + auto optVBuffer2Value = fragment.context->findUint32Value(vBuffer2.value); + auto optVBuffer3Value = fragment.context->findUint32Value(vBuffer3.value); + + if (optVBuffer0Value && optVBuffer1Value && optVBuffer2Value && + optVBuffer3Value) { + std::uint32_t vBufferData[] = {*optVBuffer0Value, *optVBuffer1Value, + *optVBuffer2Value, *optVBuffer3Value}; + auto vbuffer = reinterpret_cast(vBufferData); + // std::printf("vBuffer address = %lx\n", vbuffer->getAddress()); + + auto valueT = fragment.context->getFloat32Type(); + auto uniform = fragment.context->getOrCreateStorageBuffer( + vBufferData, TypeId::Float32); + uniform->accessOp |= AccessOp::Load; + auto storageBufferPointerType = fragment.context->getPointerType( + spv::StorageClass::StorageBuffer, TypeId::Float32); + + for (std::uint32_t i = 0; i < count; ++i) { + auto storageBufferPointerValue = fragment.builder.createAccessChain( + storageBufferPointerType, uniform->variable, + {{fragment.context->getUInt32(0), getOffset(i)}}); + + auto value = + fragment.builder.createLoad(valueT, storageBufferPointerValue); + fragment.setScalarOperand(inst.sdst + i, {valueT, value}); + } + } else { + // FIXME: implement runtime V# buffer fetching + util::unreachable(); + } + break; + } + + case Smrd::Op::S_LOAD_DWORD: + case Smrd::Op::S_LOAD_DWORDX2: + case Smrd::Op::S_LOAD_DWORDX4: + case Smrd::Op::S_LOAD_DWORDX8: + case Smrd::Op::S_LOAD_DWORDX16: { + std::uint32_t count = 1 << (static_cast(inst.op) - + static_cast(Smrd::Op::S_LOAD_DWORD)); + + auto uint32T = fragment.context->getUInt32Type(); + auto sgprLo = fragment.getScalarOperand(inst.sbase << 1, TypeId::UInt32); + auto sgprHi = + fragment.getScalarOperand((inst.sbase << 1) + 1, TypeId::UInt32); + auto optLoAddress = fragment.context->findUint32Value(sgprLo.value); + auto optHiAddress = fragment.context->findUint32Value(sgprHi.value); + + if (inst.imm && optLoAddress && optHiAddress) { + // if it is imm and address is known, read the values now + auto memory = fragment.context->getMemory(); + auto address = + *optLoAddress | (static_cast(*optHiAddress) << 32); + + auto data = + memory.getPointer(address + (inst.offset << 2)); + for (std::uint32_t i = 0; i < count; ++i) { + fragment.setScalarOperand( + inst.sdst + i, {uint32T, fragment.context->getUInt32(data[i])}); + } + } else { + // FIXME: implement + // TODO: create uniform and do load from it + util::unreachable(); + } + + break; + } + + default: + inst.dump(); + util::unreachable(); + } +} +void convertVop3(Fragment &fragment, Vop3 inst) { + fragment.registers->pc += Vop3::kMinInstSize * sizeof(std::uint32_t); + + auto applyOmod = [&](Value result) -> Value { + switch (inst.omod) { + case 1: + return {result.type, fragment.builder.createFMul( + spirv::cast(result.type), + spirv::cast(result.value), + fragment.context->getFloat32(2))}; + + case 2: + return {result.type, fragment.builder.createFMul( + spirv::cast(result.type), + spirv::cast(result.value), + fragment.context->getFloat32(4))}; + case 3: + return {result.type, fragment.builder.createFDiv( + spirv::cast(result.type), + spirv::cast(result.value), + fragment.context->getFloat32(2))}; + + default: + case 0: + return result; + } + }; + + auto applyClamp = [&](Value result) -> Value { + if (inst.clmp) { + auto glslStd450 = fragment.context->getGlslStd450(); + result.value = fragment.builder.createExtInst( + result.type, glslStd450, GLSLstd450FClamp, + {{result.value, fragment.context->getFloat32(0), + fragment.context->getFloat32(1)}}); + } + + return result; + }; + + auto getSrc = [&](int index, TypeId type) -> Value { + std::uint32_t src = + index == 0 ? inst.src0 : (index == 1 ? inst.src1 : inst.src2); + + auto result = fragment.getScalarOperand(src, type); + + if (inst.abs & (1 << index)) { + auto glslStd450 = fragment.context->getGlslStd450(); + result.value = fragment.builder.createExtInst( + result.type, glslStd450, GLSLstd450FAbs, {{result.value}}); + } + + if (inst.neg & (1 << index)) { + result.value = fragment.builder.createFNegate( + spirv::cast(result.type), + spirv::cast(result.value)); + } + + return result; + }; + + auto getSdstSrc = [&](int index, TypeId type) -> Value { + std::uint32_t src = + index == 0 ? inst.src0 : (index == 1 ? inst.src1 : inst.src2); + + auto result = fragment.getScalarOperand(src, type); + + if (inst.neg & (1 << index)) { + result.value = fragment.builder.createFNegate( + spirv::cast(result.type), + spirv::cast(result.value)); + } + + return result; + }; + + auto cmpOp = [&](TypeId type, CmpKind kind, CmpFlags flags = CmpFlags::None) { + auto src0 = fragment.getScalarOperand(inst.src0, type).value; + auto src1 = fragment.getScalarOperand(inst.src1, type).value; + + auto result = doCmpOp(fragment, type, src0, src1, kind, flags); + fragment.setScalarOperand(inst.vdst, result); + fragment.setScalarOperand(inst.vdst + 1, {fragment.context->getUInt32Type(), + fragment.context->getUInt32(0)}); + }; + + switch (inst.op) { + case Vop3::Op::V3_CMP_F_F32: + cmpOp(TypeId::Float32, CmpKind::F); + break; + case Vop3::Op::V3_CMP_LT_F32: + cmpOp(TypeId::Float32, CmpKind::LT); + break; + case Vop3::Op::V3_CMP_EQ_F32: + cmpOp(TypeId::Float32, CmpKind::EQ); + break; + case Vop3::Op::V3_CMP_LE_F32: + cmpOp(TypeId::Float32, CmpKind::LE); + break; + case Vop3::Op::V3_CMP_GT_F32: + cmpOp(TypeId::Float32, CmpKind::GT); + break; + case Vop3::Op::V3_CMP_LG_F32: + cmpOp(TypeId::Float32, CmpKind::LG); + break; + case Vop3::Op::V3_CMP_GE_F32: + cmpOp(TypeId::Float32, CmpKind::GE); + break; + case Vop3::Op::V3_CMP_O_F32: + cmpOp(TypeId::Float32, CmpKind::O); + break; + case Vop3::Op::V3_CMP_U_F32: + cmpOp(TypeId::Float32, CmpKind::U); + break; + case Vop3::Op::V3_CMP_NGE_F32: + cmpOp(TypeId::Float32, CmpKind::NGE); + break; + case Vop3::Op::V3_CMP_NLG_F32: + cmpOp(TypeId::Float32, CmpKind::NLG); + break; + case Vop3::Op::V3_CMP_NGT_F32: + cmpOp(TypeId::Float32, CmpKind::NGT); + break; + case Vop3::Op::V3_CMP_NLE_F32: + cmpOp(TypeId::Float32, CmpKind::NLE); + break; + case Vop3::Op::V3_CMP_NEQ_F32: + cmpOp(TypeId::Float32, CmpKind::NEQ); + break; + case Vop3::Op::V3_CMP_NLT_F32: + cmpOp(TypeId::Float32, CmpKind::NLT); + break; + case Vop3::Op::V3_CMP_TRU_F32: + cmpOp(TypeId::Float32, CmpKind::TRU); + break; + case Vop3::Op::V3_CMPX_F_F32: + cmpOp(TypeId::Float32, CmpKind::F, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_LT_F32: + cmpOp(TypeId::Float32, CmpKind::LT, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_EQ_F32: + cmpOp(TypeId::Float32, CmpKind::EQ, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_LE_F32: + cmpOp(TypeId::Float32, CmpKind::LE, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_GT_F32: + cmpOp(TypeId::Float32, CmpKind::GT, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_LG_F32: + cmpOp(TypeId::Float32, CmpKind::LG, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_GE_F32: + cmpOp(TypeId::Float32, CmpKind::GE, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_O_F32: + cmpOp(TypeId::Float32, CmpKind::O, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_U_F32: + cmpOp(TypeId::Float32, CmpKind::U, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_NGE_F32: + cmpOp(TypeId::Float32, CmpKind::NGE, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_NLG_F32: + cmpOp(TypeId::Float32, CmpKind::NLG, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_NGT_F32: + cmpOp(TypeId::Float32, CmpKind::NGT, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_NLE_F32: + cmpOp(TypeId::Float32, CmpKind::NLE, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_NEQ_F32: + cmpOp(TypeId::Float32, CmpKind::NEQ, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_NLT_F32: + cmpOp(TypeId::Float32, CmpKind::NLT, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_TRU_F32: + cmpOp(TypeId::Float32, CmpKind::TRU, CmpFlags::X); + break; + case Vop3::Op::V3_CMP_F_F64: + cmpOp(TypeId::Float64, CmpKind::F); + break; + case Vop3::Op::V3_CMP_LT_F64: + cmpOp(TypeId::Float64, CmpKind::LT); + break; + case Vop3::Op::V3_CMP_EQ_F64: + cmpOp(TypeId::Float64, CmpKind::EQ); + break; + case Vop3::Op::V3_CMP_LE_F64: + cmpOp(TypeId::Float64, CmpKind::LE); + break; + case Vop3::Op::V3_CMP_GT_F64: + cmpOp(TypeId::Float64, CmpKind::GT); + break; + case Vop3::Op::V3_CMP_LG_F64: + cmpOp(TypeId::Float64, CmpKind::LG); + break; + case Vop3::Op::V3_CMP_GE_F64: + cmpOp(TypeId::Float64, CmpKind::GE); + break; + case Vop3::Op::V3_CMP_O_F64: + cmpOp(TypeId::Float64, CmpKind::O); + break; + case Vop3::Op::V3_CMP_U_F64: + cmpOp(TypeId::Float64, CmpKind::U); + break; + case Vop3::Op::V3_CMP_NGE_F64: + cmpOp(TypeId::Float64, CmpKind::NGE); + break; + case Vop3::Op::V3_CMP_NLG_F64: + cmpOp(TypeId::Float64, CmpKind::NLG); + break; + case Vop3::Op::V3_CMP_NGT_F64: + cmpOp(TypeId::Float64, CmpKind::NGT); + break; + case Vop3::Op::V3_CMP_NLE_F64: + cmpOp(TypeId::Float64, CmpKind::NLE); + break; + case Vop3::Op::V3_CMP_NEQ_F64: + cmpOp(TypeId::Float64, CmpKind::NEQ); + break; + case Vop3::Op::V3_CMP_NLT_F64: + cmpOp(TypeId::Float64, CmpKind::NLT); + break; + case Vop3::Op::V3_CMP_TRU_F64: + cmpOp(TypeId::Float64, CmpKind::TRU); + break; + case Vop3::Op::V3_CMPX_F_F64: + cmpOp(TypeId::Float64, CmpKind::F, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_LT_F64: + cmpOp(TypeId::Float64, CmpKind::LT, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_EQ_F64: + cmpOp(TypeId::Float64, CmpKind::EQ, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_LE_F64: + cmpOp(TypeId::Float64, CmpKind::LE, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_GT_F64: + cmpOp(TypeId::Float64, CmpKind::GT, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_LG_F64: + cmpOp(TypeId::Float64, CmpKind::LG, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_GE_F64: + cmpOp(TypeId::Float64, CmpKind::GE, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_O_F64: + cmpOp(TypeId::Float64, CmpKind::O, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_U_F64: + cmpOp(TypeId::Float64, CmpKind::U, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_NGE_F64: + cmpOp(TypeId::Float64, CmpKind::NGE, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_NLG_F64: + cmpOp(TypeId::Float64, CmpKind::NLG, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_NGT_F64: + cmpOp(TypeId::Float64, CmpKind::NGT, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_NLE_F64: + cmpOp(TypeId::Float64, CmpKind::NLE, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_NEQ_F64: + cmpOp(TypeId::Float64, CmpKind::NEQ, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_NLT_F64: + cmpOp(TypeId::Float64, CmpKind::NLT, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_TRU_F64: + cmpOp(TypeId::Float64, CmpKind::TRU, CmpFlags::X); + break; + case Vop3::Op::V3_CMPS_F_F32: + cmpOp(TypeId::Float32, CmpKind::F, CmpFlags::S); + break; + case Vop3::Op::V3_CMPS_LT_F32: + cmpOp(TypeId::Float32, CmpKind::LT, CmpFlags::S); + break; + case Vop3::Op::V3_CMPS_EQ_F32: + cmpOp(TypeId::Float32, CmpKind::EQ, CmpFlags::S); + break; + case Vop3::Op::V3_CMPS_LE_F32: + cmpOp(TypeId::Float32, CmpKind::LE, CmpFlags::S); + break; + case Vop3::Op::V3_CMPS_GT_F32: + cmpOp(TypeId::Float32, CmpKind::GT, CmpFlags::S); + break; + case Vop3::Op::V3_CMPS_LG_F32: + cmpOp(TypeId::Float32, CmpKind::LG, CmpFlags::S); + break; + case Vop3::Op::V3_CMPS_GE_F32: + cmpOp(TypeId::Float32, CmpKind::GE, CmpFlags::S); + break; + case Vop3::Op::V3_CMPS_O_F32: + cmpOp(TypeId::Float32, CmpKind::O, CmpFlags::S); + break; + case Vop3::Op::V3_CMPS_U_F32: + cmpOp(TypeId::Float32, CmpKind::U, CmpFlags::S); + break; + case Vop3::Op::V3_CMPS_NGE_F32: + cmpOp(TypeId::Float32, CmpKind::NGE, CmpFlags::S); + break; + case Vop3::Op::V3_CMPS_NLG_F32: + cmpOp(TypeId::Float32, CmpKind::NLG, CmpFlags::S); + break; + case Vop3::Op::V3_CMPS_NGT_F32: + cmpOp(TypeId::Float32, CmpKind::NGT, CmpFlags::S); + break; + case Vop3::Op::V3_CMPS_NLE_F32: + cmpOp(TypeId::Float32, CmpKind::NLE, CmpFlags::S); + break; + case Vop3::Op::V3_CMPS_NEQ_F32: + cmpOp(TypeId::Float32, CmpKind::NEQ, CmpFlags::S); + break; + case Vop3::Op::V3_CMPS_NLT_F32: + cmpOp(TypeId::Float32, CmpKind::NLT, CmpFlags::S); + break; + case Vop3::Op::V3_CMPS_TRU_F32: + cmpOp(TypeId::Float32, CmpKind::TRU, CmpFlags::S); + break; + case Vop3::Op::V3_CMPSX_F_F32: + cmpOp(TypeId::Float32, CmpKind::F, CmpFlags::SX); + break; + case Vop3::Op::V3_CMPSX_LT_F32: + cmpOp(TypeId::Float32, CmpKind::LT, CmpFlags::SX); + break; + case Vop3::Op::V3_CMPSX_EQ_F32: + cmpOp(TypeId::Float32, CmpKind::EQ, CmpFlags::SX); + break; + case Vop3::Op::V3_CMPSX_LE_F32: + cmpOp(TypeId::Float32, CmpKind::LE, CmpFlags::SX); + break; + case Vop3::Op::V3_CMPSX_GT_F32: + cmpOp(TypeId::Float32, CmpKind::GT, CmpFlags::SX); + break; + case Vop3::Op::V3_CMPSX_LG_F32: + cmpOp(TypeId::Float32, CmpKind::LG, CmpFlags::SX); + break; + case Vop3::Op::V3_CMPSX_GE_F32: + cmpOp(TypeId::Float32, CmpKind::GE, CmpFlags::SX); + break; + case Vop3::Op::V3_CMPSX_O_F32: + cmpOp(TypeId::Float32, CmpKind::O, CmpFlags::SX); + break; + case Vop3::Op::V3_CMPSX_U_F32: + cmpOp(TypeId::Float32, CmpKind::U, CmpFlags::SX); + break; + case Vop3::Op::V3_CMPSX_NGE_F32: + cmpOp(TypeId::Float32, CmpKind::NGE, CmpFlags::SX); + break; + case Vop3::Op::V3_CMPSX_NLG_F32: + cmpOp(TypeId::Float32, CmpKind::NLG, CmpFlags::SX); + break; + case Vop3::Op::V3_CMPSX_NGT_F32: + cmpOp(TypeId::Float32, CmpKind::NGT, CmpFlags::SX); + break; + case Vop3::Op::V3_CMPSX_NLE_F32: + cmpOp(TypeId::Float32, CmpKind::NLE, CmpFlags::SX); + break; + case Vop3::Op::V3_CMPSX_NEQ_F32: + cmpOp(TypeId::Float32, CmpKind::NEQ, CmpFlags::SX); + break; + case Vop3::Op::V3_CMPSX_NLT_F32: + cmpOp(TypeId::Float32, CmpKind::NLT, CmpFlags::SX); + break; + case Vop3::Op::V3_CMPSX_TRU_F32: + cmpOp(TypeId::Float32, CmpKind::TRU, CmpFlags::SX); + break; + case Vop3::Op::V3_CMPS_F_F64: + cmpOp(TypeId::Float64, CmpKind::F, CmpFlags::S); + break; + case Vop3::Op::V3_CMPS_LT_F64: + cmpOp(TypeId::Float64, CmpKind::LT, CmpFlags::S); + break; + case Vop3::Op::V3_CMPS_EQ_F64: + cmpOp(TypeId::Float64, CmpKind::EQ, CmpFlags::S); + break; + case Vop3::Op::V3_CMPS_LE_F64: + cmpOp(TypeId::Float64, CmpKind::LE, CmpFlags::S); + break; + case Vop3::Op::V3_CMPS_GT_F64: + cmpOp(TypeId::Float64, CmpKind::GT, CmpFlags::S); + break; + case Vop3::Op::V3_CMPS_LG_F64: + cmpOp(TypeId::Float64, CmpKind::LG, CmpFlags::S); + break; + case Vop3::Op::V3_CMPS_GE_F64: + cmpOp(TypeId::Float64, CmpKind::GE, CmpFlags::S); + break; + case Vop3::Op::V3_CMPS_O_F64: + cmpOp(TypeId::Float64, CmpKind::O, CmpFlags::S); + break; + case Vop3::Op::V3_CMPS_U_F64: + cmpOp(TypeId::Float64, CmpKind::U, CmpFlags::S); + break; + case Vop3::Op::V3_CMPS_NGE_F64: + cmpOp(TypeId::Float64, CmpKind::NGE, CmpFlags::S); + break; + case Vop3::Op::V3_CMPS_NLG_F64: + cmpOp(TypeId::Float64, CmpKind::NLG, CmpFlags::S); + break; + case Vop3::Op::V3_CMPS_NGT_F64: + cmpOp(TypeId::Float64, CmpKind::NGT, CmpFlags::S); + break; + case Vop3::Op::V3_CMPS_NLE_F64: + cmpOp(TypeId::Float64, CmpKind::NLE, CmpFlags::S); + break; + case Vop3::Op::V3_CMPS_NEQ_F64: + cmpOp(TypeId::Float64, CmpKind::NEQ, CmpFlags::S); + break; + case Vop3::Op::V3_CMPS_NLT_F64: + cmpOp(TypeId::Float64, CmpKind::NLT, CmpFlags::S); + break; + case Vop3::Op::V3_CMPS_TRU_F64: + cmpOp(TypeId::Float64, CmpKind::TRU, CmpFlags::S); + break; + case Vop3::Op::V3_CMPSX_F_F64: + cmpOp(TypeId::Float64, CmpKind::F, CmpFlags::SX); + break; + case Vop3::Op::V3_CMPSX_LT_F64: + cmpOp(TypeId::Float64, CmpKind::LT, CmpFlags::SX); + break; + case Vop3::Op::V3_CMPSX_EQ_F64: + cmpOp(TypeId::Float64, CmpKind::EQ, CmpFlags::SX); + break; + case Vop3::Op::V3_CMPSX_LE_F64: + cmpOp(TypeId::Float64, CmpKind::LE, CmpFlags::SX); + break; + case Vop3::Op::V3_CMPSX_GT_F64: + cmpOp(TypeId::Float64, CmpKind::GT, CmpFlags::SX); + break; + case Vop3::Op::V3_CMPSX_LG_F64: + cmpOp(TypeId::Float64, CmpKind::LG, CmpFlags::SX); + break; + case Vop3::Op::V3_CMPSX_GE_F64: + cmpOp(TypeId::Float64, CmpKind::GE, CmpFlags::SX); + break; + case Vop3::Op::V3_CMPSX_O_F64: + cmpOp(TypeId::Float64, CmpKind::O, CmpFlags::SX); + break; + case Vop3::Op::V3_CMPSX_U_F64: + cmpOp(TypeId::Float64, CmpKind::U, CmpFlags::SX); + break; + case Vop3::Op::V3_CMPSX_NGE_F64: + cmpOp(TypeId::Float64, CmpKind::NGE, CmpFlags::SX); + break; + case Vop3::Op::V3_CMPSX_NLG_F64: + cmpOp(TypeId::Float64, CmpKind::NLG, CmpFlags::SX); + break; + case Vop3::Op::V3_CMPSX_NGT_F64: + cmpOp(TypeId::Float64, CmpKind::NGT, CmpFlags::SX); + break; + case Vop3::Op::V3_CMPSX_NLE_F64: + cmpOp(TypeId::Float64, CmpKind::NLE, CmpFlags::SX); + break; + case Vop3::Op::V3_CMPSX_NEQ_F64: + cmpOp(TypeId::Float64, CmpKind::NEQ, CmpFlags::SX); + break; + case Vop3::Op::V3_CMPSX_NLT_F64: + cmpOp(TypeId::Float64, CmpKind::NLT, CmpFlags::SX); + break; + case Vop3::Op::V3_CMPSX_TRU_F64: + cmpOp(TypeId::Float64, CmpKind::TRU, CmpFlags::SX); + break; + case Vop3::Op::V3_CMP_F_I32: + cmpOp(TypeId::SInt32, CmpKind::F); + break; + case Vop3::Op::V3_CMP_LT_I32: + cmpOp(TypeId::SInt32, CmpKind::LT); + break; + case Vop3::Op::V3_CMP_EQ_I32: + cmpOp(TypeId::SInt32, CmpKind::EQ); + break; + case Vop3::Op::V3_CMP_LE_I32: + cmpOp(TypeId::SInt32, CmpKind::LE); + break; + case Vop3::Op::V3_CMP_GT_I32: + cmpOp(TypeId::SInt32, CmpKind::GT); + break; + case Vop3::Op::V3_CMP_NE_I32: + cmpOp(TypeId::SInt32, CmpKind::NE); + break; + case Vop3::Op::V3_CMP_GE_I32: + cmpOp(TypeId::SInt32, CmpKind::GE); + break; + case Vop3::Op::V3_CMP_T_I32: + cmpOp(TypeId::SInt32, CmpKind::T); + break; + // case Vop3::Op::V3_CMP_CLASS_F32: cmpOp(TypeId::Float32, CmpKind::CLASS); + // break; + case Vop3::Op::V3_CMP_LT_I16: + cmpOp(TypeId::SInt16, CmpKind::LT); + break; + case Vop3::Op::V3_CMP_EQ_I16: + cmpOp(TypeId::SInt16, CmpKind::EQ); + break; + case Vop3::Op::V3_CMP_LE_I16: + cmpOp(TypeId::SInt16, CmpKind::LE); + break; + case Vop3::Op::V3_CMP_GT_I16: + cmpOp(TypeId::SInt16, CmpKind::GT); + break; + case Vop3::Op::V3_CMP_NE_I16: + cmpOp(TypeId::SInt16, CmpKind::NE); + break; + case Vop3::Op::V3_CMP_GE_I16: + cmpOp(TypeId::SInt16, CmpKind::GE); + break; + // case Vop3::Op::V3_CMP_CLASS_F16: cmpOp(TypeId::Float16, CmpKind::CLASS); + // break; + case Vop3::Op::V3_CMPX_F_I32: + cmpOp(TypeId::SInt32, CmpKind::F, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_LT_I32: + cmpOp(TypeId::SInt32, CmpKind::LT, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_EQ_I32: + cmpOp(TypeId::SInt32, CmpKind::EQ, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_LE_I32: + cmpOp(TypeId::SInt32, CmpKind::LE, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_GT_I32: + cmpOp(TypeId::SInt32, CmpKind::GT, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_NE_I32: + cmpOp(TypeId::SInt32, CmpKind::NE, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_GE_I32: + cmpOp(TypeId::SInt32, CmpKind::GE, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_T_I32: + cmpOp(TypeId::SInt32, CmpKind::T, CmpFlags::X); + break; + // case Vop3::Op::V3_CMPX_CLASS_F32: cmpOp(TypeId::Float32, CmpKind::CLASS, + // CmpFlags::X); break; + case Vop3::Op::V3_CMPX_LT_I16: + cmpOp(TypeId::SInt16, CmpKind::LT, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_EQ_I16: + cmpOp(TypeId::SInt16, CmpKind::EQ, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_LE_I16: + cmpOp(TypeId::SInt16, CmpKind::LE, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_GT_I16: + cmpOp(TypeId::SInt16, CmpKind::GT, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_NE_I16: + cmpOp(TypeId::SInt16, CmpKind::NE, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_GE_I16: + cmpOp(TypeId::SInt16, CmpKind::GE, CmpFlags::X); + break; + // case Vop3::Op::V3_CMPX_CLASS_F16: cmpOp(TypeId::Float16, CmpKind::CLASS, + // CmpFlags::X); break; + case Vop3::Op::V3_CMP_F_I64: + cmpOp(TypeId::SInt64, CmpKind::F); + break; + case Vop3::Op::V3_CMP_LT_I64: + cmpOp(TypeId::SInt64, CmpKind::LT); + break; + case Vop3::Op::V3_CMP_EQ_I64: + cmpOp(TypeId::SInt64, CmpKind::EQ); + break; + case Vop3::Op::V3_CMP_LE_I64: + cmpOp(TypeId::SInt64, CmpKind::LE); + break; + case Vop3::Op::V3_CMP_GT_I64: + cmpOp(TypeId::SInt64, CmpKind::GT); + break; + case Vop3::Op::V3_CMP_NE_I64: + cmpOp(TypeId::SInt64, CmpKind::NE); + break; + case Vop3::Op::V3_CMP_GE_I64: + cmpOp(TypeId::SInt64, CmpKind::GE); + break; + case Vop3::Op::V3_CMP_T_I64: + cmpOp(TypeId::SInt64, CmpKind::T); + break; + // case Vop3::Op::V3_CMP_CLASS_F64: cmpOp(TypeId::Float64, CmpKind::CLASS); + // break; + case Vop3::Op::V3_CMP_LT_U16: + cmpOp(TypeId::UInt16, CmpKind::LT); + break; + case Vop3::Op::V3_CMP_EQ_U16: + cmpOp(TypeId::UInt16, CmpKind::EQ); + break; + case Vop3::Op::V3_CMP_LE_U16: + cmpOp(TypeId::UInt16, CmpKind::LE); + break; + case Vop3::Op::V3_CMP_GT_U16: + cmpOp(TypeId::UInt16, CmpKind::GT); + break; + case Vop3::Op::V3_CMP_NE_U16: + cmpOp(TypeId::UInt16, CmpKind::NE); + break; + case Vop3::Op::V3_CMP_GE_U16: + cmpOp(TypeId::UInt16, CmpKind::GE); + break; + case Vop3::Op::V3_CMPX_F_I64: + cmpOp(TypeId::SInt64, CmpKind::F, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_LT_I64: + cmpOp(TypeId::SInt64, CmpKind::LT, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_EQ_I64: + cmpOp(TypeId::SInt64, CmpKind::EQ, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_LE_I64: + cmpOp(TypeId::SInt64, CmpKind::LE, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_GT_I64: + cmpOp(TypeId::SInt64, CmpKind::GT, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_NE_I64: + cmpOp(TypeId::SInt64, CmpKind::NE, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_GE_I64: + cmpOp(TypeId::SInt64, CmpKind::GE, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_T_I64: + cmpOp(TypeId::SInt64, CmpKind::T, CmpFlags::X); + break; + // case Vop3::Op::V3_CMPX_CLASS_F64: cmpOp(TypeId::Float64, CmpKind::CLASS, + // CmpFlags::X); break; + case Vop3::Op::V3_CMPX_LT_U16: + cmpOp(TypeId::UInt16, CmpKind::LT, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_EQ_U16: + cmpOp(TypeId::UInt16, CmpKind::EQ, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_LE_U16: + cmpOp(TypeId::UInt16, CmpKind::LE, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_GT_U16: + cmpOp(TypeId::UInt16, CmpKind::GT, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_NE_U16: + cmpOp(TypeId::UInt16, CmpKind::NE, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_GE_U16: + cmpOp(TypeId::UInt16, CmpKind::GE, CmpFlags::X); + break; + case Vop3::Op::V3_CMP_F_U32: + cmpOp(TypeId::UInt32, CmpKind::F); + break; + case Vop3::Op::V3_CMP_LT_U32: + cmpOp(TypeId::UInt32, CmpKind::LT); + break; + case Vop3::Op::V3_CMP_EQ_U32: + cmpOp(TypeId::UInt32, CmpKind::EQ); + break; + case Vop3::Op::V3_CMP_LE_U32: + cmpOp(TypeId::UInt32, CmpKind::LE); + break; + case Vop3::Op::V3_CMP_GT_U32: + cmpOp(TypeId::UInt32, CmpKind::GT); + break; + case Vop3::Op::V3_CMP_NE_U32: + cmpOp(TypeId::UInt32, CmpKind::NE); + break; + case Vop3::Op::V3_CMP_GE_U32: + cmpOp(TypeId::UInt32, CmpKind::GE); + break; + case Vop3::Op::V3_CMP_T_U32: + cmpOp(TypeId::UInt32, CmpKind::T); + break; + case Vop3::Op::V3_CMP_F_F16: + cmpOp(TypeId::Float16, CmpKind::F); + break; + case Vop3::Op::V3_CMP_LT_F16: + cmpOp(TypeId::Float16, CmpKind::LT); + break; + case Vop3::Op::V3_CMP_EQ_F16: + cmpOp(TypeId::Float16, CmpKind::EQ); + break; + case Vop3::Op::V3_CMP_LE_F16: + cmpOp(TypeId::Float16, CmpKind::LE); + break; + case Vop3::Op::V3_CMP_GT_F16: + cmpOp(TypeId::Float16, CmpKind::GT); + break; + case Vop3::Op::V3_CMP_LG_F16: + cmpOp(TypeId::Float16, CmpKind::LG); + break; + case Vop3::Op::V3_CMP_GE_F16: + cmpOp(TypeId::Float16, CmpKind::GE); + break; + case Vop3::Op::V3_CMP_O_F16: + cmpOp(TypeId::Float16, CmpKind::O); + break; + case Vop3::Op::V3_CMPX_F_U32: + cmpOp(TypeId::UInt32, CmpKind::F, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_LT_U32: + cmpOp(TypeId::UInt32, CmpKind::LT, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_EQ_U32: + cmpOp(TypeId::UInt32, CmpKind::EQ, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_LE_U32: + cmpOp(TypeId::UInt32, CmpKind::LE, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_GT_U32: + cmpOp(TypeId::UInt32, CmpKind::GT, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_NE_U32: + cmpOp(TypeId::UInt32, CmpKind::NE, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_GE_U32: + cmpOp(TypeId::UInt32, CmpKind::GE, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_T_U32: + cmpOp(TypeId::UInt32, CmpKind::T, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_F_F16: + cmpOp(TypeId::Float16, CmpKind::F, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_LT_F16: + cmpOp(TypeId::Float16, CmpKind::LT, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_EQ_F16: + cmpOp(TypeId::Float16, CmpKind::EQ, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_LE_F16: + cmpOp(TypeId::Float16, CmpKind::LE, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_GT_F16: + cmpOp(TypeId::Float16, CmpKind::GT, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_LG_F16: + cmpOp(TypeId::Float16, CmpKind::LG, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_GE_F16: + cmpOp(TypeId::Float16, CmpKind::GE, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_O_F16: + cmpOp(TypeId::Float16, CmpKind::O, CmpFlags::X); + break; + case Vop3::Op::V3_CMP_F_U64: + cmpOp(TypeId::UInt64, CmpKind::F); + break; + case Vop3::Op::V3_CMP_LT_U64: + cmpOp(TypeId::UInt64, CmpKind::LT); + break; + case Vop3::Op::V3_CMP_EQ_U64: + cmpOp(TypeId::UInt64, CmpKind::EQ); + break; + case Vop3::Op::V3_CMP_LE_U64: + cmpOp(TypeId::UInt64, CmpKind::LE); + break; + case Vop3::Op::V3_CMP_GT_U64: + cmpOp(TypeId::UInt64, CmpKind::GT); + break; + case Vop3::Op::V3_CMP_NE_U64: + cmpOp(TypeId::UInt64, CmpKind::NE); + break; + case Vop3::Op::V3_CMP_GE_U64: + cmpOp(TypeId::UInt64, CmpKind::GE); + break; + case Vop3::Op::V3_CMP_T_U64: + cmpOp(TypeId::UInt64, CmpKind::T); + break; + case Vop3::Op::V3_CMP_U_F16: + cmpOp(TypeId::Float16, CmpKind::U); + break; + case Vop3::Op::V3_CMP_NGE_F16: + cmpOp(TypeId::Float16, CmpKind::NGE); + break; + case Vop3::Op::V3_CMP_NLG_F16: + cmpOp(TypeId::Float16, CmpKind::NLG); + break; + case Vop3::Op::V3_CMP_NGT_F16: + cmpOp(TypeId::Float16, CmpKind::NGT); + break; + case Vop3::Op::V3_CMP_NLE_F16: + cmpOp(TypeId::Float16, CmpKind::NLE); + break; + case Vop3::Op::V3_CMP_NEQ_F16: + cmpOp(TypeId::Float16, CmpKind::NEQ); + break; + case Vop3::Op::V3_CMP_NLT_F16: + cmpOp(TypeId::Float16, CmpKind::NLT); + break; + case Vop3::Op::V3_CMP_TRU_F16: + cmpOp(TypeId::Float16, CmpKind::TRU); + break; + case Vop3::Op::V3_CMPX_F_U64: + cmpOp(TypeId::UInt64, CmpKind::F, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_LT_U64: + cmpOp(TypeId::UInt64, CmpKind::LT, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_EQ_U64: + cmpOp(TypeId::UInt64, CmpKind::EQ, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_LE_U64: + cmpOp(TypeId::UInt64, CmpKind::LE, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_GT_U64: + cmpOp(TypeId::UInt64, CmpKind::GT, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_NE_U64: + cmpOp(TypeId::UInt64, CmpKind::NE, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_GE_U64: + cmpOp(TypeId::UInt64, CmpKind::GE, CmpFlags::X); + break; + case Vop3::Op::V3_CMPX_T_U64: + cmpOp(TypeId::UInt64, CmpKind::T, CmpFlags::X); + break; + + case Vop3::Op::V3_RCP_F32: { + auto src = getSrc(0, TypeId::Float32); + auto floatT = fragment.context->getFloat32Type(); + auto float1 = fragment.context->getFloat32(1); + auto resultValue = fragment.builder.createFDiv( + floatT, float1, spirv::cast(src.value)); + auto result = applyClamp(applyOmod({floatT, resultValue})); + + fragment.setVectorOperand(inst.vdst, result); + break; + } + + case Vop3::Op::V3_ADD_I32: { + auto src0 = fragment.getScalarOperand(inst.src0, TypeId::UInt32).value; + auto src1 = fragment.getScalarOperand(inst.src1, TypeId::UInt32).value; + auto uintT = fragment.context->getType(TypeId::UInt32); + auto resultStruct = + fragment.context->getStructType(std::array{uintT, uintT}); + auto result = fragment.builder.createIAddCarry(resultStruct, src0, src1); + fragment.setVectorOperand( + inst.vdst, + {uintT, fragment.builder.createCompositeExtract( + uintT, result, std::array{static_cast(0)})}); + fragment.setScalarOperand( + inst.sdst, + {uintT, fragment.builder.createCompositeExtract( + uintT, result, std::array{static_cast(1)})}); + // TODO: update sdst + 1 + break; + } + + case Vop3::Op::V3_SUB_F32: { + auto floatT = fragment.context->getFloat32Type(); + auto src0 = getSrc(0, TypeId::Float32); + auto src1 = getSrc(1, TypeId::Float32); + auto resultValue = fragment.builder.createFSub( + floatT, spirv::cast(src0.value), + spirv::cast(src1.value)); + auto result = applyClamp(applyOmod({floatT, resultValue})); + + fragment.setVectorOperand(inst.vdst, result); + break; + } + + case Vop3::Op::V3_MUL_F32: { + auto floatT = fragment.context->getFloat32Type(); + auto src0 = getSrc(0, TypeId::Float32); + auto src1 = getSrc(1, TypeId::Float32); + auto resultValue = fragment.builder.createFMul( + floatT, spirv::cast(src0.value), + spirv::cast(src1.value)); + auto result = applyClamp(applyOmod({floatT, resultValue})); + + fragment.setVectorOperand(inst.vdst, result); + break; + } + case Vop3::Op::V3_MUL_LO_I32: { + auto resultT = fragment.context->getSint32Type(); + auto src0 = getSrc(0, TypeId::SInt32); + auto src1 = getSrc(1, TypeId::SInt32); + auto resultValue = fragment.builder.createIMul( + resultT, spirv::cast(src0.value), + spirv::cast(src1.value)); + auto result = applyClamp(applyOmod({resultT, resultValue})); + + fragment.setVectorOperand(inst.vdst, result); + break; + } + case Vop3::Op::V3_MUL_HI_I32: { + auto resultT = fragment.context->getSint32Type(); + auto src0 = getSrc(0, TypeId::SInt32); + auto src1 = getSrc(1, TypeId::SInt32); + + auto sint64T = fragment.context->getSint64Type(); + + auto src0_64 = fragment.builder.createSConvert( + sint64T, spirv::cast(src0.value)); + auto src1_64 = fragment.builder.createSConvert( + sint64T, spirv::cast(src1.value)); + + auto resultValue64 = fragment.builder.createIMul( + sint64T, spirv::cast(src0_64), + spirv::cast(src1_64)); + + resultValue64 = fragment.builder.createShiftRightLogical( + sint64T, resultValue64, fragment.context->getUInt32(32)); + auto resultValue = fragment.builder.createSConvert(resultT, resultValue64); + auto result = applyClamp(applyOmod({resultT, resultValue})); + + fragment.setVectorOperand(inst.vdst, result); + break; + } + case Vop3::Op::V3_MUL_HI_U32: { + auto resultT = fragment.context->getUInt32Type(); + auto src0 = spirv::cast(getSrc(0, TypeId::UInt32).value); + auto src1 = spirv::cast(getSrc(1, TypeId::UInt32).value); + + auto uint64T = fragment.context->getUInt64Type(); + + auto src0_64 = fragment.builder.createUConvert(uint64T, src0); + auto src1_64 = fragment.builder.createUConvert(uint64T, src1); + + auto resultValue64 = fragment.builder.createIMul(uint64T, src0_64, src1_64); + + resultValue64 = fragment.builder.createShiftRightLogical( + uint64T, resultValue64, fragment.context->getUInt32(32)); + auto resultValue = fragment.builder.createUConvert(resultT, resultValue64); + auto result = applyClamp(applyOmod({resultT, resultValue})); + + fragment.setVectorOperand(inst.vdst, result); + break; + } + + case Vop3::Op::V3_MAC_F32: { + auto floatT = fragment.context->getFloat32Type(); + auto src0 = getSrc(0, TypeId::Float32); + auto src1 = getSrc(1, TypeId::Float32); + + auto dst = spirv::cast( // FIXME: should use src2? + fragment.getVectorOperand(inst.vdst, TypeId::Float32).value); + + auto resultValue = fragment.builder.createFAdd( + floatT, + fragment.builder.createFMul(floatT, + spirv::cast(src0.value), + spirv::cast(src1.value)), + dst); + + auto result = applyClamp(applyOmod({floatT, resultValue})); + + fragment.setVectorOperand(inst.vdst, result); + break; + } + case Vop3::Op::V3_MAD_U32_U24: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.src0, TypeId::UInt32).value); + auto src1 = spirv::cast( + fragment.getScalarOperand(inst.src1, TypeId::UInt32).value); + auto src2 = spirv::cast( + fragment.getScalarOperand(inst.src2, TypeId::UInt32).value); + auto operandT = fragment.context->getUInt32Type(); + + src0 = spirv::cast(fragment.builder.createBitwiseAnd( + operandT, src0, fragment.context->getUInt32((1 << 24) - 1))); + src1 = spirv::cast(fragment.builder.createBitwiseAnd( + operandT, src1, fragment.context->getUInt32((1 << 24) - 1))); + + auto result = fragment.builder.createIAdd( + operandT, fragment.builder.createIMul(operandT, src0, src1), src2); + + fragment.setVectorOperand(inst.vdst, {operandT, result}); + break; + } + case Vop3::Op::V3_MAD_I32_I24: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.src0, TypeId::SInt32).value); + auto src1 = spirv::cast( + fragment.getScalarOperand(inst.src1, TypeId::SInt32).value); + auto src2 = spirv::cast( + fragment.getScalarOperand(inst.src2, TypeId::SInt32).value); + auto operandT = fragment.context->getSint32Type(); + + src0 = fragment.builder.createShiftLeftLogical( + operandT, src0, fragment.context->getUInt32(8)); + src0 = fragment.builder.createShiftRightArithmetic( + operandT, src0, fragment.context->getUInt32(8)); + src1 = fragment.builder.createShiftLeftLogical( + operandT, src1, fragment.context->getUInt32(8)); + src1 = fragment.builder.createShiftRightArithmetic( + operandT, src1, fragment.context->getUInt32(8)); + + auto result = fragment.builder.createIAdd( + operandT, fragment.builder.createIMul(operandT, src0, src1), src2); + + fragment.setVectorOperand(inst.vdst, {operandT, result}); + break; + } + case Vop3::Op::V3_MUL_U32_U24: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.src0, TypeId::UInt32).value); + auto src1 = spirv::cast( + fragment.getScalarOperand(inst.src1, TypeId::UInt32).value); + auto operandT = fragment.context->getUInt32Type(); + + src0 = spirv::cast(fragment.builder.createBitwiseAnd( + operandT, src0, fragment.context->getUInt32((1 << 24) - 1))); + src1 = spirv::cast(fragment.builder.createBitwiseAnd( + operandT, src1, fragment.context->getUInt32((1 << 24) - 1))); + + auto result = fragment.builder.createIMul(operandT, src0, src1); + + fragment.setVectorOperand(inst.vdst, {operandT, result}); + break; + } + case Vop3::Op::V3_MUL_I32_I24: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.src0, TypeId::SInt32).value); + auto src1 = spirv::cast( + fragment.getScalarOperand(inst.src1, TypeId::SInt32).value); + auto src2 = spirv::cast( + fragment.getScalarOperand(inst.src2, TypeId::SInt32).value); + auto operandT = fragment.context->getSint32Type(); + + src0 = fragment.builder.createShiftLeftLogical( + operandT, src0, fragment.context->getUInt32(8)); + src0 = fragment.builder.createShiftRightArithmetic( + operandT, src0, fragment.context->getUInt32(8)); + src1 = fragment.builder.createShiftLeftLogical( + operandT, src1, fragment.context->getUInt32(8)); + src1 = fragment.builder.createShiftRightArithmetic( + operandT, src1, fragment.context->getUInt32(8)); + + auto result = fragment.builder.createIMul(operandT, src0, src1); + + fragment.setVectorOperand(inst.vdst, {operandT, result}); + break; + } + case Vop3::Op::V3_MAD_F32: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.src0, TypeId::Float32).value); + auto src1 = spirv::cast( + fragment.getScalarOperand(inst.src1, TypeId::Float32).value); + auto src2 = spirv::cast( + fragment.getScalarOperand(inst.src2, TypeId::Float32).value); + auto floatT = fragment.context->getFloat32Type(); + + auto result = fragment.builder.createFAdd( + floatT, fragment.builder.createFMul(floatT, src0, src1), src2); + + fragment.setVectorOperand(inst.vdst, {floatT, result}); + break; + } + case Vop3::Op::V3_CNDMASK_B32: { + auto src0 = fragment.getScalarOperand(inst.src0, TypeId::UInt32).value; + auto src1 = fragment.getScalarOperand(inst.src1, TypeId::UInt32).value; + auto src2 = fragment.getScalarOperand(inst.src2, TypeId::UInt32).value; + + auto cmp = fragment.builder.createINotEqual( + fragment.context->getBoolType(), src2, fragment.context->getUInt32(0)); + + auto uint32T = fragment.context->getUInt32Type(); + auto result = fragment.builder.createSelect(uint32T, cmp, src1, src0); + fragment.setVectorOperand(inst.vdst, {uint32T, result}); + break; + } + + case Vop3::Op::V3_BFE_U32: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.src0, TypeId::UInt32).value); + auto src1 = spirv::cast( + fragment.getScalarOperand(inst.src1, TypeId::UInt32).value); + auto src2 = spirv::cast( + fragment.getScalarOperand(inst.src2, TypeId::UInt32).value); + + auto operandT = fragment.context->getUInt32Type(); + + auto voffset = + spirv::cast(fragment.builder.createBitwiseAnd( + operandT, src1, fragment.context->getUInt32(0x1f))); + auto vsize = + spirv::cast(fragment.builder.createBitwiseAnd( + operandT, src2, fragment.context->getUInt32(0x1f))); + auto field = + fragment.builder.createShiftRightLogical(operandT, src0, voffset); + auto mask = fragment.builder.createISub( + operandT, + fragment.builder.createShiftLeftLogical( + operandT, fragment.context->getUInt32(1), vsize), + fragment.context->getUInt32(1)); + + auto resultT = fragment.context->getUInt32Type(); + auto result = fragment.builder.createSelect( + operandT, + fragment.builder.createIEqual(fragment.context->getBoolType(), vsize, + fragment.context->getUInt32(0)), + fragment.context->getUInt32(0), + fragment.builder.createBitwiseAnd(operandT, field, mask)); + fragment.setVectorOperand(inst.vdst, {resultT, result}); + break; + } + + case Vop3::Op::V3_CVT_PKRTZ_F16_F32: { + auto float2T = fragment.context->getType(TypeId::Float32x2); + auto uintT = fragment.context->getType(TypeId::UInt32); + auto glslStd450 = fragment.context->getGlslStd450(); + + auto src0 = fragment.getScalarOperand(inst.src0, TypeId::Float32).value; + auto src1 = fragment.getScalarOperand(inst.src1, TypeId::Float32).value; + + auto src = fragment.builder.createCompositeConstruct( + float2T, std::array{src0, src1}); + auto dst = fragment.builder.createExtInst( + uintT, glslStd450, GLSLstd450PackHalf2x16, std::array{src}); + + fragment.setVectorOperand(inst.vdst, {uintT, dst}); + break; + } + + case Vop3::Op::V3_SAD_U32: { + auto src0 = spirv::cast(getSrc(0, TypeId::UInt32).value); + auto src1 = spirv::cast(getSrc(1, TypeId::UInt32).value); + auto src2 = spirv::cast(getSrc(2, TypeId::UInt32).value); + + auto uint32T = fragment.context->getUInt32Type(); + auto sint32T = fragment.context->getSint32Type(); + + auto diff = fragment.builder.createISub(uint32T, src0, src1); + auto sdiff = fragment.builder.createBitcast(sint32T, diff); + + auto glslStd450 = fragment.context->getGlslStd450(); + auto sabsdiff = fragment.builder.createExtInst(sint32T, glslStd450, + GLSLstd450SAbs, {{sdiff}}); + + auto absdiff = fragment.builder.createBitcast(uint32T, sabsdiff); + auto result = fragment.builder.createIAdd(uint32T, absdiff, src2); + fragment.setVectorOperand(inst.vdst, {uint32T, result}); + break; + } + + default: + inst.dump(); + util::unreachable(); + } +} + +void convertMubuf(Fragment &fragment, Mubuf inst) { + fragment.registers->pc += Mubuf::kMinInstSize * sizeof(std::uint32_t); + /* + printMubufOpcode(op); + printf(" "); + printVectorOperand(vdata, inst + instSize); + printf(", "); + printScalarOperand(srsrc << 2, inst + instSize); + printf(", "); + printScalarOperand(soffset, inst + instSize); + */ + + auto getSOffset = [&](std::int32_t adv = 0) -> spirv::UIntValue { + auto resultT = fragment.context->getUInt32Type(); + auto resultV = + fragment.getScalarOperand(inst.soffset, TypeId::UInt32).value; + auto result = spirv::cast(resultV); + + if (adv != 0) { + if (auto constVal = fragment.context->findSint32Value(result)) { + return fragment.context->getUInt32(*constVal + adv); + } + + result = fragment.builder.createIAdd(resultT, result, + fragment.context->getUInt32(adv)); + } + + return result; + }; + + auto getVBuffer = [&] { + auto vBuffer0 = + fragment.getScalarOperand((inst.srsrc << 2) + 0, TypeId::UInt32); + auto vBuffer1 = + fragment.getScalarOperand((inst.srsrc << 2) + 1, TypeId::UInt32); + auto vBuffer2 = + fragment.getScalarOperand((inst.srsrc << 2) + 2, TypeId::UInt32); + auto vBuffer3 = + fragment.getScalarOperand((inst.srsrc << 2) + 3, TypeId::UInt32); + + auto optVBuffer0Value = fragment.context->findUint32Value(vBuffer0.value); + auto optVBuffer1Value = fragment.context->findUint32Value(vBuffer1.value); + auto optVBuffer2Value = fragment.context->findUint32Value(vBuffer2.value); + auto optVBuffer3Value = fragment.context->findUint32Value(vBuffer3.value); + + if (optVBuffer0Value && optVBuffer1Value && optVBuffer2Value && + optVBuffer3Value) { + // V# buffer value is known, read the buffer now + std::array vBufferData = { + *optVBuffer0Value, *optVBuffer1Value, *optVBuffer2Value, + *optVBuffer3Value}; + + GnmVBuffer result; + std::memcpy(&result, vBufferData.data(), sizeof(result)); + return result; + } + + util::unreachable(); + }; + + auto getAddress = [&](GnmVBuffer *vbuffer) { + auto &builder = fragment.builder; + auto uint32T = fragment.context->getUInt32Type(); + + spirv::UIntValue index; + if (inst.idxen) { + index = spirv::cast( + fragment.getVectorOperand(inst.vaddr, TypeId::UInt32).value); + } + + // std::printf("vBuffer address = %lx\n", vbuffer->getAddress()); + + if (vbuffer->addtid_en) { + spirv::UIntValue threadId = + builder.createLoad(uint32T, fragment.context->getThreadId()); + + if (index) { + index = builder.createIAdd(uint32T, index, threadId); + } else { + index = threadId; + } + } + + auto offset = inst.offset ? fragment.context->getUInt32(inst.offset) + : spirv::UIntValue{}; + + if (inst.offen) { + auto off = spirv::cast( + fragment + .getVectorOperand(inst.vaddr + (inst.idxen ? 1 : 0), + TypeId::UInt32) + .value); + + if (offset) { + offset = builder.createIAdd(uint32T, off, offset); + } else { + offset = off; + } + } + + spirv::UIntValue address = getSOffset(); + + if (vbuffer->swizzle_en == 0) { + if (vbuffer->stride == 0 || !index) { + return address; + } + + auto offset = builder.createIMul( + uint32T, index, fragment.context->getUInt32(vbuffer->stride)); + if (address == fragment.context->getUInt32(0)) { + return offset; + } + + return builder.createIAdd(uint32T, address, offset); + } + + if (!index && !offset) { + return address; + } + + if (index && offset) { + auto indexStride = fragment.context->getUInt32(vbuffer->index_stride); + auto index_msb = builder.createUDiv(uint32T, index, indexStride); + auto index_lsb = builder.createUMod(uint32T, index, indexStride); + + auto elementSize = fragment.context->getUInt32(vbuffer->element_size); + auto offset_msb = builder.createUDiv(uint32T, offset, elementSize); + auto offset_lsb = builder.createUMod(uint32T, offset, elementSize); + + auto indexMsb = builder.createIMul( + uint32T, index_msb, fragment.context->getUInt32(vbuffer->stride)); + auto offsetMsb = builder.createIMul( + uint32T, offset_msb, + fragment.context->getUInt32(vbuffer->element_size)); + + address = builder.createIAdd( + uint32T, address, + builder.createIMul(uint32T, + builder.createIAdd(uint32T, indexMsb, offsetMsb), + indexStride)); + + address = builder.createIAdd( + uint32T, address, + builder.createIMul(uint32T, index_lsb, elementSize)); + + return builder.createIAdd(uint32T, address, offset_lsb); + } + + if (index) { + auto indexStride = fragment.context->getUInt32(vbuffer->index_stride); + auto index_msb = builder.createUDiv(uint32T, index, indexStride); + auto index_lsb = builder.createUMod(uint32T, index, indexStride); + + auto indexMsb = builder.createIMul( + uint32T, index_msb, fragment.context->getUInt32(vbuffer->stride)); + + return builder.createIAdd( + uint32T, address, builder.createIMul(uint32T, indexMsb, indexStride)); + } + + if (!offset) { + util::unreachable(); + } + + auto indexStride = fragment.context->getUInt32(vbuffer->index_stride); + auto elementSize = fragment.context->getUInt32(vbuffer->element_size); + auto offset_msb = builder.createUDiv(uint32T, offset, elementSize); + auto offset_lsb = builder.createUMod(uint32T, offset, elementSize); + + auto offsetMsb = + builder.createIMul(uint32T, offset_msb, + fragment.context->getUInt32(vbuffer->element_size)); + + address = builder.createIAdd( + uint32T, address, builder.createIMul(uint32T, offsetMsb, indexStride)); + + return builder.createIAdd(uint32T, address, offset_lsb); + }; + + switch (inst.op) { + case Mubuf::Op::BUFFER_LOAD_FORMAT_X: + case Mubuf::Op::BUFFER_LOAD_FORMAT_XY: + case Mubuf::Op::BUFFER_LOAD_FORMAT_XYZ: + case Mubuf::Op::BUFFER_LOAD_FORMAT_XYZW: { + std::uint32_t count = static_cast(inst.op) - + static_cast(Mubuf::Op::BUFFER_LOAD_FORMAT_X) + 1; + + auto vbuffer = getVBuffer(); + auto address = getAddress(&vbuffer); + + spirv::Value result[4]; + auto resultType = convertFromFormat( + result, count, fragment, reinterpret_cast(&vbuffer), + address, vbuffer.dfmt, vbuffer.nfmt); + + for (std::uint32_t i = 0; i < count; ++i) { + fragment.setVectorOperand(inst.vdata + i, {resultType, result[i]}); + } + break; + } + + case Mubuf::Op::BUFFER_STORE_FORMAT_X: + case Mubuf::Op::BUFFER_STORE_FORMAT_XY: + case Mubuf::Op::BUFFER_STORE_FORMAT_XYZ: + case Mubuf::Op::BUFFER_STORE_FORMAT_XYZW: { + std::uint32_t count = static_cast(inst.op) - + static_cast(Mubuf::Op::BUFFER_STORE_FORMAT_X) + + 1; + + auto vbuffer = getVBuffer(); + auto address = getAddress(&vbuffer); + + convertToFormat(RegisterId::Vector(inst.vdata), count, fragment, + reinterpret_cast(&vbuffer), address, + vbuffer.dfmt, vbuffer.nfmt); + break; + } + + case Mubuf::Op::BUFFER_LOAD_UBYTE: + case Mubuf::Op::BUFFER_LOAD_USHORT: + case Mubuf::Op::BUFFER_LOAD_SSHORT: + case Mubuf::Op::BUFFER_LOAD_SBYTE: + inst.dump(); + util::unreachable(); + + case Mubuf::Op::BUFFER_LOAD_DWORD: + case Mubuf::Op::BUFFER_LOAD_DWORDX2: + case Mubuf::Op::BUFFER_LOAD_DWORDX4: + case Mubuf::Op::BUFFER_LOAD_DWORDX3: { + std::uint32_t count = static_cast(inst.op) - + static_cast(Mubuf::Op::BUFFER_LOAD_DWORD) + 1; + + auto vbuffer = getVBuffer(); + auto address = getAddress(&vbuffer); + auto loadType = fragment.context->getType(TypeId::UInt32); + auto uniform = fragment.context->getOrCreateStorageBuffer( + reinterpret_cast(&vbuffer), TypeId::UInt32); + uniform->accessOp |= AccessOp::Load; + + auto uniformPointerType = fragment.context->getPointerType( + spv::StorageClass::StorageBuffer, TypeId::UInt32); + address = + fragment.builder.createUDiv(fragment.context->getUInt32Type(), address, + fragment.context->getUInt32(4)); + + for (int i = 0; i < count; ++i) { + auto channelOffset = address; + + if (i != 0) { + channelOffset = fragment.builder.createIAdd( + fragment.context->getUInt32Type(), channelOffset, + fragment.context->getUInt32(i)); + } + + auto uniformPointerValue = fragment.builder.createAccessChain( + uniformPointerType, uniform->variable, + {{fragment.context->getUInt32(0), channelOffset}}); + + auto value = fragment.builder.createLoad(loadType, uniformPointerValue); + fragment.setVectorOperand(inst.vdata + i, {loadType, value}); + } + break; + } + + case Mubuf::Op::BUFFER_STORE_BYTE: + case Mubuf::Op::BUFFER_STORE_SHORT: + inst.dump(); + util::unreachable(); + + case Mubuf::Op::BUFFER_STORE_DWORD: + case Mubuf::Op::BUFFER_STORE_DWORDX2: + case Mubuf::Op::BUFFER_STORE_DWORDX4: + case Mubuf::Op::BUFFER_STORE_DWORDX3: { + std::uint32_t count = static_cast(inst.op) - + static_cast(Mubuf::Op::BUFFER_STORE_DWORD) + 1; + + auto vbuffer = getVBuffer(); + auto address = getAddress(&vbuffer); + auto storeType = fragment.context->getType(TypeId::UInt32); + auto uniform = fragment.context->getOrCreateStorageBuffer( + reinterpret_cast(&vbuffer), TypeId::UInt32); + uniform->accessOp |= AccessOp::Store; + + auto uniformPointerType = fragment.context->getPointerType( + spv::StorageClass::StorageBuffer, TypeId::UInt32); + address = + fragment.builder.createUDiv(fragment.context->getUInt32Type(), address, + fragment.context->getUInt32(4)); + + for (int i = 0; i < count; ++i) { + auto channelOffset = address; + + if (i != 0) { + channelOffset = fragment.builder.createIAdd( + fragment.context->getUInt32Type(), channelOffset, + fragment.context->getUInt32(i)); + } + + auto uniformPointerValue = fragment.builder.createAccessChain( + uniformPointerType, uniform->variable, + {{fragment.context->getUInt32(0), channelOffset}}); + + fragment.builder.createStore( + uniformPointerValue, + fragment.getVectorOperand(inst.vdata + i, TypeId::UInt32).value); + } + } + + default: + inst.dump(); + util::unreachable(); + } +} + +void convertMtbuf(Fragment &fragment, Mtbuf inst) { + fragment.registers->pc += Mtbuf::kMinInstSize * sizeof(std::uint32_t); + + switch (inst.op) { + case Mtbuf::Op::TBUFFER_LOAD_FORMAT_X: + case Mtbuf::Op::TBUFFER_LOAD_FORMAT_XY: + case Mtbuf::Op::TBUFFER_LOAD_FORMAT_XYZ: + case Mtbuf::Op::TBUFFER_LOAD_FORMAT_XYZW: { + std::uint32_t count = static_cast(inst.op) - + static_cast(Mtbuf::Op::TBUFFER_LOAD_FORMAT_X) + + 1; + + auto &builder = fragment.builder; + + auto vBuffer0 = + fragment.getScalarOperand((inst.srsrc << 2) + 0, TypeId::UInt32); + auto vBuffer1 = + fragment.getScalarOperand((inst.srsrc << 2) + 1, TypeId::UInt32); + auto vBuffer2 = + fragment.getScalarOperand((inst.srsrc << 2) + 2, TypeId::UInt32); + auto vBuffer3 = + fragment.getScalarOperand((inst.srsrc << 2) + 3, TypeId::UInt32); + + auto optVBuffer0Value = fragment.context->findUint32Value(vBuffer0.value); + auto optVBuffer1Value = fragment.context->findUint32Value(vBuffer1.value); + auto optVBuffer2Value = fragment.context->findUint32Value(vBuffer2.value); + auto optVBuffer3Value = fragment.context->findUint32Value(vBuffer3.value); + + if (optVBuffer0Value && optVBuffer1Value && optVBuffer2Value && + optVBuffer3Value) { + // V# buffer value is known, read the buffer now + std::uint32_t vBufferData[] = {*optVBuffer0Value, *optVBuffer1Value, + *optVBuffer2Value, *optVBuffer3Value}; + + auto vbuffer = reinterpret_cast(vBufferData); + auto base = spirv::cast( + fragment.getScalarOperand(inst.soffset, TypeId::UInt32).value); + + auto uint32T = fragment.context->getUInt32Type(); + auto uint32_0 = fragment.context->getUInt32(0); + + if (inst.dfmt == kSurfaceFormatInvalid) { + util::unreachable("!! dfmt is invalid !!\n"); + + for (std::uint32_t i = 0; i < count; ++i) { + fragment.setVectorOperand(inst.vdata + i, {uint32T, uint32_0}); + } + + return; + } + + spirv::UIntValue index; + if (inst.idxen) { + index = spirv::cast( + fragment.getVectorOperand(inst.vaddr, TypeId::UInt32).value); + } + + // std::printf("vBuffer address = %lx\n", vbuffer->getAddress()); + + if (vbuffer->addtid_en) { + spirv::UIntValue threadId = + builder.createLoad(uint32T, fragment.context->getThreadId()); + + if (index) { + index = builder.createIAdd(uint32T, index, threadId); + } else { + index = threadId; + } + } + + auto offset = inst.offset ? fragment.context->getUInt32(inst.offset) + : spirv::UIntValue{}; + + if (inst.offen) { + auto off = spirv::cast( + fragment + .getVectorOperand(inst.vaddr + (inst.idxen ? 1 : 0), + TypeId::UInt32) + .value); + + if (offset) { + offset = builder.createIAdd(uint32T, off, offset); + } else { + offset = off; + } + } + + spirv::UIntValue address = base; + if (vbuffer->swizzle_en == 0) { + if (vbuffer->stride != 0 && index) { + auto offset = builder.createIMul( + uint32T, index, fragment.context->getUInt32(vbuffer->stride)); + if (address == uint32_0) { + address = offset; + } else { + address = builder.createIAdd(uint32T, address, offset); + } + } + } else { + if (index && offset) { + auto indexStride = fragment.context->getUInt32(vbuffer->index_stride); + auto index_msb = builder.createUDiv(uint32T, index, indexStride); + auto index_lsb = builder.createUMod(uint32T, index, indexStride); + + auto elementSize = fragment.context->getUInt32(vbuffer->element_size); + auto offset_msb = builder.createUDiv(uint32T, offset, elementSize); + auto offset_lsb = builder.createUMod(uint32T, offset, elementSize); + + auto indexMsb = builder.createIMul( + uint32T, index_msb, fragment.context->getUInt32(vbuffer->stride)); + auto offsetMsb = builder.createIMul( + uint32T, offset_msb, + fragment.context->getUInt32(vbuffer->element_size)); + + address = builder.createIAdd( + uint32T, address, + builder.createIMul( + uint32T, builder.createIAdd(uint32T, indexMsb, offsetMsb), + indexStride)); + + address = builder.createIAdd( + uint32T, address, + builder.createIMul(uint32T, index_lsb, elementSize)); + + address = builder.createIAdd(uint32T, address, offset_lsb); + } else if (index) { + auto indexStride = fragment.context->getUInt32(vbuffer->index_stride); + auto index_msb = builder.createUDiv(uint32T, index, indexStride); + auto index_lsb = builder.createUMod(uint32T, index, indexStride); + + auto indexMsb = builder.createIMul( + uint32T, index_msb, fragment.context->getUInt32(vbuffer->stride)); + + address = builder.createIAdd( + uint32T, address, + builder.createIMul(uint32T, indexMsb, indexStride)); + } else if (offset) { + auto indexStride = fragment.context->getUInt32(vbuffer->index_stride); + auto elementSize = fragment.context->getUInt32(vbuffer->element_size); + auto offset_msb = builder.createUDiv(uint32T, offset, elementSize); + auto offset_lsb = builder.createUMod(uint32T, offset, elementSize); + + auto offsetMsb = builder.createIMul( + uint32T, offset_msb, + fragment.context->getUInt32(vbuffer->element_size)); + + address = builder.createIAdd( + uint32T, address, + builder.createIMul(uint32T, offsetMsb, indexStride)); + + address = builder.createIAdd(uint32T, address, offset_lsb); + } + } + + spirv::Value result[4]; + auto resultType = convertFromFormat(result, count, fragment, vBufferData, + address, inst.dfmt, inst.nfmt); + + for (std::uint32_t i = 0; i < count; ++i) { + fragment.setVectorOperand(inst.vdata + i, {resultType, result[i]}); + } + break; + } else { + util::unreachable(); + } + } + + case Mtbuf::Op::TBUFFER_STORE_FORMAT_X: + case Mtbuf::Op::TBUFFER_STORE_FORMAT_XY: + case Mtbuf::Op::TBUFFER_STORE_FORMAT_XYZ: + case Mtbuf::Op::TBUFFER_STORE_FORMAT_XYZW: { + std::uint32_t count = static_cast(inst.op) - + static_cast(Mtbuf::Op::TBUFFER_STORE_FORMAT_X) + + 1; + auto &builder = fragment.builder; + + auto vBuffer0 = + fragment.getScalarOperand((inst.srsrc << 2) + 0, TypeId::UInt32); + auto vBuffer1 = + fragment.getScalarOperand((inst.srsrc << 2) + 1, TypeId::UInt32); + auto vBuffer2 = + fragment.getScalarOperand((inst.srsrc << 2) + 2, TypeId::UInt32); + auto vBuffer3 = + fragment.getScalarOperand((inst.srsrc << 2) + 3, TypeId::UInt32); + + auto optVBuffer0Value = fragment.context->findUint32Value(vBuffer0.value); + auto optVBuffer1Value = fragment.context->findUint32Value(vBuffer1.value); + auto optVBuffer2Value = fragment.context->findUint32Value(vBuffer2.value); + auto optVBuffer3Value = fragment.context->findUint32Value(vBuffer3.value); + + if (optVBuffer0Value && optVBuffer1Value && optVBuffer2Value && + optVBuffer3Value) { + // V# buffer value is known, read the buffer now + std::uint32_t vBufferData[] = {*optVBuffer0Value, *optVBuffer1Value, + *optVBuffer2Value, *optVBuffer3Value}; + + auto vbuffer = reinterpret_cast(vBufferData); + // std::printf("vBuffer address = %lx\n", vbuffer->getAddress()); + + auto base = spirv::cast( + fragment.getScalarOperand(inst.soffset, TypeId::UInt32).value); + + auto uint32T = fragment.context->getUInt32Type(); + auto uint32_0 = fragment.context->getUInt32(0); + + if (inst.dfmt == kSurfaceFormatInvalid) { + util::unreachable("!! dfmt is invalid !!\n"); + + for (std::uint32_t i = 0; i < count; ++i) { + fragment.setVectorOperand(inst.vdata + i, {uint32T, uint32_0}); + } + + return; + } + + spirv::UIntValue index; + if (inst.idxen) { + index = spirv::cast( + fragment.getVectorOperand(inst.vaddr, TypeId::UInt32).value); + } + + if (vbuffer->addtid_en) { + spirv::UIntValue threadId = + builder.createLoad(uint32T, fragment.context->getThreadId()); + + if (index) { + index = builder.createIAdd(uint32T, index, threadId); + } else { + index = threadId; + } + } + + auto offset = inst.offset ? fragment.context->getUInt32(inst.offset) + : spirv::UIntValue{}; + + if (inst.offen) { + auto off = spirv::cast( + fragment + .getVectorOperand(inst.vaddr + (inst.idxen ? 1 : 0), + TypeId::UInt32) + .value); + + if (offset) { + offset = builder.createIAdd(uint32T, off, offset); + } else { + offset = off; + } + } + + spirv::UIntValue address = base; + if (vbuffer->swizzle_en == 0) { + if (vbuffer->stride != 0 && index) { + auto offset = builder.createIMul( + uint32T, index, fragment.context->getUInt32(vbuffer->stride)); + if (address == uint32_0) { + address = offset; + } else { + address = builder.createIAdd(uint32T, address, offset); + } + } + } else { + if (index && offset) { + auto indexStride = fragment.context->getUInt32(vbuffer->index_stride); + auto index_msb = builder.createUDiv(uint32T, index, indexStride); + auto index_lsb = builder.createUMod(uint32T, index, indexStride); + + auto elementSize = fragment.context->getUInt32(vbuffer->element_size); + auto offset_msb = builder.createUDiv(uint32T, offset, elementSize); + auto offset_lsb = builder.createUMod(uint32T, offset, elementSize); + + auto indexMsb = builder.createIMul( + uint32T, index_msb, fragment.context->getUInt32(vbuffer->stride)); + auto offsetMsb = builder.createIMul( + uint32T, offset_msb, + fragment.context->getUInt32(vbuffer->element_size)); + + address = builder.createIAdd( + uint32T, address, + builder.createIMul( + uint32T, builder.createIAdd(uint32T, indexMsb, offsetMsb), + indexStride)); + + address = builder.createIAdd( + uint32T, address, + builder.createIMul(uint32T, index_lsb, elementSize)); + + address = builder.createIAdd(uint32T, address, offset_lsb); + } else if (index) { + auto indexStride = fragment.context->getUInt32(vbuffer->index_stride); + auto index_msb = builder.createUDiv(uint32T, index, indexStride); + auto index_lsb = builder.createUMod(uint32T, index, indexStride); + + auto indexMsb = builder.createIMul( + uint32T, index_msb, fragment.context->getUInt32(vbuffer->stride)); + + address = builder.createIAdd( + uint32T, address, + builder.createIMul(uint32T, indexMsb, indexStride)); + } else if (offset) { + auto indexStride = fragment.context->getUInt32(vbuffer->index_stride); + auto elementSize = fragment.context->getUInt32(vbuffer->element_size); + auto offset_msb = builder.createUDiv(uint32T, offset, elementSize); + auto offset_lsb = builder.createUMod(uint32T, offset, elementSize); + + auto offsetMsb = builder.createIMul( + uint32T, offset_msb, + fragment.context->getUInt32(vbuffer->element_size)); + + address = builder.createIAdd( + uint32T, address, + builder.createIMul(uint32T, offsetMsb, indexStride)); + + address = builder.createIAdd(uint32T, address, offset_lsb); + } + } + + convertToFormat(RegisterId::Vector(inst.vdata), count, fragment, + vBufferData, address, inst.dfmt, inst.nfmt); + } else { + util::unreachable(); + } + break; + } + + default: + inst.dump(); + util::unreachable(); + } +} +void convertMimg(Fragment &fragment, Mimg inst) { + fragment.registers->pc += Mimg::kMinInstSize * sizeof(std::uint32_t); + switch (inst.op) { + case Mimg::Op::IMAGE_GET_RESINFO: { + auto image = fragment.createImage(RegisterId::Raw(inst.srsrc << 2), inst.r128); + spirv::Value values[4]; + auto uint32T = fragment.context->getUInt32Type(); + + if (inst.dmask & 3) { + // query whd + // TODO: support other than 2D textures + auto uint32x2T = fragment.context->getUint32x2Type(); + auto lod = fragment.getScalarOperand(inst.vaddr, TypeId::UInt32); + auto sizeResult = + fragment.builder.createImageQuerySizeLod(uint32x2T, image, lod.value); + + values[0] = + fragment.builder.createCompositeExtract(uint32T, sizeResult, {{0}}); + values[1] = + fragment.builder.createCompositeExtract(uint32T, sizeResult, {{1}}); + values[2] = fragment.context->getUInt32(1); + } + + if (inst.dmask & (1 << 3)) { + // query total mip count + values[3] = fragment.builder.createImageQueryLevels(uint32T, image); + } + + for (std::size_t dstOffset = 0, i = 0; i < 4; ++i) { + if (inst.dmask & (1 << i)) { + fragment.setVectorOperand(inst.vdata + dstOffset++, {uint32T, values[i]}); + } + } + break; + } + case Mimg::Op::IMAGE_SAMPLE: { + auto image = fragment.createImage(RegisterId::Raw(inst.srsrc << 2), inst.r128); + auto sampler = fragment.createSampler(RegisterId::Raw(inst.ssamp << 2)); + auto coord0 = fragment.getVectorOperand(inst.vaddr, TypeId::Float32).value; + auto coord1 = + fragment.getVectorOperand(inst.vaddr + 1, TypeId::Float32).value; + auto coord2 = + fragment.getVectorOperand(inst.vaddr + 2, TypeId::Float32).value; + auto coords = fragment.builder.createCompositeConstruct( + fragment.context->getFloat32x3Type(), + {{coord0, coord1, coord2}}); // TODO + + auto sampledImage2dT = fragment.context->getSampledImage2DType(); + auto float4T = fragment.context->getFloat32x4Type(); + auto floatT = fragment.context->getFloat32Type(); + auto sampledImage = + fragment.builder.createSampledImage(sampledImage2dT, image, sampler); + auto value = fragment.builder.createImageSampleImplicitLod( + float4T, sampledImage, coords); + + for (std::uint32_t dstOffset = 0, i = 0; i < 4; ++i) { + if (inst.dmask & (1 << i)) { + fragment.setVectorOperand( + inst.vdata + dstOffset++, {floatT, fragment.builder.createCompositeExtract( + floatT, value, {{i}})}); + } + } + break; + } + + default: + inst.dump(); + util::unreachable(); + } +} +void convertDs(Fragment &fragment, Ds inst) { + fragment.registers->pc += Ds::kMinInstSize * sizeof(std::uint32_t); + switch (inst.op) { + + default: + inst.dump(); + util::unreachable(); + } +} +void convertVintrp(Fragment &fragment, Vintrp inst) { + fragment.registers->pc += Vintrp::kMinInstSize * sizeof(std::uint32_t); + switch (inst.op) { + case Vintrp::Op::V_INTERP_P1_F32: + // TODO: operation should read from LDS + // TODO: accurate emulation + + // In current inaccurate emulation we just ignore phase 1 and vsrc argument + // interpolated value stored in attr# + break; + + case Vintrp::Op::V_INTERP_P2_F32: { + // TODO: operation should read from LDS + // TODO: accurate emulation + + auto attr = fragment.getAttrOperand(inst.attr, TypeId::Float32x4); + auto channelType = fragment.context->getType(TypeId::Float32); + auto attrChan = fragment.builder.createCompositeExtract( + channelType, attr.value, + std::array{static_cast(inst.attrChan)}); + fragment.setVectorOperand(inst.vdst, {channelType, attrChan}); + break; + } + + default: + inst.dump(); + util::unreachable(); + } +} + +void convertExp(Fragment &fragment, Exp inst) { + fragment.registers->pc += Exp::kMinInstSize * sizeof(std::uint32_t); + + if (inst.en == 0) { + fragment.builder.createFunctionCall(fragment.context->getVoidType(), + fragment.context->getDiscardFn(), {}); + return; + } + + // spirv::Value value; + std::array exports; + + // TODO: handle vm + if (inst.compr) { + auto floatT = fragment.context->getType(TypeId::Float32); + auto float2T = fragment.context->getType(TypeId::Float32x2); + auto glslStd450 = fragment.context->getGlslStd450(); + + auto xyUint = fragment.getVectorOperand(inst.vsrc0, TypeId::UInt32).value; + auto zwUint = fragment.getVectorOperand(inst.vsrc1, TypeId::UInt32).value; + + auto xy = fragment.builder.createExtInst( + float2T, glslStd450, GLSLstd450UnpackHalf2x16, std::array{xyUint}); + auto zw = fragment.builder.createExtInst( + float2T, glslStd450, GLSLstd450UnpackHalf2x16, std::array{zwUint}); + exports[0] = fragment.builder.createCompositeExtract( + floatT, xy, std::array{static_cast(0)}); + exports[1] = fragment.builder.createCompositeExtract( + floatT, xy, std::array{static_cast(1)}); + exports[2] = fragment.builder.createCompositeExtract( + floatT, zw, std::array{static_cast(0)}); + exports[3] = fragment.builder.createCompositeExtract( + floatT, zw, std::array{static_cast(1)}); + // value = fragment.builder.createCompositeConstruct(type, std::array{x, y, + // z, w}); + } else { + exports[0] = fragment.getVectorOperand(inst.vsrc0, TypeId::Float32).value; + exports[1] = fragment.getVectorOperand(inst.vsrc1, TypeId::Float32).value; + exports[2] = fragment.getVectorOperand(inst.vsrc2, TypeId::Float32).value; + exports[3] = fragment.getVectorOperand(inst.vsrc3, TypeId::Float32).value; + /* + value = fragment.builder.createCompositeConstruct( + type, + std::array{ + fragment.getVectorOperand(inst.vsrc0, TypeId::Float32).value, + fragment.getVectorOperand(inst.vsrc1, TypeId::Float32).value, + fragment.getVectorOperand(inst.vsrc2, TypeId::Float32).value, + fragment.getVectorOperand(inst.vsrc3, TypeId::Float32).value}); + */ + } + + auto resultType = fragment.context->getFloat32x4Type(); + auto floatType = fragment.context->getFloat32Type(); +/* + if (inst.en != 0xf) { + auto prevValue = fragment.getExportTarget(inst.target, TypeId::Float32x4); + if (prevValue) { + for (std::uint32_t i = 0; i < 4; ++i) { + if (~inst.en & (1 << i)) { + exports[i] = fragment.builder.createCompositeExtract( + floatType, prevValue.value, {{i}}); + } + } + } + } +*/ + + auto value = fragment.builder.createCompositeConstruct(resultType, exports); + fragment.setExportTarget(inst.target, {resultType, value}); +} + +void convertVop1(Fragment &fragment, Vop1 inst) { + fragment.registers->pc += Vop1::kMinInstSize * sizeof(std::uint32_t); + switch (inst.op) { + case Vop1::Op::V_MOV_B32: + fragment.setVectorOperand( + inst.vdst, fragment.getScalarOperand(inst.src0, TypeId::UInt32, + OperandGetFlags::PreserveType)); + break; + + case Vop1::Op::V_RCP_F32: { + auto src = spirv::cast( + fragment.getScalarOperand(inst.src0, TypeId::Float32).value); + auto floatT = fragment.context->getFloat32Type(); + auto float1 = fragment.context->getFloat32(1); + auto result = fragment.builder.createFDiv(floatT, float1, src); + + fragment.setVectorOperand(inst.vdst, {floatT, result}); + break; + } + + case Vop1::Op::V_RSQ_F32: { + auto src = spirv::cast( + fragment.getScalarOperand(inst.src0, TypeId::Float32).value); + auto floatT = fragment.context->getFloat32Type(); + auto float1 = fragment.context->getFloat32(1); + + auto glslStd450 = fragment.context->getGlslStd450(); + auto result = fragment.builder.createExtInst( + floatT, glslStd450, GLSLstd450InverseSqrt, {{src}}); + + fragment.setVectorOperand(inst.vdst, {floatT, result}); + break; + } + + case Vop1::Op::V_SQRT_F32: { + auto src = spirv::cast( + fragment.getScalarOperand(inst.src0, TypeId::Float32).value); + auto floatT = fragment.context->getFloat32Type(); + + auto glslStd450 = fragment.context->getGlslStd450(); + auto result = fragment.builder.createExtInst(floatT, glslStd450, + GLSLstd450Sqrt, {{src}}); + + fragment.setVectorOperand(inst.vdst, {floatT, result}); + break; + } + + case Vop1::Op::V_EXP_F32: { + auto src = spirv::cast( + fragment.getScalarOperand(inst.src0, TypeId::Float32).value); + auto floatT = fragment.context->getFloat32Type(); + + auto glslStd450 = fragment.context->getGlslStd450(); + auto result = fragment.builder.createExtInst(floatT, glslStd450, + GLSLstd450Exp2, {{src}}); + + fragment.setVectorOperand(inst.vdst, {floatT, result}); + break; + } + + + case Vop1::Op::V_FRACT_F32: { + auto src = spirv::cast( + fragment.getScalarOperand(inst.src0, TypeId::Float32).value); + auto floatT = fragment.context->getFloat32Type(); + + auto glslStd450 = fragment.context->getGlslStd450(); + auto result = fragment.builder.createExtInst(floatT, glslStd450, + GLSLstd450Fract, {{src}}); + + fragment.setVectorOperand(inst.vdst, {floatT, result}); + break; + } + + case Vop1::Op::V_CVT_I32_F32: { + auto src = spirv::cast( + fragment.getScalarOperand(inst.src0, TypeId::Float32).value); + auto resultType = fragment.context->getType(TypeId::SInt32); + auto result = fragment.builder.createConvertFToS(resultType, src); + + fragment.setVectorOperand(inst.vdst, {resultType, result}); + break; + } + case Vop1::Op::V_CVT_F32_I32: { + auto src = spirv::cast( + fragment.getScalarOperand(inst.src0, TypeId::SInt32).value); + auto resultType = fragment.context->getType(TypeId::Float32); + auto result = fragment.builder.createConvertSToF(resultType, src); + + fragment.setVectorOperand(inst.vdst, {resultType, result}); + break; + } + + case Vop1::Op::V_CVT_U32_F32: { + auto src = fragment.getScalarOperand(inst.src0, TypeId::Float32).value; + auto resultType = fragment.context->getType(TypeId::UInt32); + auto result = fragment.builder.createConvertFToU(resultType, src); + + fragment.setVectorOperand(inst.vdst, {resultType, result}); + break; + } + case Vop1::Op::V_CVT_F32_U32: { + auto src = fragment.getScalarOperand(inst.src0, TypeId::UInt32).value; + auto resultType = fragment.context->getFloat32Type(); + auto result = fragment.builder.createConvertUToF( + resultType, spirv::cast(src)); + + fragment.setVectorOperand(inst.vdst, {resultType, result}); + break; + } + + default: + inst.dump(); + util::unreachable(); + } +} + +void convertVopc(Fragment &fragment, Vopc inst) { + fragment.registers->pc += Vopc::kMinInstSize * sizeof(std::uint32_t); + + auto cmpOp = [&](TypeId type, CmpKind kind, CmpFlags flags = CmpFlags::None) { + auto src0 = fragment.getScalarOperand(inst.src0, type).value; + auto src1 = fragment.getVectorOperand(inst.vsrc1, type).value; + + auto result = doCmpOp(fragment, type, src0, src1, kind, flags); + fragment.setVcc(result); + }; + + switch (inst.op) { + case Vopc::Op::V_CMP_F_F32: + cmpOp(TypeId::Float32, CmpKind::F); + break; + case Vopc::Op::V_CMP_LT_F32: + cmpOp(TypeId::Float32, CmpKind::LT); + break; + case Vopc::Op::V_CMP_EQ_F32: + cmpOp(TypeId::Float32, CmpKind::EQ); + break; + case Vopc::Op::V_CMP_LE_F32: + cmpOp(TypeId::Float32, CmpKind::LE); + break; + case Vopc::Op::V_CMP_GT_F32: + cmpOp(TypeId::Float32, CmpKind::GT); + break; + case Vopc::Op::V_CMP_LG_F32: + cmpOp(TypeId::Float32, CmpKind::LG); + break; + case Vopc::Op::V_CMP_GE_F32: + cmpOp(TypeId::Float32, CmpKind::GE); + break; + case Vopc::Op::V_CMP_O_F32: + cmpOp(TypeId::Float32, CmpKind::O); + break; + case Vopc::Op::V_CMP_U_F32: + cmpOp(TypeId::Float32, CmpKind::U); + break; + case Vopc::Op::V_CMP_NGE_F32: + cmpOp(TypeId::Float32, CmpKind::NGE); + break; + case Vopc::Op::V_CMP_NLG_F32: + cmpOp(TypeId::Float32, CmpKind::NLG); + break; + case Vopc::Op::V_CMP_NGT_F32: + cmpOp(TypeId::Float32, CmpKind::NGT); + break; + case Vopc::Op::V_CMP_NLE_F32: + cmpOp(TypeId::Float32, CmpKind::NLE); + break; + case Vopc::Op::V_CMP_NEQ_F32: + cmpOp(TypeId::Float32, CmpKind::NEQ); + break; + case Vopc::Op::V_CMP_NLT_F32: + cmpOp(TypeId::Float32, CmpKind::NLT); + break; + case Vopc::Op::V_CMP_TRU_F32: + cmpOp(TypeId::Float32, CmpKind::TRU); + break; + case Vopc::Op::V_CMPX_F_F32: + cmpOp(TypeId::Float32, CmpKind::F, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_LT_F32: + cmpOp(TypeId::Float32, CmpKind::LT, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_EQ_F32: + cmpOp(TypeId::Float32, CmpKind::EQ, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_LE_F32: + cmpOp(TypeId::Float32, CmpKind::LE, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_GT_F32: + cmpOp(TypeId::Float32, CmpKind::GT, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_LG_F32: + cmpOp(TypeId::Float32, CmpKind::LG, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_GE_F32: + cmpOp(TypeId::Float32, CmpKind::GE, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_O_F32: + cmpOp(TypeId::Float32, CmpKind::O, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_U_F32: + cmpOp(TypeId::Float32, CmpKind::U, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_NGE_F32: + cmpOp(TypeId::Float32, CmpKind::NGE, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_NLG_F32: + cmpOp(TypeId::Float32, CmpKind::NLG, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_NGT_F32: + cmpOp(TypeId::Float32, CmpKind::NGT, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_NLE_F32: + cmpOp(TypeId::Float32, CmpKind::NLE, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_NEQ_F32: + cmpOp(TypeId::Float32, CmpKind::NEQ, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_NLT_F32: + cmpOp(TypeId::Float32, CmpKind::NLT, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_TRU_F32: + cmpOp(TypeId::Float32, CmpKind::TRU, CmpFlags::X); + break; + case Vopc::Op::V_CMP_F_F64: + cmpOp(TypeId::Float64, CmpKind::F); + break; + case Vopc::Op::V_CMP_LT_F64: + cmpOp(TypeId::Float64, CmpKind::LT); + break; + case Vopc::Op::V_CMP_EQ_F64: + cmpOp(TypeId::Float64, CmpKind::EQ); + break; + case Vopc::Op::V_CMP_LE_F64: + cmpOp(TypeId::Float64, CmpKind::LE); + break; + case Vopc::Op::V_CMP_GT_F64: + cmpOp(TypeId::Float64, CmpKind::GT); + break; + case Vopc::Op::V_CMP_LG_F64: + cmpOp(TypeId::Float64, CmpKind::LG); + break; + case Vopc::Op::V_CMP_GE_F64: + cmpOp(TypeId::Float64, CmpKind::GE); + break; + case Vopc::Op::V_CMP_O_F64: + cmpOp(TypeId::Float64, CmpKind::O); + break; + case Vopc::Op::V_CMP_U_F64: + cmpOp(TypeId::Float64, CmpKind::U); + break; + case Vopc::Op::V_CMP_NGE_F64: + cmpOp(TypeId::Float64, CmpKind::NGE); + break; + case Vopc::Op::V_CMP_NLG_F64: + cmpOp(TypeId::Float64, CmpKind::NLG); + break; + case Vopc::Op::V_CMP_NGT_F64: + cmpOp(TypeId::Float64, CmpKind::NGT); + break; + case Vopc::Op::V_CMP_NLE_F64: + cmpOp(TypeId::Float64, CmpKind::NLE); + break; + case Vopc::Op::V_CMP_NEQ_F64: + cmpOp(TypeId::Float64, CmpKind::NEQ); + break; + case Vopc::Op::V_CMP_NLT_F64: + cmpOp(TypeId::Float64, CmpKind::NLT); + break; + case Vopc::Op::V_CMP_TRU_F64: + cmpOp(TypeId::Float64, CmpKind::TRU); + break; + case Vopc::Op::V_CMPX_F_F64: + cmpOp(TypeId::Float64, CmpKind::F, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_LT_F64: + cmpOp(TypeId::Float64, CmpKind::LT, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_EQ_F64: + cmpOp(TypeId::Float64, CmpKind::EQ, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_LE_F64: + cmpOp(TypeId::Float64, CmpKind::LE, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_GT_F64: + cmpOp(TypeId::Float64, CmpKind::GT, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_LG_F64: + cmpOp(TypeId::Float64, CmpKind::LG, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_GE_F64: + cmpOp(TypeId::Float64, CmpKind::GE, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_O_F64: + cmpOp(TypeId::Float64, CmpKind::O, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_U_F64: + cmpOp(TypeId::Float64, CmpKind::U, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_NGE_F64: + cmpOp(TypeId::Float64, CmpKind::NGE, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_NLG_F64: + cmpOp(TypeId::Float64, CmpKind::NLG, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_NGT_F64: + cmpOp(TypeId::Float64, CmpKind::NGT, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_NLE_F64: + cmpOp(TypeId::Float64, CmpKind::NLE, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_NEQ_F64: + cmpOp(TypeId::Float64, CmpKind::NEQ, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_NLT_F64: + cmpOp(TypeId::Float64, CmpKind::NLT, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_TRU_F64: + cmpOp(TypeId::Float64, CmpKind::TRU, CmpFlags::X); + break; + case Vopc::Op::V_CMPS_F_F32: + cmpOp(TypeId::Float32, CmpKind::F, CmpFlags::S); + break; + case Vopc::Op::V_CMPS_LT_F32: + cmpOp(TypeId::Float32, CmpKind::LT, CmpFlags::S); + break; + case Vopc::Op::V_CMPS_EQ_F32: + cmpOp(TypeId::Float32, CmpKind::EQ, CmpFlags::S); + break; + case Vopc::Op::V_CMPS_LE_F32: + cmpOp(TypeId::Float32, CmpKind::LE, CmpFlags::S); + break; + case Vopc::Op::V_CMPS_GT_F32: + cmpOp(TypeId::Float32, CmpKind::GT, CmpFlags::S); + break; + case Vopc::Op::V_CMPS_LG_F32: + cmpOp(TypeId::Float32, CmpKind::LG, CmpFlags::S); + break; + case Vopc::Op::V_CMPS_GE_F32: + cmpOp(TypeId::Float32, CmpKind::GE, CmpFlags::S); + break; + case Vopc::Op::V_CMPS_O_F32: + cmpOp(TypeId::Float32, CmpKind::O, CmpFlags::S); + break; + case Vopc::Op::V_CMPS_U_F32: + cmpOp(TypeId::Float32, CmpKind::U, CmpFlags::S); + break; + case Vopc::Op::V_CMPS_NGE_F32: + cmpOp(TypeId::Float32, CmpKind::NGE, CmpFlags::S); + break; + case Vopc::Op::V_CMPS_NLG_F32: + cmpOp(TypeId::Float32, CmpKind::NLG, CmpFlags::S); + break; + case Vopc::Op::V_CMPS_NGT_F32: + cmpOp(TypeId::Float32, CmpKind::NGT, CmpFlags::S); + break; + case Vopc::Op::V_CMPS_NLE_F32: + cmpOp(TypeId::Float32, CmpKind::NLE, CmpFlags::S); + break; + case Vopc::Op::V_CMPS_NEQ_F32: + cmpOp(TypeId::Float32, CmpKind::NEQ, CmpFlags::S); + break; + case Vopc::Op::V_CMPS_NLT_F32: + cmpOp(TypeId::Float32, CmpKind::NLT, CmpFlags::S); + break; + case Vopc::Op::V_CMPS_TRU_F32: + cmpOp(TypeId::Float32, CmpKind::TRU, CmpFlags::S); + break; + case Vopc::Op::V_CMPSX_F_F32: + cmpOp(TypeId::Float32, CmpKind::F, CmpFlags::SX); + break; + case Vopc::Op::V_CMPSX_LT_F32: + cmpOp(TypeId::Float32, CmpKind::LT, CmpFlags::SX); + break; + case Vopc::Op::V_CMPSX_EQ_F32: + cmpOp(TypeId::Float32, CmpKind::EQ, CmpFlags::SX); + break; + case Vopc::Op::V_CMPSX_LE_F32: + cmpOp(TypeId::Float32, CmpKind::LE, CmpFlags::SX); + break; + case Vopc::Op::V_CMPSX_GT_F32: + cmpOp(TypeId::Float32, CmpKind::GT, CmpFlags::SX); + break; + case Vopc::Op::V_CMPSX_LG_F32: + cmpOp(TypeId::Float32, CmpKind::LG, CmpFlags::SX); + break; + case Vopc::Op::V_CMPSX_GE_F32: + cmpOp(TypeId::Float32, CmpKind::GE, CmpFlags::SX); + break; + case Vopc::Op::V_CMPSX_O_F32: + cmpOp(TypeId::Float32, CmpKind::O, CmpFlags::SX); + break; + case Vopc::Op::V_CMPSX_U_F32: + cmpOp(TypeId::Float32, CmpKind::U, CmpFlags::SX); + break; + case Vopc::Op::V_CMPSX_NGE_F32: + cmpOp(TypeId::Float32, CmpKind::NGE, CmpFlags::SX); + break; + case Vopc::Op::V_CMPSX_NLG_F32: + cmpOp(TypeId::Float32, CmpKind::NLG, CmpFlags::SX); + break; + case Vopc::Op::V_CMPSX_NGT_F32: + cmpOp(TypeId::Float32, CmpKind::NGT, CmpFlags::SX); + break; + case Vopc::Op::V_CMPSX_NLE_F32: + cmpOp(TypeId::Float32, CmpKind::NLE, CmpFlags::SX); + break; + case Vopc::Op::V_CMPSX_NEQ_F32: + cmpOp(TypeId::Float32, CmpKind::NEQ, CmpFlags::SX); + break; + case Vopc::Op::V_CMPSX_NLT_F32: + cmpOp(TypeId::Float32, CmpKind::NLT, CmpFlags::SX); + break; + case Vopc::Op::V_CMPSX_TRU_F32: + cmpOp(TypeId::Float32, CmpKind::TRU, CmpFlags::SX); + break; + case Vopc::Op::V_CMPS_F_F64: + cmpOp(TypeId::Float64, CmpKind::F, CmpFlags::S); + break; + case Vopc::Op::V_CMPS_LT_F64: + cmpOp(TypeId::Float64, CmpKind::LT, CmpFlags::S); + break; + case Vopc::Op::V_CMPS_EQ_F64: + cmpOp(TypeId::Float64, CmpKind::EQ, CmpFlags::S); + break; + case Vopc::Op::V_CMPS_LE_F64: + cmpOp(TypeId::Float64, CmpKind::LE, CmpFlags::S); + break; + case Vopc::Op::V_CMPS_GT_F64: + cmpOp(TypeId::Float64, CmpKind::GT, CmpFlags::S); + break; + case Vopc::Op::V_CMPS_LG_F64: + cmpOp(TypeId::Float64, CmpKind::LG, CmpFlags::S); + break; + case Vopc::Op::V_CMPS_GE_F64: + cmpOp(TypeId::Float64, CmpKind::GE, CmpFlags::S); + break; + case Vopc::Op::V_CMPS_O_F64: + cmpOp(TypeId::Float64, CmpKind::O, CmpFlags::S); + break; + case Vopc::Op::V_CMPS_U_F64: + cmpOp(TypeId::Float64, CmpKind::U, CmpFlags::S); + break; + case Vopc::Op::V_CMPS_NGE_F64: + cmpOp(TypeId::Float64, CmpKind::NGE, CmpFlags::S); + break; + case Vopc::Op::V_CMPS_NLG_F64: + cmpOp(TypeId::Float64, CmpKind::NLG, CmpFlags::S); + break; + case Vopc::Op::V_CMPS_NGT_F64: + cmpOp(TypeId::Float64, CmpKind::NGT, CmpFlags::S); + break; + case Vopc::Op::V_CMPS_NLE_F64: + cmpOp(TypeId::Float64, CmpKind::NLE, CmpFlags::S); + break; + case Vopc::Op::V_CMPS_NEQ_F64: + cmpOp(TypeId::Float64, CmpKind::NEQ, CmpFlags::S); + break; + case Vopc::Op::V_CMPS_NLT_F64: + cmpOp(TypeId::Float64, CmpKind::NLT, CmpFlags::S); + break; + case Vopc::Op::V_CMPS_TRU_F64: + cmpOp(TypeId::Float64, CmpKind::TRU, CmpFlags::S); + break; + case Vopc::Op::V_CMPSX_F_F64: + cmpOp(TypeId::Float64, CmpKind::F, CmpFlags::SX); + break; + case Vopc::Op::V_CMPSX_LT_F64: + cmpOp(TypeId::Float64, CmpKind::LT, CmpFlags::SX); + break; + case Vopc::Op::V_CMPSX_EQ_F64: + cmpOp(TypeId::Float64, CmpKind::EQ, CmpFlags::SX); + break; + case Vopc::Op::V_CMPSX_LE_F64: + cmpOp(TypeId::Float64, CmpKind::LE, CmpFlags::SX); + break; + case Vopc::Op::V_CMPSX_GT_F64: + cmpOp(TypeId::Float64, CmpKind::GT, CmpFlags::SX); + break; + case Vopc::Op::V_CMPSX_LG_F64: + cmpOp(TypeId::Float64, CmpKind::LG, CmpFlags::SX); + break; + case Vopc::Op::V_CMPSX_GE_F64: + cmpOp(TypeId::Float64, CmpKind::GE, CmpFlags::SX); + break; + case Vopc::Op::V_CMPSX_O_F64: + cmpOp(TypeId::Float64, CmpKind::O, CmpFlags::SX); + break; + case Vopc::Op::V_CMPSX_U_F64: + cmpOp(TypeId::Float64, CmpKind::U, CmpFlags::SX); + break; + case Vopc::Op::V_CMPSX_NGE_F64: + cmpOp(TypeId::Float64, CmpKind::NGE, CmpFlags::SX); + break; + case Vopc::Op::V_CMPSX_NLG_F64: + cmpOp(TypeId::Float64, CmpKind::NLG, CmpFlags::SX); + break; + case Vopc::Op::V_CMPSX_NGT_F64: + cmpOp(TypeId::Float64, CmpKind::NGT, CmpFlags::SX); + break; + case Vopc::Op::V_CMPSX_NLE_F64: + cmpOp(TypeId::Float64, CmpKind::NLE, CmpFlags::SX); + break; + case Vopc::Op::V_CMPSX_NEQ_F64: + cmpOp(TypeId::Float64, CmpKind::NEQ, CmpFlags::SX); + break; + case Vopc::Op::V_CMPSX_NLT_F64: + cmpOp(TypeId::Float64, CmpKind::NLT, CmpFlags::SX); + break; + case Vopc::Op::V_CMPSX_TRU_F64: + cmpOp(TypeId::Float64, CmpKind::TRU, CmpFlags::SX); + break; + case Vopc::Op::V_CMP_F_I32: + cmpOp(TypeId::SInt32, CmpKind::F); + break; + case Vopc::Op::V_CMP_LT_I32: + cmpOp(TypeId::SInt32, CmpKind::LT); + break; + case Vopc::Op::V_CMP_EQ_I32: + cmpOp(TypeId::SInt32, CmpKind::EQ); + break; + case Vopc::Op::V_CMP_LE_I32: + cmpOp(TypeId::SInt32, CmpKind::LE); + break; + case Vopc::Op::V_CMP_GT_I32: + cmpOp(TypeId::SInt32, CmpKind::GT); + break; + case Vopc::Op::V_CMP_NE_I32: + cmpOp(TypeId::SInt32, CmpKind::NE); + break; + case Vopc::Op::V_CMP_GE_I32: + cmpOp(TypeId::SInt32, CmpKind::GE); + break; + case Vopc::Op::V_CMP_T_I32: + cmpOp(TypeId::SInt32, CmpKind::T); + break; + // case Vopc::Op::V_CMP_CLASS_F32: cmpOp(TypeId::Float32, CmpKind::CLASS); + // break; + case Vopc::Op::V_CMP_LT_I16: + cmpOp(TypeId::SInt16, CmpKind::LT); + break; + case Vopc::Op::V_CMP_EQ_I16: + cmpOp(TypeId::SInt16, CmpKind::EQ); + break; + case Vopc::Op::V_CMP_LE_I16: + cmpOp(TypeId::SInt16, CmpKind::LE); + break; + case Vopc::Op::V_CMP_GT_I16: + cmpOp(TypeId::SInt16, CmpKind::GT); + break; + case Vopc::Op::V_CMP_NE_I16: + cmpOp(TypeId::SInt16, CmpKind::NE); + break; + case Vopc::Op::V_CMP_GE_I16: + cmpOp(TypeId::SInt16, CmpKind::GE); + break; + // case Vopc::Op::V_CMP_CLASS_F16: cmpOp(TypeId::Float16, CmpKind::CLASS); + // break; + case Vopc::Op::V_CMPX_F_I32: + cmpOp(TypeId::SInt32, CmpKind::F, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_LT_I32: + cmpOp(TypeId::SInt32, CmpKind::LT, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_EQ_I32: + cmpOp(TypeId::SInt32, CmpKind::EQ, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_LE_I32: + cmpOp(TypeId::SInt32, CmpKind::LE, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_GT_I32: + cmpOp(TypeId::SInt32, CmpKind::GT, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_NE_I32: + cmpOp(TypeId::SInt32, CmpKind::NE, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_GE_I32: + cmpOp(TypeId::SInt32, CmpKind::GE, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_T_I32: + cmpOp(TypeId::SInt32, CmpKind::T, CmpFlags::X); + break; + // case Vopc::Op::V_CMPX_CLASS_F32: cmpOp(TypeId::Float32, CmpKind::CLASS, + // CmpFlags::X); break; + case Vopc::Op::V_CMPX_LT_I16: + cmpOp(TypeId::SInt16, CmpKind::LT, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_EQ_I16: + cmpOp(TypeId::SInt16, CmpKind::EQ, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_LE_I16: + cmpOp(TypeId::SInt16, CmpKind::LE, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_GT_I16: + cmpOp(TypeId::SInt16, CmpKind::GT, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_NE_I16: + cmpOp(TypeId::SInt16, CmpKind::NE, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_GE_I16: + cmpOp(TypeId::SInt16, CmpKind::GE, CmpFlags::X); + break; + // case Vopc::Op::V_CMPX_CLASS_F16: cmpOp(TypeId::Float16, CmpKind::CLASS, + // CmpFlags::X); break; + case Vopc::Op::V_CMP_F_I64: + cmpOp(TypeId::SInt64, CmpKind::F); + break; + case Vopc::Op::V_CMP_LT_I64: + cmpOp(TypeId::SInt64, CmpKind::LT); + break; + case Vopc::Op::V_CMP_EQ_I64: + cmpOp(TypeId::SInt64, CmpKind::EQ); + break; + case Vopc::Op::V_CMP_LE_I64: + cmpOp(TypeId::SInt64, CmpKind::LE); + break; + case Vopc::Op::V_CMP_GT_I64: + cmpOp(TypeId::SInt64, CmpKind::GT); + break; + case Vopc::Op::V_CMP_NE_I64: + cmpOp(TypeId::SInt64, CmpKind::NE); + break; + case Vopc::Op::V_CMP_GE_I64: + cmpOp(TypeId::SInt64, CmpKind::GE); + break; + case Vopc::Op::V_CMP_T_I64: + cmpOp(TypeId::SInt64, CmpKind::T); + break; + // case Vopc::Op::V_CMP_CLASS_F64: cmpOp(TypeId::Float64, CmpKind::CLASS); + // break; + case Vopc::Op::V_CMP_LT_U16: + cmpOp(TypeId::UInt16, CmpKind::LT); + break; + case Vopc::Op::V_CMP_EQ_U16: + cmpOp(TypeId::UInt16, CmpKind::EQ); + break; + case Vopc::Op::V_CMP_LE_U16: + cmpOp(TypeId::UInt16, CmpKind::LE); + break; + case Vopc::Op::V_CMP_GT_U16: + cmpOp(TypeId::UInt16, CmpKind::GT); + break; + case Vopc::Op::V_CMP_NE_U16: + cmpOp(TypeId::UInt16, CmpKind::NE); + break; + case Vopc::Op::V_CMP_GE_U16: + cmpOp(TypeId::UInt16, CmpKind::GE); + break; + case Vopc::Op::V_CMPX_F_I64: + cmpOp(TypeId::SInt64, CmpKind::F, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_LT_I64: + cmpOp(TypeId::SInt64, CmpKind::LT, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_EQ_I64: + cmpOp(TypeId::SInt64, CmpKind::EQ, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_LE_I64: + cmpOp(TypeId::SInt64, CmpKind::LE, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_GT_I64: + cmpOp(TypeId::SInt64, CmpKind::GT, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_NE_I64: + cmpOp(TypeId::SInt64, CmpKind::NE, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_GE_I64: + cmpOp(TypeId::SInt64, CmpKind::GE, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_T_I64: + cmpOp(TypeId::SInt64, CmpKind::T, CmpFlags::X); + break; + // case Vopc::Op::V_CMPX_CLASS_F64: cmpOp(TypeId::Float64, CmpKind::CLASS, + // CmpFlags::X); break; + case Vopc::Op::V_CMPX_LT_U16: + cmpOp(TypeId::UInt16, CmpKind::LT, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_EQ_U16: + cmpOp(TypeId::UInt16, CmpKind::EQ, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_LE_U16: + cmpOp(TypeId::UInt16, CmpKind::LE, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_GT_U16: + cmpOp(TypeId::UInt16, CmpKind::GT, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_NE_U16: + cmpOp(TypeId::UInt16, CmpKind::NE, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_GE_U16: + cmpOp(TypeId::UInt16, CmpKind::GE, CmpFlags::X); + break; + case Vopc::Op::V_CMP_F_U32: + cmpOp(TypeId::UInt32, CmpKind::F); + break; + case Vopc::Op::V_CMP_LT_U32: + cmpOp(TypeId::UInt32, CmpKind::LT); + break; + case Vopc::Op::V_CMP_EQ_U32: + cmpOp(TypeId::UInt32, CmpKind::EQ); + break; + case Vopc::Op::V_CMP_LE_U32: + cmpOp(TypeId::UInt32, CmpKind::LE); + break; + case Vopc::Op::V_CMP_GT_U32: + cmpOp(TypeId::UInt32, CmpKind::GT); + break; + case Vopc::Op::V_CMP_NE_U32: + cmpOp(TypeId::UInt32, CmpKind::NE); + break; + case Vopc::Op::V_CMP_GE_U32: + cmpOp(TypeId::UInt32, CmpKind::GE); + break; + case Vopc::Op::V_CMP_T_U32: + cmpOp(TypeId::UInt32, CmpKind::T); + break; + case Vopc::Op::V_CMP_F_F16: + cmpOp(TypeId::Float16, CmpKind::F); + break; + case Vopc::Op::V_CMP_LT_F16: + cmpOp(TypeId::Float16, CmpKind::LT); + break; + case Vopc::Op::V_CMP_EQ_F16: + cmpOp(TypeId::Float16, CmpKind::EQ); + break; + case Vopc::Op::V_CMP_LE_F16: + cmpOp(TypeId::Float16, CmpKind::LE); + break; + case Vopc::Op::V_CMP_GT_F16: + cmpOp(TypeId::Float16, CmpKind::GT); + break; + case Vopc::Op::V_CMP_LG_F16: + cmpOp(TypeId::Float16, CmpKind::LG); + break; + case Vopc::Op::V_CMP_GE_F16: + cmpOp(TypeId::Float16, CmpKind::GE); + break; + case Vopc::Op::V_CMP_O_F16: + cmpOp(TypeId::Float16, CmpKind::O); + break; + case Vopc::Op::V_CMPX_F_U32: + cmpOp(TypeId::UInt32, CmpKind::F, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_LT_U32: + cmpOp(TypeId::UInt32, CmpKind::LT, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_EQ_U32: + cmpOp(TypeId::UInt32, CmpKind::EQ, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_LE_U32: + cmpOp(TypeId::UInt32, CmpKind::LE, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_GT_U32: + cmpOp(TypeId::UInt32, CmpKind::GT, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_NE_U32: + cmpOp(TypeId::UInt32, CmpKind::NE, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_GE_U32: + cmpOp(TypeId::UInt32, CmpKind::GE, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_T_U32: + cmpOp(TypeId::UInt32, CmpKind::T, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_F_F16: + cmpOp(TypeId::Float16, CmpKind::F, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_LT_F16: + cmpOp(TypeId::Float16, CmpKind::LT, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_EQ_F16: + cmpOp(TypeId::Float16, CmpKind::EQ, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_LE_F16: + cmpOp(TypeId::Float16, CmpKind::LE, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_GT_F16: + cmpOp(TypeId::Float16, CmpKind::GT, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_LG_F16: + cmpOp(TypeId::Float16, CmpKind::LG, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_GE_F16: + cmpOp(TypeId::Float16, CmpKind::GE, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_O_F16: + cmpOp(TypeId::Float16, CmpKind::O, CmpFlags::X); + break; + case Vopc::Op::V_CMP_F_U64: + cmpOp(TypeId::UInt64, CmpKind::F); + break; + case Vopc::Op::V_CMP_LT_U64: + cmpOp(TypeId::UInt64, CmpKind::LT); + break; + case Vopc::Op::V_CMP_EQ_U64: + cmpOp(TypeId::UInt64, CmpKind::EQ); + break; + case Vopc::Op::V_CMP_LE_U64: + cmpOp(TypeId::UInt64, CmpKind::LE); + break; + case Vopc::Op::V_CMP_GT_U64: + cmpOp(TypeId::UInt64, CmpKind::GT); + break; + case Vopc::Op::V_CMP_NE_U64: + cmpOp(TypeId::UInt64, CmpKind::NE); + break; + case Vopc::Op::V_CMP_GE_U64: + cmpOp(TypeId::UInt64, CmpKind::GE); + break; + case Vopc::Op::V_CMP_T_U64: + cmpOp(TypeId::UInt64, CmpKind::T); + break; + case Vopc::Op::V_CMP_U_F16: + cmpOp(TypeId::Float16, CmpKind::U); + break; + case Vopc::Op::V_CMP_NGE_F16: + cmpOp(TypeId::Float16, CmpKind::NGE); + break; + case Vopc::Op::V_CMP_NLG_F16: + cmpOp(TypeId::Float16, CmpKind::NLG); + break; + case Vopc::Op::V_CMP_NGT_F16: + cmpOp(TypeId::Float16, CmpKind::NGT); + break; + case Vopc::Op::V_CMP_NLE_F16: + cmpOp(TypeId::Float16, CmpKind::NLE); + break; + case Vopc::Op::V_CMP_NEQ_F16: + cmpOp(TypeId::Float16, CmpKind::NEQ); + break; + case Vopc::Op::V_CMP_NLT_F16: + cmpOp(TypeId::Float16, CmpKind::NLT); + break; + case Vopc::Op::V_CMP_TRU_F16: + cmpOp(TypeId::Float16, CmpKind::TRU); + break; + case Vopc::Op::V_CMPX_F_U64: + cmpOp(TypeId::UInt64, CmpKind::F, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_LT_U64: + cmpOp(TypeId::UInt64, CmpKind::LT, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_EQ_U64: + cmpOp(TypeId::UInt64, CmpKind::EQ, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_LE_U64: + cmpOp(TypeId::UInt64, CmpKind::LE, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_GT_U64: + cmpOp(TypeId::UInt64, CmpKind::GT, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_NE_U64: + cmpOp(TypeId::UInt64, CmpKind::NE, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_GE_U64: + cmpOp(TypeId::UInt64, CmpKind::GE, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_T_U64: + cmpOp(TypeId::UInt64, CmpKind::T, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_U_F16: + cmpOp(TypeId::Float16, CmpKind::U, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_NGE_F16: + cmpOp(TypeId::Float16, CmpKind::NGE, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_NLG_F16: + cmpOp(TypeId::Float16, CmpKind::NLG, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_NGT_F16: + cmpOp(TypeId::Float16, CmpKind::NGT, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_NLE_F16: + cmpOp(TypeId::Float16, CmpKind::NLE, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_NEQ_F16: + cmpOp(TypeId::Float16, CmpKind::NEQ, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_NLT_F16: + cmpOp(TypeId::Float16, CmpKind::NLT, CmpFlags::X); + break; + case Vopc::Op::V_CMPX_TRU_F16: + cmpOp(TypeId::Float16, CmpKind::TRU, CmpFlags::X); + break; + + default: + inst.dump(); + util::unreachable(); + } +} +void convertSop1(Fragment &fragment, Sop1 inst) { + fragment.registers->pc += Sop1::kMinInstSize * sizeof(std::uint32_t); + + switch (inst.op) { + case Sop1::Op::S_MOV_B32: + fragment.setScalarOperand( + inst.sdst, fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32)); + break; + + case Sop1::Op::S_MOV_B64: + fragment.setScalarOperand( + inst.sdst, fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32)); + fragment.setScalarOperand( + inst.sdst + 1, + fragment.getScalarOperand(inst.ssrc0 + 1, TypeId::UInt32)); + break; + + case Sop1::Op::S_WQM_B32: { + // TODO: whole quad mode + break; + } + case Sop1::Op::S_WQM_B64: { + // TODO: whole quad mode + break; + } + case Sop1::Op::S_AND_SAVEEXEC_B64: { + auto execLo = fragment.getExecLo(); + auto execHi = fragment.getExecHi(); + + auto srcLo = fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32); + auto srcHi = fragment.getScalarOperand(inst.ssrc0 + 1, TypeId::UInt32); + + fragment.setOperand( + RegisterId::ExecLo, + {srcLo.type, fragment.builder.createBitwiseAnd(srcLo.type, srcLo.value, + execLo.value)}); + fragment.setOperand( + RegisterId::ExecHi, + {srcHi.type, fragment.builder.createBitwiseAnd(srcHi.type, srcHi.value, + execHi.value)}); + auto uint32_0 = fragment.context->getUInt32(0); + auto boolT = fragment.context->getBoolType(); + auto loIsNotZero = + fragment.builder.createINotEqual(boolT, execLo.value, uint32_0); + auto hiIsNotZero = + fragment.builder.createINotEqual(boolT, execHi.value, uint32_0); + fragment.setScc({boolT, fragment.builder.createLogicalAnd( + boolT, loIsNotZero, hiIsNotZero)}); + fragment.setScalarOperand(inst.sdst, execLo); + fragment.setScalarOperand(inst.sdst + 1, execHi); + break; + } + + case Sop1::Op::S_SETPC_B64: + if (auto ssrc0 = fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32), + ssrc1 = fragment.getScalarOperand(inst.ssrc0 + 1, TypeId::UInt32); + ssrc0 && ssrc1) { + auto ssrc0OptValue = fragment.context->findUint32Value(ssrc0.value); + auto ssrc1OptValue = fragment.context->findUint32Value(ssrc1.value); + + if (!ssrc0OptValue.has_value() || !ssrc1OptValue.has_value()) { + util::unreachable(); + } + + fragment.jumpAddress = + *ssrc0OptValue | (static_cast(*ssrc1OptValue) << 32); + } else { + util::unreachable(); + } + return; + + case Sop1::Op::S_SWAPPC_B64: { + if (auto ssrc0 = fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32), + ssrc1 = fragment.getScalarOperand(inst.ssrc0 + 1, TypeId::UInt32); + ssrc0 && ssrc1) { + auto ssrc0OptValue = fragment.context->findUint32Value(ssrc0.value); + auto ssrc1OptValue = fragment.context->findUint32Value(ssrc1.value); + + if (!ssrc0OptValue.has_value() || !ssrc1OptValue.has_value()) { + util::unreachable(); + } + + auto pc = fragment.registers->pc; + fragment.setScalarOperand(inst.sdst, {fragment.context->getUInt32Type(), + fragment.context->getUInt32(pc)}); + fragment.setScalarOperand(inst.sdst + 1, + {fragment.context->getUInt32Type(), + fragment.context->getUInt32(pc >> 32)}); + + fragment.jumpAddress = + *ssrc0OptValue | (static_cast(*ssrc1OptValue) << 32); + } else { + inst.dump(); + util::unreachable(); + } + return; + } + + default: + inst.dump(); + util::unreachable(); + } +} + +void convertSopc(Fragment &fragment, Sopc inst) { + fragment.registers->pc += Sopc::kMinInstSize * sizeof(std::uint32_t); + + auto cmpOp = [&](CmpKind kind, TypeId type) { + auto src0 = fragment.getScalarOperand(inst.ssrc0, type).value; + auto src1 = fragment.getScalarOperand(inst.ssrc1, type).value; + + auto result = doCmpOp(fragment, type, src0, src1, kind, CmpFlags::None); + fragment.setScc(result); + }; + + switch (inst.op) { + case Sopc::Op::S_CMP_EQ_I32: + cmpOp(CmpKind::EQ, TypeId::SInt32); + break; + case Sopc::Op::S_CMP_LG_I32: + cmpOp(CmpKind::LG, TypeId::SInt32); + break; + case Sopc::Op::S_CMP_GT_I32: + cmpOp(CmpKind::GT, TypeId::SInt32); + break; + case Sopc::Op::S_CMP_GE_I32: + cmpOp(CmpKind::GE, TypeId::SInt32); + break; + case Sopc::Op::S_CMP_LT_I32: + cmpOp(CmpKind::LT, TypeId::SInt32); + break; + case Sopc::Op::S_CMP_LE_I32: + cmpOp(CmpKind::LE, TypeId::SInt32); + break; + case Sopc::Op::S_CMP_EQ_U32: + cmpOp(CmpKind::EQ, TypeId::UInt32); + break; + case Sopc::Op::S_CMP_LG_U32: + cmpOp(CmpKind::LG, TypeId::UInt32); + break; + case Sopc::Op::S_CMP_GT_U32: + cmpOp(CmpKind::GT, TypeId::UInt32); + break; + case Sopc::Op::S_CMP_GE_U32: + cmpOp(CmpKind::GE, TypeId::UInt32); + break; + case Sopc::Op::S_CMP_LT_U32: + cmpOp(CmpKind::LT, TypeId::UInt32); + break; + case Sopc::Op::S_CMP_LE_U32: + cmpOp(CmpKind::LE, TypeId::UInt32); + break; + + case Sopc::Op::S_BITCMP0_B32: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32).value); + auto src1 = spirv::cast( + fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value); + auto operandT = fragment.context->getUInt32Type(); + + src1 = spirv::cast(fragment.builder.createBitwiseAnd( + operandT, src1, fragment.context->getUInt32(0x1f))); + auto bit = fragment.builder.createBitwiseAnd( + operandT, + fragment.builder.createShiftRightLogical(operandT, src0, src1), + fragment.context->getUInt32(1)); + + auto boolT = fragment.context->getBoolType(); + fragment.setScc({boolT, fragment.builder.createIEqual( + boolT, bit, fragment.context->getUInt32(0))}); + break; + } + case Sopc::Op::S_BITCMP1_B32: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32).value); + auto src1 = spirv::cast( + fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value); + auto operandT = fragment.context->getUInt32Type(); + + src1 = spirv::cast(fragment.builder.createBitwiseAnd( + operandT, src1, fragment.context->getUInt32(0x1f))); + auto bit = fragment.builder.createBitwiseAnd( + operandT, + fragment.builder.createShiftRightLogical(operandT, src0, src1), + fragment.context->getUInt32(1)); + + auto boolT = fragment.context->getBoolType(); + fragment.setScc({boolT, fragment.builder.createIEqual( + boolT, bit, fragment.context->getUInt32(1))}); + break; + } + case Sopc::Op::S_BITCMP0_B64: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.ssrc0, TypeId::UInt64).value); + auto src1 = spirv::cast( + fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value); + auto operandT = fragment.context->getUInt64Type(); + + src1 = spirv::cast(fragment.builder.createBitwiseAnd( + operandT, src1, fragment.context->getUInt32(0x3f))); + auto bit = fragment.builder.createBitwiseAnd( + operandT, + fragment.builder.createShiftRightLogical(operandT, src0, src1), + fragment.context->getUInt64(1)); + + auto boolT = fragment.context->getBoolType(); + fragment.setScc({boolT, fragment.builder.createIEqual( + boolT, bit, fragment.context->getUInt64(0))}); + break; + } + case Sopc::Op::S_BITCMP1_B64: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.ssrc0, TypeId::UInt64).value); + auto src1 = spirv::cast( + fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value); + auto operandT = fragment.context->getUInt64Type(); + + src1 = spirv::cast(fragment.builder.createBitwiseAnd( + operandT, src1, fragment.context->getUInt32(0x3f))); + auto bit = fragment.builder.createBitwiseAnd( + operandT, + fragment.builder.createShiftRightLogical(operandT, src0, src1), + fragment.context->getUInt64(1)); + + auto boolT = fragment.context->getBoolType(); + fragment.setScc({boolT, fragment.builder.createIEqual( + boolT, bit, fragment.context->getUInt64(1))}); + break; + } + default: + inst.dump(); + util::unreachable(); + } +} + +void convertSopp(Fragment &fragment, Sopp inst) { + fragment.registers->pc += Sopp::kMinInstSize * sizeof(std::uint32_t); + + auto createCondBranch = [&](spirv::BoolValue condition) { + fragment.branchCondition = condition; + /* + auto address = fragment.registers->pc + (inst.simm << 2); + + Fragment *ifTrueTarget = + fragment.context->getOrCreateFragment(address, 0x100); + Fragment *ifFalseTarget = + fragment.context->getOrCreateFragment(fragment.registers->pc, + 0x100); + + fragment.builder.createSelectionMerge(ifTrueTarget->entryBlockId, {}); + fragment.builder.createBranchConditional(condition, + ifTrueTarget->builder.id, ifFalseTarget->entryBlockId); + */ + }; + + switch (inst.op) { + case Sopp::Op::S_WAITCNT: + // TODO + break; + + case Sopp::Op::S_BRANCH: { + fragment.jumpAddress = fragment.registers->pc + (inst.simm << 2); + // auto address = fragment.registers->pc + (inst.simm << 2); + // Fragment *target = fragment.context->getOrCreateFragment(address, 0x100); + + // fragment.builder.createBranch(target->entryBlockId); + // fragment.terminator = FragmentTerminator::Branch; + // target->predecessors.insert(&fragment); + // fragment.successors.insert(target); + break; + } + + case Sopp::Op::S_CBRANCH_SCC0: { + createCondBranch(fragment.builder.createLogicalNot( + fragment.context->getBoolType(), fragment.getScc())); + break; + } + + case Sopp::Op::S_CBRANCH_SCC1: { + createCondBranch(fragment.getScc()); + break; + } + + case Sopp::Op::S_CBRANCH_VCCZ: { + auto loIsZero = fragment.builder.createIEqual( + fragment.context->getBoolType(), fragment.getVccLo().value, + fragment.context->getUInt32(0)); + auto hiIsZero = fragment.builder.createIEqual( + fragment.context->getBoolType(), fragment.getVccHi().value, + fragment.context->getUInt32(0)); + createCondBranch(fragment.builder.createLogicalAnd( + fragment.context->getBoolType(), loIsZero, hiIsZero)); + break; + } + + case Sopp::Op::S_CBRANCH_VCCNZ: { + auto loIsNotZero = fragment.builder.createINotEqual( + fragment.context->getBoolType(), fragment.getVccLo().value, + fragment.context->getUInt32(0)); + auto hiIsNotZero = fragment.builder.createINotEqual( + fragment.context->getBoolType(), fragment.getVccHi().value, + fragment.context->getUInt32(0)); + + createCondBranch(fragment.builder.createLogicalOr( + fragment.context->getBoolType(), loIsNotZero, hiIsNotZero)); + break; + } + + case Sopp::Op::S_CBRANCH_EXECZ: { + auto loIsZero = fragment.builder.createIEqual( + fragment.context->getBoolType(), fragment.getExecLo().value, + fragment.context->getUInt32(0)); + auto hiIsZero = fragment.builder.createIEqual( + fragment.context->getBoolType(), fragment.getExecHi().value, + fragment.context->getUInt32(0)); + createCondBranch(fragment.builder.createLogicalAnd( + fragment.context->getBoolType(), loIsZero, hiIsZero)); + break; + } + + case Sopp::Op::S_CBRANCH_EXECNZ: { + auto loIsNotZero = fragment.builder.createINotEqual( + fragment.context->getBoolType(), fragment.getExecLo().value, + fragment.context->getUInt32(0)); + auto hiIsNotZero = fragment.builder.createINotEqual( + fragment.context->getBoolType(), fragment.getExecHi().value, + fragment.context->getUInt32(0)); + + createCondBranch(fragment.builder.createLogicalOr( + fragment.context->getBoolType(), loIsNotZero, hiIsNotZero)); + break; + } + + case Sopp::Op::S_ENDPGM: + // fragment.terminator = FragmentTerminator::EndProgram; + return; + + case Sopp::Op::S_NOP: + break; + + default: + inst.dump(); + util::unreachable(); + } +} + +void convertInstruction(Fragment &fragment, Instruction inst) { + switch (inst.instClass) { + case InstructionClass::Vop2: + return convertVop2(fragment, Vop2(inst.inst)); + case InstructionClass::Sop2: + return convertSop2(fragment, Sop2(inst.inst)); + case InstructionClass::Sopk: + return convertSopk(fragment, Sopk(inst.inst)); + case InstructionClass::Smrd: + return convertSmrd(fragment, Smrd(inst.inst)); + case InstructionClass::Vop3: + return convertVop3(fragment, Vop3(inst.inst)); + case InstructionClass::Mubuf: + return convertMubuf(fragment, Mubuf(inst.inst)); + case InstructionClass::Mtbuf: + return convertMtbuf(fragment, Mtbuf(inst.inst)); + case InstructionClass::Mimg: + return convertMimg(fragment, Mimg(inst.inst)); + case InstructionClass::Ds: + return convertDs(fragment, Ds(inst.inst)); + case InstructionClass::Vintrp: + return convertVintrp(fragment, Vintrp(inst.inst)); + case InstructionClass::Exp: + return convertExp(fragment, Exp(inst.inst)); + case InstructionClass::Vop1: + return convertVop1(fragment, Vop1(inst.inst)); + case InstructionClass::Vopc: + return convertVopc(fragment, Vopc(inst.inst)); + case InstructionClass::Sop1: + return convertSop1(fragment, Sop1(inst.inst)); + case InstructionClass::Sopc: + return convertSopc(fragment, Sopc(inst.inst)); + case InstructionClass::Sopp: + return convertSopp(fragment, Sopp(inst.inst)); + + case InstructionClass::Invalid: + break; + } + + inst.dump(); + util::unreachable(); +} + +} // namespace + +void Fragment::injectValuesFromPreds() { + for (auto pred : predecessors) { + for (auto value : pred->values) { + values.insert(value); + } + + for (auto output : pred->outputs) { + outputs.insert(output); + } + } + + std::vector> predValues; + + // std::printf("injection values for bb%lx\n", registers->pc); + + // auto getRegName = [](RegisterId id) { + // if (id.isScalar()) { + // return "sgpr"; + // } + + // if (id.isVector()) { + // return "vgpr"; + // } + + // if (id.isExport()) { + // return "exp"; + // } + + // if (id.isAttr()) { + // return "attr"; + // } + + // return ""; + // }; + + auto setupRegisterValue = [&](RegisterId id) { + bool allSameValues = true; + predValues.clear(); + + spirv::Type type; + + for (auto pred : predecessors) { + Value value; + + if (type) { + value = pred->getRegister(id, type); + } else { + value = pred->getRegister(id); + type = value.type; + } + + if (allSameValues && !predValues.empty()) { + allSameValues = predValues.back().first == value.value; + } + + predValues.emplace_back(value.value, pred->builder.id); + } + + Value value; + + if (allSameValues) { + value = {type, predValues.back().first}; + // std::printf(" ** %s[%u] is value = %u\n", getRegName(id), + // id.getOffset(), + // predValues.back().first.id); + } else { + // std::printf(" ** %s[%u] is phi = { ", getRegName(id), id.getOffset()); + // for (bool isFirst = true; auto value : predValues) { + // if (isFirst) { + // isFirst = false; + // } else { + // std::printf(", "); + // } + // std::printf("%u", value.first.id); + // } + // std::printf(" }\n"); + value = {type, builder.createPhi(type, predValues)}; + } + + registers->setRegister(id, value); + }; + + for (auto id : values) { + setupRegisterValue(id); + } + for (auto id : outputs) { + setupRegisterValue(id); + } +} + +spirv::SamplerValue Fragment::createSampler(RegisterId base) { + auto sBuffer0 = getOperand(RegisterId::Raw(base + 0), TypeId::UInt32); + auto sBuffer1 = getOperand(RegisterId::Raw(base + 1), TypeId::UInt32); + auto sBuffer2 = getOperand(RegisterId::Raw(base + 2), TypeId::UInt32); + auto sBuffer3 = getOperand(RegisterId::Raw(base + 3), TypeId::UInt32); + + auto optSBuffer0Value = context->findUint32Value(sBuffer0.value); + auto optSBuffer1Value = context->findUint32Value(sBuffer1.value); + auto optSBuffer2Value = context->findUint32Value(sBuffer2.value); + auto optSBuffer3Value = context->findUint32Value(sBuffer3.value); + + if (optSBuffer0Value && optSBuffer1Value && optSBuffer2Value && + optSBuffer3Value) { + std::uint32_t sbuffer[] = { + *optSBuffer0Value, + *optSBuffer1Value, + *optSBuffer2Value, + *optSBuffer3Value, + }; + + auto uniform = context->getOrCreateUniformConstant( + sbuffer, std::size(sbuffer), TypeId::Sampler); + return builder.createLoad(context->getSamplerType(), uniform->variable); + } else { + util::unreachable(); + } +} + +spirv::ImageValue Fragment::createImage(RegisterId base, bool r128) { + auto tBuffer0 = getOperand(RegisterId::Raw(base + 0), TypeId::UInt32); + auto tBuffer1 = getOperand(RegisterId::Raw(base + 1), TypeId::UInt32); + auto tBuffer2 = getOperand(RegisterId::Raw(base + 2), TypeId::UInt32); + auto tBuffer3 = getOperand(RegisterId::Raw(base + 3), TypeId::UInt32); + + auto optTBuffer0Value = context->findUint32Value(tBuffer0.value); + auto optTBuffer1Value = context->findUint32Value(tBuffer1.value); + auto optTBuffer2Value = context->findUint32Value(tBuffer2.value); + auto optTBuffer3Value = context->findUint32Value(tBuffer3.value); + + if (!optTBuffer0Value || !optTBuffer1Value || !optTBuffer2Value || + !optTBuffer3Value) { + util::unreachable(); + } + + if (r128) { + std::uint32_t sbuffer[] = { + *optTBuffer0Value, + *optTBuffer1Value, + *optTBuffer2Value, + *optTBuffer3Value, + }; + + auto uniform = context->getOrCreateUniformConstant( + sbuffer, std::size(sbuffer), TypeId::Image2D); + return builder.createLoad(context->getImage2DType(), uniform->variable); + } + + auto tBuffer4 = getOperand(RegisterId::Raw(base + 4), TypeId::UInt32); + auto tBuffer5 = getOperand(RegisterId::Raw(base + 5), TypeId::UInt32); + auto tBuffer6 = getOperand(RegisterId::Raw(base + 6), TypeId::UInt32); + auto tBuffer7 = getOperand(RegisterId::Raw(base + 7), TypeId::UInt32); + + auto optTBuffer4Value = context->findUint32Value(tBuffer4.value); + auto optTBuffer5Value = context->findUint32Value(tBuffer5.value); + auto optTBuffer6Value = context->findUint32Value(tBuffer6.value); + auto optTBuffer7Value = context->findUint32Value(tBuffer7.value); + + if (!optTBuffer4Value || !optTBuffer5Value || !optTBuffer6Value || + !optTBuffer7Value) { + util::unreachable(); + } + + std::uint32_t sbuffer[] = { + *optTBuffer0Value, *optTBuffer1Value, *optTBuffer2Value, + *optTBuffer3Value, *optTBuffer4Value, *optTBuffer5Value, + *optTBuffer6Value, *optTBuffer7Value, + }; + + auto uniform = context->getOrCreateUniformConstant( + sbuffer, std::size(sbuffer), TypeId::Image2D); + return builder.createLoad(context->getImage2DType(), uniform->variable); +} + +Value Fragment::createCompositeExtract(Value composite, std::uint32_t member) { + auto optCompositeType = context->getTypeIdOf(composite.type); + if (!optCompositeType.has_value()) { + util::unreachable(); + } + + auto compositeType = *optCompositeType; + + TypeId baseType = compositeType.getBaseType(); + std::uint32_t memberCount = compositeType.getElementsCount(); + + if (member >= memberCount) { + util::unreachable(); + } + + auto resultType = context->getType(baseType); + spirv::Value resultValue; + + if (memberCount > 4) { + // stored in array + auto row = member / 4; + auto column = member % 4; + + auto rowType = context->getType( + static_cast(static_cast(baseType) + 3)); + + auto rowValue = + builder.createCompositeExtract(rowType, composite.value, {{row}}); + resultValue = + builder.createCompositeExtract(resultType, rowValue, {{column}}); + } else { + resultValue = + builder.createCompositeExtract(resultType, composite.value, {{member}}); + } + + return {resultType, resultValue}; +} + +spirv::Value Fragment::createBitcast(spirv::Type to, spirv::Type from, + spirv::Value value) { + if (from == to) { + return value; + } + + if (from == context->getFloat32Type()) { + if (auto origValue = context->findFloat32Value(value)) { + if (to == context->getUInt32Type()) { + return context->getUInt32(std::bit_cast(*origValue)); + } + + if (to == context->getSint32Type()) { + return context->getSInt32(std::bit_cast(*origValue)); + } + } + } else if (from == context->getUInt32Type()) { + if (auto origValue = context->findUint32Value(value)) { + if (to == context->getFloat32Type()) { + return context->getFloat32(std::bit_cast(*origValue)); + } + + if (to == context->getSint32Type()) { + return context->getSInt32(std::bit_cast(*origValue)); + } + } + } else if (from == context->getSint32Type()) { + if (auto origValue = context->findSint32Value(value)) { + if (to == context->getFloat32Type()) { + return context->getFloat32(std::bit_cast(*origValue)); + } + + if (to == context->getUInt32Type()) { + return context->getUInt32(std::bit_cast(*origValue)); + } + } + } + + if (from == context->getUInt64Type() && to == context->getUInt32Type()) { + util::unreachable(); + } + return builder.createBitcast(to, value); +} + +Value Fragment::getOperand(RegisterId id, TypeId type, OperandGetFlags flags) { + if (id == RegisterId::Scc) { + if (type != TypeId::Bool) { + util::unreachable(); + } + + return getRegister(id); + } + + auto elementsCount = type.getElementsCount(); + + if (elementsCount == 0) { + util::unreachable(); + } + + auto resultType = context->getType(type); + + auto baseTypeId = type.getBaseType(); + auto baseTypeSize = baseTypeId.getSize(); + auto registerCountPerElement = (baseTypeSize + 3) / 4; + auto registerElementsCount = elementsCount * registerCountPerElement; + + if (registerElementsCount == 1 || id.isExport() || id.isAttr()) { + if (flags == OperandGetFlags::PreserveType) { + return getRegister(id); + } else { + return getRegister(id, resultType); + } + } + + if (baseTypeSize < 4) { + util::unreachable(); + } + + auto baseType = context->getType(baseTypeId); + + if (registerCountPerElement == 1) { + std::vector members; + members.reserve(elementsCount); + spirv::Type preservedType; + + for (std::uint32_t i = 0; i < elementsCount; ++i) { + Value member; + + if (flags == OperandGetFlags::PreserveType) { + if (!preservedType) { + member = getRegister(RegisterId::Raw(id + i)); + preservedType = member.type; + } else { + member = getRegister(RegisterId::Raw(id + i), preservedType); + } + } else { + member = getRegister(RegisterId::Raw(id + i), baseType); + } + + members.push_back(member.value); + } + + return {resultType, builder.createCompositeConstruct(resultType, members)}; + } + + if (registerElementsCount != 2) { + util::unreachable(); + } + + TypeId registerType; + + switch (baseTypeId) { + case TypeId::UInt64: + registerType = TypeId::UInt32; + break; + case TypeId::SInt64: + registerType = TypeId::SInt32; + break; + case TypeId::Float64: + registerType = TypeId::Float32; + break; + + default: + util::unreachable(); + } + + if (registerCountPerElement != 2) { + util::unreachable(); + } + + auto uint64T = context->getUInt64Type(); + auto valueLo = builder.createUConvert( + uint64T, + spirv::cast(getOperand(id, TypeId::UInt32).value)); + auto valueHi = builder.createUConvert( + uint64T, spirv::cast( + getOperand(RegisterId::Raw(id + 1), TypeId::UInt32).value)); + valueHi = + builder.createShiftLeftLogical(uint64T, valueHi, context->getUInt32(32)); + auto value = builder.createBitwiseOr(uint64T, valueLo, valueHi); + + if (baseTypeId != TypeId::UInt64) { + value = createBitcast(baseType, context->getUInt64Type(), value); + } + + return {resultType, value}; +} + +void Fragment::setOperand(RegisterId id, Value value) { + if (id.isExport()) { + function->createExport(builder, id.getOffset(), value); + return; + } + + auto typeId = *context->getTypeIdOf(value.type); + auto elementsCount = typeId.getElementsCount(); + + if (elementsCount == 0) { + util::unreachable(); + } + + // if (id.isScalar()) { + // std::printf("update sgpr[%u]\n", id.getOffset()); + // } + + // TODO: handle half types + auto baseTypeId = typeId.getBaseType(); + auto baseTypeSize = baseTypeId.getSize(); + + auto registerCountPerElement = (baseTypeSize + 3) / 4; + auto registerElementsCount = elementsCount * registerCountPerElement; + + if (id == RegisterId::Scc) { + auto boolT = context->getBoolType(); + if (value.type != boolT) { + if (value.type == context->getUInt32Type()) { + value.value = + builder.createINotEqual(boolT, value.value, context->getUInt32(0)); + } else if (value.type == context->getSint32Type()) { + value.value = + builder.createINotEqual(boolT, value.value, context->getSInt32(0)); + } else if (value.type == context->getUInt64Type()) { + value.value = + builder.createINotEqual(boolT, value.value, context->getUInt64(0)); + } else { + util::unreachable(); + } + + value.type = boolT; + } + + setRegister(id, value); + return; + } + + if (registerElementsCount == 1 || id.isExport() || id.isAttr()) { + setRegister(id, value); + return; + } + + if (baseTypeSize < 4) { + util::unreachable(); + } + + if (registerCountPerElement == 1) { + for (std::uint32_t i = 0; i < elementsCount; ++i) { + auto element = createCompositeExtract(value, i); + auto regId = RegisterId::Raw(id + i); + setRegister(regId, element); + } + } else { + if (elementsCount != 1 || baseTypeId != typeId) { + util::unreachable(); + } + + TypeId registerType; + + switch (baseTypeId) { + case TypeId::UInt64: + registerType = TypeId::UInt32; + break; + case TypeId::SInt64: + registerType = TypeId::SInt32; + break; + case TypeId::Float64: + registerType = TypeId::Float32; + break; + + default: + util::unreachable(); + } + + if (registerCountPerElement != 2) { + util::unreachable(); + } + + auto uint64T = context->getUInt64Type(); + auto uint64_value = spirv::cast(value.value); + if (baseTypeId != TypeId::UInt64) { + uint64_value = spirv::cast( + createBitcast(uint64T, context->getType(baseTypeId), value.value)); + } + + auto uint32T = context->getUInt32Type(); + auto valueLo = builder.createUConvert(uint32T, uint64_value); + auto valueHi = builder.createUConvert( + uint32T, builder.createShiftRightLogical(uint64T, uint64_value, + context->getUInt32(32))); + + setOperand(id, {uint32T, valueLo}); + setOperand(RegisterId::Raw(id.raw + 1), {uint32T, valueHi}); + } +} + +void Fragment::setVcc(Value value) { + // TODO: update vcc hi if needed + // TODO: update vccz + + setOperand(RegisterId::VccLo, value); + setOperand(RegisterId::VccHi, + {context->getUInt32Type(), context->getUInt32(0)}); +} + +void Fragment::setScc(Value value) { + setOperand(RegisterId::Scc, value); + + if (value.type != context->getBoolType() && + value.type != context->getUInt32Type() && + value.type != context->getSint32Type() && + value.type != context->getUInt64Type()) { + util::unreachable(); + } +} + +spirv::BoolValue Fragment::getScc() { + auto result = + getOperand(RegisterId::Scc, TypeId::Bool, OperandGetFlags::PreserveType); + + if (result.type == context->getBoolType()) { + return spirv::cast(result.value); + } + + if (result.type == context->getUInt32Type()) { + return builder.createINotEqual(context->getBoolType(), result.value, + context->getUInt32(0)); + } + if (result.type == context->getSint32Type()) { + return builder.createINotEqual(context->getBoolType(), result.value, + context->getSInt32(0)); + } + if (result.type == context->getUInt64Type()) { + return builder.createINotEqual(context->getBoolType(), result.value, + context->getUInt64(0)); + } + + util::unreachable(); +} +/* +void Fragment::createCallTo(MaterializedFunction *materialized) { + std::vector args; + args.reserve(materialized->args.size()); + + for (auto input : materialized->args) { + auto value = getOperand(input.first, input.second); + args.push_back(value.value); + } + + auto callResultType = materialized->returnType; + + auto callResult = + builder.createFunctionCall(callResultType, materialized->function, args); + if (materialized->results.empty()) { + return; + } + + if (materialized->results.size() == 1) { + setOperand(materialized->results.begin()->first, + Value(callResultType, callResult)); + return; + } + + auto resultTypePointer = context->getBuilder().createTypePointer( + spv::StorageClass::Function, callResultType); + auto resultTypeVariable = + builder.createVariable(resultTypePointer, spv::StorageClass::Function); + builder.createStore(resultTypeVariable, callResult); + + std::uint32_t member = 0; + for (auto [output, typeId] : materialized->results) { + auto pointerType = + context->getPointerType(spv::StorageClass::Function, typeId); + auto valuePointer = builder.createAccessChain( + pointerType, resultTypeVariable, {{context->getUInt32(member++)}}); + + auto elementType = context->getType(typeId); + auto elementValue = builder.createLoad(elementType, valuePointer); + setOperand(output, Value(elementType, elementValue)); + } +} +*/ +void amdgpu::shader::Fragment::convert(std::uint64_t size) { + auto ptr = context->getMemory().getPointer(registers->pc); + auto endptr = ptr + size / sizeof(std::uint32_t); + + while (ptr < endptr) { + Instruction inst(ptr); + // auto startPoint = builder.bodyRegion.getCurrentPosition(); + + // std::printf("===============\n"); + // inst.dump(); + // std::printf("\n"); + convertInstruction(*this, inst); + + // std::printf("-------------->\n"); + // spirv::dump(builder.bodyRegion.getCurrentPosition() - startPoint); + + ptr += inst.size(); + } +} + +Value amdgpu::shader::Fragment::getRegister(RegisterId id) { + if (id.isScalar()) { + switch (id.getOffset()) { + case 128 ... 192: + return {context->getSint32Type(), context->getSInt32(id - 128)}; + case 193 ... 208: + return {context->getSint32Type(), + context->getSInt32(-static_cast(id - 192))}; + case 240: + return {context->getFloat32Type(), context->getFloat32(0.5f)}; + case 241: + return {context->getFloat32Type(), context->getFloat32(-0.5f)}; + case 242: + return {context->getFloat32Type(), context->getFloat32(1.0f)}; + case 243: + return {context->getFloat32Type(), context->getFloat32(-1.0f)}; + case 244: + return {context->getFloat32Type(), context->getFloat32(2.0f)}; + case 245: + return {context->getFloat32Type(), context->getFloat32(-2.0f)}; + case 246: + return {context->getFloat32Type(), context->getFloat32(4.0f)}; + case 247: + return {context->getFloat32Type(), context->getFloat32(-4.0f)}; + case 255: { + auto ptr = context->getMemory().getPointer(registers->pc); + registers->pc += sizeof(std::uint32_t); + return {context->getUInt32Type(), context->getUInt32(*ptr)}; + } + } + } + + if (auto result = registers->getRegister(id)) { + return result; + } + + if (id.isExport()) { + util::unreachable(); + } + + // std::printf("creation input %u\n", id.raw); + auto result = function->createInput(id); + assert(result); + values.insert(id); + registers->setRegister(id, result); + return result; +} + +Value amdgpu::shader::Fragment::getRegister(RegisterId id, + spirv::Type type) { + auto result = getRegister(id); + + if (!result) { + return result; + } + + if (type == context->getUInt64Type()) { + util::unreachable("%u is ulong\n", id.raw); + } + + return {type, createBitcast(type, result.type, result.value)}; +} + +void amdgpu::shader::Fragment::setRegister(RegisterId id, Value value) { + if (registers->getRegister(id) == value) { + return; + } + + assert(value); + + registers->setRegister(id, value); + outputs.insert(id); + // std::printf("creation output %u\n", id.raw); +} diff --git a/hw/amdgpu/shader/src/Function.cpp b/hw/amdgpu/shader/src/Function.cpp new file mode 100644 index 000000000..75d359320 --- /dev/null +++ b/hw/amdgpu/shader/src/Function.cpp @@ -0,0 +1,274 @@ +#include "Function.hpp" +#include "ConverterContext.hpp" +#include "RegisterId.hpp" + +using namespace amdgpu::shader; + +Value Function::createInput(RegisterId id) { + auto [it, inserted] = inputs.try_emplace(id); + + if (!inserted) { + assert(it->second); + return it->second; + } + + auto offset = id.getOffset(); + + if (id.isScalar()) { + auto uint32T = context->getUInt32Type(); + + if (userSgprs.size() > offset) { + return ((it->second = {uint32T, context->getUInt32(userSgprs[offset])})); + } + + if (stage == Stage::None) { + return ((it->second = + Value{uint32T, builder.createFunctionParameter(uint32T)})); + } + + switch (id.raw) { + case RegisterId::ExecLo: + return ((it->second = {uint32T, context->getUInt32(1)})); + case RegisterId::ExecHi: + return ((it->second = {uint32T, context->getUInt32(0)})); + + case RegisterId::Scc: + return ((it->second = {context->getBoolType(), context->getFalse()})); + + default: + break; + } + + if (stage == Stage::Vertex) { + return ((it->second = {uint32T, context->getUInt32(0)})); + } else if (stage == Stage::Fragment) { + return ((it->second = {uint32T, context->getUInt32(0)})); + } else if (stage == Stage::Compute) { + std::uint32_t offsetAfterSgprs = offset - userSgprs.size(); + if (offsetAfterSgprs < 3) { + auto workgroupIdVar = context->getWorkgroupId(); + auto workgroupId = entryFragment.builder.createLoad( + context->getUint32x3Type(), workgroupIdVar); + for (uint32_t i = 0; i < 3; ++i) { + auto input = entryFragment.builder.createCompositeExtract( + uint32T, workgroupId, {{i}}); + + inputs[RegisterId::Scalar(userSgprs.size() + i)] = {uint32T, input}; + } + + return inputs[id]; + } + + return ((it->second = {uint32T, context->getUInt32(0)})); + } + + util::unreachable(); + } + + if (stage == Stage::None) { + auto float32T = context->getFloat32Type(); + return ( + (it->second = {float32T, builder.createFunctionParameter(float32T)})); + } + + if (stage == Stage::Vertex) { + if (id.isVector()) { + auto uint32T = context->getUInt32Type(); + + if (id.getOffset() == 0) { + auto input = + entryFragment.builder.createLoad(uint32T, context->getThreadId()); + + return ((it->second = {uint32T, input})); + } + + return ((it->second = {uint32T, context->getUInt32(0)})); + } + + util::unreachable("Unexpected vertex input %u. user sgprs count=%zu", + id.raw, userSgprs.size()); + } + + if (stage == Stage::Fragment) { + if (id.isAttr()) { + auto float4T = context->getFloat32x4Type(); + auto input = entryFragment.builder.createLoad( + float4T, context->getIn(id.getOffset())); + return ((it->second = {float4T, input})); + } + + if (id.isVector()) { + switch (offset) { + case 2: + case 3: + case 4: + case 5: { + auto float4T = context->getFloat32x4Type(); + auto floatT = context->getFloat32Type(); + auto fragCoord = + entryFragment.builder.createLoad(float4T, context->getFragCoord()); + return ( + (it->second = {floatT, entryFragment.builder.createCompositeExtract( + floatT, fragCoord, {{offset - 2}})})); + } + } + } + + return ((it->second = {context->getUInt32Type(), context->getUInt32(0)})); + } + + if (stage == Stage::Compute) { + if (id.isVector() && offset < 3) { + auto uint32T = context->getUInt32Type(); + auto localInvocationIdVar = context->getLocalInvocationId(); + auto localInvocationId = entryFragment.builder.createLoad( + context->getUint32x3Type(), localInvocationIdVar); + + for (uint32_t i = 0; i < 3; ++i) { + auto input = entryFragment.builder.createCompositeExtract( + uint32T, localInvocationId, {{i}}); + + inputs[RegisterId::Vector(i)] = {uint32T, input}; + } + + return inputs[id]; + } + + return ((it->second = {context->getUInt32Type(), context->getUInt32(0)})); + } + + util::unreachable(); +} + +void Function::createExport(spirv::BlockBuilder &builder, unsigned index, + Value value) { + if (stage == Stage::Vertex) { + switch (index) { + case 12: { + auto float4OutPtrT = + context->getPointerType(spv::StorageClass::Output, TypeId::Float32x4); + + auto gl_PerVertexPosition = builder.createAccessChain( + float4OutPtrT, context->getPerVertex(), {{context->getSInt32(0)}}); + + if (value.type != context->getFloat32x4Type()) { + util::unreachable(); + } + + builder.createStore(gl_PerVertexPosition, value.value); + return; + } + + case 32 ... 64: { // paramN + if (value.type != context->getFloat32x4Type()) { + util::unreachable(); + } + + builder.createStore(context->getOut(index - 32), value.value); + return; + } + } + + util::unreachable("Unexpected vartex export target %u", index); + } + + if (stage == Stage::Fragment) { + switch (index) { + case 0 ... 7: { + if (value.type != context->getFloat32x4Type()) { + util::unreachable(); + } + + builder.createStore(context->getOut(index), value.value); + return; + } + } + + util::unreachable("Unexpected fragment export target %u", index); + } + + util::unreachable(); +} + +spirv::Type Function::getResultType() { + if (exitFragment.outputs.empty()) { + return context->getVoidType(); + } + + if (exitFragment.outputs.size() == 1) { + return exitFragment.registers->getRegister(*exitFragment.outputs.begin()) + .type; + } + + std::vector members; + members.reserve(exitFragment.outputs.size()); + + for (auto id : exitFragment.outputs) { + members.push_back(exitFragment.registers->getRegister(id).type); + } + + return context->getStructType(members); +} + +spirv::FunctionType Function::getFunctionType() { + if (stage != Stage::None) { + return context->getFunctionType(getResultType(), {}); + } + + std::vector params; + params.reserve(inputs.size()); + + for (auto inp : inputs) { + params.push_back(inp.second.type); + } + + return context->getFunctionType(getResultType(), params); +} + +Fragment *Function::createFragment() { + auto result = context->createFragment(0); + result->function = this; + fragments.push_back(result); + return result; +} + +void Function::insertReturn() { + if (exitFragment.outputs.empty()) { + exitFragment.builder.createReturn(); + return; + } + + if (exitFragment.outputs.size() == 1) { + auto value = + exitFragment.registers->getRegister(*exitFragment.outputs.begin()) + .value; + exitFragment.builder.createReturnValue(value); + return; + } + + auto resultType = getResultType(); + + auto resultTypePointer = context->getBuilder().createTypePointer( + spv::StorageClass::Function, resultType); + + auto resultVariable = entryFragment.builder.createVariable( + resultTypePointer, spv::StorageClass::Function); + + std::uint32_t member = 0; + for (auto regId : exitFragment.outputs) { + auto value = exitFragment.registers->getRegister(regId); + auto valueTypeId = context->getTypeIdOf(value.type); + + auto pointerType = + context->getPointerType(spv::StorageClass::Function, *valueTypeId); + auto valuePointer = exitFragment.builder.createAccessChain( + pointerType, resultVariable, + {{exitFragment.context->getUInt32(member++)}}); + + exitFragment.builder.createStore(valuePointer, value.value); + } + + auto resultValue = exitFragment.builder.createLoad(resultType, resultVariable); + + exitFragment.builder.createReturnValue(resultValue); +} diff --git a/hw/amdgpu/shader/src/Instruction.cpp b/hw/amdgpu/shader/src/Instruction.cpp new file mode 100644 index 000000000..e4a2b01c1 --- /dev/null +++ b/hw/amdgpu/shader/src/Instruction.cpp @@ -0,0 +1,3161 @@ +#include "Instruction.hpp" +#include + +namespace { +using namespace amdgpu::shader; + +int printScalarOperand(int id, const std::uint32_t *inst) { + switch (id) { + case 0 ... 103: + std::printf("sgpr[%d]", id); + return 0; + case 106: + std::printf("VCC_LO"); + return 0; + case 107: + std::printf("VCC_HI"); + return 0; + case 124: + std::printf("M0"); + return 0; + case 126: + std::printf("EXEC_LO"); + return 0; + case 127: + std::printf("EXEC_HI"); + return 0; + case 128 ... 192: + std::printf("%d", id - 128); + return 0; + case 193 ... 208: + std::printf("%d", -static_cast(id - 192)); + return 0; + case 240: + std::printf("0.5"); + return 0; + case 241: + std::printf("-0.5"); + return 0; + case 242: + std::printf("1.0"); + return 0; + case 243: + std::printf("-1.0"); + return 0; + case 244: + std::printf("2.0"); + return 0; + case 245: + std::printf("-2.0"); + return 0; + case 246: + std::printf("4.0"); + return 0; + case 247: + std::printf("-4.0"); + return 0; + case 251: + std::printf("VCCZ"); + return 0; + case 252: + std::printf("EXECZ"); + return 0; + case 253: + std::printf("SCC"); + return 0; + case 254: + std::printf("LDS_DIRECT"); + return 0; + case 255: + std::printf("%08x", *inst); + return 1; + case 256 ... 511: + std::printf("vgpr[%u]", id - 256); + return 0; + } + + std::printf("", id); + return 0; +} + +int printVectorOperand(int id, const std::uint32_t *inst) { + std::printf("vgpr[%u]", id); + return 0; +} + +void printExpTarget(int target) { + switch (target) { + case 0 ... 7: + std::printf("mrt%u", target); + break; + case 8: + std::printf("mrtz"); + break; + case 9: + std::printf("null"); + break; + case 12 ... 15: + std::printf("pos%u", target - 12); + break; + case 32 ... 63: + std::printf("param%u", target - 32); + break; + + default: + std::printf("", target); + break; + } +} + +void printSop1Opcode(Sop1::Op op) { + if (auto string = sop1OpcodeToString(op)) { + std::printf("%s", string); + } else { + std::printf("", static_cast(op)); + } +} + +void printSop2Opcode(Sop2::Op op) { + if (auto string = sop2OpcodeToString(op)) { + std::printf("%s", string); + } else { + std::printf("", static_cast(op)); + } +} + +void printSopkOpcode(Sopk::Op op) { + if (auto string = sopkOpcodeToString(op)) { + std::printf("%s", string); + } else { + std::printf("", static_cast(op)); + } +} + +void printSopcOpcode(Sopc::Op op) { + if (auto string = sopcOpcodeToString(op)) { + std::printf("%s", string); + } else { + std::printf("", static_cast(op)); + } +} + +void printSoppOpcode(Sopp::Op op) { + if (auto string = soppOpcodeToString(op)) { + std::printf("%s", string); + } else { + std::printf("", static_cast(op)); + } +} + +void printVop2Opcode(Vop2::Op op) { + if (auto string = vop2OpcodeToString(op)) { + std::printf("%s", string); + } else { + std::printf("", static_cast(op)); + } +} + +void printVop1Opcode(Vop1::Op op) { + if (auto string = vop1OpcodeToString(op)) { + std::printf("%s", string); + } else { + std::printf("", static_cast(op)); + } +} + +void printVopcOpcode(Vopc::Op op) { + if (auto string = vopcOpcodeToString(op)) { + std::printf("%s", string); + } else { + std::printf("", static_cast(op)); + } +} + +void printVop3Opcode(Vop3::Op op) { + if (auto string = vop3OpcodeToString(op)) { + std::printf("%s", string); + } else { + std::printf("", static_cast(op)); + } +} + +void printSmrdOpcode(Smrd::Op op) { + if (auto string = smrdOpcodeToString(op)) { + std::printf("%s", string); + } else { + std::printf("", static_cast(op)); + } +} + +void printMubufOpcode(Mubuf::Op op) { + if (auto string = mubufOpcodeToString(op)) { + std::printf("%s", string); + } else { + std::printf("", static_cast(op)); + } +} + +void printMtbufOpcode(Mtbuf::Op op) { + if (auto string = mtbufOpcodeToString(op)) { + std::printf("%s", string); + } else { + std::printf("", static_cast(op)); + } +} + +void printMimgOpcode(Mimg::Op op) { + if (auto string = mimgOpcodeToString(op)) { + std::printf("%s", string); + } else { + std::printf("", static_cast(op)); + } +} + +void printDsOpcode(Ds::Op op) { + if (auto string = dsOpcodeToString(op)) { + std::printf("%s", string); + } else { + std::printf("", static_cast(op)); + } +} + +void printVintrpOpcode(Vintrp::Op op) { + if (auto string = vintrpOpcodeToString(op)) { + std::printf("%s", string); + } else { + std::printf("", static_cast(op)); + } +} +} // namespace + +const char *amdgpu::shader::sop1OpcodeToString(Sop1::Op op) { + switch (op) { + case Sop1::Op::S_MOV_B32: + return "s_mov_b32"; + case Sop1::Op::S_MOV_B64: + return "s_mov_b64"; + case Sop1::Op::S_CMOV_B32: + return "s_cmov_b32"; + case Sop1::Op::S_CMOV_B64: + return "s_cmov_b64"; + case Sop1::Op::S_NOT_B32: + return "s_not_b32"; + case Sop1::Op::S_NOT_B64: + return "s_not_b64"; + case Sop1::Op::S_WQM_B32: + return "s_wqm_b32"; + case Sop1::Op::S_WQM_B64: + return "s_wqm_b64"; + case Sop1::Op::S_BREV_B32: + return "s_brev_b32"; + case Sop1::Op::S_BREV_B64: + return "s_brev_b64"; + case Sop1::Op::S_BCNT0_I32_B32: + return "s_bcnt0_i32_b32"; + case Sop1::Op::S_BCNT0_I32_B64: + return "s_bcnt0_i32_b64"; + case Sop1::Op::S_BCNT1_I32_B32: + return "s_bcnt1_i32_b32"; + case Sop1::Op::S_BCNT1_I32_B64: + return "s_bcnt1_i32_b64"; + case Sop1::Op::S_FF0_I32_B32: + return "s_ff0_i32_b32"; + case Sop1::Op::S_FF0_I32_B64: + return "s_ff0_i32_b64"; + case Sop1::Op::S_FF1_I32_B32: + return "s_ff1_i32_b32"; + case Sop1::Op::S_FF1_I32_B64: + return "s_ff1_i32_b64"; + case Sop1::Op::S_FLBIT_I32_B32: + return "s_flbit_i32_b32"; + case Sop1::Op::S_FLBIT_I32_B64: + return "s_flbit_i32_b64"; + case Sop1::Op::S_FLBIT_I32: + return "s_flbit_i32"; + case Sop1::Op::S_FLBIT_I32_I64: + return "s_flbit_i32_i64"; + case Sop1::Op::S_SEXT_I32_I8: + return "s_sext_i32_i8"; + case Sop1::Op::S_SEXT_I32_I16: + return "s_sext_i32_i16"; + case Sop1::Op::S_BITSET0_B32: + return "s_bitset0_b32"; + case Sop1::Op::S_BITSET0_B64: + return "s_bitset0_b64"; + case Sop1::Op::S_BITSET1_B32: + return "s_bitset1_b32"; + case Sop1::Op::S_BITSET1_B64: + return "s_bitset1_b64"; + case Sop1::Op::S_GETPC_B64: + return "s_getpc_b64"; + case Sop1::Op::S_SETPC_B64: + return "s_setpc_b64"; + case Sop1::Op::S_SWAPPC_B64: + return "s_swappc_b64"; + case Sop1::Op::S_RFE_B64: + return "s_rfe_b64"; + case Sop1::Op::S_AND_SAVEEXEC_B64: + return "s_and_saveexec_b64"; + case Sop1::Op::S_OR_SAVEEXEC_B64: + return "s_or_saveexec_b64"; + case Sop1::Op::S_XOR_SAVEEXEC_B64: + return "s_xor_saveexec_b64"; + case Sop1::Op::S_ANDN2_SAVEEXEC_B64: + return "s_andn2_saveexec_b64"; + case Sop1::Op::S_ORN2_SAVEEXEC_B64: + return "s_orn2_saveexec_b64"; + case Sop1::Op::S_NAND_SAVEEXEC_B64: + return "s_nand_saveexec_b64"; + case Sop1::Op::S_NOR_SAVEEXEC_B64: + return "s_nor_saveexec_b64"; + case Sop1::Op::S_XNOR_SAVEEXEC_B64: + return "s_xnor_saveexec_b64"; + case Sop1::Op::S_QUADMASK_B32: + return "s_quadmask_b32"; + case Sop1::Op::S_QUADMASK_B64: + return "s_quadmask_b64"; + case Sop1::Op::S_MOVRELS_B32: + return "s_movrels_b32"; + case Sop1::Op::S_MOVRELS_B64: + return "s_movrels_b64"; + case Sop1::Op::S_MOVRELD_B32: + return "s_movreld_b32"; + case Sop1::Op::S_MOVRELD_B64: + return "s_movreld_b64"; + case Sop1::Op::S_CBRANCH_JOIN: + return "s_cbranch_join"; + case Sop1::Op::S_ABS_I32: + return "s_abs_i32"; + case Sop1::Op::S_MOV_FED_B32: + return "s_mov_fed_b32"; + default: + return nullptr; + } +} + +const char *amdgpu::shader::sop2OpcodeToString(Sop2::Op op) { + switch (op) { + case Sop2::Op::S_ADD_U32: + return "s_add_u32"; + case Sop2::Op::S_SUB_U32: + return "s_sub_u32"; + case Sop2::Op::S_ADD_I32: + return "s_add_i32"; + case Sop2::Op::S_SUB_I32: + return "s_sub_i32"; + case Sop2::Op::S_ADDC_U32: + return "s_addc_u32"; + case Sop2::Op::S_SUBB_U32: + return "s_subb_u32"; + case Sop2::Op::S_MIN_I32: + return "s_min_i32"; + case Sop2::Op::S_MIN_U32: + return "s_min_u32"; + case Sop2::Op::S_MAX_I32: + return "s_max_i32"; + case Sop2::Op::S_MAX_U32: + return "s_max_u32"; + case Sop2::Op::S_CSELECT_B32: + return "s_cselect_b32"; + case Sop2::Op::S_CSELECT_B64: + return "s_cselect_b64"; + case Sop2::Op::S_AND_B32: + return "s_and_b32"; + case Sop2::Op::S_AND_B64: + return "s_and_b64"; + case Sop2::Op::S_OR_B32: + return "s_or_b32"; + case Sop2::Op::S_OR_B64: + return "s_or_b64"; + case Sop2::Op::S_XOR_B32: + return "s_xor_b32"; + case Sop2::Op::S_XOR_B64: + return "s_xor_b64"; + case Sop2::Op::S_ANDN2_B32: + return "s_andn2_b32"; + case Sop2::Op::S_ANDN2_B64: + return "s_andn2_b64"; + case Sop2::Op::S_ORN2_B32: + return "s_orn2_b32"; + case Sop2::Op::S_ORN2_B64: + return "s_orn2_b64"; + case Sop2::Op::S_NAND_B32: + return "s_nand_b32"; + case Sop2::Op::S_NAND_B64: + return "s_nand_b64"; + case Sop2::Op::S_NOR_B32: + return "s_nor_b32"; + case Sop2::Op::S_NOR_B64: + return "s_nor_b64"; + case Sop2::Op::S_XNOR_B32: + return "s_xnor_b32"; + case Sop2::Op::S_XNOR_B64: + return "s_xnor_b64"; + case Sop2::Op::S_LSHL_B32: + return "s_lshl_b32"; + case Sop2::Op::S_LSHL_B64: + return "s_lshl_b64"; + case Sop2::Op::S_LSHR_B32: + return "s_lshr_b32"; + case Sop2::Op::S_LSHR_B64: + return "s_lshr_b64"; + case Sop2::Op::S_ASHR_I32: + return "s_ashr_i32"; + case Sop2::Op::S_ASHR_I64: + return "s_ashr_i64"; + case Sop2::Op::S_BFM_B32: + return "s_bfm_b32"; + case Sop2::Op::S_BFM_B64: + return "s_bfm_b64"; + case Sop2::Op::S_MUL_I32: + return "s_mul_i32"; + case Sop2::Op::S_BFE_U32: + return "s_bfe_u32"; + case Sop2::Op::S_BFE_I32: + return "s_bfe_i32"; + case Sop2::Op::S_BFE_U64: + return "s_bfe_u64"; + case Sop2::Op::S_BFE_I64: + return "s_bfe_i64"; + case Sop2::Op::S_CBRANCH_G_FORK: + return "s_cbranch_g_fork"; + case Sop2::Op::S_ABSDIFF_I32: + return "s_absdiff_i32"; + default: + return nullptr; + } +} + +const char *amdgpu::shader::sopkOpcodeToString(Sopk::Op op) { + switch (op) { + case Sopk::Op::S_MOVK_I32: + return "s_movk_i32"; + case Sopk::Op::S_CMOVK_I32: + return "s_cmovk_i32"; + case Sopk::Op::S_CMPK_EQ_I32: + return "s_cmpk_eq_i32"; + case Sopk::Op::S_CMPK_LG_I32: + return "s_cmpk_lg_i32"; + case Sopk::Op::S_CMPK_GT_I32: + return "s_cmpk_gt_i32"; + case Sopk::Op::S_CMPK_GE_I32: + return "s_cmpk_ge_i32"; + case Sopk::Op::S_CMPK_LT_I32: + return "s_cmpk_lt_i32"; + case Sopk::Op::S_CMPK_LE_I32: + return "s_cmpk_le_i32"; + case Sopk::Op::S_CMPK_EQ_U32: + return "s_cmpk_eq_u32"; + case Sopk::Op::S_CMPK_LG_U32: + return "s_cmpk_lg_u32"; + case Sopk::Op::S_CMPK_GT_U32: + return "s_cmpk_gt_u32"; + case Sopk::Op::S_CMPK_GE_U32: + return "s_cmpk_ge_u32"; + case Sopk::Op::S_CMPK_LT_U32: + return "s_cmpk_lt_u32"; + case Sopk::Op::S_CMPK_LE_U32: + return "s_cmpk_le_u32"; + case Sopk::Op::S_ADDK_I32: + return "s_addk_i32"; + case Sopk::Op::S_MULK_I32: + return "s_mulk_i32"; + case Sopk::Op::S_CBRANCH_I_FORK: + return "s_cbranch_i_fork"; + case Sopk::Op::S_GETREG_B32: + return "s_getreg_b32"; + case Sopk::Op::S_SETREG_B32: + return "s_setreg_b32"; + case Sopk::Op::S_SETREG_IMM: + return "s_setreg_imm"; + default: + return nullptr; + } +} + +const char *amdgpu::shader::sopcOpcodeToString(Sopc::Op op) { + switch (op) { + case Sopc::Op::S_CMP_EQ_I32: + return "s_cmp_eq_i32"; + case Sopc::Op::S_CMP_LG_I32: + return "s_cmp_lg_i32"; + case Sopc::Op::S_CMP_GT_I32: + return "s_cmp_gt_i32"; + case Sopc::Op::S_CMP_GE_I32: + return "s_cmp_ge_i32"; + case Sopc::Op::S_CMP_LT_I32: + return "s_cmp_lt_i32"; + case Sopc::Op::S_CMP_LE_I32: + return "s_cmp_le_i32"; + case Sopc::Op::S_CMP_EQ_U32: + return "s_cmp_eq_u32"; + case Sopc::Op::S_CMP_LG_U32: + return "s_cmp_lg_u32"; + case Sopc::Op::S_CMP_GT_U32: + return "s_cmp_gt_u32"; + case Sopc::Op::S_CMP_GE_U32: + return "s_cmp_ge_u32"; + case Sopc::Op::S_CMP_LT_U32: + return "s_cmp_lt_u32"; + case Sopc::Op::S_CMP_LE_U32: + return "s_cmp_le_u32"; + case Sopc::Op::S_BITCMP0_B32: + return "s_bitcmp0_b32"; + case Sopc::Op::S_BITCMP1_B32: + return "s_bitcmp1_b32"; + case Sopc::Op::S_BITCMP0_B64: + return "s_bitcmp0_b64"; + case Sopc::Op::S_BITCMP1_B64: + return "s_bitcmp1_b64"; + case Sopc::Op::S_SETVSKIP: + return "s_setvskip"; + default: + return nullptr; + } +} + +const char *amdgpu::shader::soppOpcodeToString(Sopp::Op op) { + switch (op) { + case Sopp::Op::S_NOP: + return "s_nop"; + case Sopp::Op::S_ENDPGM: + return "s_endpgm"; + case Sopp::Op::S_BRANCH: + return "s_branch"; + case Sopp::Op::S_CBRANCH_SCC0: + return "s_cbranch_scc0"; + case Sopp::Op::S_CBRANCH_SCC1: + return "s_cbranch_scc1"; + case Sopp::Op::S_CBRANCH_VCCZ: + return "s_cbranch_vccz"; + case Sopp::Op::S_CBRANCH_VCCNZ: + return "s_cbranch_vccnz"; + case Sopp::Op::S_CBRANCH_EXECZ: + return "s_cbranch_execz"; + case Sopp::Op::S_CBRANCH_EXECNZ: + return "s_cbranch_execnz"; + case Sopp::Op::S_BARRIER: + return "s_barrier"; + case Sopp::Op::S_WAITCNT: + return "s_waitcnt"; + case Sopp::Op::S_SETHALT: + return "s_sethalt"; + case Sopp::Op::S_SLEEP: + return "s_sleep"; + case Sopp::Op::S_SETPRIO: + return "s_setprio"; + case Sopp::Op::S_SENDMSG: + return "s_sendmsg"; + case Sopp::Op::S_SENDMSGHALT: + return "s_sendmsghalt"; + case Sopp::Op::S_TRAP: + return "s_trap"; + case Sopp::Op::S_ICACHE_INV: + return "s_icache_inv"; + case Sopp::Op::S_INCPERFLEVEL: + return "s_incperflevel"; + case Sopp::Op::S_DECPERFLEVEL: + return "s_decperflevel"; + case Sopp::Op::S_TTRACEDATA: + return "s_ttracedata"; + case Sopp::Op::S_CBRANCH_CDBGSYS: + return "s_cbranch_cdbgsys"; + case Sopp::Op::S_CBRANCH_CDBGUSER: + return "s_cbranch_cdbguser"; + case Sopp::Op::S_CBRANCH_CDBGSYS_OR_USER: + return "s_cbranch_cdbgsys_or_user"; + case Sopp::Op::S_CBRANCH_CDBGSYS_AND_USER: + return "s_cbranch_cdbgsys_and_user"; + default: + return nullptr; + } +} + +const char *amdgpu::shader::vop2OpcodeToString(Vop2::Op op) { + switch (op) { + case Vop2::Op::V_CNDMASK_B32: + return "v_cndmask_b32"; + case Vop2::Op::V_READLANE_B32: + return "v_readlane_b32"; + case Vop2::Op::V_WRITELANE_B32: + return "v_writelane_b32"; + case Vop2::Op::V_ADD_F32: + return "v_add_f32"; + case Vop2::Op::V_SUB_F32: + return "v_sub_f32"; + case Vop2::Op::V_SUBREV_F32: + return "v_subrev_f32"; + case Vop2::Op::V_MAC_LEGACY_F32: + return "v_mac_legacy_f32"; + case Vop2::Op::V_MUL_LEGACY_F32: + return "v_mul_legacy_f32"; + case Vop2::Op::V_MUL_F32: + return "v_mul_f32"; + case Vop2::Op::V_MUL_I32_I24: + return "v_mul_i32_i24"; + case Vop2::Op::V_MUL_HI_I32_I24: + return "v_mul_hi_i32_i24"; + case Vop2::Op::V_MUL_U32_U24: + return "v_mul_u32_u24"; + case Vop2::Op::V_MUL_HI_U32_U24: + return "v_mul_hi_u32_u24"; + case Vop2::Op::V_MIN_LEGACY_F32: + return "v_min_legacy_f32"; + case Vop2::Op::V_MAX_LEGACY_F32: + return "v_max_legacy_f32"; + case Vop2::Op::V_MIN_F32: + return "v_min_f32"; + case Vop2::Op::V_MAX_F32: + return "v_max_f32"; + case Vop2::Op::V_MIN_I32: + return "v_min_i32"; + case Vop2::Op::V_MAX_I32: + return "v_max_i32"; + case Vop2::Op::V_MIN_U32: + return "v_min_u32"; + case Vop2::Op::V_MAX_U32: + return "v_max_u32"; + case Vop2::Op::V_LSHR_B32: + return "v_lshr_b32"; + case Vop2::Op::V_LSHRREV_B32: + return "v_lshrrev_b32"; + case Vop2::Op::V_ASHR_I32: + return "v_ashr_i32"; + case Vop2::Op::V_ASHRREV_I32: + return "v_ashrrev_i32"; + case Vop2::Op::V_LSHL_B32: + return "v_lshl_b32"; + case Vop2::Op::V_LSHLREV_B32: + return "v_lshlrev_b32"; + case Vop2::Op::V_AND_B32: + return "v_and_b32"; + case Vop2::Op::V_OR_B32: + return "v_or_b32"; + case Vop2::Op::V_XOR_B32: + return "v_xor_b32"; + case Vop2::Op::V_BFM_B32: + return "v_bfm_b32"; + case Vop2::Op::V_MAC_F32: + return "v_mac_f32"; + case Vop2::Op::V_MADMK_F32: + return "v_madmk_f32"; + case Vop2::Op::V_MADAK_F32: + return "v_madak_f32"; + case Vop2::Op::V_BCNT_U32_B32: + return "v_bcnt_u32_b32"; + case Vop2::Op::V_MBCNT_LO_U32_B32: + return "v_mbcnt_lo_u32_b32"; + case Vop2::Op::V_MBCNT_HI_U32_B32: + return "v_mbcnt_hi_u32_b32"; + case Vop2::Op::V_ADD_I32: + return "v_add_i32"; + case Vop2::Op::V_SUB_I32: + return "v_sub_i32"; + case Vop2::Op::V_SUBREV_I32: + return "v_subrev_i32"; + case Vop2::Op::V_ADDC_U32: + return "v_addc_u32"; + case Vop2::Op::V_SUBB_U32: + return "v_subb_u32"; + case Vop2::Op::V_SUBBREV_U32: + return "v_subbrev_u32"; + case Vop2::Op::V_LDEXP_F32: + return "v_ldexp_f32"; + case Vop2::Op::V_CVT_PKACCUM_U8_F32: + return "v_cvt_pkaccum_u8_f32"; + case Vop2::Op::V_CVT_PKNORM_I16_F32: + return "v_cvt_pknorm_i16_f32"; + case Vop2::Op::V_CVT_PKNORM_U16_F32: + return "v_cvt_pknorm_u16_f32"; + case Vop2::Op::V_CVT_PKRTZ_F16_F32: + return "v_cvt_pkrtz_f16_f32"; + case Vop2::Op::V_CVT_PK_U16_U32: + return "v_cvt_pk_u16_u32"; + case Vop2::Op::V_CVT_PK_I16_I32: + return "v_cvt_pk_i16_i32"; + default: + return nullptr; + } +} + +const char *amdgpu::shader::vop1OpcodeToString(Vop1::Op op) { + switch (op) { + case Vop1::Op::V_NOP: + return "v_nop"; + case Vop1::Op::V_MOV_B32: + return "v_mov_b32"; + case Vop1::Op::V_READFIRSTLANE_B32: + return "v_readfirstlane_b32"; + case Vop1::Op::V_CVT_I32_F64: + return "v_cvt_i32_f64"; + case Vop1::Op::V_CVT_F64_I32: + return "v_cvt_f64_i32"; + case Vop1::Op::V_CVT_F32_I32: + return "v_cvt_f32_i32"; + case Vop1::Op::V_CVT_F32_U32: + return "v_cvt_f32_u32"; + case Vop1::Op::V_CVT_U32_F32: + return "v_cvt_u32_f32"; + case Vop1::Op::V_CVT_I32_F32: + return "v_cvt_i32_f32"; + case Vop1::Op::V_MOV_FED_B32: + return "v_mov_fed_b32"; + case Vop1::Op::V_CVT_F16_F32: + return "v_cvt_f16_f32"; + case Vop1::Op::V_CVT_F32_F16: + return "v_cvt_f32_f16"; + case Vop1::Op::V_CVT_RPI_I32_F32: + return "v_cvt_rpi_i32_f32"; + case Vop1::Op::V_CVT_FLR_I32_F32: + return "v_cvt_flr_i32_f32"; + case Vop1::Op::V_CVT_OFF_F32_I4: + return "v_cvt_off_f32_i4"; + case Vop1::Op::V_CVT_F32_F64: + return "v_cvt_f32_f64"; + case Vop1::Op::V_CVT_F64_F32: + return "v_cvt_f64_f32"; + case Vop1::Op::V_CVT_F32_UBYTE0: + return "v_cvt_f32_ubyte0"; + case Vop1::Op::V_CVT_F32_UBYTE1: + return "v_cvt_f32_ubyte1"; + case Vop1::Op::V_CVT_F32_UBYTE2: + return "v_cvt_f32_ubyte2"; + case Vop1::Op::V_CVT_F32_UBYTE3: + return "v_cvt_f32_ubyte3"; + case Vop1::Op::V_CVT_U32_F64: + return "v_cvt_u32_f64"; + case Vop1::Op::V_CVT_F64_U32: + return "v_cvt_f64_u32"; + case Vop1::Op::V_FRACT_F32: + return "v_fract_f32"; + case Vop1::Op::V_TRUNC_F32: + return "v_trunc_f32"; + case Vop1::Op::V_CEIL_F32: + return "v_ceil_f32"; + case Vop1::Op::V_RNDNE_F32: + return "v_rndne_f32"; + case Vop1::Op::V_FLOOR_F32: + return "v_floor_f32"; + case Vop1::Op::V_EXP_F32: + return "v_exp_f32"; + case Vop1::Op::V_LOG_CLAMP_F32: + return "v_log_clamp_f32"; + case Vop1::Op::V_LOG_F32: + return "v_log_f32"; + case Vop1::Op::V_RCP_CLAMP_F32: + return "v_rcp_clamp_f32"; + case Vop1::Op::V_RCP_LEGACY_F32: + return "v_rcp_legacy_f32"; + case Vop1::Op::V_RCP_F32: + return "v_rcp_f32"; + case Vop1::Op::V_RCP_IFLAG_F32: + return "v_rcp_iflag_f32"; + case Vop1::Op::V_RSQ_CLAMP_F32: + return "v_rsq_clamp_f32"; + case Vop1::Op::V_RSQ_LEGACY_F32: + return "v_rsq_legacy_f32"; + case Vop1::Op::V_RSQ_F32: + return "v_rsq_f32"; + case Vop1::Op::V_RCP_F64: + return "v_rcp_f64"; + case Vop1::Op::V_RCP_CLAMP_F64: + return "v_rcp_clamp_f64"; + case Vop1::Op::V_RSQ_F64: + return "v_rsq_f64"; + case Vop1::Op::V_RSQ_CLAMP_F64: + return "v_rsq_clamp_f64"; + case Vop1::Op::V_SQRT_F32: + return "v_sqrt_f32"; + case Vop1::Op::V_SQRT_F64: + return "v_sqrt_f64"; + case Vop1::Op::V_SIN_F32: + return "v_sin_f32"; + case Vop1::Op::V_COS_F32: + return "v_cos_f32"; + case Vop1::Op::V_NOT_B32: + return "v_not_b32"; + case Vop1::Op::V_BFREV_B32: + return "v_bfrev_b32"; + case Vop1::Op::V_FFBH_U32: + return "v_ffbh_u32"; + case Vop1::Op::V_FFBL_B32: + return "v_ffbl_b32"; + case Vop1::Op::V_FFBH_I32: + return "v_ffbh_i32"; + case Vop1::Op::V_FREXP_EXP_I32_F64: + return "v_frexp_exp_i32_f64"; + case Vop1::Op::V_FREXP_MANT_F64: + return "v_frexp_mant_f64"; + case Vop1::Op::V_FRACT_F64: + return "v_fract_f64"; + case Vop1::Op::V_FREXP_EXP_I32_F32: + return "v_frexp_exp_i32_f32"; + case Vop1::Op::V_FREXP_MANT_F32: + return "v_frexp_mant_f32"; + case Vop1::Op::V_CLREXCP: + return "v_clrexcp"; + case Vop1::Op::V_MOVRELD_B32: + return "v_movreld_b32"; + case Vop1::Op::V_MOVRELS_B32: + return "v_movrels_b32"; + case Vop1::Op::V_MOVRELSD_B32: + return "v_movrelsd_b32"; + default: + return nullptr; + } +} + +const char *amdgpu::shader::vopcOpcodeToString(Vopc::Op op) { + switch (op) { + case Vopc::Op::V_CMP_F_F32: + return "v_cmp_f_f32"; + case Vopc::Op::V_CMP_LT_F32: + return "v_cmp_lt_f32"; + case Vopc::Op::V_CMP_EQ_F32: + return "v_cmp_eq_f32"; + case Vopc::Op::V_CMP_LE_F32: + return "v_cmp_le_f32"; + case Vopc::Op::V_CMP_GT_F32: + return "v_cmp_gt_f32"; + case Vopc::Op::V_CMP_LG_F32: + return "v_cmp_lg_f32"; + case Vopc::Op::V_CMP_GE_F32: + return "v_cmp_ge_f32"; + case Vopc::Op::V_CMP_O_F32: + return "v_cmp_o_f32"; + case Vopc::Op::V_CMP_U_F32: + return "v_cmp_u_f32"; + case Vopc::Op::V_CMP_NGE_F32: + return "v_cmp_nge_f32"; + case Vopc::Op::V_CMP_NLG_F32: + return "v_cmp_nlg_f32"; + case Vopc::Op::V_CMP_NGT_F32: + return "v_cmp_ngt_f32"; + case Vopc::Op::V_CMP_NLE_F32: + return "v_cmp_nle_f32"; + case Vopc::Op::V_CMP_NEQ_F32: + return "v_cmp_neq_f32"; + case Vopc::Op::V_CMP_NLT_F32: + return "v_cmp_nlt_f32"; + case Vopc::Op::V_CMP_TRU_F32: + return "v_cmp_tru_f32"; + case Vopc::Op::V_CMPX_F_F32: + return "v_cmpx_f_f32"; + case Vopc::Op::V_CMPX_LT_F32: + return "v_cmpx_lt_f32"; + case Vopc::Op::V_CMPX_EQ_F32: + return "v_cmpx_eq_f32"; + case Vopc::Op::V_CMPX_LE_F32: + return "v_cmpx_le_f32"; + case Vopc::Op::V_CMPX_GT_F32: + return "v_cmpx_gt_f32"; + case Vopc::Op::V_CMPX_LG_F32: + return "v_cmpx_lg_f32"; + case Vopc::Op::V_CMPX_GE_F32: + return "v_cmpx_ge_f32"; + case Vopc::Op::V_CMPX_O_F32: + return "v_cmpx_o_f32"; + case Vopc::Op::V_CMPX_U_F32: + return "v_cmpx_u_f32"; + case Vopc::Op::V_CMPX_NGE_F32: + return "v_cmpx_nge_f32"; + case Vopc::Op::V_CMPX_NLG_F32: + return "v_cmpx_nlg_f32"; + case Vopc::Op::V_CMPX_NGT_F32: + return "v_cmpx_ngt_f32"; + case Vopc::Op::V_CMPX_NLE_F32: + return "v_cmpx_nle_f32"; + case Vopc::Op::V_CMPX_NEQ_F32: + return "v_cmpx_neq_f32"; + case Vopc::Op::V_CMPX_NLT_F32: + return "v_cmpx_nlt_f32"; + case Vopc::Op::V_CMPX_TRU_F32: + return "v_cmpx_tru_f32"; + case Vopc::Op::V_CMP_F_F64: + return "v_cmp_f_f64"; + case Vopc::Op::V_CMP_LT_F64: + return "v_cmp_lt_f64"; + case Vopc::Op::V_CMP_EQ_F64: + return "v_cmp_eq_f64"; + case Vopc::Op::V_CMP_LE_F64: + return "v_cmp_le_f64"; + case Vopc::Op::V_CMP_GT_F64: + return "v_cmp_gt_f64"; + case Vopc::Op::V_CMP_LG_F64: + return "v_cmp_lg_f64"; + case Vopc::Op::V_CMP_GE_F64: + return "v_cmp_ge_f64"; + case Vopc::Op::V_CMP_O_F64: + return "v_cmp_o_f64"; + case Vopc::Op::V_CMP_U_F64: + return "v_cmp_u_f64"; + case Vopc::Op::V_CMP_NGE_F64: + return "v_cmp_nge_f64"; + case Vopc::Op::V_CMP_NLG_F64: + return "v_cmp_nlg_f64"; + case Vopc::Op::V_CMP_NGT_F64: + return "v_cmp_ngt_f64"; + case Vopc::Op::V_CMP_NLE_F64: + return "v_cmp_nle_f64"; + case Vopc::Op::V_CMP_NEQ_F64: + return "v_cmp_neq_f64"; + case Vopc::Op::V_CMP_NLT_F64: + return "v_cmp_nlt_f64"; + case Vopc::Op::V_CMP_TRU_F64: + return "v_cmp_tru_f64"; + case Vopc::Op::V_CMPX_F_F64: + return "v_cmpx_f_f64"; + case Vopc::Op::V_CMPX_LT_F64: + return "v_cmpx_lt_f64"; + case Vopc::Op::V_CMPX_EQ_F64: + return "v_cmpx_eq_f64"; + case Vopc::Op::V_CMPX_LE_F64: + return "v_cmpx_le_f64"; + case Vopc::Op::V_CMPX_GT_F64: + return "v_cmpx_gt_f64"; + case Vopc::Op::V_CMPX_LG_F64: + return "v_cmpx_lg_f64"; + case Vopc::Op::V_CMPX_GE_F64: + return "v_cmpx_ge_f64"; + case Vopc::Op::V_CMPX_O_F64: + return "v_cmpx_o_f64"; + case Vopc::Op::V_CMPX_U_F64: + return "v_cmpx_u_f64"; + case Vopc::Op::V_CMPX_NGE_F64: + return "v_cmpx_nge_f64"; + case Vopc::Op::V_CMPX_NLG_F64: + return "v_cmpx_nlg_f64"; + case Vopc::Op::V_CMPX_NGT_F64: + return "v_cmpx_ngt_f64"; + case Vopc::Op::V_CMPX_NLE_F64: + return "v_cmpx_nle_f64"; + case Vopc::Op::V_CMPX_NEQ_F64: + return "v_cmpx_neq_f64"; + case Vopc::Op::V_CMPX_NLT_F64: + return "v_cmpx_nlt_f64"; + case Vopc::Op::V_CMPX_TRU_F64: + return "v_cmpx_tru_f64"; + case Vopc::Op::V_CMPS_F_F32: + return "v_cmps_f_f32"; + case Vopc::Op::V_CMPS_LT_F32: + return "v_cmps_lt_f32"; + case Vopc::Op::V_CMPS_EQ_F32: + return "v_cmps_eq_f32"; + case Vopc::Op::V_CMPS_LE_F32: + return "v_cmps_le_f32"; + case Vopc::Op::V_CMPS_GT_F32: + return "v_cmps_gt_f32"; + case Vopc::Op::V_CMPS_LG_F32: + return "v_cmps_lg_f32"; + case Vopc::Op::V_CMPS_GE_F32: + return "v_cmps_ge_f32"; + case Vopc::Op::V_CMPS_O_F32: + return "v_cmps_o_f32"; + case Vopc::Op::V_CMPS_U_F32: + return "v_cmps_u_f32"; + case Vopc::Op::V_CMPS_NGE_F32: + return "v_cmps_nge_f32"; + case Vopc::Op::V_CMPS_NLG_F32: + return "v_cmps_nlg_f32"; + case Vopc::Op::V_CMPS_NGT_F32: + return "v_cmps_ngt_f32"; + case Vopc::Op::V_CMPS_NLE_F32: + return "v_cmps_nle_f32"; + case Vopc::Op::V_CMPS_NEQ_F32: + return "v_cmps_neq_f32"; + case Vopc::Op::V_CMPS_NLT_F32: + return "v_cmps_nlt_f32"; + case Vopc::Op::V_CMPS_TRU_F32: + return "v_cmps_tru_f32"; + case Vopc::Op::V_CMPSX_F_F32: + return "v_cmpsx_f_f32"; + case Vopc::Op::V_CMPSX_LT_F32: + return "v_cmpsx_lt_f32"; + case Vopc::Op::V_CMPSX_EQ_F32: + return "v_cmpsx_eq_f32"; + case Vopc::Op::V_CMPSX_LE_F32: + return "v_cmpsx_le_f32"; + case Vopc::Op::V_CMPSX_GT_F32: + return "v_cmpsx_gt_f32"; + case Vopc::Op::V_CMPSX_LG_F32: + return "v_cmpsx_lg_f32"; + case Vopc::Op::V_CMPSX_GE_F32: + return "v_cmpsx_ge_f32"; + case Vopc::Op::V_CMPSX_O_F32: + return "v_cmpsx_o_f32"; + case Vopc::Op::V_CMPSX_U_F32: + return "v_cmpsx_u_f32"; + case Vopc::Op::V_CMPSX_NGE_F32: + return "v_cmpsx_nge_f32"; + case Vopc::Op::V_CMPSX_NLG_F32: + return "v_cmpsx_nlg_f32"; + case Vopc::Op::V_CMPSX_NGT_F32: + return "v_cmpsx_ngt_f32"; + case Vopc::Op::V_CMPSX_NLE_F32: + return "v_cmpsx_nle_f32"; + case Vopc::Op::V_CMPSX_NEQ_F32: + return "v_cmpsx_neq_f32"; + case Vopc::Op::V_CMPSX_NLT_F32: + return "v_cmpsx_nlt_f32"; + case Vopc::Op::V_CMPSX_TRU_F32: + return "v_cmpsx_tru_f32"; + case Vopc::Op::V_CMPS_F_F64: + return "v_cmps_f_f64"; + case Vopc::Op::V_CMPS_LT_F64: + return "v_cmps_lt_f64"; + case Vopc::Op::V_CMPS_EQ_F64: + return "v_cmps_eq_f64"; + case Vopc::Op::V_CMPS_LE_F64: + return "v_cmps_le_f64"; + case Vopc::Op::V_CMPS_GT_F64: + return "v_cmps_gt_f64"; + case Vopc::Op::V_CMPS_LG_F64: + return "v_cmps_lg_f64"; + case Vopc::Op::V_CMPS_GE_F64: + return "v_cmps_ge_f64"; + case Vopc::Op::V_CMPS_O_F64: + return "v_cmps_o_f64"; + case Vopc::Op::V_CMPS_U_F64: + return "v_cmps_u_f64"; + case Vopc::Op::V_CMPS_NGE_F64: + return "v_cmps_nge_f64"; + case Vopc::Op::V_CMPS_NLG_F64: + return "v_cmps_nlg_f64"; + case Vopc::Op::V_CMPS_NGT_F64: + return "v_cmps_ngt_f64"; + case Vopc::Op::V_CMPS_NLE_F64: + return "v_cmps_nle_f64"; + case Vopc::Op::V_CMPS_NEQ_F64: + return "v_cmps_neq_f64"; + case Vopc::Op::V_CMPS_NLT_F64: + return "v_cmps_nlt_f64"; + case Vopc::Op::V_CMPS_TRU_F64: + return "v_cmps_tru_f64"; + case Vopc::Op::V_CMPSX_F_F64: + return "v_cmpsx_f_f64"; + case Vopc::Op::V_CMPSX_LT_F64: + return "v_cmpsx_lt_f64"; + case Vopc::Op::V_CMPSX_EQ_F64: + return "v_cmpsx_eq_f64"; + case Vopc::Op::V_CMPSX_LE_F64: + return "v_cmpsx_le_f64"; + case Vopc::Op::V_CMPSX_GT_F64: + return "v_cmpsx_gt_f64"; + case Vopc::Op::V_CMPSX_LG_F64: + return "v_cmpsx_lg_f64"; + case Vopc::Op::V_CMPSX_GE_F64: + return "v_cmpsx_ge_f64"; + case Vopc::Op::V_CMPSX_O_F64: + return "v_cmpsx_o_f64"; + case Vopc::Op::V_CMPSX_U_F64: + return "v_cmpsx_u_f64"; + case Vopc::Op::V_CMPSX_NGE_F64: + return "v_cmpsx_nge_f64"; + case Vopc::Op::V_CMPSX_NLG_F64: + return "v_cmpsx_nlg_f64"; + case Vopc::Op::V_CMPSX_NGT_F64: + return "v_cmpsx_ngt_f64"; + case Vopc::Op::V_CMPSX_NLE_F64: + return "v_cmpsx_nle_f64"; + case Vopc::Op::V_CMPSX_NEQ_F64: + return "v_cmpsx_neq_f64"; + case Vopc::Op::V_CMPSX_NLT_F64: + return "v_cmpsx_nlt_f64"; + case Vopc::Op::V_CMPSX_TRU_F64: + return "v_cmpsx_tru_f64"; + case Vopc::Op::V_CMP_F_I32: + return "v_cmp_f_i32"; + case Vopc::Op::V_CMP_LT_I32: + return "v_cmp_lt_i32"; + case Vopc::Op::V_CMP_EQ_I32: + return "v_cmp_eq_i32"; + case Vopc::Op::V_CMP_LE_I32: + return "v_cmp_le_i32"; + case Vopc::Op::V_CMP_GT_I32: + return "v_cmp_gt_i32"; + case Vopc::Op::V_CMP_NE_I32: + return "v_cmp_ne_i32"; + case Vopc::Op::V_CMP_GE_I32: + return "v_cmp_ge_i32"; + case Vopc::Op::V_CMP_T_I32: + return "v_cmp_t_i32"; + case Vopc::Op::V_CMP_CLASS_F32: + return "v_cmp_class_f32"; + case Vopc::Op::V_CMP_LT_I16: + return "v_cmp_lt_i16"; + case Vopc::Op::V_CMP_EQ_I16: + return "v_cmp_eq_i16"; + case Vopc::Op::V_CMP_LE_I16: + return "v_cmp_le_i16"; + case Vopc::Op::V_CMP_GT_I16: + return "v_cmp_gt_i16"; + case Vopc::Op::V_CMP_NE_I16: + return "v_cmp_ne_i16"; + case Vopc::Op::V_CMP_GE_I16: + return "v_cmp_ge_i16"; + case Vopc::Op::V_CMP_CLASS_F16: + return "v_cmp_class_f16"; + case Vopc::Op::V_CMPX_F_I32: + return "v_cmpx_f_i32"; + case Vopc::Op::V_CMPX_LT_I32: + return "v_cmpx_lt_i32"; + case Vopc::Op::V_CMPX_EQ_I32: + return "v_cmpx_eq_i32"; + case Vopc::Op::V_CMPX_LE_I32: + return "v_cmpx_le_i32"; + case Vopc::Op::V_CMPX_GT_I32: + return "v_cmpx_gt_i32"; + case Vopc::Op::V_CMPX_NE_I32: + return "v_cmpx_ne_i32"; + case Vopc::Op::V_CMPX_GE_I32: + return "v_cmpx_ge_i32"; + case Vopc::Op::V_CMPX_T_I32: + return "v_cmpx_t_i32"; + case Vopc::Op::V_CMPX_CLASS_F32: + return "v_cmpx_class_f32"; + case Vopc::Op::V_CMPX_LT_I16: + return "v_cmpx_lt_i16"; + case Vopc::Op::V_CMPX_EQ_I16: + return "v_cmpx_eq_i16"; + case Vopc::Op::V_CMPX_LE_I16: + return "v_cmpx_le_i16"; + case Vopc::Op::V_CMPX_GT_I16: + return "v_cmpx_gt_i16"; + case Vopc::Op::V_CMPX_NE_I16: + return "v_cmpx_ne_i16"; + case Vopc::Op::V_CMPX_GE_I16: + return "v_cmpx_ge_i16"; + case Vopc::Op::V_CMPX_CLASS_F16: + return "v_cmpx_class_f16"; + case Vopc::Op::V_CMP_F_I64: + return "v_cmp_f_i64"; + case Vopc::Op::V_CMP_LT_I64: + return "v_cmp_lt_i64"; + case Vopc::Op::V_CMP_EQ_I64: + return "v_cmp_eq_i64"; + case Vopc::Op::V_CMP_LE_I64: + return "v_cmp_le_i64"; + case Vopc::Op::V_CMP_GT_I64: + return "v_cmp_gt_i64"; + case Vopc::Op::V_CMP_NE_I64: + return "v_cmp_ne_i64"; + case Vopc::Op::V_CMP_GE_I64: + return "v_cmp_ge_i64"; + case Vopc::Op::V_CMP_T_I64: + return "v_cmp_t_i64"; + case Vopc::Op::V_CMP_CLASS_F64: + return "v_cmp_class_f64"; + case Vopc::Op::V_CMP_LT_U16: + return "v_cmp_lt_u16"; + case Vopc::Op::V_CMP_EQ_U16: + return "v_cmp_eq_u16"; + case Vopc::Op::V_CMP_LE_U16: + return "v_cmp_le_u16"; + case Vopc::Op::V_CMP_GT_U16: + return "v_cmp_gt_u16"; + case Vopc::Op::V_CMP_NE_U16: + return "v_cmp_ne_u16"; + case Vopc::Op::V_CMP_GE_U16: + return "v_cmp_ge_u16"; + case Vopc::Op::V_CMPX_F_I64: + return "v_cmpx_f_i64"; + case Vopc::Op::V_CMPX_LT_I64: + return "v_cmpx_lt_i64"; + case Vopc::Op::V_CMPX_EQ_I64: + return "v_cmpx_eq_i64"; + case Vopc::Op::V_CMPX_LE_I64: + return "v_cmpx_le_i64"; + case Vopc::Op::V_CMPX_GT_I64: + return "v_cmpx_gt_i64"; + case Vopc::Op::V_CMPX_NE_I64: + return "v_cmpx_ne_i64"; + case Vopc::Op::V_CMPX_GE_I64: + return "v_cmpx_ge_i64"; + case Vopc::Op::V_CMPX_T_I64: + return "v_cmpx_t_i64"; + case Vopc::Op::V_CMPX_CLASS_F64: + return "v_cmpx_class_f64"; + case Vopc::Op::V_CMPX_LT_U16: + return "v_cmpx_lt_u16"; + case Vopc::Op::V_CMPX_EQ_U16: + return "v_cmpx_eq_u16"; + case Vopc::Op::V_CMPX_LE_U16: + return "v_cmpx_le_u16"; + case Vopc::Op::V_CMPX_GT_U16: + return "v_cmpx_gt_u16"; + case Vopc::Op::V_CMPX_NE_U16: + return "v_cmpx_ne_u16"; + case Vopc::Op::V_CMPX_GE_U16: + return "v_cmpx_ge_u16"; + case Vopc::Op::V_CMP_F_U32: + return "v_cmp_f_u32"; + case Vopc::Op::V_CMP_LT_U32: + return "v_cmp_lt_u32"; + case Vopc::Op::V_CMP_EQ_U32: + return "v_cmp_eq_u32"; + case Vopc::Op::V_CMP_LE_U32: + return "v_cmp_le_u32"; + case Vopc::Op::V_CMP_GT_U32: + return "v_cmp_gt_u32"; + case Vopc::Op::V_CMP_NE_U32: + return "v_cmp_ne_u32"; + case Vopc::Op::V_CMP_GE_U32: + return "v_cmp_ge_u32"; + case Vopc::Op::V_CMP_T_U32: + return "v_cmp_t_u32"; + case Vopc::Op::V_CMP_F_F16: + return "v_cmp_f_f16"; + case Vopc::Op::V_CMP_LT_F16: + return "v_cmp_lt_f16"; + case Vopc::Op::V_CMP_EQ_F16: + return "v_cmp_eq_f16"; + case Vopc::Op::V_CMP_LE_F16: + return "v_cmp_le_f16"; + case Vopc::Op::V_CMP_GT_F16: + return "v_cmp_gt_f16"; + case Vopc::Op::V_CMP_LG_F16: + return "v_cmp_lg_f16"; + case Vopc::Op::V_CMP_GE_F16: + return "v_cmp_ge_f16"; + case Vopc::Op::V_CMP_O_F16: + return "v_cmp_o_f16"; + case Vopc::Op::V_CMPX_F_U32: + return "v_cmpx_f_u32"; + case Vopc::Op::V_CMPX_LT_U32: + return "v_cmpx_lt_u32"; + case Vopc::Op::V_CMPX_EQ_U32: + return "v_cmpx_eq_u32"; + case Vopc::Op::V_CMPX_LE_U32: + return "v_cmpx_le_u32"; + case Vopc::Op::V_CMPX_GT_U32: + return "v_cmpx_gt_u32"; + case Vopc::Op::V_CMPX_NE_U32: + return "v_cmpx_ne_u32"; + case Vopc::Op::V_CMPX_GE_U32: + return "v_cmpx_ge_u32"; + case Vopc::Op::V_CMPX_T_U32: + return "v_cmpx_t_u32"; + case Vopc::Op::V_CMPX_F_F16: + return "v_cmpx_f_f16"; + case Vopc::Op::V_CMPX_LT_F16: + return "v_cmpx_lt_f16"; + case Vopc::Op::V_CMPX_EQ_F16: + return "v_cmpx_eq_f16"; + case Vopc::Op::V_CMPX_LE_F16: + return "v_cmpx_le_f16"; + case Vopc::Op::V_CMPX_GT_F16: + return "v_cmpx_gt_f16"; + case Vopc::Op::V_CMPX_LG_F16: + return "v_cmpx_lg_f16"; + case Vopc::Op::V_CMPX_GE_F16: + return "v_cmpx_ge_f16"; + case Vopc::Op::V_CMPX_O_F16: + return "v_cmpx_o_f16"; + case Vopc::Op::V_CMP_F_U64: + return "v_cmp_f_u64"; + case Vopc::Op::V_CMP_LT_U64: + return "v_cmp_lt_u64"; + case Vopc::Op::V_CMP_EQ_U64: + return "v_cmp_eq_u64"; + case Vopc::Op::V_CMP_LE_U64: + return "v_cmp_le_u64"; + case Vopc::Op::V_CMP_GT_U64: + return "v_cmp_gt_u64"; + case Vopc::Op::V_CMP_NE_U64: + return "v_cmp_ne_u64"; + case Vopc::Op::V_CMP_GE_U64: + return "v_cmp_ge_u64"; + case Vopc::Op::V_CMP_T_U64: + return "v_cmp_t_u64"; + case Vopc::Op::V_CMP_U_F16: + return "v_cmp_u_f16"; + case Vopc::Op::V_CMP_NGE_F16: + return "v_cmp_nge_f16"; + case Vopc::Op::V_CMP_NLG_F16: + return "v_cmp_nlg_f16"; + case Vopc::Op::V_CMP_NGT_F16: + return "v_cmp_ngt_f16"; + case Vopc::Op::V_CMP_NLE_F16: + return "v_cmp_nle_f16"; + case Vopc::Op::V_CMP_NEQ_F16: + return "v_cmp_neq_f16"; + case Vopc::Op::V_CMP_NLT_F16: + return "v_cmp_nlt_f16"; + case Vopc::Op::V_CMP_TRU_F16: + return "v_cmp_tru_f16"; + case Vopc::Op::V_CMPX_F_U64: + return "v_cmpx_f_u64"; + case Vopc::Op::V_CMPX_LT_U64: + return "v_cmpx_lt_u64"; + case Vopc::Op::V_CMPX_EQ_U64: + return "v_cmpx_eq_u64"; + case Vopc::Op::V_CMPX_LE_U64: + return "v_cmpx_le_u64"; + case Vopc::Op::V_CMPX_GT_U64: + return "v_cmpx_gt_u64"; + case Vopc::Op::V_CMPX_NE_U64: + return "v_cmpx_ne_u64"; + case Vopc::Op::V_CMPX_GE_U64: + return "v_cmpx_ge_u64"; + case Vopc::Op::V_CMPX_T_U64: + return "v_cmpx_t_u64"; + case Vopc::Op::V_CMPX_U_F16: + return "v_cmpx_u_f16"; + case Vopc::Op::V_CMPX_NGE_F16: + return "v_cmpx_nge_f16"; + case Vopc::Op::V_CMPX_NLG_F16: + return "v_cmpx_nlg_f16"; + case Vopc::Op::V_CMPX_NGT_F16: + return "v_cmpx_ngt_f16"; + case Vopc::Op::V_CMPX_NLE_F16: + return "v_cmpx_nle_f16"; + case Vopc::Op::V_CMPX_NEQ_F16: + return "v_cmpx_neq_f16"; + case Vopc::Op::V_CMPX_NLT_F16: + return "v_cmpx_nlt_f16"; + case Vopc::Op::V_CMPX_TRU_F16: + return "v_cmpx_tru_f16"; + + default: + return nullptr; + } +} + +const char *amdgpu::shader::vop3OpcodeToString(Vop3::Op op) { + switch (op) { + case Vop3::Op::V3_CMP_F_F32: + return "v3_cmp_f_f32"; + case Vop3::Op::V3_CMP_LT_F32: + return "v3_cmp_lt_f32"; + case Vop3::Op::V3_CMP_EQ_F32: + return "v3_cmp_eq_f32"; + case Vop3::Op::V3_CMP_LE_F32: + return "v3_cmp_le_f32"; + case Vop3::Op::V3_CMP_GT_F32: + return "v3_cmp_gt_f32"; + case Vop3::Op::V3_CMP_LG_F32: + return "v3_cmp_lg_f32"; + case Vop3::Op::V3_CMP_GE_F32: + return "v3_cmp_ge_f32"; + case Vop3::Op::V3_CMP_O_F32: + return "v3_cmp_o_f32"; + case Vop3::Op::V3_CMP_U_F32: + return "v3_cmp_u_f32"; + case Vop3::Op::V3_CMP_NGE_F32: + return "v3_cmp_nge_f32"; + case Vop3::Op::V3_CMP_NLG_F32: + return "v3_cmp_nlg_f32"; + case Vop3::Op::V3_CMP_NGT_F32: + return "v3_cmp_ngt_f32"; + case Vop3::Op::V3_CMP_NLE_F32: + return "v3_cmp_nle_f32"; + case Vop3::Op::V3_CMP_NEQ_F32: + return "v3_cmp_neq_f32"; + case Vop3::Op::V3_CMP_NLT_F32: + return "v3_cmp_nlt_f32"; + case Vop3::Op::V3_CMP_TRU_F32: + return "v3_cmp_tru_f32"; + case Vop3::Op::V3_CMPX_F_F32: + return "v3_cmpx_f_f32"; + case Vop3::Op::V3_CMPX_LT_F32: + return "v3_cmpx_lt_f32"; + case Vop3::Op::V3_CMPX_EQ_F32: + return "v3_cmpx_eq_f32"; + case Vop3::Op::V3_CMPX_LE_F32: + return "v3_cmpx_le_f32"; + case Vop3::Op::V3_CMPX_GT_F32: + return "v3_cmpx_gt_f32"; + case Vop3::Op::V3_CMPX_LG_F32: + return "v3_cmpx_lg_f32"; + case Vop3::Op::V3_CMPX_GE_F32: + return "v3_cmpx_ge_f32"; + case Vop3::Op::V3_CMPX_O_F32: + return "v3_cmpx_o_f32"; + case Vop3::Op::V3_CMPX_U_F32: + return "v3_cmpx_u_f32"; + case Vop3::Op::V3_CMPX_NGE_F32: + return "v3_cmpx_nge_f32"; + case Vop3::Op::V3_CMPX_NLG_F32: + return "v3_cmpx_nlg_f32"; + case Vop3::Op::V3_CMPX_NGT_F32: + return "v3_cmpx_ngt_f32"; + case Vop3::Op::V3_CMPX_NLE_F32: + return "v3_cmpx_nle_f32"; + case Vop3::Op::V3_CMPX_NEQ_F32: + return "v3_cmpx_neq_f32"; + case Vop3::Op::V3_CMPX_NLT_F32: + return "v3_cmpx_nlt_f32"; + case Vop3::Op::V3_CMPX_TRU_F32: + return "v3_cmpx_tru_f32"; + case Vop3::Op::V3_CMP_F_F64: + return "v3_cmp_f_f64"; + case Vop3::Op::V3_CMP_LT_F64: + return "v3_cmp_lt_f64"; + case Vop3::Op::V3_CMP_EQ_F64: + return "v3_cmp_eq_f64"; + case Vop3::Op::V3_CMP_LE_F64: + return "v3_cmp_le_f64"; + case Vop3::Op::V3_CMP_GT_F64: + return "v3_cmp_gt_f64"; + case Vop3::Op::V3_CMP_LG_F64: + return "v3_cmp_lg_f64"; + case Vop3::Op::V3_CMP_GE_F64: + return "v3_cmp_ge_f64"; + case Vop3::Op::V3_CMP_O_F64: + return "v3_cmp_o_f64"; + case Vop3::Op::V3_CMP_U_F64: + return "v3_cmp_u_f64"; + case Vop3::Op::V3_CMP_NGE_F64: + return "v3_cmp_nge_f64"; + case Vop3::Op::V3_CMP_NLG_F64: + return "v3_cmp_nlg_f64"; + case Vop3::Op::V3_CMP_NGT_F64: + return "v3_cmp_ngt_f64"; + case Vop3::Op::V3_CMP_NLE_F64: + return "v3_cmp_nle_f64"; + case Vop3::Op::V3_CMP_NEQ_F64: + return "v3_cmp_neq_f64"; + case Vop3::Op::V3_CMP_NLT_F64: + return "v3_cmp_nlt_f64"; + case Vop3::Op::V3_CMP_TRU_F64: + return "v3_cmp_tru_f64"; + case Vop3::Op::V3_CMPX_F_F64: + return "v3_cmpx_f_f64"; + case Vop3::Op::V3_CMPX_LT_F64: + return "v3_cmpx_lt_f64"; + case Vop3::Op::V3_CMPX_EQ_F64: + return "v3_cmpx_eq_f64"; + case Vop3::Op::V3_CMPX_LE_F64: + return "v3_cmpx_le_f64"; + case Vop3::Op::V3_CMPX_GT_F64: + return "v3_cmpx_gt_f64"; + case Vop3::Op::V3_CMPX_LG_F64: + return "v3_cmpx_lg_f64"; + case Vop3::Op::V3_CMPX_GE_F64: + return "v3_cmpx_ge_f64"; + case Vop3::Op::V3_CMPX_O_F64: + return "v3_cmpx_o_f64"; + case Vop3::Op::V3_CMPX_U_F64: + return "v3_cmpx_u_f64"; + case Vop3::Op::V3_CMPX_NGE_F64: + return "v3_cmpx_nge_f64"; + case Vop3::Op::V3_CMPX_NLG_F64: + return "v3_cmpx_nlg_f64"; + case Vop3::Op::V3_CMPX_NGT_F64: + return "v3_cmpx_ngt_f64"; + case Vop3::Op::V3_CMPX_NLE_F64: + return "v3_cmpx_nle_f64"; + case Vop3::Op::V3_CMPX_NEQ_F64: + return "v3_cmpx_neq_f64"; + case Vop3::Op::V3_CMPX_NLT_F64: + return "v3_cmpx_nlt_f64"; + case Vop3::Op::V3_CMPX_TRU_F64: + return "v3_cmpx_tru_f64"; + case Vop3::Op::V3_CMPS_F_F32: + return "v3_cmps_f_f32"; + case Vop3::Op::V3_CMPS_LT_F32: + return "v3_cmps_lt_f32"; + case Vop3::Op::V3_CMPS_EQ_F32: + return "v3_cmps_eq_f32"; + case Vop3::Op::V3_CMPS_LE_F32: + return "v3_cmps_le_f32"; + case Vop3::Op::V3_CMPS_GT_F32: + return "v3_cmps_gt_f32"; + case Vop3::Op::V3_CMPS_LG_F32: + return "v3_cmps_lg_f32"; + case Vop3::Op::V3_CMPS_GE_F32: + return "v3_cmps_ge_f32"; + case Vop3::Op::V3_CMPS_O_F32: + return "v3_cmps_o_f32"; + case Vop3::Op::V3_CMPS_U_F32: + return "v3_cmps_u_f32"; + case Vop3::Op::V3_CMPS_NGE_F32: + return "v3_cmps_nge_f32"; + case Vop3::Op::V3_CMPS_NLG_F32: + return "v3_cmps_nlg_f32"; + case Vop3::Op::V3_CMPS_NGT_F32: + return "v3_cmps_ngt_f32"; + case Vop3::Op::V3_CMPS_NLE_F32: + return "v3_cmps_nle_f32"; + case Vop3::Op::V3_CMPS_NEQ_F32: + return "v3_cmps_neq_f32"; + case Vop3::Op::V3_CMPS_NLT_F32: + return "v3_cmps_nlt_f32"; + case Vop3::Op::V3_CMPS_TRU_F32: + return "v3_cmps_tru_f32"; + case Vop3::Op::V3_CMPSX_F_F32: + return "v3_cmpsx_f_f32"; + case Vop3::Op::V3_CMPSX_LT_F32: + return "v3_cmpsx_lt_f32"; + case Vop3::Op::V3_CMPSX_EQ_F32: + return "v3_cmpsx_eq_f32"; + case Vop3::Op::V3_CMPSX_LE_F32: + return "v3_cmpsx_le_f32"; + case Vop3::Op::V3_CMPSX_GT_F32: + return "v3_cmpsx_gt_f32"; + case Vop3::Op::V3_CMPSX_LG_F32: + return "v3_cmpsx_lg_f32"; + case Vop3::Op::V3_CMPSX_GE_F32: + return "v3_cmpsx_ge_f32"; + case Vop3::Op::V3_CMPSX_O_F32: + return "v3_cmpsx_o_f32"; + case Vop3::Op::V3_CMPSX_U_F32: + return "v3_cmpsx_u_f32"; + case Vop3::Op::V3_CMPSX_NGE_F32: + return "v3_cmpsx_nge_f32"; + case Vop3::Op::V3_CMPSX_NLG_F32: + return "v3_cmpsx_nlg_f32"; + case Vop3::Op::V3_CMPSX_NGT_F32: + return "v3_cmpsx_ngt_f32"; + case Vop3::Op::V3_CMPSX_NLE_F32: + return "v3_cmpsx_nle_f32"; + case Vop3::Op::V3_CMPSX_NEQ_F32: + return "v3_cmpsx_neq_f32"; + case Vop3::Op::V3_CMPSX_NLT_F32: + return "v3_cmpsx_nlt_f32"; + case Vop3::Op::V3_CMPSX_TRU_F32: + return "v3_cmpsx_tru_f32"; + case Vop3::Op::V3_CMPS_F_F64: + return "v3_cmps_f_f64"; + case Vop3::Op::V3_CMPS_LT_F64: + return "v3_cmps_lt_f64"; + case Vop3::Op::V3_CMPS_EQ_F64: + return "v3_cmps_eq_f64"; + case Vop3::Op::V3_CMPS_LE_F64: + return "v3_cmps_le_f64"; + case Vop3::Op::V3_CMPS_GT_F64: + return "v3_cmps_gt_f64"; + case Vop3::Op::V3_CMPS_LG_F64: + return "v3_cmps_lg_f64"; + case Vop3::Op::V3_CMPS_GE_F64: + return "v3_cmps_ge_f64"; + case Vop3::Op::V3_CMPS_O_F64: + return "v3_cmps_o_f64"; + case Vop3::Op::V3_CMPS_U_F64: + return "v3_cmps_u_f64"; + case Vop3::Op::V3_CMPS_NGE_F64: + return "v3_cmps_nge_f64"; + case Vop3::Op::V3_CMPS_NLG_F64: + return "v3_cmps_nlg_f64"; + case Vop3::Op::V3_CMPS_NGT_F64: + return "v3_cmps_ngt_f64"; + case Vop3::Op::V3_CMPS_NLE_F64: + return "v3_cmps_nle_f64"; + case Vop3::Op::V3_CMPS_NEQ_F64: + return "v3_cmps_neq_f64"; + case Vop3::Op::V3_CMPS_NLT_F64: + return "v3_cmps_nlt_f64"; + case Vop3::Op::V3_CMPS_TRU_F64: + return "v3_cmps_tru_f64"; + case Vop3::Op::V3_CMPSX_F_F64: + return "v3_cmpsx_f_f64"; + case Vop3::Op::V3_CMPSX_LT_F64: + return "v3_cmpsx_lt_f64"; + case Vop3::Op::V3_CMPSX_EQ_F64: + return "v3_cmpsx_eq_f64"; + case Vop3::Op::V3_CMPSX_LE_F64: + return "v3_cmpsx_le_f64"; + case Vop3::Op::V3_CMPSX_GT_F64: + return "v3_cmpsx_gt_f64"; + case Vop3::Op::V3_CMPSX_LG_F64: + return "v3_cmpsx_lg_f64"; + case Vop3::Op::V3_CMPSX_GE_F64: + return "v3_cmpsx_ge_f64"; + case Vop3::Op::V3_CMPSX_O_F64: + return "v3_cmpsx_o_f64"; + case Vop3::Op::V3_CMPSX_U_F64: + return "v3_cmpsx_u_f64"; + case Vop3::Op::V3_CMPSX_NGE_F64: + return "v3_cmpsx_nge_f64"; + case Vop3::Op::V3_CMPSX_NLG_F64: + return "v3_cmpsx_nlg_f64"; + case Vop3::Op::V3_CMPSX_NGT_F64: + return "v3_cmpsx_ngt_f64"; + case Vop3::Op::V3_CMPSX_NLE_F64: + return "v3_cmpsx_nle_f64"; + case Vop3::Op::V3_CMPSX_NEQ_F64: + return "v3_cmpsx_neq_f64"; + case Vop3::Op::V3_CMPSX_NLT_F64: + return "v3_cmpsx_nlt_f64"; + case Vop3::Op::V3_CMPSX_TRU_F64: + return "v3_cmpsx_tru_f64"; + case Vop3::Op::V3_CMP_F_I32: + return "v3_cmp_f_i32"; + case Vop3::Op::V3_CMP_LT_I32: + return "v3_cmp_lt_i32"; + case Vop3::Op::V3_CMP_EQ_I32: + return "v3_cmp_eq_i32"; + case Vop3::Op::V3_CMP_LE_I32: + return "v3_cmp_le_i32"; + case Vop3::Op::V3_CMP_GT_I32: + return "v3_cmp_gt_i32"; + case Vop3::Op::V3_CMP_NE_I32: + return "v3_cmp_ne_i32"; + case Vop3::Op::V3_CMP_GE_I32: + return "v3_cmp_ge_i32"; + case Vop3::Op::V3_CMP_T_I32: + return "v3_cmp_t_i32"; + case Vop3::Op::V3_CMP_CLASS_F32: + return "v3_cmp_class_f32"; + case Vop3::Op::V3_CMP_LT_I16: + return "v3_cmp_lt_i16"; + case Vop3::Op::V3_CMP_EQ_I16: + return "v3_cmp_eq_i16"; + case Vop3::Op::V3_CMP_LE_I16: + return "v3_cmp_le_i16"; + case Vop3::Op::V3_CMP_GT_I16: + return "v3_cmp_gt_i16"; + case Vop3::Op::V3_CMP_NE_I16: + return "v3_cmp_ne_i16"; + case Vop3::Op::V3_CMP_GE_I16: + return "v3_cmp_ge_i16"; + case Vop3::Op::V3_CMP_CLASS_F16: + return "v3_cmp_class_f16"; + case Vop3::Op::V3_CMPX_F_I32: + return "v3_cmpx_f_i32"; + case Vop3::Op::V3_CMPX_LT_I32: + return "v3_cmpx_lt_i32"; + case Vop3::Op::V3_CMPX_EQ_I32: + return "v3_cmpx_eq_i32"; + case Vop3::Op::V3_CMPX_LE_I32: + return "v3_cmpx_le_i32"; + case Vop3::Op::V3_CMPX_GT_I32: + return "v3_cmpx_gt_i32"; + case Vop3::Op::V3_CMPX_NE_I32: + return "v3_cmpx_ne_i32"; + case Vop3::Op::V3_CMPX_GE_I32: + return "v3_cmpx_ge_i32"; + case Vop3::Op::V3_CMPX_T_I32: + return "v3_cmpx_t_i32"; + case Vop3::Op::V3_CMPX_CLASS_F32: + return "v3_cmpx_class_f32"; + case Vop3::Op::V3_CMPX_LT_I16: + return "v3_cmpx_lt_i16"; + case Vop3::Op::V3_CMPX_EQ_I16: + return "v3_cmpx_eq_i16"; + case Vop3::Op::V3_CMPX_LE_I16: + return "v3_cmpx_le_i16"; + case Vop3::Op::V3_CMPX_GT_I16: + return "v3_cmpx_gt_i16"; + case Vop3::Op::V3_CMPX_NE_I16: + return "v3_cmpx_ne_i16"; + case Vop3::Op::V3_CMPX_GE_I16: + return "v3_cmpx_ge_i16"; + case Vop3::Op::V3_CMPX_CLASS_F16: + return "v3_cmpx_class_f16"; + case Vop3::Op::V3_CMP_F_I64: + return "v3_cmp_f_i64"; + case Vop3::Op::V3_CMP_LT_I64: + return "v3_cmp_lt_i64"; + case Vop3::Op::V3_CMP_EQ_I64: + return "v3_cmp_eq_i64"; + case Vop3::Op::V3_CMP_LE_I64: + return "v3_cmp_le_i64"; + case Vop3::Op::V3_CMP_GT_I64: + return "v3_cmp_gt_i64"; + case Vop3::Op::V3_CMP_NE_I64: + return "v3_cmp_ne_i64"; + case Vop3::Op::V3_CMP_GE_I64: + return "v3_cmp_ge_i64"; + case Vop3::Op::V3_CMP_T_I64: + return "v3_cmp_t_i64"; + case Vop3::Op::V3_CMP_CLASS_F64: + return "v3_cmp_class_f64"; + case Vop3::Op::V3_CMP_LT_U16: + return "v3_cmp_lt_u16"; + case Vop3::Op::V3_CMP_EQ_U16: + return "v3_cmp_eq_u16"; + case Vop3::Op::V3_CMP_LE_U16: + return "v3_cmp_le_u16"; + case Vop3::Op::V3_CMP_GT_U16: + return "v3_cmp_gt_u16"; + case Vop3::Op::V3_CMP_NE_U16: + return "v3_cmp_ne_u16"; + case Vop3::Op::V3_CMP_GE_U16: + return "v3_cmp_ge_u16"; + case Vop3::Op::V3_CMPX_F_I64: + return "v3_cmpx_f_i64"; + case Vop3::Op::V3_CMPX_LT_I64: + return "v3_cmpx_lt_i64"; + case Vop3::Op::V3_CMPX_EQ_I64: + return "v3_cmpx_eq_i64"; + case Vop3::Op::V3_CMPX_LE_I64: + return "v3_cmpx_le_i64"; + case Vop3::Op::V3_CMPX_GT_I64: + return "v3_cmpx_gt_i64"; + case Vop3::Op::V3_CMPX_NE_I64: + return "v3_cmpx_ne_i64"; + case Vop3::Op::V3_CMPX_GE_I64: + return "v3_cmpx_ge_i64"; + case Vop3::Op::V3_CMPX_T_I64: + return "v3_cmpx_t_i64"; + case Vop3::Op::V3_CMPX_CLASS_F64: + return "v3_cmpx_class_f64"; + case Vop3::Op::V3_CMPX_LT_U16: + return "v3_cmpx_lt_u16"; + case Vop3::Op::V3_CMPX_EQ_U16: + return "v3_cmpx_eq_u16"; + case Vop3::Op::V3_CMPX_LE_U16: + return "v3_cmpx_le_u16"; + case Vop3::Op::V3_CMPX_GT_U16: + return "v3_cmpx_gt_u16"; + case Vop3::Op::V3_CMPX_NE_U16: + return "v3_cmpx_ne_u16"; + case Vop3::Op::V3_CMPX_GE_U16: + return "v3_cmpx_ge_u16"; + case Vop3::Op::V3_CMP_F_U32: + return "v3_cmp_f_u32"; + case Vop3::Op::V3_CMP_LT_U32: + return "v3_cmp_lt_u32"; + case Vop3::Op::V3_CMP_EQ_U32: + return "v3_cmp_eq_u32"; + case Vop3::Op::V3_CMP_LE_U32: + return "v3_cmp_le_u32"; + case Vop3::Op::V3_CMP_GT_U32: + return "v3_cmp_gt_u32"; + case Vop3::Op::V3_CMP_NE_U32: + return "v3_cmp_ne_u32"; + case Vop3::Op::V3_CMP_GE_U32: + return "v3_cmp_ge_u32"; + case Vop3::Op::V3_CMP_T_U32: + return "v3_cmp_t_u32"; + case Vop3::Op::V3_CMP_F_F16: + return "v3_cmp_f_f16"; + case Vop3::Op::V3_CMP_LT_F16: + return "v3_cmp_lt_f16"; + case Vop3::Op::V3_CMP_EQ_F16: + return "v3_cmp_eq_f16"; + case Vop3::Op::V3_CMP_LE_F16: + return "v3_cmp_le_f16"; + case Vop3::Op::V3_CMP_GT_F16: + return "v3_cmp_gt_f16"; + case Vop3::Op::V3_CMP_LG_F16: + return "v3_cmp_lg_f16"; + case Vop3::Op::V3_CMP_GE_F16: + return "v3_cmp_ge_f16"; + case Vop3::Op::V3_CMP_O_F16: + return "v3_cmp_o_f16"; + case Vop3::Op::V3_CMPX_F_U32: + return "v3_cmpx_f_u32"; + case Vop3::Op::V3_CMPX_LT_U32: + return "v3_cmpx_lt_u32"; + case Vop3::Op::V3_CMPX_EQ_U32: + return "v3_cmpx_eq_u32"; + case Vop3::Op::V3_CMPX_LE_U32: + return "v3_cmpx_le_u32"; + case Vop3::Op::V3_CMPX_GT_U32: + return "v3_cmpx_gt_u32"; + case Vop3::Op::V3_CMPX_NE_U32: + return "v3_cmpx_ne_u32"; + case Vop3::Op::V3_CMPX_GE_U32: + return "v3_cmpx_ge_u32"; + case Vop3::Op::V3_CMPX_T_U32: + return "v3_cmpx_t_u32"; + case Vop3::Op::V3_CMPX_F_F16: + return "v3_cmpx_f_f16"; + case Vop3::Op::V3_CMPX_LT_F16: + return "v3_cmpx_lt_f16"; + case Vop3::Op::V3_CMPX_EQ_F16: + return "v3_cmpx_eq_f16"; + case Vop3::Op::V3_CMPX_LE_F16: + return "v3_cmpx_le_f16"; + case Vop3::Op::V3_CMPX_GT_F16: + return "v3_cmpx_gt_f16"; + case Vop3::Op::V3_CMPX_LG_F16: + return "v3_cmpx_lg_f16"; + case Vop3::Op::V3_CMPX_GE_F16: + return "v3_cmpx_ge_f16"; + case Vop3::Op::V3_CMPX_O_F16: + return "v3_cmpx_o_f16"; + case Vop3::Op::V3_CMP_F_U64: + return "v3_cmp_f_u64"; + case Vop3::Op::V3_CMP_LT_U64: + return "v3_cmp_lt_u64"; + case Vop3::Op::V3_CMP_EQ_U64: + return "v3_cmp_eq_u64"; + case Vop3::Op::V3_CMP_LE_U64: + return "v3_cmp_le_u64"; + case Vop3::Op::V3_CMP_GT_U64: + return "v3_cmp_gt_u64"; + case Vop3::Op::V3_CMP_NE_U64: + return "v3_cmp_ne_u64"; + case Vop3::Op::V3_CMP_GE_U64: + return "v3_cmp_ge_u64"; + case Vop3::Op::V3_CMP_T_U64: + return "v3_cmp_t_u64"; + case Vop3::Op::V3_CMP_U_F16: + return "v3_cmp_u_f16"; + case Vop3::Op::V3_CMP_NGE_F16: + return "v3_cmp_nge_f16"; + case Vop3::Op::V3_CMP_NLG_F16: + return "v3_cmp_nlg_f16"; + case Vop3::Op::V3_CMP_NGT_F16: + return "v3_cmp_ngt_f16"; + case Vop3::Op::V3_CMP_NLE_F16: + return "v3_cmp_nle_f16"; + case Vop3::Op::V3_CMP_NEQ_F16: + return "v3_cmp_neq_f16"; + case Vop3::Op::V3_CMP_NLT_F16: + return "v3_cmp_nlt_f16"; + case Vop3::Op::V3_CMP_TRU_F16: + return "v3_cmp_tru_f16"; + case Vop3::Op::V3_CMPX_F_U64: + return "v3_cmpx_f_u64"; + case Vop3::Op::V3_CMPX_LT_U64: + return "v3_cmpx_lt_u64"; + case Vop3::Op::V3_CMPX_EQ_U64: + return "v3_cmpx_eq_u64"; + case Vop3::Op::V3_CMPX_LE_U64: + return "v3_cmpx_le_u64"; + case Vop3::Op::V3_CMPX_GT_U64: + return "v3_cmpx_gt_u64"; + case Vop3::Op::V3_CMPX_NE_U64: + return "v3_cmpx_ne_u64"; + case Vop3::Op::V3_CMPX_GE_U64: + return "v3_cmpx_ge_u64"; + case Vop3::Op::V3_CMPX_T_U64: + return "v3_cmpx_t_u64"; + case Vop3::Op::V3_CNDMASK_B32: + return "v3_cndmask_b32"; + case Vop3::Op::V3_READLANE_B32: + return "v3_readlane_b32"; + case Vop3::Op::V3_WRITELANE_B32: + return "v3_writelane_b32"; + case Vop3::Op::V3_ADD_F32: + return "v3_add_f32"; + case Vop3::Op::V3_SUB_F32: + return "v3_sub_f32"; + case Vop3::Op::V3_SUBREV_F32: + return "v3_subrev_f32"; + case Vop3::Op::V3_MAC_LEGACY_F32: + return "v3_mac_legacy_f32"; + case Vop3::Op::V3_MUL_LEGACY_F32: + return "v3_mul_legacy_f32"; + case Vop3::Op::V3_MUL_F32: + return "v3_mul_f32"; + case Vop3::Op::V3_MUL_I32_I24: + return "v3_mul_i32_i24"; + case Vop3::Op::V3_MUL_HI_I32_I24: + return "v3_mul_hi_i32_i24"; + case Vop3::Op::V3_MUL_U32_U24: + return "v3_mul_u32_u24"; + case Vop3::Op::V3_MUL_HI_U32_U24: + return "v3_mul_hi_u32_u24"; + case Vop3::Op::V3_MIN_LEGACY_F32: + return "v3_min_legacy_f32"; + case Vop3::Op::V3_MAX_LEGACY_F32: + return "v3_max_legacy_f32"; + case Vop3::Op::V3_MIN_F32: + return "v3_min_f32"; + case Vop3::Op::V3_MAX_F32: + return "v3_max_f32"; + case Vop3::Op::V3_MIN_I32: + return "v3_min_i32"; + case Vop3::Op::V3_MAX_I32: + return "v3_max_i32"; + case Vop3::Op::V3_MIN_U32: + return "v3_min_u32"; + case Vop3::Op::V3_MAX_U32: + return "v3_max_u32"; + case Vop3::Op::V3_LSHR_B32: + return "v3_lshr_b32"; + case Vop3::Op::V3_LSHRREV_B32: + return "v3_lshrrev_b32"; + case Vop3::Op::V3_ASHR_I32: + return "v3_ashr_i32"; + case Vop3::Op::V3_ASHRREV_I32: + return "v3_ashrrev_i32"; + case Vop3::Op::V3_LSHL_B32: + return "v3_lshl_b32"; + case Vop3::Op::V3_LSHLREV_B32: + return "v3_lshlrev_b32"; + case Vop3::Op::V3_AND_B32: + return "v3_and_b32"; + case Vop3::Op::V3_OR_B32: + return "v3_or_b32"; + case Vop3::Op::V3_XOR_B32: + return "v3_xor_b32"; + case Vop3::Op::V3_BFM_B32: + return "v3_bfm_b32"; + case Vop3::Op::V3_MAC_F32: + return "v3_mac_f32"; + case Vop3::Op::V3_MADMK_F32: + return "v3_madmk_f32"; + case Vop3::Op::V3_MADAK_F32: + return "v3_madak_f32"; + case Vop3::Op::V3_BCNT_U32_B32: + return "v3_bcnt_u32_b32"; + case Vop3::Op::V3_MBCNT_LO_U32_B32: + return "v3_mbcnt_lo_u32_b32"; + case Vop3::Op::V3_MBCNT_HI_U32_B32: + return "v3_mbcnt_hi_u32_b32"; + case Vop3::Op::V3_ADD_I32: + return "v3_add_i32"; + case Vop3::Op::V3_SUB_I32: + return "v3_sub_i32"; + case Vop3::Op::V3_SUBREV_I32: + return "v3_subrev_i32"; + case Vop3::Op::V3_ADDC_U32: + return "v3_addc_u32"; + case Vop3::Op::V3_SUBB_U32: + return "v3_subb_u32"; + case Vop3::Op::V3_SUBBREV_U32: + return "v3_subbrev_u32"; + case Vop3::Op::V3_LDEXP_F32: + return "v3_ldexp_f32"; + case Vop3::Op::V3_CVT_PKACCUM_U8_F32: + return "v3_cvt_pkaccum_u8_f32"; + case Vop3::Op::V3_CVT_PKNORM_I16_F32: + return "v3_cvt_pknorm_i16_f32"; + case Vop3::Op::V3_CVT_PKNORM_U16_F32: + return "v3_cvt_pknorm_u16_f32"; + case Vop3::Op::V3_CVT_PKRTZ_F16_F32: + return "v3_cvt_pkrtz_f16_f32"; + case Vop3::Op::V3_CVT_PK_U16_U32: + return "v3_cvt_pk_u16_u32"; + case Vop3::Op::V3_CVT_PK_I16_I32: + return "v3_cvt_pk_i16_i32"; + case Vop3::Op::V3_MAD_LEGACY_F32: + return "v3_mad_legacy_f32"; + case Vop3::Op::V3_MAD_F32: + return "v3_mad_f32"; + case Vop3::Op::V3_MAD_I32_I24: + return "v3_mad_i32_i24"; + case Vop3::Op::V3_MAD_U32_U24: + return "v3_mad_u32_u24"; + case Vop3::Op::V3_CUBEID_F32: + return "v3_cubeid_f32"; + case Vop3::Op::V3_CUBESC_F32: + return "v3_cubesc_f32"; + case Vop3::Op::V3_CUBETC_F32: + return "v3_cubetc_f32"; + case Vop3::Op::V3_CUBEMA_F32: + return "v3_cubema_f32"; + case Vop3::Op::V3_BFE_U32: + return "v3_bfe_u32"; + case Vop3::Op::V3_BFE_I32: + return "v3_bfe_i32"; + case Vop3::Op::V3_BFI_B32: + return "v3_bfi_b32"; + case Vop3::Op::V3_FMA_F32: + return "v3_fma_f32"; + case Vop3::Op::V3_FMA_F64: + return "v3_fma_f64"; + case Vop3::Op::V3_LERP_U8: + return "v3_lerp_u8"; + case Vop3::Op::V3_ALIGNBIT_B32: + return "v3_alignbit_b32"; + case Vop3::Op::V3_ALIGNBYTE_B32: + return "v3_alignbyte_b32"; + case Vop3::Op::V3_MULLIT_F32: + return "v3_mullit_f32"; + case Vop3::Op::V3_MIN3_F32: + return "v3_min3_f32"; + case Vop3::Op::V3_MIN3_I32: + return "v3_min3_i32"; + case Vop3::Op::V3_MIN3_U32: + return "v3_min3_u32"; + case Vop3::Op::V3_MAX3_F32: + return "v3_max3_f32"; + case Vop3::Op::V3_MAX3_I32: + return "v3_max3_i32"; + case Vop3::Op::V3_MAX3_U32: + return "v3_max3_u32"; + case Vop3::Op::V3_MED3_F32: + return "v3_med3_f32"; + case Vop3::Op::V3_MED3_I32: + return "v3_med3_i32"; + case Vop3::Op::V3_MED3_U32: + return "v3_med3_u32"; + case Vop3::Op::V3_SAD_U8: + return "v3_sad_u8"; + case Vop3::Op::V3_SAD_HI_U8: + return "v3_sad_hi_u8"; + case Vop3::Op::V3_SAD_U16: + return "v3_sad_u16"; + case Vop3::Op::V3_SAD_U32: + return "v3_sad_u32"; + case Vop3::Op::V3_CVT_PK_U8_F32: + return "v3_cvt_pk_u8_f32"; + case Vop3::Op::V3_DIV_FIXUP_F32: + return "v3_div_fixup_f32"; + case Vop3::Op::V3_DIV_FIXUP_F64: + return "v3_div_fixup_f64"; + case Vop3::Op::V3_LSHL_B64: + return "v3_lshl_b64"; + case Vop3::Op::V3_LSHR_B64: + return "v3_lshr_b64"; + case Vop3::Op::V3_ASHR_I64: + return "v3_ashr_i64"; + case Vop3::Op::V3_ADD_F64: + return "v3_add_f64"; + case Vop3::Op::V3_MUL_F64: + return "v3_mul_f64"; + case Vop3::Op::V3_MIN_F64: + return "v3_min_f64"; + case Vop3::Op::V3_MAX_F64: + return "v3_max_f64"; + case Vop3::Op::V3_LDEXP_F64: + return "v3_ldexp_f64"; + case Vop3::Op::V3_MUL_LO_U32: + return "v3_mul_lo_u32"; + case Vop3::Op::V3_MUL_HI_U32: + return "v3_mul_hi_u32"; + case Vop3::Op::V3_MUL_LO_I32: + return "v3_mul_lo_i32"; + case Vop3::Op::V3_MUL_HI_I32: + return "v3_mul_hi_i32"; + case Vop3::Op::V3_DIV_SCALE_F32: + return "v3_div_scale_f32"; + case Vop3::Op::V3_DIV_SCALE_F64: + return "v3_div_scale_f64"; + case Vop3::Op::V3_DIV_FMAS_F32: + return "v3_div_fmas_f32"; + case Vop3::Op::V3_DIV_FMAS_F64: + return "v3_div_fmas_f64"; + case Vop3::Op::V3_MSAD_U8: + return "v3_msad_u8"; + case Vop3::Op::V3_QSAD_U8: + return "v3_qsad_u8"; + case Vop3::Op::V3_MQSAD_U8: + return "v3_mqsad_u8"; + case Vop3::Op::V3_TRIG_PREOP_F64: + return "v3_trig_preop_f64"; + case Vop3::Op::V3_NOP: + return "v3_nop"; + case Vop3::Op::V3_MOV_B32: + return "v3_mov_b32"; + case Vop3::Op::V3_READFIRSTLANE_B32: + return "v3_readfirstlane_b32"; + case Vop3::Op::V3_CVT_I32_F64: + return "v3_cvt_i32_f64"; + case Vop3::Op::V3_CVT_F64_I32: + return "v3_cvt_f64_i32"; + case Vop3::Op::V3_CVT_F32_I32: + return "v3_cvt_f32_i32"; + case Vop3::Op::V3_CVT_F32_U32: + return "v3_cvt_f32_u32"; + case Vop3::Op::V3_CVT_U32_F32: + return "v3_cvt_u32_f32"; + case Vop3::Op::V3_CVT_I32_F32: + return "v3_cvt_i32_f32"; + case Vop3::Op::V3_MOV_FED_B32: + return "v3_mov_fed_b32"; + case Vop3::Op::V3_CVT_F16_F32: + return "v3_cvt_f16_f32"; + case Vop3::Op::V3_CVT_F32_F16: + return "v3_cvt_f32_f16"; + case Vop3::Op::V3_CVT_RPI_I32_F32: + return "v3_cvt_rpi_i32_f32"; + case Vop3::Op::V3_CVT_FLR_I32_F32: + return "v3_cvt_flr_i32_f32"; + case Vop3::Op::V3_CVT_OFF_F32_I4: + return "v3_cvt_off_f32_i4"; + case Vop3::Op::V3_CVT_F32_F64: + return "v3_cvt_f32_f64"; + case Vop3::Op::V3_CVT_F64_F32: + return "v3_cvt_f64_f32"; + case Vop3::Op::V3_CVT_F32_UBYTE0: + return "v3_cvt_f32_ubyte0"; + case Vop3::Op::V3_CVT_F32_UBYTE1: + return "v3_cvt_f32_ubyte1"; + case Vop3::Op::V3_CVT_F32_UBYTE2: + return "v3_cvt_f32_ubyte2"; + case Vop3::Op::V3_CVT_F32_UBYTE3: + return "v3_cvt_f32_ubyte3"; + case Vop3::Op::V3_CVT_U32_F64: + return "v3_cvt_u32_f64"; + case Vop3::Op::V3_CVT_F64_U32: + return "v3_cvt_f64_u32"; + case Vop3::Op::V3_FRACT_F32: + return "v3_fract_f32"; + case Vop3::Op::V3_TRUNC_F32: + return "v3_trunc_f32"; + case Vop3::Op::V3_CEIL_F32: + return "v3_ceil_f32"; + case Vop3::Op::V3_RNDNE_F32: + return "v3_rndne_f32"; + case Vop3::Op::V3_FLOOR_F32: + return "v3_floor_f32"; + case Vop3::Op::V3_EXP_F32: + return "v3_exp_f32"; + case Vop3::Op::V3_LOG_CLAMP_F32: + return "v3_log_clamp_f32"; + case Vop3::Op::V3_LOG_F32: + return "v3_log_f32"; + case Vop3::Op::V3_RCP_CLAMP_F32: + return "v3_rcp_clamp_f32"; + case Vop3::Op::V3_RCP_LEGACY_F32: + return "v3_rcp_legacy_f32"; + case Vop3::Op::V3_RCP_F32: + return "v3_rcp_f32"; + case Vop3::Op::V3_RCP_IFLAG_F32: + return "v3_rcp_iflag_f32"; + case Vop3::Op::V3_RSQ_CLAMP_F32: + return "v3_rsq_clamp_f32"; + case Vop3::Op::V3_RSQ_LEGACY_F32: + return "v3_rsq_legacy_f32"; + case Vop3::Op::V3_RSQ_F32: + return "v3_rsq_f32"; + case Vop3::Op::V3_RCP_F64: + return "v3_rcp_f64"; + case Vop3::Op::V3_RCP_CLAMP_F64: + return "v3_rcp_clamp_f64"; + case Vop3::Op::V3_RSQ_F64: + return "v3_rsq_f64"; + case Vop3::Op::V3_RSQ_CLAMP_F64: + return "v3_rsq_clamp_f64"; + case Vop3::Op::V3_SQRT_F32: + return "v3_sqrt_f32"; + case Vop3::Op::V3_SQRT_F64: + return "v3_sqrt_f64"; + case Vop3::Op::V3_SIN_F32: + return "v3_sin_f32"; + case Vop3::Op::V3_COS_F32: + return "v3_cos_f32"; + case Vop3::Op::V3_NOT_B32: + return "v3_not_b32"; + case Vop3::Op::V3_BFREV_B32: + return "v3_bfrev_b32"; + case Vop3::Op::V3_FFBH_U32: + return "v3_ffbh_u32"; + case Vop3::Op::V3_FFBL_B32: + return "v3_ffbl_b32"; + case Vop3::Op::V3_FFBH_I32: + return "v3_ffbh_i32"; + case Vop3::Op::V3_FREXP_EXP_I32_F64: + return "v3_frexp_exp_i32_f64"; + case Vop3::Op::V3_FREXP_MANT_F64: + return "v3_frexp_mant_f64"; + case Vop3::Op::V3_FRACT_F64: + return "v3_fract_f64"; + case Vop3::Op::V3_FREXP_EXP_I32_F32: + return "v3_frexp_exp_i32_f32"; + case Vop3::Op::V3_FREXP_MANT_F32: + return "v3_frexp_mant_f32"; + case Vop3::Op::V3_CLREXCP: + return "v3_clrexcp"; + case Vop3::Op::V3_MOVRELD_B32: + return "v3_movreld_b32"; + case Vop3::Op::V3_MOVRELS_B32: + return "v3_movrels_b32"; + case Vop3::Op::V3_MOVRELSD_B32: + return "v3_movrelsd_b32"; + default: + return nullptr; + } +} + +const char *amdgpu::shader::smrdOpcodeToString(Smrd::Op op) { + switch (op) { + case Smrd::Op::S_LOAD_DWORD: + return "s_load_dword"; + case Smrd::Op::S_LOAD_DWORDX2: + return "s_load_dwordx2"; + case Smrd::Op::S_LOAD_DWORDX4: + return "s_load_dwordx4"; + case Smrd::Op::S_LOAD_DWORDX8: + return "s_load_dwordx8"; + case Smrd::Op::S_LOAD_DWORDX16: + return "s_load_dwordx16"; + case Smrd::Op::S_BUFFER_LOAD_DWORD: + return "s_buffer_load_dword"; + case Smrd::Op::S_BUFFER_LOAD_DWORDX2: + return "s_buffer_load_dwordx2"; + case Smrd::Op::S_BUFFER_LOAD_DWORDX4: + return "s_buffer_load_dwordx4"; + case Smrd::Op::S_BUFFER_LOAD_DWORDX8: + return "s_buffer_load_dwordx8"; + case Smrd::Op::S_BUFFER_LOAD_DWORDX16: + return "s_buffer_load_dwordx16"; + case Smrd::Op::S_DCACHE_INV_VOL: + return "s_dcache_inv_vol"; + case Smrd::Op::S_MEMTIME: + return "s_memtime"; + case Smrd::Op::S_DCACHE_INV: + return "s_dcache_inv"; + default: + return nullptr; + } +} + +const char *amdgpu::shader::mubufOpcodeToString(Mubuf::Op op) { + switch (op) { + case Mubuf::Op::BUFFER_LOAD_FORMAT_X: + return "buffer_load_format_x"; + case Mubuf::Op::BUFFER_LOAD_FORMAT_XY: + return "buffer_load_format_xy"; + case Mubuf::Op::BUFFER_LOAD_FORMAT_XYZ: + return "buffer_load_format_xyz"; + case Mubuf::Op::BUFFER_LOAD_FORMAT_XYZW: + return "buffer_load_format_xyzw"; + case Mubuf::Op::BUFFER_STORE_FORMAT_X: + return "buffer_store_format_x"; + case Mubuf::Op::BUFFER_STORE_FORMAT_XY: + return "buffer_store_format_xy"; + case Mubuf::Op::BUFFER_STORE_FORMAT_XYZ: + return "buffer_store_format_xyz"; + case Mubuf::Op::BUFFER_STORE_FORMAT_XYZW: + return "buffer_store_format_xyzw"; + case Mubuf::Op::BUFFER_LOAD_UBYTE: + return "buffer_load_ubyte"; + case Mubuf::Op::BUFFER_LOAD_SBYTE: + return "buffer_load_sbyte"; + case Mubuf::Op::BUFFER_LOAD_USHORT: + return "buffer_load_ushort"; + case Mubuf::Op::BUFFER_LOAD_SSHORT: + return "buffer_load_sshort"; + case Mubuf::Op::BUFFER_LOAD_DWORD: + return "buffer_load_dword"; + case Mubuf::Op::BUFFER_LOAD_DWORDX2: + return "buffer_load_dwordx2"; + case Mubuf::Op::BUFFER_LOAD_DWORDX4: + return "buffer_load_dwordx4"; + case Mubuf::Op::BUFFER_LOAD_DWORDX3: + return "buffer_load_dwordx3"; + case Mubuf::Op::BUFFER_STORE_BYTE: + return "buffer_store_byte"; + case Mubuf::Op::BUFFER_STORE_SHORT: + return "buffer_store_short"; + case Mubuf::Op::BUFFER_STORE_DWORD: + return "buffer_store_dword"; + case Mubuf::Op::BUFFER_STORE_DWORDX2: + return "buffer_store_dwordx2"; + case Mubuf::Op::BUFFER_STORE_DWORDX4: + return "buffer_store_dwordx4"; + case Mubuf::Op::BUFFER_STORE_DWORDX3: + return "buffer_store_dwordx3"; + case Mubuf::Op::BUFFER_ATOMIC_SWAP: + return "buffer_atomic_swap"; + case Mubuf::Op::BUFFER_ATOMIC_CMPSWAP: + return "buffer_atomic_cmpswap"; + case Mubuf::Op::BUFFER_ATOMIC_ADD: + return "buffer_atomic_add"; + case Mubuf::Op::BUFFER_ATOMIC_SUB: + return "buffer_atomic_sub"; + case Mubuf::Op::BUFFER_ATOMIC_RSUB: + return "buffer_atomic_rsub"; + case Mubuf::Op::BUFFER_ATOMIC_SMIN: + return "buffer_atomic_smin"; + case Mubuf::Op::BUFFER_ATOMIC_UMIN: + return "buffer_atomic_umin"; + case Mubuf::Op::BUFFER_ATOMIC_SMAX: + return "buffer_atomic_smax"; + case Mubuf::Op::BUFFER_ATOMIC_UMAX: + return "buffer_atomic_umax"; + case Mubuf::Op::BUFFER_ATOMIC_AND: + return "buffer_atomic_and"; + case Mubuf::Op::BUFFER_ATOMIC_OR: + return "buffer_atomic_or"; + case Mubuf::Op::BUFFER_ATOMIC_XOR: + return "buffer_atomic_xor"; + case Mubuf::Op::BUFFER_ATOMIC_INC: + return "buffer_atomic_inc"; + case Mubuf::Op::BUFFER_ATOMIC_DEC: + return "buffer_atomic_dec"; + case Mubuf::Op::BUFFER_ATOMIC_FCMPSWAP: + return "buffer_atomic_fcmpswap"; + case Mubuf::Op::BUFFER_ATOMIC_FMIN: + return "buffer_atomic_fmin"; + case Mubuf::Op::BUFFER_ATOMIC_FMAX: + return "buffer_atomic_fmax"; + case Mubuf::Op::BUFFER_ATOMIC_SWAP_X2: + return "buffer_atomic_swap_x2"; + case Mubuf::Op::BUFFER_ATOMIC_CMPSWAP_X2: + return "buffer_atomic_cmpswap_x2"; + case Mubuf::Op::BUFFER_ATOMIC_ADD_X2: + return "buffer_atomic_add_x2"; + case Mubuf::Op::BUFFER_ATOMIC_SUB_X2: + return "buffer_atomic_sub_x2"; + case Mubuf::Op::BUFFER_ATOMIC_RSUB_X2: + return "buffer_atomic_rsub_x2"; + case Mubuf::Op::BUFFER_ATOMIC_SMIN_X2: + return "buffer_atomic_smin_x2"; + case Mubuf::Op::BUFFER_ATOMIC_UMIN_X2: + return "buffer_atomic_umin_x2"; + case Mubuf::Op::BUFFER_ATOMIC_SMAX_X2: + return "buffer_atomic_smax_x2"; + case Mubuf::Op::BUFFER_ATOMIC_UMAX_X2: + return "buffer_atomic_umax_x2"; + case Mubuf::Op::BUFFER_ATOMIC_AND_X2: + return "buffer_atomic_and_x2"; + case Mubuf::Op::BUFFER_ATOMIC_OR_X2: + return "buffer_atomic_or_x2"; + case Mubuf::Op::BUFFER_ATOMIC_XOR_X2: + return "buffer_atomic_xor_x2"; + case Mubuf::Op::BUFFER_ATOMIC_INC_X2: + return "buffer_atomic_inc_x2"; + case Mubuf::Op::BUFFER_ATOMIC_DEC_X2: + return "buffer_atomic_dec_x2"; + case Mubuf::Op::BUFFER_ATOMIC_FCMPSWAP_X2: + return "buffer_atomic_fcmpswap_x2"; + case Mubuf::Op::BUFFER_ATOMIC_FMIN_X2: + return "buffer_atomic_fmin_x2"; + case Mubuf::Op::BUFFER_ATOMIC_FMAX_X2: + return "buffer_atomic_fmax_x2"; + case Mubuf::Op::BUFFER_WBINVL1_SC_VOL: + return "buffer_wbinvl1_sc/vol"; + case Mubuf::Op::BUFFER_WBINVL1: + return "buffer_wbinvl1"; + default: + return nullptr; + } +} + +const char *amdgpu::shader::mtbufOpcodeToString(Mtbuf::Op op) { + switch (op) { + case Mtbuf::Op::TBUFFER_LOAD_FORMAT_X: + return "tbuffer_load_format_x"; + case Mtbuf::Op::TBUFFER_LOAD_FORMAT_XY: + return "tbuffer_load_format_xy"; + case Mtbuf::Op::TBUFFER_LOAD_FORMAT_XYZ: + return "tbuffer_load_format_xyz"; + case Mtbuf::Op::TBUFFER_LOAD_FORMAT_XYZW: + return "tbuffer_load_format_xyzw"; + case Mtbuf::Op::TBUFFER_STORE_FORMAT_X: + return "tbuffer_store_format_x"; + case Mtbuf::Op::TBUFFER_STORE_FORMAT_XY: + return "tbuffer_store_format_xy"; + case Mtbuf::Op::TBUFFER_STORE_FORMAT_XYZ: + return "tbuffer_store_format_xyz"; + case Mtbuf::Op::TBUFFER_STORE_FORMAT_XYZW: + return "tbuffer_store_format_xyzw"; + default: + return nullptr; + } +} + +const char *amdgpu::shader::mimgOpcodeToString(Mimg::Op op) { + switch (op) { + case Mimg::Op::IMAGE_LOAD: + return "image_load"; + case Mimg::Op::IMAGE_LOAD_MIP: + return "image_load_mip"; + case Mimg::Op::IMAGE_LOAD_PCK: + return "image_load_pck"; + case Mimg::Op::IMAGE_LOAD_PCK_SGN: + return "image_load_pck_sgn"; + case Mimg::Op::IMAGE_LOAD_MIP_PCK: + return "image_load_mip_pck"; + case Mimg::Op::IMAGE_LOAD_MIP_PCK_SGN: + return "image_load_mip_pck_sgn"; + case Mimg::Op::IMAGE_STORE: + return "image_store"; + case Mimg::Op::IMAGE_STORE_MIP: + return "image_store_mip"; + case Mimg::Op::IMAGE_STORE_PCK: + return "image_store_pck"; + case Mimg::Op::IMAGE_STORE_MIP_PCK: + return "image_store_mip_pck"; + case Mimg::Op::IMAGE_GET_RESINFO: + return "image_get_resinfo"; + case Mimg::Op::IMAGE_ATOMIC_SWAP: + return "image_atomic_swap"; + case Mimg::Op::IMAGE_ATOMIC_CMPSWAP: + return "image_atomic_cmpswap"; + case Mimg::Op::IMAGE_ATOMIC_ADD: + return "image_atomic_add"; + case Mimg::Op::IMAGE_ATOMIC_SUB: + return "image_atomic_sub"; + case Mimg::Op::IMAGE_ATOMIC_RSUB: + return "image_atomic_rsub"; + case Mimg::Op::IMAGE_ATOMIC_SMIN: + return "image_atomic_smin"; + case Mimg::Op::IMAGE_ATOMIC_UMIN: + return "image_atomic_umin"; + case Mimg::Op::IMAGE_ATOMIC_SMAX: + return "image_atomic_smax"; + case Mimg::Op::IMAGE_ATOMIC_UMAX: + return "image_atomic_umax"; + case Mimg::Op::IMAGE_ATOMIC_AND: + return "image_atomic_and"; + case Mimg::Op::IMAGE_ATOMIC_OR: + return "image_atomic_or"; + case Mimg::Op::IMAGE_ATOMIC_XOR: + return "image_atomic_xor"; + case Mimg::Op::IMAGE_ATOMIC_INC: + return "image_atomic_inc"; + case Mimg::Op::IMAGE_ATOMIC_DEC: + return "image_atomic_dec"; + case Mimg::Op::IMAGE_ATOMIC_FCMPSWAP: + return "image_atomic_fcmpswap"; + case Mimg::Op::IMAGE_ATOMIC_FMIN: + return "image_atomic_fmin"; + case Mimg::Op::IMAGE_ATOMIC_FMAX: + return "image_atomic_fmax"; + case Mimg::Op::IMAGE_SAMPLE: + return "image_sample"; + case Mimg::Op::IMAGE_SAMPLE_CL: + return "image_sample_cl"; + case Mimg::Op::IMAGE_SAMPLE_D: + return "image_sample_d"; + case Mimg::Op::IMAGE_SAMPLE_D_CL: + return "image_sample_d_cl"; + case Mimg::Op::IMAGE_SAMPLE_L: + return "image_sample_l"; + case Mimg::Op::IMAGE_SAMPLE_B: + return "image_sample_b"; + case Mimg::Op::IMAGE_SAMPLE_B_CL: + return "image_sample_b_cl"; + case Mimg::Op::IMAGE_SAMPLE_LZ: + return "image_sample_lz"; + case Mimg::Op::IMAGE_SAMPLE_C: + return "image_sample_c"; + case Mimg::Op::IMAGE_SAMPLE_C_CL: + return "image_sample_c_cl"; + case Mimg::Op::IMAGE_SAMPLE_C_D: + return "image_sample_c_d"; + case Mimg::Op::IMAGE_SAMPLE_C_D_CL: + return "image_sample_c_d_cl"; + case Mimg::Op::IMAGE_SAMPLE_C_L: + return "image_sample_c_l"; + case Mimg::Op::IMAGE_SAMPLE_C_B: + return "image_sample_c_b"; + case Mimg::Op::IMAGE_SAMPLE_C_B_CL: + return "image_sample_c_b_cl"; + case Mimg::Op::IMAGE_SAMPLE_C_LZ: + return "image_sample_c_lz"; + case Mimg::Op::IMAGE_SAMPLE_O: + return "image_sample_o"; + case Mimg::Op::IMAGE_SAMPLE_CL_O: + return "image_sample_cl_o"; + case Mimg::Op::IMAGE_SAMPLE_D_O: + return "image_sample_d_o"; + case Mimg::Op::IMAGE_SAMPLE_D_CL_O: + return "image_sample_d_cl_o"; + case Mimg::Op::IMAGE_SAMPLE_L_O: + return "image_sample_l_o"; + case Mimg::Op::IMAGE_SAMPLE_B_O: + return "image_sample_b_o"; + case Mimg::Op::IMAGE_SAMPLE_B_CL_O: + return "image_sample_b_cl_o"; + case Mimg::Op::IMAGE_SAMPLE_LZ_O: + return "image_sample_lz_o"; + case Mimg::Op::IMAGE_SAMPLE_C_O: + return "image_sample_c_o"; + case Mimg::Op::IMAGE_SAMPLE_C_CL_O: + return "image_sample_c_cl_o"; + case Mimg::Op::IMAGE_SAMPLE_C_D_O: + return "image_sample_c_d_o"; + case Mimg::Op::IMAGE_SAMPLE_C_D_CL_O: + return "image_sample_c_d_cl_o"; + case Mimg::Op::IMAGE_SAMPLE_C_L_O: + return "image_sample_c_l_o"; + case Mimg::Op::IMAGE_SAMPLE_C_B_O: + return "image_sample_c_b_o"; + case Mimg::Op::IMAGE_SAMPLE_C_B_CL_O: + return "image_sample_c_b_cl_o"; + case Mimg::Op::IMAGE_SAMPLE_C_LZ_O: + return "image_sample_c_lz_o"; + case Mimg::Op::IMAGE_GATHER4: + return "image_gather4"; + case Mimg::Op::IMAGE_GATHER4_CL: + return "image_gather4_cl"; + case Mimg::Op::IMAGE_GATHER4_L: + return "image_gather4_l"; + case Mimg::Op::IMAGE_GATHER4_B: + return "image_gather4_b"; + case Mimg::Op::IMAGE_GATHER4_B_CL: + return "image_gather4_b_cl"; + case Mimg::Op::IMAGE_GATHER4_LZ: + return "image_gather4_lz"; + case Mimg::Op::IMAGE_GATHER4_C: + return "image_gather4_c"; + case Mimg::Op::IMAGE_GATHER4_C_CL: + return "image_gather4_c_cl"; + case Mimg::Op::IMAGE_GATHER4_C_L: + return "image_gather4_c_l"; + case Mimg::Op::IMAGE_GATHER4_C_B: + return "image_gather4_c_b"; + case Mimg::Op::IMAGE_GATHER4_C_B_CL: + return "image_gather4_c_b_cl"; + case Mimg::Op::IMAGE_GATHER4_C_LZ: + return "image_gather4_c_lz"; + case Mimg::Op::IMAGE_GATHER4_O: + return "image_gather4_o"; + case Mimg::Op::IMAGE_GATHER4_CL_O: + return "image_gather4_cl_o"; + case Mimg::Op::IMAGE_GATHER4_L_O: + return "image_gather4_l_o"; + case Mimg::Op::IMAGE_GATHER4_B_O: + return "image_gather4_b_o"; + case Mimg::Op::IMAGE_GATHER4_B_CL_O: + return "image_gather4_b_cl_o"; + case Mimg::Op::IMAGE_GATHER4_LZ_O: + return "image_gather4_lz_o"; + case Mimg::Op::IMAGE_GATHER4_C_O: + return "image_gather4_c_o"; + case Mimg::Op::IMAGE_GATHER4_C_CL_O: + return "image_gather4_c_cl_o"; + case Mimg::Op::IMAGE_GATHER4_C_L_O: + return "image_gather4_c_l_o"; + case Mimg::Op::IMAGE_GATHER4_C_B_O: + return "image_gather4_c_b_o"; + case Mimg::Op::IMAGE_GATHER4_C_B_CL_O: + return "image_gather4_c_b_cl_o"; + case Mimg::Op::IMAGE_GATHER4_C_LZ_O: + return "image_gather4_c_lz_o"; + case Mimg::Op::IMAGE_GET_LOD: + return "image_get_lod"; + case Mimg::Op::IMAGE_SAMPLE_CD: + return "image_sample_cd"; + case Mimg::Op::IMAGE_SAMPLE_CD_CL: + return "image_sample_cd_cl"; + case Mimg::Op::IMAGE_SAMPLE_C_CD: + return "image_sample_c_cd"; + case Mimg::Op::IMAGE_SAMPLE_C_CD_CL: + return "image_sample_c_cd_cl"; + case Mimg::Op::IMAGE_SAMPLE_CD_O: + return "image_sample_cd_o"; + case Mimg::Op::IMAGE_SAMPLE_CD_CL_O: + return "image_sample_cd_cl_o"; + case Mimg::Op::IMAGE_SAMPLE_C_CD_O: + return "image_sample_c_cd_o"; + case Mimg::Op::IMAGE_SAMPLE_C_CD_CL_O: + return "image_sample_c_cd_cl_o"; + default: + return nullptr; + } +} + +const char *amdgpu::shader::dsOpcodeToString(Ds::Op op) { + switch (op) { + case Ds::Op::DS_ADD_U32: + return "ds_add_u32"; + case Ds::Op::DS_SUB_U32: + return "ds_sub_u32"; + case Ds::Op::DS_RSUB_U32: + return "ds_rsub_u32"; + case Ds::Op::DS_INC_U32: + return "ds_inc_u32"; + case Ds::Op::DS_DEC_U32: + return "ds_dec_u32"; + case Ds::Op::DS_MIN_I32: + return "ds_min_i32"; + case Ds::Op::DS_MAX_I32: + return "ds_max_i32"; + case Ds::Op::DS_MIN_U32: + return "ds_min_u32"; + case Ds::Op::DS_MAX_U32: + return "ds_max_u32"; + case Ds::Op::DS_AND_B32: + return "ds_and_b32"; + case Ds::Op::DS_OR_B32: + return "ds_or_b32"; + case Ds::Op::DS_XOR_B32: + return "ds_xor_b32"; + case Ds::Op::DS_MSKOR_B32: + return "ds_mskor_b32"; + case Ds::Op::DS_WRITE_B32: + return "ds_write_b32"; + case Ds::Op::DS_WRITE2_B32: + return "ds_write2_b32"; + case Ds::Op::DS_WRITE2ST64_B32: + return "ds_write2st64_b32"; + case Ds::Op::DS_CMPST_B32: + return "ds_cmpst_b32"; + case Ds::Op::DS_CMPST_F32: + return "ds_cmpst_f32"; + case Ds::Op::DS_MIN_F32: + return "ds_min_f32"; + case Ds::Op::DS_MAX_F32: + return "ds_max_f32"; + case Ds::Op::DS_NOP: + return "ds_nop"; + case Ds::Op::DS_GWS_SEMA_RELEASE_ALL: + return "ds_gws_sema_release_all"; + case Ds::Op::DS_GWS_INIT: + return "ds_gws_init"; + case Ds::Op::DS_GWS_SEMA_V: + return "ds_gws_sema_v"; + case Ds::Op::DS_GWS_SEMA_BR: + return "ds_gws_sema_br"; + case Ds::Op::DS_GWS_SEMA_P: + return "ds_gws_sema_p"; + case Ds::Op::DS_GWS_BARRIER: + return "ds_gws_barrier"; + case Ds::Op::DS_WRITE_B8: + return "ds_write_b8"; + case Ds::Op::DS_WRITE_B16: + return "ds_write_b16"; + case Ds::Op::DS_ADD_RTN_U32: + return "ds_add_rtn_u32"; + case Ds::Op::DS_SUB_RTN_U32: + return "ds_sub_rtn_u32"; + case Ds::Op::DS_RSUB_RTN_U32: + return "ds_rsub_rtn_u32"; + case Ds::Op::DS_INC_RTN_U32: + return "ds_inc_rtn_u32"; + case Ds::Op::DS_DEC_RTN_U32: + return "ds_dec_rtn_u32"; + case Ds::Op::DS_MIN_RTN_I32: + return "ds_min_rtn_i32"; + case Ds::Op::DS_MAX_RTN_I32: + return "ds_max_rtn_i32"; + case Ds::Op::DS_MIN_RTN_U32: + return "ds_min_rtn_u32"; + case Ds::Op::DS_MAX_RTN_U32: + return "ds_max_rtn_u32"; + case Ds::Op::DS_AND_RTN_B32: + return "ds_and_rtn_b32"; + case Ds::Op::DS_OR_RTN_B32: + return "ds_or_rtn_b32"; + case Ds::Op::DS_XOR_RTN_B32: + return "ds_xor_rtn_b32"; + case Ds::Op::DS_MSKOR_RTN_B32: + return "ds_mskor_rtn_b32"; + case Ds::Op::DS_WRXCHG_RTN_B32: + return "ds_wrxchg_rtn_b32"; + case Ds::Op::DS_WRXCHG2_RTN_B32: + return "ds_wrxchg2_rtn_b32"; + case Ds::Op::DS_WRXCHG2ST64_RTN_B32: + return "ds_wrxchg2st64_rtn_b32"; + case Ds::Op::DS_CMPST_RTN_B32: + return "ds_cmpst_rtn_b32"; + case Ds::Op::DS_CMPST_RTN_F32: + return "ds_cmpst_rtn_f32"; + case Ds::Op::DS_MIN_RTN_F32: + return "ds_min_rtn_f32"; + case Ds::Op::DS_MAX_RTN_F32: + return "ds_max_rtn_f32"; + case Ds::Op::DS_WRAP_RTN_B32: + return "ds_wrap_rtn_b32"; + case Ds::Op::DS_SWIZZLE_B32: + return "ds_swizzle_b32"; + case Ds::Op::DS_READ_B32: + return "ds_read_b32"; + case Ds::Op::DS_READ2_B32: + return "ds_read2_b32"; + case Ds::Op::DS_READ2ST64_B32: + return "ds_read2st64_b32"; + case Ds::Op::DS_READ_I8: + return "ds_read_i8"; + case Ds::Op::DS_READ_U8: + return "ds_read_u8"; + case Ds::Op::DS_READ_I16: + return "ds_read_i16"; + case Ds::Op::DS_READ_U16: + return "ds_read_u16"; + case Ds::Op::DS_CONSUME: + return "ds_consume"; + case Ds::Op::DS_APPEND: + return "ds_append"; + case Ds::Op::DS_ORDERED_COUNT: + return "ds_ordered_count"; + case Ds::Op::DS_ADD_U64: + return "ds_add_u64"; + case Ds::Op::DS_SUB_U64: + return "ds_sub_u64"; + case Ds::Op::DS_RSUB_U64: + return "ds_rsub_u64"; + case Ds::Op::DS_INC_U64: + return "ds_inc_u64"; + case Ds::Op::DS_DEC_U64: + return "ds_dec_u64"; + case Ds::Op::DS_MIN_I64: + return "ds_min_i64"; + case Ds::Op::DS_MAX_I64: + return "ds_max_i64"; + case Ds::Op::DS_MIN_U64: + return "ds_min_u64"; + case Ds::Op::DS_MAX_U64: + return "ds_max_u64"; + case Ds::Op::DS_AND_B64: + return "ds_and_b64"; + case Ds::Op::DS_OR_B64: + return "ds_or_b64"; + case Ds::Op::DS_XOR_B64: + return "ds_xor_b64"; + case Ds::Op::DS_MSKOR_B64: + return "ds_mskor_b64"; + case Ds::Op::DS_WRITE_B64: + return "ds_write_b64"; + case Ds::Op::DS_WRITE2_B64: + return "ds_write2_b64"; + case Ds::Op::DS_WRITE2ST64_B64: + return "ds_write2st64_b64"; + case Ds::Op::DS_CMPST_B64: + return "ds_cmpst_b64"; + case Ds::Op::DS_CMPST_F64: + return "ds_cmpst_f64"; + case Ds::Op::DS_MIN_F64: + return "ds_min_f64"; + case Ds::Op::DS_MAX_F64: + return "ds_max_f64"; + case Ds::Op::DS_ADD_RTN_U64: + return "ds_add_rtn_u64"; + case Ds::Op::DS_SUB_RTN_U64: + return "ds_sub_rtn_u64"; + case Ds::Op::DS_RSUB_RTN_U64: + return "ds_rsub_rtn_u64"; + case Ds::Op::DS_INC_RTN_U64: + return "ds_inc_rtn_u64"; + case Ds::Op::DS_DEC_RTN_U64: + return "ds_dec_rtn_u64"; + case Ds::Op::DS_MIN_RTN_I64: + return "ds_min_rtn_i64"; + case Ds::Op::DS_MAX_RTN_I64: + return "ds_max_rtn_i64"; + case Ds::Op::DS_MIN_RTN_U64: + return "ds_min_rtn_u64"; + case Ds::Op::DS_MAX_RTN_U64: + return "ds_max_rtn_u64"; + case Ds::Op::DS_AND_RTN_B64: + return "ds_and_rtn_b64"; + case Ds::Op::DS_OR_RTN_B64: + return "ds_or_rtn_b64"; + case Ds::Op::DS_XOR_RTN_B64: + return "ds_xor_rtn_b64"; + case Ds::Op::DS_MSKOR_RTN_B64: + return "ds_mskor_rtn_b64"; + case Ds::Op::DS_WRXCHG_RTN_B64: + return "ds_wrxchg_rtn_b64"; + case Ds::Op::DS_WRXCHG2_RTN_B64: + return "ds_wrxchg2_rtn_b64"; + case Ds::Op::DS_WRXCHG2ST64_RTN_B64: + return "ds_wrxchg2st64_rtn_b64"; + case Ds::Op::DS_CMPST_RTN_B64: + return "ds_cmpst_rtn_b64"; + case Ds::Op::DS_CMPST_RTN_F64: + return "ds_cmpst_rtn_f64"; + case Ds::Op::DS_MIN_RTN_F64: + return "ds_min_rtn_f64"; + case Ds::Op::DS_MAX_RTN_F64: + return "ds_max_rtn_f64"; + case Ds::Op::DS_READ_B64: + return "ds_read_b64"; + case Ds::Op::DS_READ2_B64: + return "ds_read2_b64"; + case Ds::Op::DS_READ2ST64_B64: + return "ds_read2st64_b64"; + case Ds::Op::DS_CONDXCHG32_RTN_B64: + return "ds_condxchg32_rtn_b64"; + case Ds::Op::DS_ADD_SRC2_U32: + return "ds_add_src2_u32"; + case Ds::Op::DS_SUB_SRC2_U32: + return "ds_sub_src2_u32"; + case Ds::Op::DS_RSUB_SRC2_U32: + return "ds_rsub_src2_u32"; + case Ds::Op::DS_INC_SRC2_U32: + return "ds_inc_src2_u32"; + case Ds::Op::DS_DEC_SRC2_U32: + return "ds_dec_src2_u32"; + case Ds::Op::DS_MIN_SRC2_I32: + return "ds_min_src2_i32"; + case Ds::Op::DS_MAX_SRC2_I32: + return "ds_max_src2_i32"; + case Ds::Op::DS_MIN_SRC2_U32: + return "ds_min_src2_u32"; + case Ds::Op::DS_MAX_SRC2_U32: + return "ds_max_src2_u32"; + case Ds::Op::DS_AND_SRC2_B32: + return "ds_and_src2_b32"; + case Ds::Op::DS_OR_SRC2_B32: + return "ds_or_src2_b32"; + case Ds::Op::DS_XOR_SRC2_B32: + return "ds_xor_src2_b32"; + case Ds::Op::DS_WRITE_SRC2_B32: + return "ds_write_src2_b32"; + case Ds::Op::DS_MIN_SRC2_F32: + return "ds_min_src2_f32"; + case Ds::Op::DS_MAX_SRC2_F32: + return "ds_max_src2_f32"; + case Ds::Op::DS_ADD_SRC2_U64: + return "ds_add_src2_u64"; + case Ds::Op::DS_SUB_SRC2_U64: + return "ds_sub_src2_u64"; + case Ds::Op::DS_RSUB_SRC2_U64: + return "ds_rsub_src2_u64"; + case Ds::Op::DS_INC_SRC2_U64: + return "ds_inc_src2_u64"; + case Ds::Op::DS_DEC_SRC2_U64: + return "ds_dec_src2_u64"; + case Ds::Op::DS_MIN_SRC2_I64: + return "ds_min_src2_i64"; + case Ds::Op::DS_MAX_SRC2_I64: + return "ds_max_src2_i64"; + case Ds::Op::DS_MIN_SRC2_U64: + return "ds_min_src2_u64"; + case Ds::Op::DS_MAX_SRC2_U64: + return "ds_max_src2_u64"; + case Ds::Op::DS_AND_SRC2_B64: + return "ds_and_src2_b64"; + case Ds::Op::DS_OR_SRC2_B64: + return "ds_or_src2_b64"; + case Ds::Op::DS_XOR_SRC2_B64: + return "ds_xor_src2_b64"; + case Ds::Op::DS_WRITE_SRC2_B64: + return "ds_write_src2_b64"; + case Ds::Op::DS_MIN_SRC2_F64: + return "ds_min_src2_f64"; + case Ds::Op::DS_MAX_SRC2_F64: + return "ds_max_src2_f64"; + case Ds::Op::DS_WRITE_B96: + return "ds_write_b96"; + case Ds::Op::DS_WRITE_B128: + return "ds_write_b128"; + case Ds::Op::DS_CONDXCHG32_RTN_B128: + return "ds_condxchg32_rtn_b128"; + case Ds::Op::DS_READ_B96: + return "ds_read_b96"; + case Ds::Op::DS_READ_B128: + return "ds_read_b128"; + default: + return nullptr; + } +} + +const char *amdgpu::shader::vintrpOpcodeToString(Vintrp::Op op) { + switch (op) { + case Vintrp::Op::V_INTERP_P1_F32: + return "v_interp_p1_f32"; + case Vintrp::Op::V_INTERP_P2_F32: + return "v_interp_p2_f32"; + case Vintrp::Op::V_INTERP_MOV_F32: + return "v_interp_mov_f32"; + default: + return nullptr; + } +} + +const char *amdgpu::shader::opcodeToString(InstructionClass instClass, int op) { + switch (instClass) { + case InstructionClass::Vop2: + return vop2OpcodeToString(static_cast(op)); + case InstructionClass::Sop2: + return sop2OpcodeToString(static_cast(op)); + case InstructionClass::Sopk: + return sopkOpcodeToString(static_cast(op)); + case InstructionClass::Smrd: + return smrdOpcodeToString(static_cast(op)); + case InstructionClass::Vop3: + return vop3OpcodeToString(static_cast(op)); + case InstructionClass::Mubuf: + return mubufOpcodeToString(static_cast(op)); + case InstructionClass::Mtbuf: + return mtbufOpcodeToString(static_cast(op)); + case InstructionClass::Mimg: + return mimgOpcodeToString(static_cast(op)); + case InstructionClass::Ds: + return dsOpcodeToString(static_cast(op)); + case InstructionClass::Vintrp: + return vintrpOpcodeToString(static_cast(op)); + case InstructionClass::Exp: + return nullptr; + case InstructionClass::Vop1: + return vop1OpcodeToString(static_cast(op)); + case InstructionClass::Vopc: + return vopcOpcodeToString(static_cast(op)); + case InstructionClass::Sop1: + return sop1OpcodeToString(static_cast(op)); + case InstructionClass::Sopc: + return sopcOpcodeToString(static_cast(op)); + case InstructionClass::Sopp: + return soppOpcodeToString(static_cast(op)); + + default: + return nullptr; + } +} + +void amdgpu::shader::Sop1::dump() const { + int instSize = kMinInstSize; + printSop1Opcode(op); + std::printf(" "); + instSize += printScalarOperand(sdst, inst + instSize); + std::printf(", "); + instSize += printScalarOperand(ssrc0, inst + instSize); +} + +void amdgpu::shader::Sopk::dump() const { + int instSize = kMinInstSize; + printSopkOpcode(op); + std::printf(" "); + instSize += printScalarOperand(sdst, inst + instSize); + std::printf(", %d", simm); +} + +void amdgpu::shader::Sopc::dump() const { + int instSize = kMinInstSize; + printSopcOpcode(op); + std::printf(" "); + instSize += printScalarOperand(ssrc0, inst + instSize); + std::printf(", "); + instSize += printScalarOperand(ssrc1, inst + instSize); +} + +void amdgpu::shader::Sop2::dump() const { + int instSize = kMinInstSize; + printSop2Opcode(op); + std::printf(" "); + instSize += printScalarOperand(sdst, inst + instSize); + std::printf(", "); + instSize += printScalarOperand(ssrc0, inst + instSize); + std::printf(", "); + instSize += printScalarOperand(ssrc1, inst + instSize); +} + +void amdgpu::shader::Sopp::dump() const { + int instSize = kMinInstSize; + printSoppOpcode(op); + std::printf(" "); + instSize += printScalarOperand(simm, inst + instSize); +} + +void amdgpu::shader::Vop1::dump() const { + int instSize = kMinInstSize; + printVop1Opcode(op); + std::printf(" "); + instSize += printVectorOperand(vdst, inst + instSize); + std::printf(", "); + instSize += printScalarOperand(src0, inst + instSize); +} + +void amdgpu::shader::Vop2::dump() const { + int instSize = kMinInstSize; + printVop2Opcode(op); + std::printf(" "); + instSize += printVectorOperand(vdst, inst + instSize); + std::printf(", "); + instSize += printScalarOperand(src0, inst + instSize); + std::printf(", "); + instSize += printVectorOperand(vsrc1, inst + instSize); + + if (op == Vop2::Op::V_MADMK_F32 || op == Vop2::Op::V_MADAK_F32) { + std::printf(", "); + instSize += printScalarOperand(255, inst + instSize); + } +} + +void amdgpu::shader::Vop3::dump() const { + + /* + v_add_i32 + v_addc_u32 + v_sub_i32 + v_subb_u32, + v_subbrev_u32 + v_subrev_i32 + v_div_scale_f32 + v_div_scale_f64 + */ + + int instSize = kMinInstSize; + printVop3Opcode(op); + std::printf(" "); + instSize += printVectorOperand(vdst, inst + instSize); + std::printf(", "); + instSize += printScalarOperand(src0, inst + instSize); + std::printf(", "); + instSize += printScalarOperand(src1, inst + instSize); + std::printf(", "); + instSize += printScalarOperand(src2, inst + instSize); + + std::printf(" #abs=%x, clmp=%x, neg=%x, omod=%x, ", abs, clmp, neg, omod); + instSize += printScalarOperand(sdst, inst + instSize); +} + +void amdgpu::shader::Vopc::dump() const { + int instSize = kMinInstSize; + + printVopcOpcode(op); + std::printf(" "); + instSize += printScalarOperand(src0, inst + instSize); + std::printf(", "); + instSize += printVectorOperand(vsrc1, inst + instSize); +} + +void amdgpu::shader::Smrd::dump() const { + int instSize = kMinInstSize; + + printSmrdOpcode(op); + printf(" "); + printScalarOperand(sdst, inst + instSize); + printf(", "); + printScalarOperand(sbase << 1, inst + instSize); + printf(", "); + + if (imm) { + printf("%u", offset << 2); + } else { + printScalarOperand(offset, inst + instSize); + } + + std::printf(" #sdst=%x,sbase=%x,imm=%x,offset=%x", sdst, sbase, imm, offset); +} +void amdgpu::shader::Mubuf::dump() const { + int instSize = kMinInstSize; + + printMubufOpcode(op); + printf(" "); + printVectorOperand(vdata, inst + instSize); + printf(", "); + printVectorOperand(vaddr, inst + instSize); + printf(", "); + printScalarOperand(srsrc << 2, inst + instSize); + printf(", "); + printScalarOperand(soffset, inst + instSize); + printf(" #offset=%x, " + "offen=%x,idxen=%x,glc=%x,lds=%x,vaddr=%x,vdata=%x,srsrc=%x,slc=%x," + "tfe=%x,soffset=%d", + offset, offen, idxen, glc, lds, vaddr, vdata, srsrc, slc, tfe, + soffset); +} +void amdgpu::shader::Mtbuf::dump() const { + int instSize = kMinInstSize; + + printMtbufOpcode(op); + printf(" "); + printVectorOperand(vdata, inst + instSize); + printf(", "); + printScalarOperand(srsrc << 2, inst + instSize); + printf(", "); + printScalarOperand(soffset, inst + instSize); + printf(" #offset=%x,offen=%x,idxen=%x,glc=%x,op=%x,dfmt=%x,nfmt=%x,vaddr=%x," + "vdata=%x,srsrc=%x,slc=%x,tfe=%x,soffset=%x", + offset, offen, idxen, glc, (unsigned)op, dfmt, nfmt, vaddr, vdata, srsrc, slc, + tfe, soffset); +} +void amdgpu::shader::Mimg::dump() const { + int instSize = kMinInstSize; + + printMimgOpcode(op); + + printf(" #dmask=%x,unrm=%x,glc=%x,da=%x,r128=%x,tfe=%x,lwe=%x,slc=%x," + "vaddr=%x,vdata=%x,srsrc=%x,ssamp=%x", + dmask, unrm, glc, da, r128, tfe, lwe, slc, vaddr, vdata, srsrc, ssamp); +} + +void amdgpu::shader::Ds::dump() const { + int instSize = kMinInstSize; + + printDsOpcode(op); +} + +void amdgpu::shader::Vintrp::dump() const { + int instSize = kMinInstSize; + + printVintrpOpcode(op); + printf(" "); + instSize += printVectorOperand(vdst, inst + instSize); + printf(", "); + instSize += printVectorOperand(vsrc, inst + instSize); + const char channels[] = {'x', 'y', 'z', 'w'}; + + printf(", attr%d.%c", attr, channels[attrChan]); +} +void amdgpu::shader::Exp::dump() const { + int instSize = kMinInstSize; + + printExpTarget(target); + printf(" "); + instSize += printVectorOperand(vsrc0, inst + instSize); + printf(", "); + instSize += printVectorOperand(vsrc1, inst + instSize); + printf(", "); + instSize += printVectorOperand(vsrc2, inst + instSize); + printf(", "); + instSize += printVectorOperand(vsrc3, inst + instSize); + printf(" #en=%x, compr=%x, done=%x, vm=%x", en, compr, done, vm); +} + +void amdgpu::shader::Instruction::dump() const { + printf("%-6s ", instructionClassToString(instClass)); + + switch (instClass) { + case InstructionClass::Invalid: + break; + case InstructionClass::Vop2: + Vop2(inst).dump(); + return; + case InstructionClass::Sop2: + Sop2(inst).dump(); + return; + case InstructionClass::Sopk: + Sopk(inst).dump(); + return; + case InstructionClass::Smrd: + Smrd(inst).dump(); + return; + case InstructionClass::Vop3: + Vop3(inst).dump(); + return; + case InstructionClass::Mubuf: + Mubuf(inst).dump(); + return; + case InstructionClass::Mtbuf: + Mtbuf(inst).dump(); + return; + case InstructionClass::Mimg: + Mimg(inst).dump(); + return; + case InstructionClass::Ds: + Ds(inst).dump(); + return; + case InstructionClass::Vintrp: + Vintrp(inst).dump(); + return; + case InstructionClass::Exp: + Exp(inst).dump(); + return; + case InstructionClass::Vop1: + Vop1(inst).dump(); + return; + case InstructionClass::Vopc: + Vopc(inst).dump(); + return; + case InstructionClass::Sop1: + Sop1(inst).dump(); + return; + case InstructionClass::Sopc: + Sopc(inst).dump(); + return; + case InstructionClass::Sopp: + Sopp(inst).dump(); + return; + } + + printf(""); +} + +const char * +amdgpu::shader::instructionClassToString(InstructionClass instrClass) { + switch (instrClass) { + case InstructionClass::Invalid: + return "INVALID"; + case InstructionClass::Vop2: + return "VOP2"; + case InstructionClass::Sop2: + return "SOP2"; + case InstructionClass::Sopk: + return "SOPK"; + case InstructionClass::Smrd: + return "SMRD"; + case InstructionClass::Vop3: + return "VOP3"; + case InstructionClass::Mubuf: + return "MUBUF"; + case InstructionClass::Mtbuf: + return "MTBUF"; + case InstructionClass::Mimg: + return "MIMG"; + case InstructionClass::Ds: + return "DS"; + case InstructionClass::Vintrp: + return "VINTRP"; + case InstructionClass::Exp: + return "EXP"; + case InstructionClass::Vop1: + return "VOP1"; + case InstructionClass::Vopc: + return "VOPC"; + case InstructionClass::Sop1: + return "SOP1"; + case InstructionClass::Sopc: + return "SOPC"; + case InstructionClass::Sopp: + return "SOPP"; + } + + __builtin_trap(); +} diff --git a/hw/amdgpu/shader/src/RegisterState.cpp b/hw/amdgpu/shader/src/RegisterState.cpp new file mode 100644 index 000000000..b2dc221af --- /dev/null +++ b/hw/amdgpu/shader/src/RegisterState.cpp @@ -0,0 +1,72 @@ +#include "RegisterState.hpp" +#include "util/unreachable.hpp" + +amdgpu::shader::Value +amdgpu::shader::RegisterState::getRegister(RegisterId regId) { + auto offset = regId.getOffset(); + + if (regId.isScalar()) { + switch (offset) { + case 0 ... 103: + return sgprs[offset]; + case 106: + return vccLo; + case 107: + return vccHi; + case 124: + return m0; + case 126: + return execLo; + case 127: + return execHi; + case 253: + return scc; + case 254: + return ldsDirect; + } + + util::unreachable(); + } + + if (regId.isVector()) { + return vgprs[offset]; + } + + if (regId.isAttr()) { + return attrs[offset]; + } + + util::unreachable(); +} + +void amdgpu::shader::RegisterState::setRegister(RegisterId regId, + Value value) { + auto offset = regId.getOffset(); + + if (regId.isScalar()) { + switch (offset) { + case 0 ... 103: sgprs[offset] = value; return; + case 106: vccLo = value; return; + case 107: vccHi = value; return; + case 124: m0 = value; return; + case 126: execLo = value; return; + case 127: execHi = value; return; + case 253: scc = value; return; + case 254: ldsDirect = value; return; + } + + util::unreachable(); + } + + if (regId.isVector()) { + vgprs[offset] = value; + return; + } + + if (regId.isAttr()) { + attrs[offset] = value; + return; + } + + util::unreachable(); +} diff --git a/hw/amdgpu/shader/src/TypeId.cpp b/hw/amdgpu/shader/src/TypeId.cpp new file mode 100644 index 000000000..4e1ea954d --- /dev/null +++ b/hw/amdgpu/shader/src/TypeId.cpp @@ -0,0 +1,132 @@ +#include "TypeId.hpp" +#include "util/unreachable.hpp" +#include + +amdgpu::shader::TypeId amdgpu::shader::TypeId::getBaseType() const { + switch (raw) { + case TypeId::Void: + case TypeId::Bool: + case TypeId::SInt8: + case TypeId::UInt8: + case TypeId::SInt16: + case TypeId::UInt16: + case TypeId::SInt32: + case TypeId::UInt32: + case TypeId::SInt64: + case TypeId::UInt64: + case TypeId::Float16: + case TypeId::Float32: + case TypeId::Float64: + case TypeId::Sampler: + case TypeId::Image2D: + case TypeId::SampledImage2D: + return raw; + + case TypeId::UInt32x2: + case TypeId::UInt32x3: + case TypeId::UInt32x4: + case TypeId::ArrayUInt32x8: + case TypeId::ArrayUInt32x16: + return TypeId::UInt32; + + case TypeId::Float32x2: + case TypeId::Float32x3: + case TypeId::Float32x4: + case TypeId::ArrayFloat32x8: + case TypeId::ArrayFloat32x16: + return TypeId::Float32; + } + + util::unreachable(); +} + +std::size_t amdgpu::shader::TypeId::getSize() const { + switch (raw) { + case TypeId::Void: + case TypeId::Sampler: + case TypeId::Image2D: + case TypeId::SampledImage2D: + return 0; + case TypeId::Bool: + return 1; + case TypeId::SInt8: + case TypeId::UInt8: + return 1; + case TypeId::SInt16: + case TypeId::UInt16: + return 2; + case TypeId::SInt32: + case TypeId::UInt32: + return 4; + case TypeId::SInt64: + case TypeId::UInt64: + return 8; + case TypeId::Float16: + return 2; + case TypeId::Float32: + return 4; + case TypeId::Float64: + return 8; + + case TypeId::UInt32x2: + case TypeId::UInt32x3: + case TypeId::UInt32x4: + case TypeId::ArrayUInt32x8: + case TypeId::ArrayUInt32x16: + case TypeId::Float32x2: + case TypeId::Float32x3: + case TypeId::Float32x4: + case TypeId::ArrayFloat32x8: + case TypeId::ArrayFloat32x16: + return getElementsCount() * getBaseType().getSize(); + } + + util::unreachable(); +} + +std::size_t amdgpu::shader::TypeId::getElementsCount() const { + switch (raw) { + case TypeId::Bool: + case TypeId::SInt8: + case TypeId::UInt8: + case TypeId::SInt16: + case TypeId::UInt16: + case TypeId::SInt32: + case TypeId::UInt32: + case TypeId::SInt64: + case TypeId::UInt64: + case TypeId::Float16: + case TypeId::Float32: + case TypeId::Float64: + return 1; + + case TypeId::UInt32x2: + return 2; + case TypeId::UInt32x3: + return 3; + case TypeId::UInt32x4: + return 4; + case TypeId::ArrayUInt32x8: + return 8; + case TypeId::ArrayUInt32x16: + return 16; + case TypeId::Float32x2: + return 2; + case TypeId::Float32x3: + return 3; + case TypeId::Float32x4: + return 4; + case TypeId::ArrayFloat32x8: + return 8; + case TypeId::ArrayFloat32x16: + return 16; + + case TypeId::Void: + case TypeId::Sampler: + case TypeId::Image2D: + case TypeId::SampledImage2D: + return 0; + } + + util::unreachable(); +} diff --git a/hw/amdgpu/shader/src/cf.cpp b/hw/amdgpu/shader/src/cf.cpp new file mode 100644 index 000000000..c0aa78eec --- /dev/null +++ b/hw/amdgpu/shader/src/cf.cpp @@ -0,0 +1,117 @@ +#include "cf.hpp" +#include +#include +#include + +void cf::BasicBlock::split(BasicBlock *target) { + assert(target->address > address); + target->size = size - (target->address - address); + size = target->address - address; + + for (std::size_t i = 0, count = getSuccessorsCount(); i < count; ++i) { + auto succ = getSuccessor(i); + succ->predecessors.erase(this); + succ->predecessors.insert(target); + target->successors[i] = successors[i]; + successors[i] = nullptr; + } + + target->terminator = terminator; + terminator = TerminatorKind::None; + + createBranch(target); +} + +void cf::BasicBlock::createConditionalBranch(BasicBlock *ifTrue, + BasicBlock *ifFalse) { + assert(terminator == TerminatorKind::None); + assert(getSuccessorsCount() == 0); + ifTrue->predecessors.insert(this); + ifFalse->predecessors.insert(this); + + successors[0] = ifTrue; + successors[1] = ifFalse; + + terminator = TerminatorKind::Branch; +} + +void cf::BasicBlock::createBranch(BasicBlock *target) { + assert(terminator == TerminatorKind::None); + assert(getSuccessorsCount() == 0); + + target->predecessors.insert(this); + successors[0] = target; + + terminator = TerminatorKind::Branch; +} + +void cf::BasicBlock::createBranchToUnknown() { + assert(terminator == TerminatorKind::None); + assert(getSuccessorsCount() == 0); + + terminator = TerminatorKind::BranchToUnknown; +} + +void cf::BasicBlock::createReturn() { + assert(terminator == TerminatorKind::None); + assert(getSuccessorsCount() == 0); + + terminator = TerminatorKind::Return; +} + +void cf::BasicBlock::replaceSuccessor(BasicBlock *origBB, BasicBlock *newBB) { + origBB->predecessors.erase(this); + newBB->predecessors.insert(this); + + if (origBB == successors[0]) { + successors[0] = newBB; + return; + } + + if (origBB == successors[1]) { + successors[1] = newBB; + return; + } + + std::abort(); +} + +bool cf::BasicBlock::hasDirectPredecessor(const BasicBlock &block) const { + for (auto pred : predecessors) { + if (pred == &block) { + return true; + } + } + + return false; +} + +bool cf::BasicBlock::hasPredecessor(const BasicBlock &block) const { + if (&block == this) { + return hasDirectPredecessor(block); + } + + std::vector workList; + std::unordered_set visited; + workList.push_back(this); + visited.insert(this); + + while (!workList.empty()) { + auto node = workList.back(); + + if (node == &block) { + return true; + } + + workList.pop_back(); + workList.reserve(workList.size() + predecessors.size()); + + for (auto pred : predecessors) { + if (visited.insert(pred).second) { + workList.push_back(pred); + } + } + } + + return false; +} diff --git a/hw/amdgpu/shader/src/scf.cpp b/hw/amdgpu/shader/src/scf.cpp new file mode 100644 index 000000000..b11fd92c3 --- /dev/null +++ b/hw/amdgpu/shader/src/scf.cpp @@ -0,0 +1,252 @@ +#include "scf.hpp" +#include "cf.hpp" +#include +#include +#include +#include + +void scf::Block::eraseFrom(Node *endBefore) { + mEnd = endBefore->getPrev(); + if (mEnd != nullptr) { + mEnd->mNext = nullptr; + } else { + mBegin = nullptr; + } +} + +void scf::Block::splitInto(Block *target, Node *splitPoint) { + auto targetEnd = std::exchange(mEnd, splitPoint->mPrev); + + if (mEnd != nullptr) { + mEnd->mNext = nullptr; + } else { + mBegin = nullptr; + } + + for (auto node = splitPoint; node != nullptr; node = node->getNext()) { + node->mParent = target; + } + + if (target->mEnd != nullptr) { + target->mEnd->mNext = splitPoint; + } + + splitPoint->mPrev = target->mEnd; + target->mEnd = targetEnd; + + if (target->mBegin == nullptr) { + target->mBegin = splitPoint; + } +} + +scf::Block *scf::Block::split(Context &context, Node *splitPoint) { + auto result = context.create(); + splitInto(result, splitPoint); + return result; +} + +static scf::BasicBlock *findJumpTargetIn(scf::Block *parentBlock, + scf::Block *testBlock) { + auto jumpNode = dynCast(testBlock->getLastNode()); + + if (jumpNode == nullptr || jumpNode->target->getParent() != parentBlock) { + return nullptr; + } + + return jumpNode->target; +} + +static bool transformJumpToLoop(scf::Context &ctxt, scf::Block *block) { + // bb0 + // bb1 + // if true { + // bb2 + // jump bb1 + // } else { + // bb3 + // } + // + // --> + // + // bb0 + // loop { + // bb1 + // if false { + // break + // } + // bb2 + // } + // bb3 + + if (block->isEmpty()) { + return false; + } + + auto ifElse = dynCast(block->getLastNode()); + + if (ifElse == nullptr) { + return false; + } + + auto loopTarget = findJumpTargetIn(block, ifElse->ifTrue); + auto loopBlock = ifElse->ifTrue; + auto invariantBlock = ifElse->ifFalse; + + if (loopTarget == nullptr) { + loopTarget = findJumpTargetIn(block, ifElse->ifFalse); + loopBlock = ifElse->ifFalse; + invariantBlock = ifElse->ifTrue; + + if (loopTarget == nullptr) { + return false; + } + } + + auto loopBody = block->split(ctxt, loopTarget); + auto loop = ctxt.create(loopBody); + block->append(loop); + + for (auto node = invariantBlock->getRootNode(); node != nullptr;) { + auto nextNode = node->getNext(); + invariantBlock->detachNode(node); + block->append(node); + node = nextNode; + } + + loopBlock->detachNode(loopBlock->getLastNode()); + + for (auto node = loopBlock->getRootNode(); node != nullptr;) { + auto nextNode = node->getNext(); + loopBlock->detachNode(node); + loopBody->append(node); + node = nextNode; + } + + invariantBlock->append(ctxt.create()); + + return true; +} + +static bool moveSameLastBlocksTo(scf::IfElse *ifElse, scf::Block *block) { + if (ifElse->ifTrue->isEmpty() || ifElse->ifFalse->isEmpty()) { + return false; + } + + auto ifTrueIt = ifElse->ifTrue->getLastNode(); + auto ifFalseIt = ifElse->ifFalse->getLastNode(); + + while (ifTrueIt != nullptr && ifFalseIt != nullptr) { + if (!ifTrueIt->isEqual(*ifFalseIt)) { + break; + } + + ifTrueIt = ifTrueIt->getPrev(); + ifFalseIt = ifFalseIt->getPrev(); + } + + if (ifTrueIt == ifElse->ifTrue->getLastNode()) { + return false; + } + + if (ifTrueIt == nullptr) { + ifTrueIt = ifElse->ifTrue->getRootNode(); + } else { + ifTrueIt = ifTrueIt->getNext(); + } + + if (ifFalseIt == nullptr) { + ifFalseIt = ifElse->ifFalse->getRootNode(); + } else { + ifFalseIt = ifFalseIt->getNext(); + } + + ifElse->ifTrue->splitInto(block, ifTrueIt); + ifElse->ifFalse->eraseFrom(ifFalseIt); + return true; +} + +class Structurizer { + scf::Context &context; + +public: + Structurizer(scf::Context &context) : context(context) {} + + scf::Block *structurize(cf::BasicBlock *bb) { + return structurizeBlock(bb, {}); + } + +public: + scf::IfElse *structurizeIfElse( + cf::BasicBlock *ifTrue, cf::BasicBlock *ifFalse, + std::unordered_map &visited) { + auto ifTrueBlock = structurizeBlock(ifTrue, visited); + auto ifFalseBlock = structurizeBlock(ifFalse, visited); + + return context.create(ifTrueBlock, ifFalseBlock); + } + + scf::Block *structurizeBlock( + cf::BasicBlock *bb, + std::unordered_map visited) { + auto result = context.create(); + std::vector workList; + workList.push_back(bb); + + while (!workList.empty()) { + auto block = workList.back(); + workList.pop_back(); + + auto [it, inserted] = visited.try_emplace(block, nullptr); + if (!inserted) { + result->append(context.create(it->second)); + continue; + } + + auto scfBlock = context.create(block->getAddress(), + block->getSize()); + it->second = scfBlock; + result->append(scfBlock); + + switch (block->getTerminator()) { + case cf::TerminatorKind::None: + std::abort(); + break; + + case cf::TerminatorKind::Branch: + switch (block->getSuccessorsCount()) { + case 1: + workList.push_back(block->getSuccessor(0)); + break; + + case 2: { + auto ifElse = structurizeIfElse(block->getSuccessor(0), + block->getSuccessor(1), visited); + result->append(ifElse); + + while (moveSameLastBlocksTo(ifElse, result) || + transformJumpToLoop(context, result)) { + ; + } + + break; + } + } + break; + + case cf::TerminatorKind::BranchToUnknown: + result->append(context.create()); + break; + + case cf::TerminatorKind::Return: + result->append(context.create()); + break; + } + } + + return result; + } +}; + +scf::Block *scf::structurize(Context &ctxt, cf::BasicBlock *bb) { + return Structurizer{ctxt}.structurize(bb); +} diff --git a/orbis-kernel b/orbis-kernel index 05d35b714..6a093985c 160000 --- a/orbis-kernel +++ b/orbis-kernel @@ -1 +1 @@ -Subproject commit 05d35b71483880246bc4c1a28f857e9046af7c36 +Subproject commit 6a093985c4a331661fd47ff9f1c06e4b9b102002