Added amdgpu hw project

2026-02-08 08:44:39 +01:00 · 2023-06-24 15:59:27 +03:00 · 2023-06-24 15:59:27 +03:00 · a8af9198bf
parent 1fdadaaee9
commit a8af9198bf
49 changed files with 28342 additions and 1 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -7,3 +7,4 @@ set(CMAKE_CXX_STANDARD 23)
 add_subdirectory(3rdparty/crypto)
 add_subdirectory(orbis-kernel)
 add_subdirectory(rpcsx-os)
+add_subdirectory(hw/amdgpu)
--- a/hw/amdgpu/CMakeLists.txt
+++ b/hw/amdgpu/CMakeLists.txt
@ -0,0 +1,17 @@
+cmake_minimum_required(VERSION 3.10)
+
+set(CMAKE_CXX_STANDARD 23)
+set(CMAKE_CXX_EXTENSIONS off)
+
+add_subdirectory(bridge)
+add_subdirectory(device)
+add_subdirectory(shader)
+add_subdirectory(lib/libspirv)
+
+project(amdgpu)
+
+add_library(${PROJECT_NAME} INTERFACE)
+target_include_directories(${PROJECT_NAME} INTERFACE include)
+
+add_library(amdgpu::base ALIAS ${PROJECT_NAME})
+
--- a/hw/amdgpu/bridge/CMakeLists.txt
+++ b/hw/amdgpu/bridge/CMakeLists.txt
@ -0,0 +1,16 @@
+project(libamdgpu-bridge)
+set(PROJECT_PATH amdgpu/bridge)
+
+set(INCLUDE
+    include/${PROJECT_PATH}/bridge.hpp
+)
+
+set(SRC
+    src/bridge.cpp
+)
+
+add_library(${PROJECT_NAME} STATIC ${INCLUDE} ${SRC})
+target_include_directories(${PROJECT_NAME} PUBLIC include PRIVATE include/${PROJECT_PATH})
+set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "")
+add_library(amdgpu::bridge ALIAS ${PROJECT_NAME})
+set_property(TARGET ${PROJECT_NAME} PROPERTY POSITION_INDEPENDENT_CODE ON)
--- a/hw/amdgpu/bridge/include/amdgpu/bridge/bridge.hpp
+++ b/hw/amdgpu/bridge/include/amdgpu/bridge/bridge.hpp
@ -0,0 +1,256 @@
+#pragma once
+
+#include <cstdint>
+#include <cstring>
+#include <initializer_list>
+
+namespace amdgpu::bridge {
+enum class CommandId : std::uint32_t {
+  Nop,
+  SetUpSharedMemory,
+  ProtectMemory,
+  CommandBuffer,
+  Flip,
+  DoFlip,
+  SetBuffer
+};
+
+struct CmdMemoryProt {
+  std::uint64_t address;
+  std::uint64_t size;
+  std::uint32_t prot;
+};
+
+struct CmdCommandBuffer {
+  std::uint64_t queue;
+  std::uint64_t address;
+  std::uint64_t size;
+};
+
+struct CmdBuffer {
+  std::uint32_t bufferIndex;
+  std::uint32_t width;
+  std::uint32_t height;
+  std::uint32_t pitch;
+  std::uint64_t address;
+  std::uint32_t pixelFormat;
+  std::uint32_t tilingMode;
+};
+
+struct CmdFlip {
+  std::uint32_t bufferIndex;
+  std::uint64_t arg;
+};
+
+struct BridgeHeader {
+  std::uint64_t size;
+  std::uint64_t info;
+  std::uint32_t pullerPid;
+  std::uint32_t pusherPid;
+  volatile std::uint64_t flags;
+  std::uint64_t vmAddress;
+  std::uint64_t vmSize;
+  char vmName[32];
+  volatile std::uint32_t flipBuffer;
+  volatile std::uint64_t flipArg;
+  volatile std::uint64_t flipCount;
+  std::uint32_t memoryAreaCount;
+  std::uint32_t commandBufferCount;
+  std::uint32_t bufferCount;
+  CmdMemoryProt memoryAreas[128];
+  CmdCommandBuffer commandBuffers[32];
+  CmdBuffer buffers[8];
+
+  volatile std::uint64_t pull;
+  volatile std::uint64_t push;
+  std::uint64_t commands[];
+};
+
+struct Command {
+  CommandId id;
+
+  union {
+    CmdMemoryProt memoryProt;
+    CmdCommandBuffer commandBuffer;
+    CmdBuffer buffer;
+    CmdFlip flip;
+  };
+};
+
+enum class BridgeFlags {
+  VmConfigured = 1 << 0,
+  PushLock = 1 << 1,
+  PullLock = 1 << 2,
+};
+
+class BridgePusher {
+  BridgeHeader *buffer = nullptr;
+
+public:
+  BridgePusher() = default;
+  BridgePusher(BridgeHeader *buffer) : buffer(buffer) {}
+
+  void setVm(std::uint64_t address, std::uint64_t size, const char *name) {
+    buffer->vmAddress = address;
+    buffer->vmSize = size;
+    std::strncpy(buffer->vmName, name, sizeof(buffer->vmName));
+    buffer->flags |= static_cast<std::uint64_t>(BridgeFlags::VmConfigured);
+  }
+
+  void sendMemoryProtect(std::uint64_t address, std::uint64_t size,
+                         std::uint32_t prot) {
+    sendCommand(CommandId::ProtectMemory, {address, size, prot});
+  }
+
+  void sendCommandBuffer(std::uint64_t queue, std::uint64_t address,
+                         std::uint64_t size) {
+    sendCommand(CommandId::CommandBuffer, {queue, address, size});
+  }
+
+  void sendSetBuffer(std::uint32_t bufferIndex, std::uint64_t address,
+                     std::uint32_t width, std::uint32_t height,
+                     std::uint32_t pitch, std::uint32_t pixelFormat,
+                     std::uint32_t tilingMode) {
+    sendCommand(CommandId::SetBuffer,
+                {static_cast<std::uint64_t>(bufferIndex) << 32 | tilingMode,
+                 address, static_cast<std::uint64_t>(width) << 32 | height,
+                 static_cast<std::uint64_t>(pitch) << 32 | pixelFormat});
+  }
+
+  void sendFlip(std::uint32_t bufferIndex, std::uint64_t arg) {
+    sendCommand(CommandId::Flip, {bufferIndex, arg});
+  }
+
+  void sendDoFlip() { sendCommand(CommandId::DoFlip, {}); }
+
+  void wait() {
+    while (buffer->pull != buffer->push)
+      ;
+  }
+
+private:
+  static std::uint64_t makeCommandHeader(CommandId id, std::size_t cmdSize) {
+    return static_cast<std::uint64_t>(id) |
+           (static_cast<std::uint64_t>(cmdSize - 1) << 32);
+  }
+
+  void sendCommand(CommandId id, std::initializer_list<std::uint64_t> args) {
+    std::size_t cmdSize = args.size() + 1;
+    std::uint64_t pos = getPushPosition(cmdSize);
+
+    buffer->commands[pos++] = makeCommandHeader(CommandId::Flip, cmdSize);
+    for (auto arg : args) {
+      buffer->commands[pos++] = arg;
+    }
+    buffer->push = pos;
+  }
+
+  std::uint64_t getPushPosition(std::uint64_t cmdSize) {
+    std::uint64_t position = buffer->push;
+
+    if (position + cmdSize > buffer->size) {
+      if (position < buffer->size) {
+        buffer->commands[position] =
+            static_cast<std::uint64_t>(CommandId::Nop) |
+            ((buffer->size - position - 1) << 32);
+      }
+
+      position = 0;
+      waitPuller(cmdSize);
+    }
+
+    return position;
+  }
+  void waitPuller(std::uint64_t pullValue) {
+    while (buffer->pull < pullValue) {
+      ;
+    }
+  }
+};
+
+class BridgePuller {
+  BridgeHeader *buffer = nullptr;
+
+public:
+  BridgePuller() = default;
+  BridgePuller(BridgeHeader *buffer) : buffer(buffer) {}
+
+  std::size_t pullCommands(Command *commands, std::size_t maxCount) {
+    std::size_t processed = 0;
+
+    while (processed < maxCount) {
+      if (buffer->pull == buffer->push) {
+        break;
+      }
+
+      auto pos = buffer->pull;
+      auto cmd = buffer->commands[pos];
+      CommandId cmdId = static_cast<CommandId>(cmd);
+      std::uint32_t argsCount = cmd >> 32;
+
+      if (cmdId != CommandId::Nop) {
+        commands[processed++] =
+            unpackCommand(cmdId, buffer->commands + pos + 1, argsCount);
+      }
+
+      auto newPull = pos + argsCount + 1;
+
+      if (newPull >= buffer->size) {
+        newPull = 0;
+      }
+
+      buffer->pull = newPull;
+    }
+
+    return processed;
+  }
+
+private:
+  Command unpackCommand(CommandId command, const std::uint64_t *args,
+                        std::uint32_t argsCount) {
+    Command result;
+    result.id = command;
+
+    switch (command) {
+    case CommandId::Nop:
+    case CommandId::SetUpSharedMemory:
+    case CommandId::DoFlip:
+      return result;
+
+    case CommandId::ProtectMemory:
+      result.memoryProt.address = args[0];
+      result.memoryProt.size = args[1];
+      result.memoryProt.prot = args[2];
+      return result;
+
+    case CommandId::CommandBuffer:
+      result.commandBuffer.queue = args[0];
+      result.commandBuffer.address = args[1];
+      result.commandBuffer.size = args[2];
+      return result;
+
+    case CommandId::Flip:
+      result.flip.bufferIndex = args[0];
+      result.flip.arg = args[1];
+      return result;
+
+    case CommandId::SetBuffer:
+      result.buffer.bufferIndex = static_cast<std::uint32_t>(args[0] >> 32);
+      result.buffer.address = args[1];
+      result.buffer.width = static_cast<std::uint32_t>(args[2] >> 32);
+      result.buffer.height = static_cast<std::uint32_t>(args[2]);
+      result.buffer.pitch = static_cast<std::uint32_t>(args[3] >> 32);
+      result.buffer.pixelFormat = static_cast<std::uint32_t>(args[3]);
+      result.buffer.tilingMode = static_cast<std::uint32_t>(args[0]);
+      return result;
+    }
+
+    __builtin_trap();
+  }
+};
+
+BridgeHeader *createShmCommandBuffer(const char *name);
+BridgeHeader *openShmCommandBuffer(const char *name);
+void destroyShmCommandBuffer(BridgeHeader *buffer);
+void unlinkShm(const char *name);
+} // namespace amdgpu::bridge
--- a/hw/amdgpu/bridge/src/bridge.cpp
+++ b/hw/amdgpu/bridge/src/bridge.cpp
@ -0,0 +1,81 @@
+#include "bridge.hpp"
+
+#include <fcntl.h>
+#include <new>
+#include <sys/mman.h>
+#include <unistd.h>
+
+static int gShmFd = -1;
+static constexpr std::size_t kShmSize = sizeof(amdgpu::bridge::BridgeHeader) +
+                                        (sizeof(std::uint64_t) * (1024 * 1024));
+amdgpu::bridge::BridgeHeader *
+amdgpu::bridge::createShmCommandBuffer(const char *name) {
+  if (gShmFd != -1) {
+    return nullptr;
+  }
+
+  unlinkShm(name);
+
+  int fd = ::shm_open(name, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR);
+
+  if (fd == -1) {
+    return nullptr;
+  }
+
+  if (ftruncate(fd, kShmSize) < 0) {
+    ::close(fd);
+    return nullptr;
+  }
+
+  void *memory =
+      ::mmap(nullptr, kShmSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+
+  if (memory == MAP_FAILED) {
+    ::close(fd);
+    return nullptr;
+  }
+
+  gShmFd = fd;
+  auto result = new (memory) amdgpu::bridge::BridgeHeader();
+  result->size = (kShmSize - sizeof(amdgpu::bridge::BridgeHeader)) /
+                 sizeof(std::uint64_t);
+  return result;
+}
+
+amdgpu::bridge::BridgeHeader *
+amdgpu::bridge::openShmCommandBuffer(const char *name) {
+  if (gShmFd != -1) {
+    return nullptr;
+  }
+
+  int fd = ::shm_open(name, O_RDWR, S_IRUSR | S_IWUSR);
+
+  if (fd == -1) {
+    return nullptr;
+  }
+
+  void *memory =
+      ::mmap(nullptr, kShmSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+
+  if (memory == MAP_FAILED) {
+    ::close(fd);
+    return nullptr;
+  }
+
+  gShmFd = fd;
+  return new (memory) amdgpu::bridge::BridgeHeader;
+}
+
+void amdgpu::bridge::destroyShmCommandBuffer(
+    amdgpu::bridge::BridgeHeader *buffer) {
+  if (gShmFd == -1) {
+    __builtin_trap();
+  }
+
+  buffer->~BridgeHeader();
+  ::close(gShmFd);
+  gShmFd = -1;
+  ::munmap(buffer, kShmSize);
+}
+
+void amdgpu::bridge::unlinkShm(const char *name) { ::shm_unlink(name); }
--- a/hw/amdgpu/device/CMakeLists.txt
+++ b/hw/amdgpu/device/CMakeLists.txt
@ -0,0 +1,66 @@
+project(libamdgpu-device)
+set(PROJECT_PATH amdgpu/device)
+
+set(SRC
+    src/device.cpp
+)
+
+function(add_precompiled_vulkan_spirv target)
+    add_library(${target} INTERFACE)
+    set(SPIRV_GEN_ROOT_DIR "spirv-gen/include/")
+    set(SPIRV_GEN_DIR "${SPIRV_GEN_ROOT_DIR}/shaders")
+
+    cmake_path(ABSOLUTE_PATH SPIRV_GEN_ROOT_DIR BASE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} NORMALIZE OUTPUT_VARIABLE outputrootdir)
+    cmake_path(ABSOLUTE_PATH SPIRV_GEN_DIR BASE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} NORMALIZE OUTPUT_VARIABLE outputdir)
+    file(MAKE_DIRECTORY ${outputrootdir})
+    file(MAKE_DIRECTORY ${outputdir})
+    target_include_directories(${target} INTERFACE ${outputrootdir})
+
+    foreach(input IN LISTS ARGN)
+        cmake_path(GET input FILENAME inputname)
+        cmake_path(REPLACE_EXTENSION inputname LAST_ONLY .h OUTPUT_VARIABLE outputname)
+        cmake_path(APPEND outputdir ${outputname} OUTPUT_VARIABLE outputpath)
+        cmake_path(REMOVE_EXTENSION inputname LAST_ONLY OUTPUT_VARIABLE varname)
+
+        string(REPLACE "." "_" varname ${varname})
+        string(PREPEND varname "spirv_")
+
+        add_custom_command(
+            OUTPUT ${outputpath}
+            COMMAND glslangValidator -V --vn "${varname}" -o "${outputpath}" "${CMAKE_CURRENT_SOURCE_DIR}/${input}"
+            DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${input}"
+            COMMENT "Generating ${outputname}..."
+        )
+
+        set(subtarget ".${target}-subtarget-${outputname}")
+        add_custom_target(${subtarget} DEPENDS ${outputpath})
+        add_dependencies(${target} ${subtarget})
+    endforeach()
+endfunction()
+
+add_precompiled_vulkan_spirv(${PROJECT_NAME}-shaders
+    src/rect_list.geom.glsl
+)
+
+find_package(SPIRV-Tools REQUIRED CONFIG)
+find_package(SPIRV-Tools-opt REQUIRED CONFIG)
+
+add_library(${PROJECT_NAME} STATIC ${INCLUDE} ${SRC})
+target_link_libraries(${PROJECT_NAME}
+PUBLIC
+    spirv
+    amdgpu::base
+    amdgpu::bridge
+    amdgpu::shader
+    util
+    SPIRV-Tools
+    SPIRV-Tools-opt
+
+PRIVATE
+    ${PROJECT_NAME}-shaders
+)
+
+target_include_directories(${PROJECT_NAME} PUBLIC include PRIVATE include/${PROJECT_PATH})
+set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "")
+add_library(amdgpu::device ALIAS ${PROJECT_NAME})
+set_property(TARGET ${PROJECT_NAME} PROPERTY POSITION_INDEPENDENT_CODE ON)
--- a/hw/amdgpu/device/include/amdgpu/device/device.hpp
+++ b/hw/amdgpu/device/include/amdgpu/device/device.hpp
--- a/hw/amdgpu/device/include/amdgpu/device/pm4.hpp
+++ b/hw/amdgpu/device/include/amdgpu/device/pm4.hpp
@ -0,0 +1,102 @@
+#pragma once
+
+namespace amdgpu {
+enum PM4Opcodes {
+  NOP = 0x10,
+  SET_BASE = 0x11,
+  CLEAR_STATE = 0x12,
+  INDEX_BUFFER_SIZE = 0x13,
+  DISPATCH_DIRECT = 0x15,
+  DISPATCH_INDIRECT = 0x16,
+  INDIRECT_BUFFER_END = 0x17,
+  MODE_CONTROL = 0x18,
+  ATOMIC_GDS = 0x1D,
+  ATOMIC_MEM = 0x1E,
+  OCCLUSION_QUERY = 0x1F,
+  SET_PREDICATION = 0x20,
+  REG_RMW = 0x21,
+  COND_EXEC = 0x22,
+  PRED_EXEC = 0x23,
+  DRAW_INDIRECT = 0x24,
+  DRAW_INDEX_INDIRECT = 0x25,
+  INDEX_BASE = 0x26,
+  DRAW_INDEX_2 = 0x27,
+  CONTEXT_CONTROL = 0x28,
+  DRAW_INDEX_OFFSET = 0x29,
+  INDEX_TYPE = 0x2A,
+  DRAW_INDEX = 0x2B,
+  DRAW_INDIRECT_MULTI = 0x2C,
+  DRAW_INDEX_AUTO = 0x2D,
+  DRAW_INDEX_IMMD = 0x2E,
+  NUM_INSTANCES = 0x2F,
+  DRAW_INDEX_MULTI_AUTO = 0x30,
+  INDIRECT_BUFFER_32 = 0x32,
+  INDIRECT_BUFFER_CONST = 0x33,
+  STRMOUT_BUFFER_UPDATE = 0x34,
+  DRAW_INDEX_OFFSET_2 = 0x35,
+  DRAW_PREAMBLE = 0x36,
+  WRITE_DATA = 0x37,
+  DRAW_INDEX_INDIRECT_MULTI = 0x38,
+  MEM_SEMAPHORE = 0x39,
+  MPEG_INDEX = 0x3A,
+  COPY_DW = 0x3B,
+  WAIT_REG_MEM = 0x3C,
+  MEM_WRITE = 0x3D,
+  INDIRECT_BUFFER_3F = 0x3F,
+  COPY_DATA = 0x40,
+  CP_DMA = 0x41,
+  PFP_SYNC_ME = 0x42,
+  SURFACE_SYNC = 0x43,
+  ME_INITIALIZE = 0x44,
+  COND_WRITE = 0x45,
+  EVENT_WRITE = 0x46,
+  EVENT_WRITE_EOP = 0x47,
+  EVENT_WRITE_EOS = 0x48,
+  RELEASE_MEM = 0x49,
+  PREAMBLE_CNTL = 0x4A,
+  RB_OFFSET = 0x4B,
+  ALU_PS_CONST_BUFFER_COPY = 0x4C,
+  ALU_VS_CONST_BUFFER_COPY = 0x4D,
+  ALU_PS_CONST_UPDATE = 0x4E,
+  ALU_VS_CONST_UPDATE = 0x4F,
+  DMA_DATA = 0x50,
+  ONE_REG_WRITE = 0x57,
+  AQUIRE_MEM = 0x58,
+  REWIND = 0x59,
+  LOAD_UCONFIG_REG = 0x5E,
+  LOAD_SH_REG = 0x5F,
+  LOAD_CONFIG_REG = 0x60,
+  LOAD_CONTEXT_REG = 0x61,
+  SET_CONFIG_REG = 0x68,
+  SET_CONTEXT_REG = 0x69,
+  SET_ALU_CONST = 0x6A,
+  SET_BOOL_CONST = 0x6B,
+  SET_LOOP_CONST = 0x6C,
+  SET_RESOURCE = 0x6D,
+  SET_SAMPLER = 0x6E,
+  SET_CTL_CONST = 0x6F,
+  SET_RESOURCE_OFFSET = 0x70,
+  SET_ALU_CONST_VS = 0x71,
+  SET_ALU_CONST_DI = 0x72,
+  SET_CONTEXT_REG_INDIRECT = 0x73,
+  SET_RESOURCE_INDIRECT = 0x74,
+  SET_APPEND_CNT = 0x75,
+  SET_SH_REG = 0x76,
+  SET_SH_REG_OFFSET = 0x77,
+  SET_QUEUE_REG = 0x78,
+  SET_UCONFIG_REG = 0x79,
+  SCRATCH_RAM_WRITE = 0x7D,
+  SCRATCH_RAM_READ = 0x7E,
+  LOAD_CONST_RAM = 0x80,
+  WRITE_CONST_RAM = 0x81,
+  DUMP_CONST_RAM = 0x83,
+  INCREMENT_CE_COUNTER = 0x84,
+  INCREMENT_DE_COUNTER = 0x85,
+  WAIT_ON_CE_COUNTER = 0x86,
+  WAIT_ON_DE_COUNTER_DIFF = 0x88,
+  SWITCH_BUFFER = 0x8B,
+};
+
+const char *pm4OpcodeToString(int opcode);
+} // namespace amdgpu::device
+
--- a/hw/amdgpu/device/include/amdgpu/device/tiler.hpp
+++ b/hw/amdgpu/device/include/amdgpu/device/tiler.hpp
@ -0,0 +1,681 @@
+#pragma once
+#include "device.hpp"
+#include <algorithm>
+#include <cstdint>
+#include <cstdio>
+
+namespace amdgpu::device {
+namespace Gnm {
+enum GpuMode { kGpuModeBase = 0, kGpuModeNeo = 1 };
+enum TileMode {
+  kTileModeDepth_2dThin_64 = 0x00000000,
+  kTileModeDepth_2dThin_128 = 0x00000001,
+  kTileModeDepth_2dThin_256 = 0x00000002,
+  kTileModeDepth_2dThin_512 = 0x00000003,
+  kTileModeDepth_2dThin_1K = 0x00000004,
+  kTileModeDepth_2dThinPrt_256 = 0x00000006,
+
+  kTileModeDisplay_LinearAligned = 0x00000008,
+  kTileModeDisplay_2dThin = 0x0000000A,
+  kTileModeDisplay_ThinPrt = 0x0000000B,
+  kTileModeDisplay_2dThinPrt = 0x0000000C,
+
+  kTileModeThin_1dThin = 0x0000000D,
+  kTileModeThin_2dThin = 0x0000000E,
+  kTileModeThin_ThinPrt = 0x00000010,
+  kTileModeThin_2dThinPrt = 0x00000011,
+  kTileModeThin_3dThinPrt = 0x00000012,
+
+  kTileModeThick_1dThick = 0x00000013,
+  kTileModeThick_2dThick = 0x00000014,
+  kTileModeThick_ThickPrt = 0x00000016,
+  kTileModeThick_2dThickPrt = 0x00000017,
+  kTileModeThick_3dThickPrt = 0x00000018,
+  kTileModeThick_2dXThick = 0x00000019,
+};
+
+enum MicroTileMode {
+  kMicroTileModeDisplay = 0x00000000,
+  kMicroTileModeThin = 0x00000001,
+  kMicroTileModeDepth = 0x00000002,
+  kMicroTileModeRotated = 0x00000003,
+  kMicroTileModeThick = 0x00000004,
+};
+
+enum ArrayMode {
+  kArrayModeLinearGeneral = 0x00000000,
+  kArrayModeLinearAligned = 0x00000001,
+  kArrayMode1dTiledThin = 0x00000002,
+  kArrayMode1dTiledThick = 0x00000003,
+  kArrayMode2dTiledThin = 0x00000004,
+  kArrayModeTiledThinPrt = 0x00000005,
+  kArrayMode2dTiledThinPrt = 0x00000006,
+  kArrayMode2dTiledThick = 0x00000007,
+  kArrayMode2dTiledXThick = 0x00000008,
+  kArrayModeTiledThickPrt = 0x00000009,
+  kArrayMode2dTiledThickPrt = 0x0000000a,
+  kArrayMode3dTiledThinPrt = 0x0000000b,
+  kArrayMode3dTiledThin = 0x0000000c,
+  kArrayMode3dTiledThick = 0x0000000d,
+  kArrayMode3dTiledXThick = 0x0000000e,
+  kArrayMode3dTiledThickPrt = 0x0000000f,
+};
+
+enum PipeConfig {
+  kPipeConfigP8_32x32_8x16 = 0x0000000a,
+  kPipeConfigP8_32x32_16x16 = 0x0000000c,
+  kPipeConfigP16 = 0x00000012,
+};
+} // namespace Gnm
+
+#define GNM_ERROR(msg, ...)                                                    \
+  //std::fprintf(stderr, msg, __VA_ARGS__);                                      \
+  //std::abort() \
+  __builtin_trap();
+
+static constexpr uint32_t kMicroTileWidth = 8;
+static constexpr uint32_t kMicroTileHeight = 8;
+
+static constexpr uint32_t getElementIndex(uint32_t x, uint32_t y, uint32_t z,
+                                          uint32_t bitsPerElement,
+                                          Gnm::MicroTileMode microTileMode,
+                                          Gnm::ArrayMode arrayMode) {
+  uint32_t elem = 0;
+
+  if (microTileMode == Gnm::kMicroTileModeDisplay) {
+    switch (bitsPerElement) {
+    case 8:
+      elem |= ((x >> 0) & 0x1) << 0;
+      elem |= ((x >> 1) & 0x1) << 1;
+      elem |= ((x >> 2) & 0x1) << 2;
+      elem |= ((y >> 1) & 0x1) << 3;
+      elem |= ((y >> 0) & 0x1) << 4;
+      elem |= ((y >> 2) & 0x1) << 5;
+      break;
+    case 16:
+      elem |= ((x >> 0) & 0x1) << 0;
+      elem |= ((x >> 1) & 0x1) << 1;
+      elem |= ((x >> 2) & 0x1) << 2;
+      elem |= ((y >> 0) & 0x1) << 3;
+      elem |= ((y >> 1) & 0x1) << 4;
+      elem |= ((y >> 2) & 0x1) << 5;
+      break;
+    case 32:
+      elem |= ((x >> 0) & 0x1) << 0;
+      elem |= ((x >> 1) & 0x1) << 1;
+      elem |= ((y >> 0) & 0x1) << 2;
+      elem |= ((x >> 2) & 0x1) << 3;
+      elem |= ((y >> 1) & 0x1) << 4;
+      elem |= ((y >> 2) & 0x1) << 5;
+      break;
+    case 64:
+      elem |= ((x >> 0) & 0x1) << 0;
+      elem |= ((y >> 0) & 0x1) << 1;
+      elem |= ((x >> 1) & 0x1) << 2;
+      elem |= ((x >> 2) & 0x1) << 3;
+      elem |= ((y >> 1) & 0x1) << 4;
+      elem |= ((y >> 2) & 0x1) << 5;
+      break;
+    default:
+      GNM_ERROR("Unsupported bitsPerElement (%u) for displayable surface.",
+                bitsPerElement);
+    }
+  } else if (microTileMode == Gnm::kMicroTileModeThin ||
+             microTileMode == Gnm::kMicroTileModeDepth) {
+    elem |= ((x >> 0) & 0x1) << 0;
+    elem |= ((y >> 0) & 0x1) << 1;
+    elem |= ((x >> 1) & 0x1) << 2;
+    elem |= ((y >> 1) & 0x1) << 3;
+    elem |= ((x >> 2) & 0x1) << 4;
+    elem |= ((y >> 2) & 0x1) << 5;
+    // Use Z too, if the array mode is Thick/XThick
+    switch (arrayMode) {
+    case Gnm::kArrayMode2dTiledXThick:
+    case Gnm::kArrayMode3dTiledXThick:
+      elem |= ((z >> 2) & 0x1) << 8;
+      // Intentional fall-through
+    case Gnm::kArrayMode1dTiledThick:
+    case Gnm::kArrayMode2dTiledThick:
+    case Gnm::kArrayMode3dTiledThick:
+    case Gnm::kArrayModeTiledThickPrt:
+    case Gnm::kArrayMode2dTiledThickPrt:
+    case Gnm::kArrayMode3dTiledThickPrt:
+      elem |= ((z >> 0) & 0x1) << 6;
+      elem |= ((z >> 1) & 0x1) << 7;
+    default:
+      break; // no other thick modes
+    }
+  } else if (microTileMode == Gnm::kMicroTileModeThick) // thick/xthick
+  {
+    switch (arrayMode) {
+    case Gnm::kArrayMode2dTiledXThick:
+    case Gnm::kArrayMode3dTiledXThick:
+      elem |= ((z >> 2) & 0x1) << 8;
+      // intentional fall-through
+    case Gnm::kArrayMode1dTiledThick:
+    case Gnm::kArrayMode2dTiledThick:
+    case Gnm::kArrayMode3dTiledThick:
+    case Gnm::kArrayModeTiledThickPrt:
+    case Gnm::kArrayMode2dTiledThickPrt:
+    case Gnm::kArrayMode3dTiledThickPrt:
+      if (bitsPerElement == 8 || bitsPerElement == 16) {
+        elem |= ((x >> 0) & 0x1) << 0;
+        elem |= ((y >> 0) & 0x1) << 1;
+        elem |= ((x >> 1) & 0x1) << 2;
+        elem |= ((y >> 1) & 0x1) << 3;
+        elem |= ((z >> 0) & 0x1) << 4;
+        elem |= ((z >> 1) & 0x1) << 5;
+        elem |= ((x >> 2) & 0x1) << 6;
+        elem |= ((y >> 2) & 0x1) << 7;
+      } else if (bitsPerElement == 32) {
+        elem |= ((x >> 0) & 0x1) << 0;
+        elem |= ((y >> 0) & 0x1) << 1;
+        elem |= ((x >> 1) & 0x1) << 2;
+        elem |= ((z >> 0) & 0x1) << 3;
+        elem |= ((y >> 1) & 0x1) << 4;
+        elem |= ((z >> 1) & 0x1) << 5;
+        elem |= ((x >> 2) & 0x1) << 6;
+        elem |= ((y >> 2) & 0x1) << 7;
+      } else if (bitsPerElement == 64 || bitsPerElement == 128) {
+        elem |= ((x >> 0) & 0x1) << 0;
+        elem |= ((y >> 0) & 0x1) << 1;
+        elem |= ((z >> 0) & 0x1) << 2;
+        elem |= ((x >> 1) & 0x1) << 3;
+        elem |= ((y >> 1) & 0x1) << 4;
+        elem |= ((z >> 1) & 0x1) << 5;
+        elem |= ((x >> 2) & 0x1) << 6;
+        elem |= ((y >> 2) & 0x1) << 7;
+      } else {
+        GNM_ERROR("Invalid bitsPerElement (%u) for "
+                  "microTileMode=kMicroTileModeThick.",
+                  bitsPerElement);
+      }
+      break;
+    default:
+      GNM_ERROR("Invalid arrayMode (0x%02X) for thick/xthick "
+                "microTileMode=kMicroTileModeThick.",
+                arrayMode);
+    }
+  }
+  // TODO: rotated
+
+  return elem;
+}
+static constexpr uint32_t getPipeIndex(uint32_t x, uint32_t y,
+                                       Gnm::PipeConfig pipeCfg) {
+  uint32_t pipe = 0;
+  switch (pipeCfg) {
+  case Gnm::kPipeConfigP8_32x32_8x16:
+    pipe |= (((x >> 4) ^ (y >> 3) ^ (x >> 5)) & 0x1) << 0;
+    pipe |= (((x >> 3) ^ (y >> 4)) & 0x1) << 1;
+    pipe |= (((x >> 5) ^ (y >> 5)) & 0x1) << 2;
+    break;
+  case Gnm::kPipeConfigP8_32x32_16x16:
+    pipe |= (((x >> 3) ^ (y >> 3) ^ (x >> 4)) & 0x1) << 0;
+    pipe |= (((x >> 4) ^ (y >> 4)) & 0x1) << 1;
+    pipe |= (((x >> 5) ^ (y >> 5)) & 0x1) << 2;
+    break;
+  case Gnm::kPipeConfigP16:
+    pipe |= (((x >> 3) ^ (y >> 3) ^ (x >> 4)) & 0x1) << 0;
+    pipe |= (((x >> 4) ^ (y >> 4)) & 0x1) << 1;
+    pipe |= (((x >> 5) ^ (y >> 5)) & 0x1) << 2;
+    pipe |= (((x >> 6) ^ (y >> 5)) & 0x1) << 3;
+    break;
+  default:
+    GNM_ERROR("Unsupported pipeCfg (0x%02X).", pipeCfg);
+  }
+  return pipe;
+}
+
+inline constexpr uint32_t fastIntLog2(uint32_t i) {
+  return 31 - __builtin_clz(i | 1);
+}
+
+static constexpr uint32_t getBankIndex(uint32_t x, uint32_t y,
+                                       uint32_t bank_width,
+                                       uint32_t bank_height, uint32_t num_banks,
+                                       uint32_t num_pipes) {
+
+  // bank_width=1, bank_height=1, num_banks = 16, num_pipes=8
+  const uint32_t x_shift_offset = fastIntLog2(bank_width * num_pipes);
+  const uint32_t y_shift_offset = fastIntLog2(bank_height);
+  const uint32_t xs = x >> x_shift_offset;
+  const uint32_t ys = y >> y_shift_offset;
+
+  uint32_t bank = 0;
+  switch (num_banks) {
+  case 2:
+    bank |= (((xs >> 3) ^ (ys >> 3)) & 0x1) << 0;
+    break;
+  case 4:
+    bank |= (((xs >> 3) ^ (ys >> 4)) & 0x1) << 0;
+    bank |= (((xs >> 4) ^ (ys >> 3)) & 0x1) << 1;
+    break;
+  case 8:
+    bank |= (((xs >> 3) ^ (ys >> 5)) & 0x1) << 0;
+    bank |= (((xs >> 4) ^ (ys >> 4) ^ (ys >> 5)) & 0x1) << 1;
+    bank |= (((xs >> 5) ^ (ys >> 3)) & 0x1) << 2;
+    break;
+  case 16:
+    bank |= (((xs >> 3) ^ (ys >> 6)) & 0x1) << 0;
+    bank |= (((xs >> 4) ^ (ys >> 5) ^ (ys >> 6)) & 0x1) << 1;
+    bank |= (((xs >> 5) ^ (ys >> 4)) & 0x1) << 2;
+    bank |= (((xs >> 6) ^ (ys >> 3)) & 0x1) << 3;
+    break;
+  default:
+    GNM_ERROR("invalid num_banks (%u) -- must be 2, 4, 8, or 16.", num_banks);
+  }
+
+  return bank;
+}
+
+inline std::uint32_t getTexelsPerElement(SurfaceFormat format) {
+  if (format >= kSurfaceFormatBc1 && format <= kSurfaceFormatBc7) {
+    return 16;
+  }
+
+  if (format >= kSurfaceFormat1) {
+    return 8;
+  }
+
+  return 1;
+}
+
+inline std::uint32_t getBitsPerElement(SurfaceFormat format) {
+  static constexpr int bitsPerElement[] = {
+      0,  8,  16, 16, 32, 32, 32, 32, 32, 32, 32, 64, 64, 96, 128, -1,
+      16, 16, 16, 16, 32, 32, 64, -1, -1, -1, -1, -1, -1, -1, -1,  -1,
+      16, 16, 32, 4,  8,  8,  4,  8,  8,  8,  -1, -1, 8,  8,  8,   8,
+      8,  8,  16, 16, 32, 32, 32, 64, 64, 8,  16, 1,  1};
+
+  auto rawFormat = static_cast<unsigned>(format);
+
+  if (rawFormat >= sizeof(bitsPerElement)) {
+    return 0;
+  }
+
+  return bitsPerElement[rawFormat];
+}
+
+struct Tiler1d {
+  Gnm::ArrayMode m_arrayMode;
+  uint32_t m_bitsPerElement;
+
+  Gnm::MicroTileMode m_microTileMode;
+  uint32_t m_tileThickness;
+  uint32_t m_tileBytes;
+  uint32_t m_tilesPerRow;
+  uint32_t m_tilesPerSlice;
+
+  Tiler1d(const GnmTBuffer *texture) {
+    /*
+    m_arrayMode = Gnm::ArrayMode::kArrayMode1dTiledThin;
+    m_bitsPerElement = 128;// getBitsPerElement(texture->dfmt);
+    m_microTileMode = Gnm::MicroTileMode::kMicroTileModeThin;
+    m_tileThickness = (m_arrayMode == Gnm::kArrayMode1dTiledThick) ? 4 : 1;
+    m_tileBytes     = (kMicroTileWidth * kMicroTileHeight * m_tileThickness * m_bitsPerElement + 7) / 8;
+
+    auto width = texture->width + 1;
+    auto height = texture->height + 1;
+    width = (width + 3) / 4;
+    height = (height + 3) / 4;
+    m_tilesPerRow   = width / kMicroTileWidth;
+    m_tilesPerSlice = std::max(m_tilesPerRow * (height / kMicroTileHeight), 1U);
+    */
+
+    m_arrayMode = (Gnm::ArrayMode)2;
+    m_bitsPerElement = 128;
+    m_microTileMode = (Gnm::MicroTileMode)1;
+    m_tileThickness= 1;
+    m_tileBytes= 1024;
+    m_tilesPerRow = 16;
+    m_tilesPerSlice = 256;
+  }
+
+  uint64_t getTiledElementBitOffset(uint32_t x, uint32_t y, uint32_t z) const {
+    uint64_t element_index = getElementIndex(x, y, z, m_bitsPerElement,
+                                             m_microTileMode, m_arrayMode);
+
+    uint64_t slice_offset =
+        (z / m_tileThickness) * m_tilesPerSlice * m_tileBytes;
+
+    uint64_t tile_row_index = y / kMicroTileHeight;
+    uint64_t tile_column_index = x / kMicroTileWidth;
+    uint64_t tile_offset =
+        ((tile_row_index * m_tilesPerRow) + tile_column_index) * m_tileBytes;
+
+    uint64_t element_offset = element_index * m_bitsPerElement;
+
+    return (slice_offset + tile_offset) * 8 + element_offset;
+  }
+
+  int32_t getTiledElementByteOffset(uint32_t x, uint32_t y, uint32_t z) const {
+    return getTiledElementBitOffset(x, y, z) / 8;
+  }
+};
+
+struct Tiler2d {
+  static constexpr int m_bitsPerElement = 32;
+  static constexpr Gnm::MicroTileMode m_microTileMode =
+      Gnm::kMicroTileModeDisplay;
+  static constexpr Gnm::ArrayMode m_arrayMode = Gnm::kArrayMode2dTiledThin;
+  static constexpr uint32_t m_macroTileWidth = 128;
+  static constexpr uint32_t m_macroTileHeight = 64;
+  static constexpr Gnm::PipeConfig m_pipeConfig =
+      Gnm::kPipeConfigP8_32x32_16x16;
+  static constexpr uint32_t m_bankWidth = 1;
+  static constexpr uint32_t m_bankHeight = 1;
+  static constexpr uint32_t m_numBanks = 16;
+  static constexpr uint32_t m_numPipes = 8;
+  static constexpr uint32_t m_tileThickness = 1;
+  static constexpr uint32_t m_numFragmentsPerPixel = 1;
+  static constexpr uint32_t m_tileSplitBytes = 512;
+  static constexpr uint32_t m_pipeInterleaveBytes = 256;
+  static constexpr uint32_t m_macroTileAspect = 2;
+  static constexpr uint32_t m_paddedWidth = 1280;
+  static constexpr uint32_t m_paddedHeight = 768;
+  static constexpr uint32_t m_arraySlice = 0;
+  static constexpr uint64_t m_bankSwizzleMask = 0;
+  static constexpr uint64_t m_pipeSwizzleMask = 0;
+  static constexpr uint64_t m_pipeInterleaveMask = 255;
+  static constexpr uint64_t m_pipeInterleaveBits = 8;
+  static constexpr uint64_t m_pipeBits = 3;
+  static constexpr uint64_t m_bankBits = 4;
+
+  static constexpr uint32_t kDramRowSize = 0x400;
+  static constexpr uint32_t kNumLogicalBanks = 16;
+  static constexpr uint32_t kPipeInterleaveBytes = 256;
+  static constexpr uint32_t kBankInterleave = 1;
+  static constexpr uint32_t kMicroTileWidth = 8;
+  static constexpr uint32_t kMicroTileHeight = 8;
+  static constexpr uint32_t kNumMicroTilePixels =
+      kMicroTileWidth * kMicroTileHeight;
+  static constexpr uint32_t kCmaskCacheBits = 0x400;
+  static constexpr uint32_t kHtileCacheBits = 0x4000;
+
+  int32_t getTiledElementBitOffset(uint64_t *outTiledBitOffset, uint32_t x,
+                                   uint32_t y, uint32_t z,
+                                   uint32_t fragmentIndex, bool log = false);
+
+  int32_t getTiledElementByteOffset(uint64_t *outTiledByteOffset, uint32_t x,
+                                    uint32_t y, uint32_t z,
+                                    uint32_t fragmentIndex, bool log = false) {
+    uint64_t bitOffset = 0;
+    int32_t status =
+        getTiledElementBitOffset(&bitOffset, x, y, z, fragmentIndex, log);
+    *outTiledByteOffset = bitOffset / 8;
+    return status;
+  }
+};
+
+inline int32_t Tiler2d::getTiledElementBitOffset(uint64_t *outTiledBitOffset,
+                                                 uint32_t x, uint32_t y,
+                                                 uint32_t z,
+                                                 uint32_t fragmentIndex,
+                                                 bool log) {
+  uint64_t element_index =
+      getElementIndex(x, y, z, m_bitsPerElement, m_microTileMode, m_arrayMode);
+
+  uint32_t xh = x, yh = y;
+  if (m_arrayMode == Gnm::kArrayModeTiledThinPrt ||
+      m_arrayMode == Gnm::kArrayModeTiledThickPrt) {
+    xh %= m_macroTileWidth;
+    yh %= m_macroTileHeight;
+  }
+  uint64_t pipe = getPipeIndex(xh, yh, m_pipeConfig);
+  uint64_t bank =
+      getBankIndex(xh, yh, m_bankWidth, m_bankHeight, m_numBanks, m_numPipes);
+
+  constexpr uint32_t tile_bytes =
+      (kMicroTileWidth * kMicroTileHeight * m_tileThickness * m_bitsPerElement *
+           m_numFragmentsPerPixel +
+       7) /
+      8;
+
+  uint64_t element_offset = 0;
+  if (m_microTileMode == Gnm::kMicroTileModeDepth) {
+    uint64_t pixel_offset =
+        element_index * m_bitsPerElement * m_numFragmentsPerPixel;
+    element_offset = pixel_offset + (fragmentIndex * m_bitsPerElement);
+  } else {
+    uint64_t fragment_offset =
+        fragmentIndex * (tile_bytes / m_numFragmentsPerPixel) * 8;
+    element_offset = fragment_offset + (element_index * m_bitsPerElement);
+  }
+
+  uint64_t slices_per_tile = 1;
+  uint64_t tile_split_slice = 0;
+
+  uint64_t macro_tile_bytes = (m_macroTileWidth / kMicroTileWidth) *
+                              (m_macroTileHeight / kMicroTileHeight) *
+                              tile_bytes / (m_numPipes * m_numBanks);
+  uint64_t macro_tiles_per_row = m_paddedWidth / m_macroTileWidth;
+  uint64_t macro_tile_row_index = y / m_macroTileHeight;
+  uint64_t macro_tile_column_index = x / m_macroTileWidth;
+  uint64_t macro_tile_index =
+      (macro_tile_row_index * macro_tiles_per_row) + macro_tile_column_index;
+  uint64_t macro_tile_offset = macro_tile_index * macro_tile_bytes;
+  uint64_t macro_tiles_per_slice =
+      macro_tiles_per_row * (m_paddedHeight / m_macroTileHeight);
+  uint64_t slice_bytes = macro_tiles_per_slice * macro_tile_bytes;
+
+  uint32_t slice = z;
+
+  uint64_t slice_offset =
+      (tile_split_slice + slices_per_tile * slice / m_tileThickness) *
+      slice_bytes;
+  if (m_arraySlice != 0) {
+    slice = m_arraySlice;
+  }
+
+  uint64_t tile_row_index = (y / kMicroTileHeight) % m_bankHeight;
+  uint64_t tile_column_index =
+      ((x / kMicroTileWidth) / m_numPipes) % m_bankWidth;
+  uint64_t tile_index = (tile_row_index * m_bankWidth) + tile_column_index;
+  uint64_t tile_offset = tile_index * tile_bytes;
+
+  // Bank and pipe rotation/swizzling.
+  uint64_t bank_swizzle = m_bankSwizzleMask;
+  uint64_t pipe_swizzle = m_pipeSwizzleMask;
+
+  uint64_t pipe_slice_rotation = 0;
+  switch (m_arrayMode) {
+  case Gnm::kArrayMode3dTiledThin:
+  case Gnm::kArrayMode3dTiledThick:
+  case Gnm::kArrayMode3dTiledXThick:
+    pipe_slice_rotation =
+        std::max(1UL, (m_numPipes / 2UL) - 1UL) * (slice / m_tileThickness);
+    break;
+  default:
+    break;
+  }
+  pipe_swizzle += pipe_slice_rotation;
+  pipe_swizzle &= (m_numPipes - 1);
+  pipe = pipe ^ pipe_swizzle;
+
+  uint32_t slice_rotation = 0;
+  switch (m_arrayMode) {
+  case Gnm::kArrayMode2dTiledThin:
+  case Gnm::kArrayMode2dTiledThick:
+  case Gnm::kArrayMode2dTiledXThick:
+    slice_rotation = ((m_numBanks / 2) - 1) * (slice / m_tileThickness);
+    break;
+  case Gnm::kArrayMode3dTiledThin:
+  case Gnm::kArrayMode3dTiledThick:
+  case Gnm::kArrayMode3dTiledXThick:
+    slice_rotation = std::max(1UL, (m_numPipes / 2UL) - 1UL) *
+                     (slice / m_tileThickness) / m_numPipes;
+    break;
+  default:
+    break;
+  }
+  uint64_t tile_split_slice_rotation = 0;
+  switch (m_arrayMode) {
+  case Gnm::kArrayMode2dTiledThin:
+  case Gnm::kArrayMode3dTiledThin:
+  case Gnm::kArrayMode2dTiledThinPrt:
+  case Gnm::kArrayMode3dTiledThinPrt:
+    tile_split_slice_rotation = ((m_numBanks / 2) + 1) * tile_split_slice;
+    break;
+  default:
+    break;
+  }
+
+  bank ^= bank_swizzle + slice_rotation;
+  bank ^= tile_split_slice_rotation;
+  bank &= (m_numBanks - 1);
+
+  uint64_t total_offset =
+      (slice_offset + macro_tile_offset + tile_offset) * 8 + element_offset;
+  uint64_t bitOffset = total_offset & 0x7;
+  total_offset /= 8;
+
+  uint64_t pipe_interleave_offset = total_offset & m_pipeInterleaveMask;
+  uint64_t offset = total_offset >> m_pipeInterleaveBits;
+
+  uint64_t finalByteOffset =
+      pipe_interleave_offset | (pipe << (m_pipeInterleaveBits)) |
+      (bank << (m_pipeInterleaveBits + m_pipeBits)) |
+      (offset << (m_pipeInterleaveBits + m_pipeBits + m_bankBits));
+  *outTiledBitOffset = (finalByteOffset << 3) | bitOffset;
+  return 0;
+}
+
+namespace surfaceTiler {
+constexpr std::uint32_t getElementIndex(std::uint32_t x, std::uint32_t y) {
+  std::uint32_t elem = 0;
+
+  elem |= ((x >> 0) & 0x1) << 0;
+  elem |= ((x >> 1) & 0x1) << 1;
+  elem |= ((y >> 0) & 0x1) << 2;
+  elem |= ((x >> 2) & 0x1) << 3;
+  elem |= ((y >> 1) & 0x1) << 4;
+  elem |= ((y >> 2) & 0x1) << 5;
+
+  return elem;
+}
+
+constexpr std::uint32_t getPipeIndex(std::uint32_t x, std::uint32_t y) {
+  std::uint32_t pipe = 0;
+
+  pipe |= (((x >> 3) ^ (y >> 3) ^ (x >> 4)) & 0x1) << 0;
+  pipe |= (((x >> 4) ^ (y >> 4)) & 0x1) << 1;
+  pipe |= (((x >> 5) ^ (y >> 5)) & 0x1) << 2;
+
+  return pipe;
+}
+
+constexpr std::uint32_t getBankIndex(std::uint32_t x, std::uint32_t y) {
+  std::uint32_t bank = 0;
+
+  bank |= (((x >> 6) ^ (y >> 6)) & 0x1) << 0;
+  bank |= (((x >> 7) ^ (y >> 5) ^ (y >> 6)) & 0x1) << 1;
+  bank |= (((x >> 8) ^ (y >> 4)) & 0x1) << 2;
+  bank |= (((x >> 9) ^ (y >> 3)) & 0x1) << 3;
+
+  return bank;
+}
+
+inline std::uint64_t getTiledElementByteOffsetImpl(std::uint32_t x,
+                                                   std::uint32_t y,
+                                                   std::uint32_t width) {
+  std::uint32_t elementIndex = getElementIndex(x, y);
+  std::uint32_t pipe = getPipeIndex(x, y);
+  std::uint32_t bank = getBankIndex(x, y);
+
+  uint64_t macroTileIndex =
+      (static_cast<std::uint64_t>(y / 64) * (width / 128)) + x / 128;
+  uint64_t macroTileOffset = macroTileIndex * 256;
+
+  std::uint64_t totalOffset = macroTileOffset + elementIndex * 4;
+
+  std::uint64_t pipeInterleaveOffset = totalOffset & 255;
+  std::uint64_t offset = totalOffset >> 8;
+
+  return pipeInterleaveOffset | (pipe << 8) | (bank << 11) | (offset << 15);
+}
+
+static constexpr std::uint32_t kMaxPrecalculatedCount = 8;
+static constexpr std::uint32_t kMaxPrecalculatedWidth = 2048;
+static constexpr std::uint32_t kMaxPrecalculatedHeight = 2048;
+
+static std::uint64_t gPrecalculatedTiledOffsets[kMaxPrecalculatedCount]
+                                               [kMaxPrecalculatedWidth *
+                                                kMaxPrecalculatedHeight];
+
+struct PrecalculatedTiler {
+  std::uint32_t width;
+  std::uint32_t height;
+  std::uint32_t stride;
+  int index;
+};
+
+static PrecalculatedTiler gPrecalculatedTilers[kMaxPrecalculatedCount];
+static int gPrecalculatedCount;
+
+static int findPrecalculatedTile(std::uint32_t width, std::uint32_t height) {
+  for (int i = 0; i < gPrecalculatedCount; ++i) {
+    if (gPrecalculatedTilers[i].width == width &&
+        gPrecalculatedTilers[i].height == height) {
+      return i;
+    }
+  }
+
+  return -1;
+}
+
+inline int precalculateTiles(std::uint32_t width, std::uint32_t height) {
+  int index = findPrecalculatedTile(width, height);
+  if (index >= 0) {
+    if (index >= kMaxPrecalculatedCount / 2 &&
+        gPrecalculatedCount > kMaxPrecalculatedCount / 2) {
+      auto tmp = gPrecalculatedTilers[index];
+
+      for (int i = index; i > 0; --i) {
+        gPrecalculatedTilers[i] = gPrecalculatedTilers[i - 1];
+      }
+
+      gPrecalculatedTilers[0] = tmp;
+      return 0;
+    }
+
+    return index;
+  }
+
+  PrecalculatedTiler tiler;
+  tiler.width = width;
+  tiler.height = height;
+  tiler.stride = std::min(width, kMaxPrecalculatedWidth);
+
+  if (gPrecalculatedCount >= kMaxPrecalculatedCount) {
+    // TODO: insert in the middle?
+    tiler.index = gPrecalculatedTilers[kMaxPrecalculatedCount - 1].index;
+    index = kMaxPrecalculatedCount - 1;
+  } else {
+    tiler.index = gPrecalculatedCount++;
+    index = tiler.index;
+  }
+
+  gPrecalculatedTilers[index - 1] = tiler;
+
+  for (std::uint32_t y = 0; y < height; ++y) {
+    for (std::uint32_t x = 0; x < width; ++x) {
+      gPrecalculatedTiledOffsets[index][y * tiler.stride + x] =
+          getTiledElementByteOffsetImpl(x, y, tiler.width);
+    }
+  }
+
+  return index;
+}
+
+inline std::uint64_t getTiledElementByteOffset(int index, std::uint32_t x,
+                                               std::uint32_t y) {
+  auto tiler = gPrecalculatedTilers[index];
+  if (x < kMaxPrecalculatedWidth && y < kMaxPrecalculatedHeight) [[likely]] {
+    return gPrecalculatedTiledOffsets[index][x + y * tiler.stride];
+  }
+
+  return getTiledElementByteOffsetImpl(x, y, tiler.width);
+}
+} // namespace surfaceTiler
+} // namespace amdgpu::device
--- a/hw/amdgpu/device/src/device.cpp
+++ b/hw/amdgpu/device/src/device.cpp
--- a/hw/amdgpu/device/src/rect_list.geom.glsl
+++ b/hw/amdgpu/device/src/rect_list.geom.glsl
@ -0,0 +1,40 @@
+#version 450
+
+layout (triangles) in;
+layout (triangle_strip, max_vertices = 4) out;
+
+void main(void)
+{
+  vec4 topLeft = gl_in[0].gl_Position;
+  vec4 right = gl_in[1].gl_Position;
+  vec4 bottomLeft = gl_in[2].gl_Position;
+
+  vec4 topRight = vec4(
+      right.x,
+      topLeft.y,
+      topLeft.z,
+      topLeft.w
+  );
+
+  vec4 bottomRight = vec4(
+      right.x,
+      bottomLeft.y,
+      topLeft.z,
+      topLeft.w
+  );
+
+
+  gl_Position = topLeft;
+  EmitVertex();
+
+  gl_Position = bottomLeft;
+  EmitVertex();
+
+  gl_Position = topRight;
+  EmitVertex();
+
+  gl_Position = bottomRight;
+  EmitVertex();
+
+  EndPrimitive();
+}
--- a/hw/amdgpu/include/amdgpu/RemoteMemory.hpp
+++ b/hw/amdgpu/include/amdgpu/RemoteMemory.hpp
@ -0,0 +1,12 @@
+#pragma once
+#include <cstdint>
+
+namespace amdgpu {
+struct RemoteMemory {
+  char *shmPointer;
+
+  template <typename T = void> T *getPointer(std::uint64_t address) const {
+    return address ? reinterpret_cast<T *>(shmPointer + address) : nullptr;
+  }
+};
+} // namespace amdgpu
--- a/hw/amdgpu/include/util/SourceLocation.hpp
+++ b/hw/amdgpu/include/util/SourceLocation.hpp
@ -0,0 +1,31 @@
+#pragma once
+
+namespace util {
+class SourceLocation {
+public:
+  const char *mFileName = {};
+  const char *mFunctionName = {};
+  unsigned mLine = 0;
+  unsigned mColumn = 0;
+
+public:
+  constexpr SourceLocation(const char *fileName = __builtin_FILE(),
+                           const char *functionName = __builtin_FUNCTION(),
+                           unsigned line = __builtin_LINE(),
+                           unsigned column =
+#if __has_builtin(__builtin_COLUMN)
+                               __builtin_COLUMN()
+#else
+                               0
+#endif
+                               ) noexcept
+      : mFileName(fileName), mFunctionName(functionName), mLine(line),
+        mColumn(column) {
+  }
+
+  constexpr unsigned line() const noexcept { return mLine; }
+  constexpr unsigned column() const noexcept { return mColumn; }
+  constexpr const char *file_name() const noexcept { return mFileName; }
+  constexpr const char *function_name() const noexcept { return mFunctionName; }
+};
+} // namespace util
--- a/hw/amdgpu/include/util/Verify.hpp
+++ b/hw/amdgpu/include/util/Verify.hpp
@ -0,0 +1,26 @@
+#pragma once
+
+#include "SourceLocation.hpp"
+#include "unreachable.hpp"
+
+class Verify {
+  util::SourceLocation mLocation;
+
+public:
+  util::SourceLocation location() const {
+    return mLocation;
+  }
+
+  Verify(util::SourceLocation location = util::SourceLocation())
+      : mLocation(location) {}
+
+  Verify &operator<<(bool result) {
+    if (!result) {
+      util::unreachable("Verification failed at %s: %s:%u:%u",
+                        mLocation.function_name(), mLocation.file_name(),
+                        mLocation.line(), mLocation.column());
+    }
+
+    return *this;
+  }
+};
--- a/hw/amdgpu/include/util/unreachable.hpp
+++ b/hw/amdgpu/include/util/unreachable.hpp
@ -0,0 +1,29 @@
+#pragma once
+
+#include "SourceLocation.hpp"
+#include <cstdio>
+#include <cstdarg>
+
+namespace util {
+[[noreturn]] inline void unreachable_impl() { std::fflush(stdout); __builtin_trap(); }
+
+[[noreturn]] inline void unreachable(SourceLocation location = {}) {
+  std::printf("\n");
+  std::fflush(stdout);
+  std::fprintf(stderr, "Unreachable at %s:%u:%u %s\n", location.file_name(),
+               location.line(), location.column(), location.function_name());
+  unreachable_impl();
+}
+
+[[noreturn]] inline void unreachable(const char *fmt, ...) {
+  std::printf("\n");
+  std::fflush(stdout);
+  va_list list;
+  va_start(list, fmt);
+  std::vfprintf(stderr, fmt, list);
+  va_end(list);
+  std::fprintf(stderr, "\n");
+
+  unreachable_impl();
+}
+} // namespace util
--- a/hw/amdgpu/lib/libspirv/CMakeLists.txt
+++ b/hw/amdgpu/lib/libspirv/CMakeLists.txt
@ -0,0 +1,4 @@
+project(spirv)
+
+add_library(${PROJECT_NAME} INTERFACE)
+target_include_directories(${PROJECT_NAME} INTERFACE include)
--- a/hw/amdgpu/lib/libspirv/include/spirv/GLSL.std.450.h
+++ b/hw/amdgpu/lib/libspirv/include/spirv/GLSL.std.450.h
@ -0,0 +1,131 @@
+/*
+** Copyright (c) 2014-2016 The Khronos Group Inc.
+**
+** Permission is hereby granted, free of charge, to any person obtaining a copy
+** of this software and/or associated documentation files (the "Materials"),
+** to deal in the Materials without restriction, including without limitation
+** the rights to use, copy, modify, merge, publish, distribute, sublicense,
+** and/or sell copies of the Materials, and to permit persons to whom the
+** Materials are furnished to do so, subject to the following conditions:
+**
+** The above copyright notice and this permission notice shall be included in
+** all copies or substantial portions of the Materials.
+**
+** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS
+** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND
+** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ 
+**
+** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS
+** IN THE MATERIALS.
+*/
+
+#ifndef GLSLstd450_H
+#define GLSLstd450_H
+
+static const int GLSLstd450Version = 100;
+static const int GLSLstd450Revision = 3;
+
+enum GLSLstd450 {
+    GLSLstd450Bad = 0,              // Don't use
+
+    GLSLstd450Round = 1,
+    GLSLstd450RoundEven = 2,
+    GLSLstd450Trunc = 3,
+    GLSLstd450FAbs = 4,
+    GLSLstd450SAbs = 5,
+    GLSLstd450FSign = 6,
+    GLSLstd450SSign = 7,
+    GLSLstd450Floor = 8,
+    GLSLstd450Ceil = 9,
+    GLSLstd450Fract = 10,
+
+    GLSLstd450Radians = 11,
+    GLSLstd450Degrees = 12,
+    GLSLstd450Sin = 13,
+    GLSLstd450Cos = 14,
+    GLSLstd450Tan = 15,
+    GLSLstd450Asin = 16,
+    GLSLstd450Acos = 17,
+    GLSLstd450Atan = 18,
+    GLSLstd450Sinh = 19,
+    GLSLstd450Cosh = 20,
+    GLSLstd450Tanh = 21,
+    GLSLstd450Asinh = 22,
+    GLSLstd450Acosh = 23,
+    GLSLstd450Atanh = 24,
+    GLSLstd450Atan2 = 25,
+
+    GLSLstd450Pow = 26,
+    GLSLstd450Exp = 27,
+    GLSLstd450Log = 28,
+    GLSLstd450Exp2 = 29,
+    GLSLstd450Log2 = 30,
+    GLSLstd450Sqrt = 31,
+    GLSLstd450InverseSqrt = 32,
+
+    GLSLstd450Determinant = 33,
+    GLSLstd450MatrixInverse = 34,
+
+    GLSLstd450Modf = 35,            // second operand needs an OpVariable to write to
+    GLSLstd450ModfStruct = 36,      // no OpVariable operand
+    GLSLstd450FMin = 37,
+    GLSLstd450UMin = 38,
+    GLSLstd450SMin = 39,
+    GLSLstd450FMax = 40,
+    GLSLstd450UMax = 41,
+    GLSLstd450SMax = 42,
+    GLSLstd450FClamp = 43,
+    GLSLstd450UClamp = 44,
+    GLSLstd450SClamp = 45,
+    GLSLstd450FMix = 46,
+    GLSLstd450IMix = 47,            // Reserved
+    GLSLstd450Step = 48,
+    GLSLstd450SmoothStep = 49,
+
+    GLSLstd450Fma = 50,
+    GLSLstd450Frexp = 51,            // second operand needs an OpVariable to write to
+    GLSLstd450FrexpStruct = 52,      // no OpVariable operand
+    GLSLstd450Ldexp = 53,
+
+    GLSLstd450PackSnorm4x8 = 54,
+    GLSLstd450PackUnorm4x8 = 55,
+    GLSLstd450PackSnorm2x16 = 56,
+    GLSLstd450PackUnorm2x16 = 57,
+    GLSLstd450PackHalf2x16 = 58,
+    GLSLstd450PackDouble2x32 = 59,
+    GLSLstd450UnpackSnorm2x16 = 60,
+    GLSLstd450UnpackUnorm2x16 = 61,
+    GLSLstd450UnpackHalf2x16 = 62,
+    GLSLstd450UnpackSnorm4x8 = 63,
+    GLSLstd450UnpackUnorm4x8 = 64,
+    GLSLstd450UnpackDouble2x32 = 65,
+
+    GLSLstd450Length = 66,
+    GLSLstd450Distance = 67,
+    GLSLstd450Cross = 68,
+    GLSLstd450Normalize = 69,
+    GLSLstd450FaceForward = 70,
+    GLSLstd450Reflect = 71,
+    GLSLstd450Refract = 72,
+
+    GLSLstd450FindILsb = 73,
+    GLSLstd450FindSMsb = 74,
+    GLSLstd450FindUMsb = 75,
+
+    GLSLstd450InterpolateAtCentroid = 76,
+    GLSLstd450InterpolateAtSample = 77,
+    GLSLstd450InterpolateAtOffset = 78,
+
+    GLSLstd450NMin = 79,
+    GLSLstd450NMax = 80,
+    GLSLstd450NClamp = 81,
+
+    GLSLstd450Count
+};
+
+#endif  // #ifndef GLSLstd450_H
--- a/hw/amdgpu/lib/libspirv/include/spirv/spirv-builder.hpp
+++ b/hw/amdgpu/lib/libspirv/include/spirv/spirv-builder.hpp
--- a/hw/amdgpu/lib/libspirv/include/spirv/spirv-instruction.hpp
+++ b/hw/amdgpu/lib/libspirv/include/spirv/spirv-instruction.hpp
--- a/hw/amdgpu/lib/libspirv/include/spirv/spirv.hpp
+++ b/hw/amdgpu/lib/libspirv/include/spirv/spirv.hpp
--- a/hw/amdgpu/shader/CMakeLists.txt
+++ b/hw/amdgpu/shader/CMakeLists.txt
@ -0,0 +1,22 @@
+project(libamdgpu-shader)
+set(PROJECT_PATH amdgpu/shader)
+
+set(SRC
+    src/cf.cpp
+    src/scf.cpp
+    src/CfBuilder.cpp
+    src/Converter.cpp
+    src/ConverterContext.cpp
+    src/Fragment.cpp
+    src/Function.cpp
+    src/Instruction.cpp
+    src/RegisterState.cpp
+    src/TypeId.cpp
+)
+
+add_library(${PROJECT_NAME} STATIC ${INCLUDE} ${SRC})
+target_link_libraries(${PROJECT_NAME} PUBLIC spirv amdgpu::base)
+target_include_directories(${PROJECT_NAME} PUBLIC include PRIVATE include/${PROJECT_PATH})
+set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "")
+add_library(amdgpu::shader ALIAS ${PROJECT_NAME})
+set_property(TARGET ${PROJECT_NAME} PROPERTY POSITION_INDEPENDENT_CODE ON)
--- a/hw/amdgpu/shader/include/amdgpu/shader/AccessOp.hpp
+++ b/hw/amdgpu/shader/include/amdgpu/shader/AccessOp.hpp
@ -0,0 +1,18 @@
+#pragma once
+
+namespace amdgpu::shader {
+enum class AccessOp { None = 0, Load = 1 << 0, Store = 1 << 1 };
+
+constexpr AccessOp operator|(AccessOp lhs, AccessOp rhs) {
+  return static_cast<AccessOp>(static_cast<int>(lhs) | static_cast<int>(rhs));
+}
+constexpr AccessOp operator&(AccessOp lhs, AccessOp rhs) {
+  return static_cast<AccessOp>(static_cast<int>(lhs) & static_cast<int>(rhs));
+}
+constexpr AccessOp &operator|=(AccessOp &lhs, AccessOp rhs) {
+  return ((lhs = lhs | rhs));
+}
+constexpr AccessOp &operator&=(AccessOp &lhs, AccessOp rhs) {
+  return ((lhs = lhs & rhs));
+}
+} // namespace amdgpu::shader
--- a/hw/amdgpu/shader/include/amdgpu/shader/BufferKind.hpp
+++ b/hw/amdgpu/shader/include/amdgpu/shader/BufferKind.hpp
@ -0,0 +1,5 @@
+#pragma once
+
+namespace amdgpu::shader {
+enum class BufferKind { VBuffer, TBuffer };
+}
--- a/hw/amdgpu/shader/include/amdgpu/shader/CfBuilder.hpp
+++ b/hw/amdgpu/shader/include/amdgpu/shader/CfBuilder.hpp
@ -0,0 +1,8 @@
+#pragma once
+#include "cf.hpp"
+#include <amdgpu/RemoteMemory.hpp>
+
+namespace amdgpu::shader {
+cf::BasicBlock *buildCf(cf::Context &ctxt, RemoteMemory memory,
+                        std::uint64_t entryPoint);
+} // namespace amdgpu::shader
--- a/hw/amdgpu/shader/include/amdgpu/shader/Converter.hpp
+++ b/hw/amdgpu/shader/include/amdgpu/shader/Converter.hpp
@ -0,0 +1,35 @@
+#pragma once
+
+#include "Stage.hpp"
+#include "AccessOp.hpp"
+
+#include <amdgpu/RemoteMemory.hpp>
+
+#include <cstdint>
+#include <span>
+#include <vector>
+
+namespace amdgpu::shader {
+struct Shader {
+  enum class UniformKind {
+    Buffer,
+    Sampler,
+    Image
+  };
+
+  struct UniformInfo {
+    std::uint32_t binding;
+    std::uint32_t buffer[8];
+    UniformKind kind;
+    AccessOp accessOp;
+  };
+
+  std::vector<UniformInfo> uniforms;
+  std::vector<std::uint32_t> spirv;
+};
+
+Shader convert(RemoteMemory memory, Stage stage, std::uint64_t entry,
+               std::span<const std::uint32_t> userSpgrs, int bindingOffset,
+               std::uint32_t dimX = 1, std::uint32_t dimY = 1,
+               std::uint32_t dimZ = 1);
+} // namespace amdgpu::shader
--- a/hw/amdgpu/shader/include/amdgpu/shader/ConverterContext.hpp
+++ b/hw/amdgpu/shader/include/amdgpu/shader/ConverterContext.hpp
@ -0,0 +1,257 @@
+#pragma once
+
+#include "Fragment.hpp"
+#include "Function.hpp"
+#include "RegisterId.hpp"
+#include "Stage.hpp"
+#include "TypeId.hpp"
+#include "Uniform.hpp"
+#include "Value.hpp"
+#include "scf.hpp"
+
+#include <forward_list>
+#include <amdgpu/RemoteMemory.hpp>
+#include <spirv/spirv-builder.hpp>
+#include <unordered_map>
+#include <util/unreachable.hpp>
+
+#include <bit>
+#include <span>
+#include <cassert>
+#include <cstdint>
+#include <vector>
+#include <map>
+
+namespace amdgpu::shader {
+/*
+struct MaterializedFunction {
+  spirv::Function function;
+  spirv::FunctionType type;
+  spirv::Type returnType;
+
+  std::vector<std::pair<RegisterId, TypeId>> args;
+  std::vector<std::pair<RegisterId, TypeId>> results;
+};
+*/
+
+class ConverterContext {
+  Stage mStage;
+  RemoteMemory mMemory;
+  spirv::IdGenerator mGenerator;
+  spirv::SpirvBuilder mBuilder{mGenerator, 1024};
+  static constexpr auto kGenericTypesCount =
+      static_cast<std::size_t>(TypeId::Void) + 1;
+  spirv::Type mTypes[kGenericTypesCount];
+  spirv::PointerType mPtrTypes[13][kGenericTypesCount];
+  spirv::RuntimeArrayType mRuntimeArrayTypes[kGenericTypesCount];
+  spirv::VariableValue mThreadId;
+  spirv::VariableValue mWorkgroupId;
+  spirv::VariableValue mLocalInvocationId;
+  spirv::VariableValue mPerVertex;
+  spirv::VariableValue mFragCoord;
+  std::vector<spirv::VariableValue> mInterfaces;
+  std::map<unsigned, spirv::VariableValue> mIns;
+  std::map<unsigned, spirv::VariableValue> mOuts;
+
+  std::map<std::uint32_t, spirv::ConstantFloat> mConstantFloat32Map;
+  std::map<std::uint32_t, spirv::ConstantUInt> mConstantUint32Map;
+  std::map<std::uint32_t, spirv::ConstantSInt> mConstantSint32Map;
+  std::map<std::uint64_t, spirv::ConstantUInt> mConstantUint64Map;
+
+  struct FunctionType {
+    spirv::Type resultType;
+    std::vector<spirv::Type> params;
+    spirv::FunctionType id;
+  };
+
+  std::vector<FunctionType> mFunctionTypes;
+
+  struct StructTypeEntry {
+    spirv::StructType id;
+    std::vector<spirv::Type> members;
+    spirv::PointerType ptrTypes[13];
+
+    bool match(std::span<const spirv::Type> other) {
+      if (members.size() != other.size()) {
+        return false;
+      }
+
+      for (std::size_t i = 0; i < other.size(); ++i) {
+        if (members[i] != other[i]) {
+          return false;
+        }
+      }
+
+      return true;
+    }
+  };
+
+  std::vector<StructTypeEntry> mStructTypes;
+
+  std::forward_list<Fragment> mFragments;
+  std::forward_list<Function> mFunctions;
+
+  spirv::ConstantBool mTrue;
+  spirv::ConstantBool mFalse;
+
+  std::vector<UniformInfo> mUniforms;
+  spirv::ExtInstSet mGlslStd450;
+  spirv::Function mDiscardFn;
+
+public:
+  ConverterContext(RemoteMemory memory, Stage stage) : mMemory(memory), mStage(stage) {
+    mGlslStd450 = mBuilder.createExtInstImport("GLSL.std.450");
+  }
+
+  const decltype(mInterfaces) &getInterfaces() const {
+    return mInterfaces;
+  }
+
+  spirv::SpirvBuilder &getBuilder() { return mBuilder; }
+  RemoteMemory getMemory() const { return mMemory; }
+  spirv::ExtInstSet getGlslStd450() const { return mGlslStd450; }
+  std::optional<TypeId> getTypeIdOf(spirv::Type type) const;
+
+  spirv::StructType findStructType(std::span<const spirv::Type> members);
+  spirv::StructType getStructType(std::span<const spirv::Type> members);
+  spirv::PointerType getStructPointerType(spv::StorageClass storageClass,
+                                          spirv::StructType structType);
+  spirv::Type getType(TypeId id);
+
+  spirv::PointerType getPointerType(spv::StorageClass storageClass, TypeId id) {
+    assert(static_cast<unsigned>(storageClass) < 13);
+    auto &type = mPtrTypes[static_cast<unsigned>(storageClass)]
+                          [static_cast<std::uint32_t>(id)];
+
+    if (!type) {
+      type = mBuilder.createTypePointer(storageClass, getType(id));
+    }
+
+    return type;
+  }
+
+  spirv::RuntimeArrayType getRuntimeArrayType(TypeId id);
+
+  spirv::UIntType getUInt32Type() {
+    return spirv::cast<spirv::UIntType>(getType(TypeId::UInt32));
+  }
+  spirv::UIntType getUInt64Type() {
+    return spirv::cast<spirv::UIntType>(getType(TypeId::UInt64));
+  }
+
+  spirv::VectorOfType<spirv::UIntType> getUint32x2Type() {
+    return spirv::cast<spirv::VectorOfType<spirv::UIntType>>(
+        getType(TypeId::UInt32x2));
+  }
+
+  spirv::VectorOfType<spirv::UIntType> getUint32x3Type() {
+    return spirv::cast<spirv::VectorOfType<spirv::UIntType>>(
+        getType(TypeId::UInt32x3));
+  }
+
+  spirv::VectorOfType<spirv::UIntType> getUint32x4Type() {
+    return spirv::cast<spirv::VectorOfType<spirv::UIntType>>(
+        getType(TypeId::UInt32x4));
+  }
+
+  spirv::ArrayOfType<spirv::UIntType> getArrayUint32x8Type() {
+    return spirv::cast<spirv::ArrayOfType<spirv::UIntType>>(getType(TypeId::ArrayUInt32x8));
+  }
+
+  spirv::ArrayOfType<spirv::UIntType> getArrayUint32x16Type() {
+    return spirv::cast<spirv::ArrayOfType<spirv::UIntType>>(getType(TypeId::ArrayUInt32x16));
+  }
+
+  spirv::SIntType getSint32Type() {
+    return spirv::cast<spirv::SIntType>(getType(TypeId::SInt32));
+  }
+  spirv::SIntType getSint64Type() {
+    return spirv::cast<spirv::SIntType>(getType(TypeId::SInt64));
+  }
+
+  spirv::FloatType getFloat32Type() {
+    return spirv::cast<spirv::FloatType>(getType(TypeId::Float32));
+  }
+
+  spirv::VectorOfType<spirv::FloatType> getFloat32x4Type() {
+    return spirv::cast<spirv::VectorOfType<spirv::FloatType>>(
+        getType(TypeId::Float32x4));
+  }
+
+  spirv::VectorOfType<spirv::FloatType> getFloat32x3Type() {
+    return spirv::cast<spirv::VectorOfType<spirv::FloatType>>(
+        getType(TypeId::Float32x3));
+  }
+
+  spirv::VectorOfType<spirv::FloatType> getFloat32x2Type() {
+    return spirv::cast<spirv::VectorOfType<spirv::FloatType>>(
+        getType(TypeId::Float32x2));
+  }
+
+  spirv::BoolType getBoolType() {
+    return spirv::cast<spirv::BoolType>(getType(TypeId::Bool));
+  }
+
+  spirv::VoidType getVoidType() {
+    return spirv::cast<spirv::VoidType>(getType(TypeId::Void));
+  }
+
+  spirv::ConstantBool getTrue() {
+    if (!mTrue) {
+      mTrue = mBuilder.createConstantTrue(getBoolType());
+    }
+    return mTrue;
+  }
+  spirv::ConstantBool getFalse() {
+    if (!mFalse) {
+      mFalse = mBuilder.createConstantFalse(getBoolType());
+    }
+    return mFalse;
+  }
+
+  spirv::ConstantUInt getUInt64(std::uint64_t value);
+  spirv::ConstantUInt getUInt32(std::uint32_t value);
+  spirv::ConstantSInt getSInt32(std::uint32_t value);
+  spirv::ConstantFloat getFloat32Raw(std::uint32_t value);
+
+  spirv::ConstantFloat getFloat32(float id) {
+    return getFloat32Raw(std::bit_cast<std::uint32_t>(id));
+  }
+
+  spirv::SamplerType getSamplerType() {
+    return spirv::cast<spirv::SamplerType>(getType(TypeId::Sampler));
+  }
+  spirv::ImageType getImage2DType() {
+    return spirv::cast<spirv::ImageType>(getType(TypeId::Image2D));
+  }
+  spirv::SampledImageType getSampledImage2DType() {
+    return spirv::cast<spirv::SampledImageType>(getType(TypeId::SampledImage2D));
+  }
+
+  UniformInfo *createStorageBuffer(TypeId type);
+  UniformInfo *getOrCreateStorageBuffer(std::uint32_t *vbuffer, TypeId type);
+  UniformInfo *getOrCreateUniformConstant(std::uint32_t *buffer, std::size_t size, TypeId type);
+  spirv::VariableValue getThreadId();
+  spirv::VariableValue getWorkgroupId();
+  spirv::VariableValue getLocalInvocationId();
+  spirv::VariableValue getPerVertex();
+  spirv::VariableValue getFragCoord();
+  spirv::VariableValue getIn(unsigned location);
+  spirv::VariableValue getOut(unsigned location);
+
+  spirv::Function getDiscardFn();
+
+  std::optional<std::uint32_t> findUint32Value(spirv::Value id) const;
+  std::optional<std::int32_t> findSint32Value(spirv::Value id) const;
+  std::optional<float> findFloat32Value(spirv::Value id) const;
+  spirv::FunctionType getFunctionType(spirv::Type resultType,
+                                      std::span<const spirv::Type> params);
+
+  Function *createFunction(std::size_t expectedSize);
+  Fragment *createFragment(std::size_t expectedSize);
+
+  std::vector<UniformInfo> &getUniforms() {
+    return mUniforms;
+  }
+};
+} // namespace amdgpu::shader
--- a/hw/amdgpu/shader/include/amdgpu/shader/Fragment.hpp
+++ b/hw/amdgpu/shader/include/amdgpu/shader/Fragment.hpp
@ -0,0 +1,95 @@
+#pragma once
+
+#include "FragmentTerminator.hpp"
+#include "Instruction.hpp"
+#include "RegisterId.hpp"
+#include "RegisterState.hpp"
+#include "Stage.hpp"
+#include "TypeId.hpp"
+#include "Uniform.hpp"
+#include "scf.hpp"
+
+#include <map>
+#include <optional>
+#include <spirv/spirv-builder.hpp>
+
+namespace amdgpu::shader {
+enum class OperandGetFlags {
+  None,
+  PreserveType = 1 << 0
+};
+
+struct Function;
+class ConverterContext;
+
+struct Fragment {
+  ConverterContext *context = nullptr;
+  Function *function = nullptr;
+  spirv::Block entryBlockId;
+  spirv::BlockBuilder builder;
+  RegisterState *registers = nullptr;
+
+  std::set<RegisterId> values;
+  std::set<RegisterId> outputs;
+
+  std::vector<Fragment *> predecessors;
+  std::uint64_t jumpAddress = 0;
+  spirv::BoolValue branchCondition;
+
+  void appendBranch(Fragment &other) {
+    other.predecessors.push_back(this);
+  }
+
+  void injectValuesFromPreds();
+
+  // std::optional<RegisterId> findInput(spirv::Value value);
+  // Value addInput(RegisterId id, spirv::Type type);
+  spirv::SamplerValue createSampler(RegisterId base);
+  spirv::ImageValue createImage(RegisterId base, bool r128); // TODO: params
+  Value createCompositeExtract(Value composite, std::uint32_t member);
+  Value getOperand(RegisterId id, TypeId type, OperandGetFlags flags = OperandGetFlags::None);
+  void setOperand(RegisterId id, Value value);
+  void setVcc(Value value);
+  void setScc(Value value);
+  spirv::BoolValue getScc();
+  spirv::Value createBitcast(spirv::Type to, spirv::Type from, spirv::Value value);
+
+  Value getScalarOperand(int id, TypeId type, OperandGetFlags flags = OperandGetFlags::None) {
+    return getOperand(RegisterId::Scalar(id), type, flags);
+  }
+  Value getVectorOperand(int id, TypeId type, OperandGetFlags flags = OperandGetFlags::None) {
+    return getOperand(RegisterId::Vector(id), type, flags);
+  }
+  Value getAttrOperand(int id, TypeId type, OperandGetFlags flags = OperandGetFlags::None) {
+    return getOperand(RegisterId::Attr(id), type, flags);
+  }
+  Value getVccLo() {
+    return getOperand(RegisterId::VccLo, TypeId::UInt32);
+  }
+  Value getVccHi() {
+    return getOperand(RegisterId::VccHi, TypeId::UInt32);
+  }
+  Value getExecLo() {
+    return getOperand(RegisterId::ExecLo, TypeId::UInt32);
+  }
+  Value getExecHi() {
+    return getOperand(RegisterId::ExecHi, TypeId::UInt32);
+  }
+  void setScalarOperand(int id, Value value) {
+    setOperand(RegisterId::Scalar(id), value);
+  }
+  void setVectorOperand(int id, Value value) {
+    setOperand(RegisterId::Vector(id), value);
+  }
+  void setExportTarget(int id, Value value) {
+    setOperand(RegisterId::Export(id), value);
+  }
+  // void createCallTo(MaterializedFunction *other);
+  void convert(std::uint64_t size);
+
+private:
+  Value getRegister(RegisterId id);
+  Value getRegister(RegisterId id, spirv::Type type);
+  void setRegister(RegisterId id, Value value);
+};
+} // namespace amdgpu::shader
--- a/hw/amdgpu/shader/include/amdgpu/shader/FragmentTerminator.hpp
+++ b/hw/amdgpu/shader/include/amdgpu/shader/FragmentTerminator.hpp
@ -0,0 +1,11 @@
+#pragma once
+
+namespace amdgpu::shader {
+enum class FragmentTerminator {
+  None,
+  EndProgram,
+  CallToReg,
+  BranchToReg,
+  Branch,
+};
+}
--- a/hw/amdgpu/shader/include/amdgpu/shader/Function.hpp
+++ b/hw/amdgpu/shader/include/amdgpu/shader/Function.hpp
@ -0,0 +1,31 @@
+#pragma once
+#include "Fragment.hpp"
+#include "RegisterId.hpp"
+#include "spirv/spirv-builder.hpp"
+#include <span>
+
+namespace amdgpu::shader {
+class ConverterContext;
+
+struct Function {
+  ConverterContext *context = nullptr;
+  Stage stage = Stage::None;
+  std::span<const std::uint32_t> userSgprs;
+  std::span<const std::uint32_t> userVgprs;
+  Fragment entryFragment;
+  Fragment exitFragment;
+  std::map<RegisterId, Value> inputs;
+  spirv::FunctionBuilder builder;
+  std::vector<Fragment *> fragments;
+
+  Value getInput(RegisterId id);
+  Value createInput(RegisterId id);
+  void createExport(spirv::BlockBuilder &builder, unsigned index, Value value);
+  spirv::Type getResultType();
+  spirv::FunctionType getFunctionType();
+
+  Fragment *createFragment();
+
+  void insertReturn();
+};
+} // namespace amdgpu::shader
--- a/hw/amdgpu/shader/include/amdgpu/shader/Instruction.hpp
+++ b/hw/amdgpu/shader/include/amdgpu/shader/Instruction.hpp
--- a/hw/amdgpu/shader/include/amdgpu/shader/RegisterId.hpp
+++ b/hw/amdgpu/shader/include/amdgpu/shader/RegisterId.hpp
@ -0,0 +1,102 @@
+#pragma once
+
+#include <cstdint>
+
+namespace amdgpu::shader {
+class RegisterId {
+  static constexpr std::uint32_t kScalarOperandsOffset = 0;
+  static constexpr std::uint32_t kScalarOperandsCount = 256;
+  static constexpr std::uint32_t kVectorOperandsOffset =
+      kScalarOperandsOffset + kScalarOperandsCount;
+  static constexpr std::uint32_t kVectorOperandsCount = 512;
+  static constexpr std::uint32_t kExportOperandsOffset =
+      kVectorOperandsOffset + kVectorOperandsCount;
+  static constexpr std::uint32_t kExportOperandsCount = 64;
+  static constexpr std::uint32_t kAttrOperandsOffset =
+      kExportOperandsOffset + kExportOperandsCount;
+  static constexpr std::uint32_t kAttrOperandsCount = 32;
+  static constexpr std::uint32_t kOperandsCount =
+      kAttrOperandsOffset + kAttrOperandsCount;
+
+  static constexpr std::uint32_t kRegisterVccLoId = kScalarOperandsOffset + 106;
+  static constexpr std::uint32_t kRegisterVccHiId = kScalarOperandsOffset + 107;
+  static constexpr std::uint32_t kRegisterM0Id = kScalarOperandsOffset + 124;
+  static constexpr std::uint32_t kRegisterExecLoId =
+      kScalarOperandsOffset + 126;
+  static constexpr std::uint32_t kRegisterExecHiId =
+      kScalarOperandsOffset + 127;
+  static constexpr std::uint32_t kRegisterSccId = kScalarOperandsOffset + 253;
+  static constexpr std::uint32_t kRegisterLdsDirect =
+      kScalarOperandsOffset + 254;
+
+public:
+  enum enum_type : std::uint32_t {
+    Invalid = ~static_cast<std::uint32_t>(0),
+
+    VccLo = kRegisterVccLoId,
+    VccHi = kRegisterVccHiId,
+    M0 = kRegisterM0Id,
+    ExecLo = kRegisterExecLoId,
+    ExecHi = kRegisterExecHiId,
+    Scc = kRegisterSccId,
+    LdsDirect = kRegisterLdsDirect,
+  } raw = Invalid;
+
+  RegisterId(enum_type value) : raw(value) {}
+
+  operator enum_type() const { return raw; }
+
+  static RegisterId Raw(std::uint32_t index) {
+    return static_cast<enum_type>(index);
+  }
+  static RegisterId Scalar(std::uint32_t index) {
+    return static_cast<enum_type>(index + kScalarOperandsOffset);
+  }
+  static RegisterId Vector(std::uint32_t index) {
+    return static_cast<enum_type>(index + kVectorOperandsOffset);
+  }
+  static RegisterId Export(std::uint32_t index) {
+    return static_cast<enum_type>(index + kExportOperandsOffset);
+  }
+  static RegisterId Attr(std::uint32_t index) {
+    return static_cast<enum_type>(index + kAttrOperandsOffset);
+  }
+
+  bool isScalar() const {
+    return raw >= kScalarOperandsOffset &&
+           raw < kScalarOperandsOffset + kScalarOperandsCount;
+  }
+  bool isVector() const {
+    return raw >= kVectorOperandsOffset &&
+           raw < kVectorOperandsOffset + kVectorOperandsCount;
+  }
+  bool isExport() const {
+    return raw >= kExportOperandsOffset &&
+           raw < kExportOperandsOffset + kExportOperandsCount;
+  }
+  bool isAttr() const {
+    return raw >= kAttrOperandsOffset &&
+           raw < kAttrOperandsOffset + kAttrOperandsCount;
+  }
+
+  unsigned getOffset() const {
+    if (isScalar()) {
+      return raw - kScalarOperandsOffset;
+    }
+
+    if (isVector()) {
+      return raw - kVectorOperandsOffset;
+    }
+
+    if (isExport()) {
+      return raw - kExportOperandsOffset;
+    }
+
+    if (isAttr()) {
+      return raw - kAttrOperandsOffset;
+    }
+
+    return raw;
+  }
+};
+} // namespace amdgpu::shader
--- a/hw/amdgpu/shader/include/amdgpu/shader/RegisterState.hpp
+++ b/hw/amdgpu/shader/include/amdgpu/shader/RegisterState.hpp
@ -0,0 +1,27 @@
+#pragma once
+#include "RegisterId.hpp"
+#include "Value.hpp"
+#include <cstdint>
+
+namespace amdgpu::shader {
+struct RegisterState {
+  std::uint64_t pc;
+
+  Value sgprs[104];
+  Value vccLo;
+  Value vccHi;
+  Value m0;
+  Value execLo;
+  Value execHi;
+  Value scc;
+  Value ldsDirect;
+  Value vgprs[512];
+  Value attrs[32];
+
+  Value getRegister(RegisterId regId);
+  void setRegister(RegisterId regId, Value value);
+
+private:
+  Value getRegisterImpl(RegisterId regId);
+};
+} // namespace amdgpu::shader
--- a/hw/amdgpu/shader/include/amdgpu/shader/Stage.hpp
+++ b/hw/amdgpu/shader/include/amdgpu/shader/Stage.hpp
@ -0,0 +1,5 @@
+#pragma once
+
+namespace amdgpu::shader {
+enum class Stage { None, Vertex, Fragment, Geometry, Compute };
+}
--- a/hw/amdgpu/shader/include/amdgpu/shader/TypeId.hpp
+++ b/hw/amdgpu/shader/include/amdgpu/shader/TypeId.hpp
@ -0,0 +1,57 @@
+#pragma once
+
+#include <cstddef>
+
+namespace amdgpu::shader {
+struct TypeId {
+  enum {
+    Bool,
+    SInt8,
+    UInt8,
+    SInt16,
+    UInt16,
+    SInt32,
+    UInt32,
+    UInt32x2,
+    UInt32x3,
+    UInt32x4,
+    UInt64,
+    SInt64,
+    ArrayUInt32x8,
+    ArrayUInt32x16,
+    Float16,
+    Float32,
+    Float32x2,
+    Float32x3,
+    Float32x4,
+    Float64,
+    ArrayFloat32x8,
+    ArrayFloat32x16,
+    Sampler,
+    Image2D,
+    SampledImage2D,
+
+    Void // should be last
+  } raw = Void;
+
+  using enum_type = decltype(raw);
+
+  TypeId() = default;
+  TypeId(enum_type value) : raw(value) {}
+  operator enum_type() const { return raw; }
+
+  TypeId getBaseType() const;
+  std::size_t getSize() const;
+  std::size_t getElementsCount() const;
+
+  bool isSignedInt() const {
+    return raw == TypeId::SInt8 || raw == TypeId::SInt16 ||
+           raw == TypeId::SInt32 || raw == TypeId::SInt64;
+  }
+
+  bool isFloatPoint() const {
+    return raw == TypeId::Float16 || raw == TypeId::Float32 ||
+           raw == TypeId::Float64;
+  }
+};
+} // namespace amdgpu::shader
--- a/hw/amdgpu/shader/include/amdgpu/shader/Uniform.hpp
+++ b/hw/amdgpu/shader/include/amdgpu/shader/Uniform.hpp
@ -0,0 +1,20 @@
+#pragma once
+
+#include "AccessOp.hpp"
+#include "TypeId.hpp"
+#include "spirv/spirv-builder.hpp"
+
+#include <cstdint>
+#include <set>
+
+namespace amdgpu::shader {
+struct UniformInfo {
+  std::uint32_t buffer[8];
+  int index;
+  TypeId typeId;
+  spirv::PointerType type;
+  spirv::VariableValue variable;
+  AccessOp accessOp = AccessOp::None;
+  bool isBuffer;
+};
+} // namespace amdgpu::shader
--- a/hw/amdgpu/shader/include/amdgpu/shader/Value.hpp
+++ b/hw/amdgpu/shader/include/amdgpu/shader/Value.hpp
@ -0,0 +1,15 @@
+#pragma once
+#include <spirv/spirv-builder.hpp>
+
+namespace amdgpu::shader {
+struct Value {
+  spirv::Type type;
+  spirv::Value value;
+
+  Value() = default;
+  Value(spirv::Type type, spirv::Value value) : type(type), value(value) {}
+
+  explicit operator bool() const { return static_cast<bool>(value); }
+  bool operator==(Value other) const { return value == other.value; }
+};
+} // namespace amdgpu::shader
--- a/hw/amdgpu/shader/include/amdgpu/shader/cf.hpp
+++ b/hw/amdgpu/shader/include/amdgpu/shader/cf.hpp
@ -0,0 +1,146 @@
+#pragma once
+
+#include <cstdint>
+#include <map>
+#include <set>
+#include <vector>
+
+namespace cf {
+enum class TerminatorKind {
+  None,
+  Branch,
+  BranchToUnknown,
+  Return,
+};
+
+class BasicBlock {
+  std::uint64_t address;
+  std::uint64_t size = 0;
+
+  std::set<BasicBlock *> predecessors;
+  BasicBlock *successors[2]{};
+  TerminatorKind terminator = TerminatorKind::None;
+
+public:
+  explicit BasicBlock(std::uint64_t address, std::uint64_t size = 0)
+      : address(address), size(size) {}
+
+  BasicBlock(const BasicBlock &) = delete;
+
+  void setSize(std::uint64_t newSize) { size = newSize; }
+  std::uint64_t getSize() const { return size; }
+  std::uint64_t getAddress() const { return address; }
+  TerminatorKind getTerminator() const { return terminator; }
+
+  void createConditionalBranch(BasicBlock *ifTrue, BasicBlock *ifFalse);
+  void createBranch(BasicBlock *target);
+  void createBranchToUnknown();
+  void createReturn();
+
+  void replaceSuccessor(BasicBlock *origBB, BasicBlock *newBB);
+  void replacePredecessor(BasicBlock *origBB, BasicBlock *newBB) {
+    origBB->replaceSuccessor(this, newBB);
+  }
+
+  template <std::invocable<BasicBlock &> T> void walk(T &&cb) {
+    std::vector<BasicBlock *> workStack;
+    std::set<BasicBlock *> processed;
+
+    workStack.push_back(this);
+    processed.insert(this);
+
+    while (!workStack.empty()) {
+      auto block = workStack.back();
+      workStack.pop_back();
+
+      block->walkSuccessors([&](BasicBlock *successor) {
+        if (processed.insert(successor).second) {
+          workStack.push_back(successor);
+        }
+      });
+
+      cb(*block);
+    }
+  }
+
+  template <std::invocable<BasicBlock *> T> void walkSuccessors(T &&cb) const {
+    if (successors[0]) {
+      cb(successors[0]);
+
+      if (successors[1]) {
+        cb(successors[1]);
+      }
+    }
+  }
+
+  template <std::invocable<BasicBlock *> T> void walkPredecessors(T &&cb) const {
+    for (auto pred : predecessors) {
+      cb(pred);
+    }
+  }
+
+  std::size_t getPredecessorsCount() const { return predecessors.size(); }
+
+  bool hasDirectPredecessor(const BasicBlock &block) const;
+  bool hasPredecessor(const BasicBlock &block) const;
+
+  std::size_t getSuccessorsCount() const {
+    if (successors[0] == nullptr) {
+      return 0;
+    }
+
+    return successors[1] != nullptr ? 2 : 1;
+  }
+
+  BasicBlock *getSuccessor(std::size_t index) const { return successors[index]; }
+
+  void split(BasicBlock *target);
+};
+
+class Context {
+  std::map<std::uint64_t, BasicBlock, std::greater<>> basicBlocks;
+
+public:
+  BasicBlock *getBasicBlockAt(std::uint64_t address) {
+    if (auto it = basicBlocks.find(address); it != basicBlocks.end()) {
+      return &it->second;
+    }
+
+    return nullptr;
+  }
+  
+  BasicBlock *getBasicBlock(std::uint64_t address) {
+    if (auto it = basicBlocks.lower_bound(address); it != basicBlocks.end()) {
+      auto bb = &it->second;
+
+      if (bb->getAddress() <= address &&
+          bb->getAddress() + bb->getSize() > address) {
+        return bb;
+      }
+    }
+
+    return nullptr;
+  }
+
+  BasicBlock *getOrCreateBasicBlock(std::uint64_t address, bool split = true) {
+    auto it = basicBlocks.lower_bound(address);
+
+    if (it != basicBlocks.end()) {
+      auto bb = &it->second;
+
+      if (bb->getAddress() <= address &&
+          bb->getAddress() + bb->getSize() > address) {
+        if (split && bb->getAddress() != address) {
+          auto result = &basicBlocks.emplace_hint(it, address, address)->second;
+          bb->split(result);
+          return result;
+        }
+
+        return bb;
+      }
+    }
+
+    return &basicBlocks.emplace_hint(it, address, address)->second;
+  }
+};
+} // namespace cf
--- a/hw/amdgpu/shader/include/amdgpu/shader/scf.hpp
+++ b/hw/amdgpu/shader/include/amdgpu/shader/scf.hpp
@ -0,0 +1,371 @@
+#pragma once
+
+#include <cassert>
+#include <cstdint>
+#include <forward_list>
+#include <functional>
+#include <memory>
+
+namespace cf {
+class BasicBlock;
+}
+
+namespace scf {
+class BasicBlock;
+struct PrintOptions {
+  unsigned char identCount = 2;
+  char identChar = ' ';
+  std::function<void(const PrintOptions &, unsigned depth, BasicBlock *)>
+      blockPrinter;
+
+  std::string makeIdent(unsigned depth) const {
+    return std::string(depth * identCount, identChar);
+  }
+};
+
+class Node {
+  Node *mParent = nullptr;
+  Node *mNext = nullptr;
+  Node *mPrev = nullptr;
+
+public:
+  virtual ~Node() = default;
+  virtual void print(const PrintOptions &options, unsigned depth) = 0;
+  virtual bool isEqual(const Node &other) const {
+    return this == &other;
+  }
+
+  void dump() {
+    print({}, 0);
+  }
+
+  void setParent(Node *parent) {
+    mParent = parent;
+  }
+
+  Node *getParent() const {
+    return mParent;
+  }
+
+  template<typename T> requires(std::is_base_of_v<Node, T>)
+  auto getParent() const -> decltype(dynCast<T>(mParent)) {
+    return dynCast<T>(mParent);
+  }
+
+  Node *getNext() const {
+    return mNext;
+  }
+
+  Node *getPrev() const {
+    return mPrev; 
+  }
+
+  friend class Block;
+};
+
+template <typename T, typename ST>
+  requires(std::is_base_of_v<Node, T> && std::is_base_of_v<Node, ST>) &&
+          requires(ST *s) { dynamic_cast<T *>(s); }
+T *dynCast(ST *s) {
+  return dynamic_cast<T *>(s);
+}
+
+template <typename T, typename ST>
+  requires(std::is_base_of_v<Node, T> && std::is_base_of_v<Node, ST>) &&
+          requires(const ST *s) { dynamic_cast<const T *>(s); }
+const T *dynCast(const ST *s) {
+  return dynamic_cast<const T *>(s);
+}
+
+inline bool isNodeEqual(const Node *lhs, const Node *rhs) {
+  if (lhs == rhs) {
+    return true;
+  }
+
+  return lhs != nullptr && rhs != nullptr && lhs->isEqual(*rhs);
+}
+
+struct UnknownBlock final : Node {
+  void print(const PrintOptions &options, unsigned depth) override {
+    std::printf("%sunknown\n", options.makeIdent(depth).c_str());
+  }
+
+  bool isEqual(const Node &other) const override {
+    return this == &other || dynCast<UnknownBlock>(&other) != nullptr;
+  }
+};
+
+struct Return final : Node {
+  void print(const PrintOptions &options, unsigned depth) override {
+    std::printf("%sreturn\n", options.makeIdent(depth).c_str());
+  }
+
+  bool isEqual(const Node &other) const override {
+    return this == &other || dynCast<Return>(&other) != nullptr;
+  }
+};
+
+
+class Context;
+
+class Block final : public Node {
+  Node *mBegin = nullptr;
+  Node *mEnd = nullptr;
+
+  void *mUserData = nullptr;
+
+public:
+  void print(const PrintOptions &options, unsigned depth) override {
+    std::printf("%s{\n", options.makeIdent(depth).c_str());
+
+    for (auto node = mBegin; node != nullptr; node = node->getNext()) {
+      node->print(options, depth + 1);
+    }
+    std::printf("%s}\n", options.makeIdent(depth).c_str());
+  }
+
+  bool isEmpty() const {
+    return mBegin == nullptr;
+  }
+
+  Node *getRootNode() const {
+    return mBegin;
+  }
+  Node *getLastNode() const {
+    return mEnd;
+  }
+
+  void setUserData(void *data) {
+    mUserData = data;
+  }
+  void* getUserData() const {
+    return mUserData;
+  }
+  template<typename T>
+  T* getUserData() const {
+    return static_cast<T *>(mUserData);
+  }
+
+  void eraseFrom(Node *endBefore);
+  void splitInto(Block *target, Node *splitPoint);
+  Block *split(Context &context, Node *splitPoint);
+
+  void append(Node *node) {
+    assert(node->mParent == nullptr);
+    assert(node->mPrev == nullptr);
+    assert(node->mNext == nullptr);
+
+    node->mParent = this;
+    node->mPrev = mEnd;
+
+    if (mEnd != nullptr) {
+      mEnd->mNext = node;
+    }
+
+    if (mBegin == nullptr) {
+      mBegin = node;
+    }
+
+    mEnd = node;
+  }
+
+  void detachNode(Node *node) {
+    if (node->mPrev != nullptr) {
+      node->mPrev->mNext = node->mNext;
+    }
+
+    if (node->mNext != nullptr) {
+      node->mNext->mPrev = node->mPrev;
+    }
+
+    if (mBegin == node) {
+      mBegin = node->mNext;
+    }
+
+    if (mEnd == node) {
+      mEnd = node->mPrev;
+    }
+
+    node->mNext = nullptr;
+    node->mPrev = nullptr;
+    node->mParent = nullptr;
+  }
+
+  bool isEqual(const Node &other) const override {
+    if (this == &other) {
+      return true;
+    }
+
+    auto otherBlock = dynCast<Block>(&other);
+
+    if (otherBlock == nullptr) {
+      return false;
+    }
+
+    auto thisIt = mBegin;
+    auto otherIt = otherBlock->mBegin;
+
+    while (thisIt != nullptr && otherIt != nullptr) {
+      if (!thisIt->isEqual(*otherIt)) {
+        return false;
+      }
+
+      thisIt = thisIt->mNext;
+      otherIt = otherIt->mNext;
+    }
+
+    return thisIt == otherIt;
+  }
+};
+
+class BasicBlock final : public Node {
+  std::uint64_t address;
+  std::uint64_t size = 0;
+
+public:
+  explicit BasicBlock(std::uint64_t address, std::uint64_t size = 0)
+      : address(address), size(size) {}
+
+  std::uint64_t getSize() const { return size; }
+  std::uint64_t getAddress() const { return address; }
+
+  void print(const PrintOptions &options, unsigned depth) override {
+    std::printf(
+        "%sbb%lx\n",
+        std::string(depth * options.identCount, options.identChar).c_str(),
+        getAddress());
+    if (depth != 0 && options.blockPrinter) {
+      options.blockPrinter(options, depth + 1, this);
+    }
+  }
+
+  Block *getBlock() const {
+    return dynCast<Block>(getParent());
+  }
+
+  bool isEqual(const Node &other) const override {
+    if (this == &other) {
+      return true;
+    }
+
+    if (auto otherBlock = dynCast<BasicBlock>(&other)) {
+      return address == otherBlock->address;
+    }
+
+    return false;
+  }
+};
+
+struct IfElse final : Node {
+  Block *ifTrue;
+  Block *ifFalse;
+
+  IfElse(Block *ifTrue, Block *ifFalse) : ifTrue(ifTrue), ifFalse(ifFalse) {
+    ifTrue->setParent(this);
+    ifFalse->setParent(this);
+  }
+
+  void print(const PrintOptions &options, unsigned depth) override {
+    if (ifTrue->isEmpty()) {
+      std::printf("%sif false\n", options.makeIdent(depth).c_str());
+      ifFalse->print(options, depth);
+      return;
+    }
+
+    std::printf("%sif true\n", options.makeIdent(depth).c_str());
+    ifTrue->print(options, depth);
+    if (!ifFalse->isEmpty()) {
+      std::printf("%selse\n", options.makeIdent(depth).c_str());
+      ifFalse->print(options, depth);
+    }
+  }
+
+  bool isEqual(const Node &other) const override {
+    if (this == &other) {
+      return true;
+    }
+
+    if (auto otherBlock = dynCast<IfElse>(&other)) {
+      return ifTrue->isEqual(*otherBlock->ifTrue) &&
+             ifFalse->isEqual(*otherBlock->ifFalse);
+    }
+
+    return false;
+  }
+};
+
+struct Jump final : Node {
+  BasicBlock *target;
+
+  Jump(BasicBlock *target) : target(target) {}
+
+  bool isEqual(const Node &other) const override {
+    if (this == &other) {
+      return true;
+    }
+
+    if (auto otherJump = dynCast<Jump>(&other)) {
+      return target == otherJump->target;
+    }
+
+    return false;
+  }
+
+  void print(const PrintOptions &options, unsigned depth) override {
+    std::printf("%sjump ", options.makeIdent(depth).c_str());
+    target->print(options, 0);
+  }
+};
+
+struct Loop final : Node {
+  Block *body;
+
+  Loop(Block *body) : body(body) {
+    body->setParent(this);
+  }
+
+  bool isEqual(const Node &other) const override {
+    if (this == &other) {
+      return true;
+    }
+
+    if (auto otherLoop = dynCast<Loop>(&other)) {
+      return body->isEqual(*otherLoop->body);
+    }
+
+    return false;
+  }
+
+  void print(const PrintOptions &options, unsigned depth) override {
+    std::printf("%sloop {\n", options.makeIdent(depth).c_str());
+    body->print(options, depth + 1);
+    std::printf("%s}\n", options.makeIdent(depth).c_str());
+  }
+};
+
+struct Break final : Node {
+  bool isEqual(const Node &other) const override {
+    return this == &other || dynCast<Break>(&other) != nullptr;
+  }
+
+  void print(const PrintOptions &options, unsigned depth) override {
+    std::printf("%sbreak\n", options.makeIdent(depth).c_str());
+  }
+};
+
+class Context {
+  std::forward_list<std::unique_ptr<Node>> mNodes;
+
+public:
+  template <typename T, typename... ArgsT>
+    requires(std::is_constructible_v<T, ArgsT...>)
+  T *create(ArgsT &&...args) {
+    auto result = new T(std::forward<ArgsT>(args)...);
+    mNodes.push_front(std::unique_ptr<Node>{result});
+    return result;
+  }
+};
+
+scf::Block *structurize(Context &ctxt, cf::BasicBlock *bb);
+void makeUniqueBasicBlocks(Context &ctxt, Block *block);
+} // namespace scf
--- a/hw/amdgpu/shader/src/CfBuilder.cpp
+++ b/hw/amdgpu/shader/src/CfBuilder.cpp
@ -0,0 +1,187 @@
+#include "CfBuilder.hpp"
+#include "Instruction.hpp"
+#include <cassert>
+#include <amdgpu/RemoteMemory.hpp>
+#include <unordered_set>
+
+using namespace amdgpu;
+using namespace amdgpu::shader;
+
+struct CfgBuilder {
+  cf::Context *context;
+  RemoteMemory memory;
+
+  std::size_t analyzeBb(cf::BasicBlock *bb, std::uint64_t *successors,
+                        std::size_t *successorsCount, auto pushWork) {
+    auto address = bb->getAddress();
+    auto instBegin = memory.getPointer<std::uint32_t>(address);
+    auto instHex = instBegin;
+
+    while (true) {
+      auto instruction = Instruction(instHex);
+      auto size = instruction.size();
+      auto pc = address + ((instHex - instBegin) << 2);
+      instHex += size;
+
+      if (instruction.instClass == InstructionClass::Sop1) {
+        Sop1 sop1{instHex - size};
+
+        if (sop1.op == Sop1::Op::S_SETPC_B64 ||
+            sop1.op == Sop1::Op::S_SWAPPC_B64) {
+          bb->createBranchToUnknown();
+          break;
+        }
+
+        continue;
+      }
+
+      if (instruction.instClass == InstructionClass::Sopp) {
+        Sopp sopp{instHex - size};
+
+        if (sopp.op == Sopp::Op::S_ENDPGM) {
+          bb->createReturn();
+          break;
+        }
+
+        bool isEnd = false;
+        switch (sopp.op) {
+        case Sopp::Op::S_BRANCH:
+          successors[0] = pc + ((size + sopp.simm) << 2);
+          *successorsCount = 1;
+
+          isEnd = true;
+          break;
+
+        case Sopp::Op::S_CBRANCH_SCC0:
+        case Sopp::Op::S_CBRANCH_SCC1:
+        case Sopp::Op::S_CBRANCH_VCCZ:
+        case Sopp::Op::S_CBRANCH_VCCNZ:
+        case Sopp::Op::S_CBRANCH_EXECZ:
+        case Sopp::Op::S_CBRANCH_EXECNZ:
+          successors[0] = pc + ((size + sopp.simm) << 2);
+          successors[1] = pc + (size << 2);
+          *successorsCount = 2;
+          isEnd = true;
+          break;
+
+        default:
+          break;
+        }
+
+        if (isEnd) {
+          break;
+        }
+        continue;
+      }
+
+      // move instruction that requires EXEC test to separate bb
+      if (instruction.instClass == InstructionClass::Vop2 ||
+          instruction.instClass == InstructionClass::Vop3 ||
+          instruction.instClass == InstructionClass::Mubuf ||
+          instruction.instClass == InstructionClass::Mtbuf ||
+          instruction.instClass == InstructionClass::Mimg ||
+          instruction.instClass == InstructionClass::Ds ||
+          instruction.instClass == InstructionClass::Vintrp ||
+          instruction.instClass == InstructionClass::Exp ||
+          instruction.instClass == InstructionClass::Vop1 ||
+          instruction.instClass == InstructionClass::Vopc ||
+          instruction.instClass == InstructionClass::Smrd) {
+        *successorsCount = 1;
+
+        if (instBegin != instHex - size) {
+          // if it is not first instruction in block, move end to prev
+          // instruction, successor is current instruction
+          instHex -= size;
+          successors[0] = pc;
+          break;
+        }
+
+        successors[0] = pc + (size << 2);
+        break;
+      }
+    }
+
+    return (instHex - instBegin) << 2;
+  }
+
+  cf::BasicBlock *buildCfg(std::uint64_t entryPoint) {
+    std::vector<std::uint64_t> workList;
+    workList.push_back(entryPoint);
+    std::unordered_set<std::uint64_t> processed;
+    processed.insert(entryPoint);
+
+    struct BranchInfo {
+      std::uint64_t source;
+      std::size_t count;
+      std::uint64_t targets[2];
+    };
+
+    std::vector<BranchInfo> branches;
+
+    while (!workList.empty()) {
+      auto address = workList.back();
+      workList.pop_back();
+
+      auto bb = context->getOrCreateBasicBlock(address);
+
+      if (bb->getSize() != 0) {
+        continue;
+      }
+
+      std::uint64_t successors[2];
+      std::size_t successorsCount = 0;
+      std::size_t size = analyzeBb(bb, successors, &successorsCount,
+                                   [&](std::uint64_t address) {
+                                     if (processed.insert(address).second) {
+                                       workList.push_back(address);
+                                     }
+                                   });
+      bb->setSize(size);
+
+      if (successorsCount == 2) {
+        auto succ0Address = successors[0];
+        auto succ1Address = successors[1];
+
+        branches.push_back(
+            {address + size - 4, 2, {successors[0], successors[1]}});
+
+        if (processed.insert(successors[0]).second) {
+          workList.push_back(successors[0]);
+        }
+        if (processed.insert(successors[1]).second) {
+          workList.push_back(successors[1]);
+        }
+      } else if (successorsCount == 1) {
+        branches.push_back({address + size - 4, 1, {successors[0]}});
+
+        if (processed.insert(successors[0]).second) {
+          workList.push_back(successors[0]);
+        }
+      }
+    }
+
+    for (auto branch : branches) {
+      auto bb = context->getBasicBlock(branch.source);
+      assert(bb);
+      if (branch.count == 2) {
+        bb->createConditionalBranch(
+            context->getBasicBlockAt(branch.targets[0]),
+            context->getBasicBlockAt(branch.targets[1]));
+      } else {
+        bb->createBranch(context->getBasicBlockAt(branch.targets[0]));
+      }
+    }
+
+    return context->getBasicBlockAt(entryPoint);
+  }
+};
+
+cf::BasicBlock *amdgpu::shader::buildCf(cf::Context &ctxt,
+                                           RemoteMemory memory,
+                                           std::uint64_t entryPoint) {
+  CfgBuilder builder;
+  builder.context = &ctxt;
+  builder.memory = memory;
+
+  return builder.buildCfg(entryPoint);
+}
--- a/hw/amdgpu/shader/src/Converter.cpp
+++ b/hw/amdgpu/shader/src/Converter.cpp
@ -0,0 +1,389 @@
+#include "Converter.hpp"
+#include "CfBuilder.hpp"
+#include "ConverterContext.hpp"
+#include "Fragment.hpp"
+#include "FragmentTerminator.hpp"
+#include "Instruction.hpp"
+#include "RegisterId.hpp"
+#include "RegisterState.hpp"
+#include "cf.hpp"
+#include "amdgpu/RemoteMemory.hpp"
+#include "scf.hpp"
+#include "util/unreachable.hpp"
+#include <compare>
+#include <cstddef>
+#include <forward_list>
+#include <memory>
+#include <spirv/spirv.hpp>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+static void printInstructions(const scf::PrintOptions &options, unsigned depth,
+                              std::uint32_t *instBegin, std::size_t size) {
+  auto instHex = instBegin;
+  auto instEnd = instBegin + size / sizeof(std::uint32_t);
+
+  while (instHex < instEnd) {
+    auto instruction = amdgpu::shader::Instruction(instHex);
+    std::printf("%s", options.makeIdent(depth).c_str());
+    instruction.dump();
+    std::printf("\n");
+    instHex += instruction.size();
+  }
+}
+
+namespace amdgpu::shader {
+class Converter {
+  scf::Context *scfContext;
+  cf::Context cfContext;
+  RemoteMemory memory;
+  Function *function = nullptr;
+  std::forward_list<RegisterState> states;
+  std::vector<RegisterState *> freeStates;
+
+public:
+  void convertFunction(RemoteMemory mem, scf::Context *scfCtxt,
+                       scf::Block *block, Function *fn) {
+    scfContext = scfCtxt;
+    function = fn;
+    memory = mem;
+
+    auto lastFragment = convertBlock(block, &function->entryFragment);
+
+    if (lastFragment != nullptr) {
+      lastFragment->builder.createBranch(fn->exitFragment.entryBlockId);
+      lastFragment->appendBranch(fn->exitFragment);
+    }
+
+    initState(&fn->exitFragment);
+  }
+
+private:
+  RegisterState *allocateState() {
+    if (freeStates.empty()) {
+      return &states.emplace_front();
+    }
+
+    auto result = freeStates.back();
+    freeStates.pop_back();
+    *result = {};
+    return result;
+  }
+
+  void releaseState(RegisterState *state) {
+    assert(state != nullptr);
+    freeStates.push_back(state);
+  }
+
+  void initState(Fragment *fragment, std::uint64_t address = 0) {
+    if (fragment->registers == nullptr) {
+      fragment->registers = allocateState();
+    }
+
+    if (address != 0) {
+      fragment->registers->pc = address;
+    }
+
+    fragment->injectValuesFromPreds();
+    fragment->predecessors.clear();
+  }
+
+  void releaseStateOf(Fragment *frag) {
+    releaseState(frag->registers);
+    frag->registers = nullptr;
+    frag->values = {};
+    frag->outputs = {};
+  }
+
+  bool needInjectExecTest(Fragment *fragment) {
+    auto inst = memory.getPointer<std::uint32_t>(fragment->registers->pc);
+    auto instClass = getInstructionClass(*inst);
+    return instClass == InstructionClass::Vop2 ||
+           instClass == InstructionClass::Vop3 ||
+           instClass == InstructionClass::Mubuf ||
+           instClass == InstructionClass::Mtbuf ||
+           instClass == InstructionClass::Mimg ||
+           instClass == InstructionClass::Ds ||
+           instClass == InstructionClass::Vintrp ||
+           instClass == InstructionClass::Exp ||
+           instClass == InstructionClass::Vop1 ||
+           instClass == InstructionClass::Vopc/* ||
+           instClass == InstructionClass::Smrd*/;
+  }
+
+  spirv::BoolValue createExecTest(Fragment *fragment) {
+    auto context = fragment->context;
+    auto &builder = fragment->builder;
+    auto boolT = context->getBoolType();
+    auto uint32_0 = context->getUInt32(0);
+    auto loIsNotZero =
+        builder.createINotEqual(boolT, fragment->getExecLo().value, uint32_0);
+    auto hiIsNotZero =
+        builder.createINotEqual(boolT, fragment->getExecHi().value, uint32_0);
+
+    return builder.createLogicalOr(boolT, loIsNotZero, hiIsNotZero);
+  }
+
+  Fragment *convertBlock(scf::Block *block, Fragment *rootFragment) {
+    Fragment *currentFragment = nullptr;
+
+    for (scf::Node *node = block->getRootNode(); node != nullptr;
+         node = node->getNext()) {
+
+      if (auto bb = dynCast<scf::BasicBlock>(node)) {
+        if (currentFragment == nullptr) {
+          currentFragment = rootFragment;
+        } else {
+          auto newFragment = function->createFragment();
+          currentFragment->appendBranch(*newFragment);
+          currentFragment->builder.createBranch(newFragment->entryBlockId);
+          currentFragment = newFragment;
+        }
+
+        initState(currentFragment, bb->getAddress());
+        for (auto pred : currentFragment->predecessors) {
+          releaseStateOf(pred);
+        }
+
+        if (needInjectExecTest(currentFragment)) {
+          auto bodyFragment = function->createFragment();
+          auto mergeFragment = function->createFragment();
+
+          auto cond = createExecTest(currentFragment);
+
+          currentFragment->appendBranch(*bodyFragment);
+          currentFragment->appendBranch(*mergeFragment);
+          currentFragment->builder.createSelectionMerge(
+              mergeFragment->entryBlockId, {});
+          currentFragment->builder.createBranchConditional(
+              cond, bodyFragment->entryBlockId, mergeFragment->entryBlockId);
+
+          initState(bodyFragment, bb->getAddress());
+          bodyFragment->convert(bb->getSize());
+
+          bodyFragment->appendBranch(*mergeFragment);
+          bodyFragment->builder.createBranch(mergeFragment->entryBlockId);
+
+          initState(mergeFragment);
+          releaseState(currentFragment->registers);
+          releaseState(bodyFragment->registers);
+
+          currentFragment = mergeFragment;
+        } else {
+          currentFragment->convert(bb->getSize());
+        }
+        continue;
+      }
+
+      if (auto ifElse = dynCast<scf::IfElse>(node)) {
+        auto ifTrueFragment = function->createFragment();
+        auto ifFalseFragment = function->createFragment();
+        auto mergeFragment = function->createFragment();
+
+        currentFragment->appendBranch(*ifTrueFragment);
+        currentFragment->appendBranch(*ifFalseFragment);
+
+        currentFragment->builder.createSelectionMerge(
+            mergeFragment->entryBlockId, {});
+        currentFragment->builder.createBranchConditional(
+            currentFragment->branchCondition, ifTrueFragment->entryBlockId,
+            ifFalseFragment->entryBlockId);
+
+        auto ifTrueLastBlock = convertBlock(ifElse->ifTrue, ifTrueFragment);
+        auto ifFalseLastBlock = convertBlock(ifElse->ifFalse, ifFalseFragment);
+
+        if (ifTrueLastBlock != nullptr) {
+          ifTrueLastBlock->builder.createBranch(mergeFragment->entryBlockId);
+          ifTrueLastBlock->appendBranch(*mergeFragment);
+
+          if (ifTrueLastBlock->registers == nullptr) {
+            initState(ifTrueLastBlock);
+          }
+        }
+
+        if (ifFalseLastBlock != nullptr) {
+          ifFalseLastBlock->builder.createBranch(mergeFragment->entryBlockId);
+          ifFalseLastBlock->appendBranch(*mergeFragment);
+
+          if (ifFalseLastBlock->registers == nullptr) {
+            initState(ifFalseLastBlock);
+          }
+        }
+
+        releaseStateOf(currentFragment);
+        initState(mergeFragment);
+
+        if (ifTrueLastBlock != nullptr) {
+          releaseStateOf(ifTrueLastBlock);
+        }
+
+        if (ifFalseLastBlock != nullptr) {
+          releaseStateOf(ifFalseLastBlock);
+        }
+        currentFragment = mergeFragment;
+        continue;
+      }
+
+      if (dynCast<scf::UnknownBlock>(node)) {
+        auto jumpAddress = currentFragment->jumpAddress;
+
+        std::printf("jump to %lx\n", jumpAddress);
+        std::fflush(stdout);
+
+        if (jumpAddress == 0) {
+          util::unreachable("no jump register on unknown block");
+        }
+
+        auto block = buildCf(cfContext, memory, jumpAddress);
+        auto basicBlockPrinter = [this](const scf::PrintOptions &opts,
+                                        unsigned depth, scf::BasicBlock *bb) {
+          printInstructions(opts, depth,
+                            memory.getPointer<std::uint32_t>(bb->getAddress()),
+                            bb->getSize());
+        };
+        auto scfBlock = scf::structurize(*scfContext, block);
+        scfBlock->print({.blockPrinter = basicBlockPrinter}, 0);
+        std::fflush(stdout);
+
+        auto targetFragment = function->createFragment();
+        currentFragment->builder.createBranch(targetFragment->entryBlockId);
+        currentFragment->appendBranch(*targetFragment);
+        auto result = convertBlock(scfBlock, targetFragment);
+
+        if (currentFragment->registers == nullptr) {
+          initState(targetFragment);
+          releaseStateOf(currentFragment);
+        }
+
+        return result;
+      }
+
+      if (dynCast<scf::Return>(node)) {
+        currentFragment->appendBranch(function->exitFragment);
+        currentFragment->builder.createBranch(
+            function->exitFragment.entryBlockId);
+        return nullptr;
+      }
+
+      util::unreachable();
+    }
+
+    return currentFragment != nullptr ? currentFragment : rootFragment;
+  }
+};
+}; // namespace amdgpu::shader
+
+amdgpu::shader::Shader amdgpu::shader::convert(
+    RemoteMemory memory, Stage stage, std::uint64_t entry,
+    std::span<const std::uint32_t> userSpgrs, int bindingOffset,
+    std::uint32_t dimX, std::uint32_t dimY, std::uint32_t dimZ) {
+  ConverterContext ctxt(memory, stage);
+  auto &builder = ctxt.getBuilder();
+  builder.createCapability(spv::Capability::Shader);
+  builder.createCapability(spv::Capability::ImageQuery);
+  builder.createCapability(spv::Capability::ImageBuffer);
+  builder.createCapability(spv::Capability::UniformAndStorageBuffer8BitAccess);
+  builder.createCapability(spv::Capability::UniformAndStorageBuffer16BitAccess);
+  builder.createCapability(spv::Capability::Int64);
+  builder.setMemoryModel(spv::AddressingModel::Logical,
+                         spv::MemoryModel::GLSL450);
+
+  scf::Context scfContext;
+  scf::Block *entryBlock = nullptr;
+  {
+    cf::Context cfContext;
+    auto entryBB = buildCf(cfContext, memory, entry);
+    entryBlock = scf::structurize(scfContext, entryBB);
+  }
+
+  std::printf("========== stage: %u, user sgprs: %zu\n", (unsigned)stage,
+              userSpgrs.size());
+  std::printf("structurized CFG:\n");
+
+  auto basicBlockPrinter = [memory](const scf::PrintOptions &opts,
+                                    unsigned depth, scf::BasicBlock *bb) {
+    printInstructions(opts, depth,
+                      memory.getPointer<std::uint32_t>(bb->getAddress()),
+                      bb->getSize());
+  };
+
+  entryBlock->print({.blockPrinter = basicBlockPrinter}, 0);
+  std::printf("==========\n");
+
+  auto mainFunction = ctxt.createFunction(0);
+  mainFunction->userSgprs = userSpgrs;
+  mainFunction->stage = stage;
+
+  Converter converter;
+  converter.convertFunction(memory, &scfContext, entryBlock, mainFunction);
+
+  Shader result;
+
+  std::fflush(stdout);
+  mainFunction->exitFragment.outputs.clear();
+
+  for (auto &uniform : ctxt.getUniforms()) {
+    auto &newUniform = result.uniforms.emplace_back();
+    newUniform.binding = bindingOffset++;
+
+    for (int i = 0; i < 8; ++i) {
+      newUniform.buffer[i] = uniform.buffer[i];
+    }
+
+    std::uint32_t descriptorSet = 0;
+
+    ctxt.getBuilder().createDecorate(
+        uniform.variable, spv::Decoration::DescriptorSet, {{descriptorSet}});
+    ctxt.getBuilder().createDecorate(uniform.variable, spv::Decoration::Binding,
+                                     {{newUniform.binding}});
+
+    switch (uniform.typeId) {
+    case TypeId::Sampler:
+      newUniform.kind = Shader::UniformKind::Sampler;
+      break;
+    case TypeId::Image2D:
+      newUniform.kind = Shader::UniformKind::Image;
+      break;
+    default:
+      newUniform.kind = Shader::UniformKind::Buffer;
+      break;
+    }
+
+    newUniform.accessOp = uniform.accessOp;
+  }
+
+  mainFunction->insertReturn();
+
+  for (auto frag : mainFunction->fragments) {
+    mainFunction->builder.insertBlock(frag->builder);
+  }
+
+  mainFunction->builder.insertBlock(mainFunction->exitFragment.builder);
+
+  builder.insertFunction(mainFunction->builder, mainFunction->getResultType(),
+                         spv::FunctionControlMask::MaskNone,
+                         mainFunction->getFunctionType());
+
+  if (stage == Stage::Vertex) {
+    builder.createEntryPoint(spv::ExecutionModel::Vertex,
+                             mainFunction->builder.id, "main",
+                             ctxt.getInterfaces());
+  } else if (stage == Stage::Fragment) {
+    builder.createEntryPoint(spv::ExecutionModel::Fragment,
+                             mainFunction->builder.id, "main",
+                             ctxt.getInterfaces());
+    builder.createExecutionMode(mainFunction->builder.id,
+                                spv::ExecutionMode::OriginUpperLeft, {});
+  } else if (stage == Stage::Compute) {
+    builder.createEntryPoint(spv::ExecutionModel::GLCompute,
+                             mainFunction->builder.id, "main",
+                             ctxt.getInterfaces());
+    builder.createExecutionMode(mainFunction->builder.id,
+                                spv::ExecutionMode::LocalSize,
+                                {{dimX, dimY, dimZ}});
+  }
+
+  result.spirv = ctxt.getBuilder().build(SPV_VERSION, 0);
+  return result;
+}
--- a/hw/amdgpu/shader/src/ConverterContext.cpp
+++ b/hw/amdgpu/shader/src/ConverterContext.cpp
@ -0,0 +1,567 @@
+#include "ConverterContext.hpp"
+#include "util/unreachable.hpp"
+using namespace amdgpu::shader;
+
+std::optional<TypeId> ConverterContext::getTypeIdOf(spirv::Type type) const {
+  for (int i = 0; i < kGenericTypesCount; ++i) {
+    if (mTypes[i] == type) {
+      return static_cast<TypeId::enum_type>(i);
+    }
+  }
+
+  return std::nullopt;
+}
+
+spirv::StructType
+ConverterContext::findStructType(std::span<const spirv::Type> members) {
+  for (auto &structType : mStructTypes) {
+    if (structType.match(members)) {
+      return structType.id;
+    }
+  }
+
+  return {};
+}
+
+spirv::StructType
+ConverterContext::getStructType(std::span<const spirv::Type> members) {
+  for (auto &structType : mStructTypes) {
+    if (structType.match(members)) {
+      return structType.id;
+    }
+  }
+
+  auto &newType = mStructTypes.emplace_back();
+  newType.id = mBuilder.createTypeStruct(members);
+  newType.members.reserve(members.size());
+  for (auto member : members) {
+    newType.members.push_back(member);
+  }
+  return newType.id;
+}
+
+spirv::PointerType
+ConverterContext::getStructPointerType(spv::StorageClass storageClass,
+                                       spirv::StructType structType) {
+  StructTypeEntry *entry = nullptr;
+  for (auto &structType : mStructTypes) {
+    if (structType.id != structType.id) {
+      continue;
+    }
+
+    entry = &structType;
+  }
+
+  if (entry == nullptr) {
+    util::unreachable("Struct type not found");
+  }
+
+  auto &ptrType = entry->ptrTypes[static_cast<unsigned>(storageClass)];
+
+  if (!ptrType) {
+    ptrType = mBuilder.createTypePointer(storageClass, structType);
+  }
+
+  return ptrType;
+}
+
+spirv::Type ConverterContext::getType(TypeId id) {
+  auto &type = mTypes[static_cast<std::uint32_t>(id)];
+
+  if (type) {
+    return type;
+  }
+
+  switch (id) {
+  case TypeId::Void:
+    return ((type = mBuilder.createTypeVoid()));
+  case TypeId::Bool:
+    return ((type = mBuilder.createTypeBool()));
+  case TypeId::SInt8:
+    return ((type = mBuilder.createTypeSInt(8)));
+  case TypeId::UInt8:
+    return ((type = mBuilder.createTypeUInt(8)));
+  case TypeId::SInt16:
+    return ((type = mBuilder.createTypeSInt(16)));
+  case TypeId::UInt16:
+    return ((type = mBuilder.createTypeUInt(16)));
+  case TypeId::SInt32:
+    return ((type = mBuilder.createTypeSInt(32)));
+  case TypeId::UInt32:
+    return ((type = mBuilder.createTypeUInt(32)));
+  case TypeId::UInt32x2:
+    return ((type = mBuilder.createTypeVector(getType(TypeId::UInt32), 2)));
+  case TypeId::UInt32x3:
+    return ((type = mBuilder.createTypeVector(getType(TypeId::UInt32), 3)));
+  case TypeId::UInt32x4:
+    return ((type = mBuilder.createTypeVector(getType(TypeId::UInt32), 4)));
+  case TypeId::UInt64:
+    return ((type = mBuilder.createTypeUInt(64)));
+  case TypeId::SInt64:
+    return ((type = mBuilder.createTypeSInt(64)));
+  case TypeId::ArrayUInt32x8:
+    type = mBuilder.createTypeArray(getType(TypeId::UInt32x4), getUInt32(2));
+    getBuilder().createDecorate(type, spv::Decoration::ArrayStride,
+                                std::array{static_cast<std::uint32_t>(16)});
+  case TypeId::ArrayUInt32x16:
+    type = mBuilder.createTypeArray(getType(TypeId::UInt32x4), getUInt32(4));
+    getBuilder().createDecorate(type, spv::Decoration::ArrayStride,
+                                std::array{static_cast<std::uint32_t>(16)});
+    return type;
+  case TypeId::Float16:
+    return ((type = mBuilder.createTypeFloat(16)));
+  case TypeId::Float32:
+    return ((type = mBuilder.createTypeFloat(32)));
+  case TypeId::Float32x2:
+    return ((type = mBuilder.createTypeVector(getType(TypeId::Float32), 2)));
+  case TypeId::Float32x3:
+    return ((type = mBuilder.createTypeVector(getType(TypeId::Float32), 3)));
+  case TypeId::Float32x4:
+    return ((type = mBuilder.createTypeVector(getType(TypeId::Float32), 4)));
+  case TypeId::Float64:
+    return ((type = mBuilder.createTypeFloat(64)));
+  case TypeId::ArrayFloat32x8:
+    type = mBuilder.createTypeArray(getType(TypeId::Float32x4), getUInt32(2));
+    getBuilder().createDecorate(type, spv::Decoration::ArrayStride,
+                                std::array{static_cast<std::uint32_t>(16)});
+    return type;
+  case TypeId::ArrayFloat32x16:
+    type = mBuilder.createTypeArray(getType(TypeId::Float32x4), getUInt32(4));
+    getBuilder().createDecorate(type, spv::Decoration::ArrayStride,
+                                std::array{static_cast<std::uint32_t>(16)});
+    return type;
+
+  case TypeId::Image2D:
+    return ((type = getBuilder().createTypeImage(getFloat32Type(),
+                                                 spv::Dim::Dim2D, 0, 0, 0, 1,
+                                                 spv::ImageFormat::Unknown)));
+  case TypeId::SampledImage2D:
+    return ((type = getBuilder().createTypeSampledImage(getImage2DType())));
+
+  case TypeId::Sampler:
+    return ((type = getBuilder().createTypeSampler()));
+  }
+
+  util::unreachable();
+}
+
+spirv::RuntimeArrayType ConverterContext::getRuntimeArrayType(TypeId id) {
+  auto &type = mRuntimeArrayTypes[static_cast<std::uint32_t>(id)];
+
+  if (!type) {
+    type = mBuilder.createTypeRuntimeArray(getType(id));
+    mBuilder.createDecorate(type, spv::Decoration::ArrayStride,
+                            {{(std::uint32_t)id.getSize()}});
+  }
+
+  return type;
+}
+
+spirv::ConstantUInt ConverterContext::getUInt64(std::uint64_t value) {
+  auto &id = mConstantUint64Map[value];
+  if (!id) {
+    id = mBuilder.createConstant64(getUInt64Type(), value);
+  }
+  return id;
+}
+
+spirv::ConstantUInt ConverterContext::getUInt32(std::uint32_t value) {
+  auto &id = mConstantUint32Map[value];
+  if (!id) {
+    id = mBuilder.createConstant32(getUInt32Type(), value);
+  }
+  return id;
+}
+
+spirv::ConstantSInt ConverterContext::getSInt32(std::uint32_t value) {
+  auto &id = mConstantSint32Map[value];
+  if (!id) {
+    id = mBuilder.createConstant32(getSint32Type(), value);
+  }
+  return id;
+}
+
+spirv::ConstantFloat ConverterContext::getFloat32Raw(std::uint32_t value) {
+  auto &id = mConstantFloat32Map[value];
+  if (!id) {
+    id = mBuilder.createConstant32(getFloat32Type(), value);
+  }
+  return id;
+}
+
+UniformInfo *ConverterContext::createStorageBuffer(TypeId type) {
+  std::array<spirv::Type, 1> uniformStructMembers{getRuntimeArrayType(type)};
+  auto uniformStruct = findStructType(uniformStructMembers);
+
+  if (!uniformStruct) {
+    uniformStruct = getStructType(uniformStructMembers);
+
+    getBuilder().createDecorate(uniformStruct, spv::Decoration::Block, {});
+
+    getBuilder().createMemberDecorate(
+        uniformStruct, 0, spv::Decoration::Offset,
+        std::array{static_cast<std::uint32_t>(0)});
+  }
+
+  auto uniformType =
+      getStructPointerType(spv::StorageClass::StorageBuffer, uniformStruct);
+  auto uniformVariable = getBuilder().createVariable(
+      uniformType, spv::StorageClass::StorageBuffer);
+
+  mInterfaces.push_back(uniformVariable);
+
+  auto &newUniform = mUniforms.emplace_back();
+  newUniform.index = mUniforms.size() - 1;
+  newUniform.typeId = type;
+  newUniform.type = uniformType;
+  newUniform.variable = uniformVariable;
+  newUniform.isBuffer = true;
+  std::printf("new storage buffer %u of type %u\n", newUniform.index,
+               newUniform.typeId.raw);
+  return &newUniform;
+}
+
+UniformInfo *ConverterContext::getOrCreateStorageBuffer(std::uint32_t *vbuffer,
+                                                        TypeId type) {
+  for (auto &uniform : mUniforms) {
+    if (std::memcmp(uniform.buffer, vbuffer, sizeof(std::uint32_t) * 4)) {
+      continue;
+    }
+
+    if (uniform.typeId != type) {
+      util::unreachable("getOrCreateStorageBuffer: access to the uniform with "
+                        "different type");
+    }
+
+    if (!uniform.isBuffer) {
+      util::unreachable("getOrCreateStorageBuffer: uniform was constant");
+    }
+
+    // std::printf("reuse storage buffer %u of type %u\n", uniform.index,
+    //             uniform.typeId.raw);
+    return &uniform;
+  }
+
+  auto newUniform = createStorageBuffer(type);
+  std::memcpy(newUniform->buffer, vbuffer, sizeof(std::uint32_t) * 4);
+  return newUniform;
+}
+
+UniformInfo *ConverterContext::getOrCreateUniformConstant(std::uint32_t *buffer,
+                                                          std::size_t size,
+                                                          TypeId type) {
+  for (auto &uniform : mUniforms) {
+    if (std::memcmp(uniform.buffer, buffer, sizeof(std::uint32_t) * size)) {
+      continue;
+    }
+
+    if (uniform.typeId != type) {
+      util::unreachable(
+          "getOrCreateUniformConstant: access to the uniform with "
+          "different type");
+    }
+
+    if (uniform.isBuffer) {
+      util::unreachable("getOrCreateUniformConstant: uniform was buffer");
+    }
+
+    return &uniform;
+  }
+
+  auto uniformType = getPointerType(spv::StorageClass::UniformConstant, type);
+  auto uniformVariable = getBuilder().createVariable(
+      uniformType, spv::StorageClass::UniformConstant);
+  mInterfaces.push_back(uniformVariable);
+
+  auto &newUniform = mUniforms.emplace_back();
+  newUniform.index = mUniforms.size() - 1;
+  newUniform.typeId = type;
+  newUniform.type = uniformType;
+  newUniform.variable = uniformVariable;
+  newUniform.isBuffer = false;
+  std::memcpy(newUniform.buffer, buffer, sizeof(std::uint32_t) * size);
+
+  return &newUniform;
+}
+
+spirv::VariableValue ConverterContext::getThreadId() {
+  if (mThreadId) {
+    return mThreadId;
+  }
+
+  auto inputType = getPointerType(spv::StorageClass::Input, TypeId::UInt32);
+  mThreadId = mBuilder.createVariable(inputType, spv::StorageClass::Input);
+
+  if (mStage == Stage::Vertex) {
+    mBuilder.createDecorate(
+        mThreadId, spv::Decoration::BuiltIn,
+        std::array{static_cast<std::uint32_t>(spv::BuiltIn::VertexIndex)});
+  } else {
+    util::unreachable();
+  }
+
+  mInterfaces.push_back(mThreadId);
+
+  return mThreadId;
+}
+
+spirv::VariableValue ConverterContext::getWorkgroupId() {
+  if (mWorkgroupId) {
+    return mWorkgroupId;
+  }
+
+  if (mStage != Stage::Compute) {
+    util::unreachable();
+  }
+
+  auto workgroupIdType =
+      getPointerType(spv::StorageClass::Input, TypeId::UInt32x3);
+  mWorkgroupId =
+      mBuilder.createVariable(workgroupIdType, spv::StorageClass::Input);
+
+  mBuilder.createDecorate(
+      mWorkgroupId, spv::Decoration::BuiltIn,
+      {{static_cast<std::uint32_t>(spv::BuiltIn::WorkgroupId)}});
+  mInterfaces.push_back(mWorkgroupId);
+
+  return mWorkgroupId;
+}
+
+spirv::VariableValue ConverterContext::getLocalInvocationId() {
+  if (mLocalInvocationId) {
+    return mLocalInvocationId;
+  }
+
+  if (mStage != Stage::Compute) {
+    util::unreachable();
+  }
+
+  auto localInvocationIdType =
+      getPointerType(spv::StorageClass::Input, TypeId::UInt32x3);
+  mLocalInvocationId =
+      mBuilder.createVariable(localInvocationIdType, spv::StorageClass::Input);
+
+  mBuilder.createDecorate(
+      mLocalInvocationId, spv::Decoration::BuiltIn,
+      std::array{static_cast<std::uint32_t>(spv::BuiltIn::LocalInvocationId)});
+
+  mInterfaces.push_back(mLocalInvocationId);
+
+  return mLocalInvocationId;
+}
+
+spirv::VariableValue ConverterContext::getPerVertex() {
+  if (mPerVertex) {
+    return mPerVertex;
+  }
+
+  auto floatT = getFloat32Type();
+  auto float4T = getFloat32x4Type();
+
+  auto uintConst1 = getUInt32(1);
+  auto arr1Float = mBuilder.createTypeArray(floatT, uintConst1);
+
+  auto gl_PerVertexStructT = mBuilder.createTypeStruct(std::array{
+      static_cast<spirv::Type>(float4T),
+      static_cast<spirv::Type>(floatT),
+      static_cast<spirv::Type>(arr1Float),
+      static_cast<spirv::Type>(arr1Float),
+  });
+
+  mBuilder.createDecorate(gl_PerVertexStructT, spv::Decoration::Block, {});
+  mBuilder.createMemberDecorate(
+      gl_PerVertexStructT, 0, spv::Decoration::BuiltIn,
+      std::array{static_cast<std::uint32_t>(spv::BuiltIn::Position)});
+  mBuilder.createMemberDecorate(
+      gl_PerVertexStructT, 1, spv::Decoration::BuiltIn,
+      std::array{static_cast<std::uint32_t>(spv::BuiltIn::PointSize)});
+  mBuilder.createMemberDecorate(
+      gl_PerVertexStructT, 2, spv::Decoration::BuiltIn,
+      std::array{static_cast<std::uint32_t>(spv::BuiltIn::ClipDistance)});
+  mBuilder.createMemberDecorate(
+      gl_PerVertexStructT, 3, spv::Decoration::BuiltIn,
+      std::array{static_cast<std::uint32_t>(spv::BuiltIn::CullDistance)});
+
+  auto gl_PerVertexPtrT = mBuilder.createTypePointer(spv::StorageClass::Output,
+                                                     gl_PerVertexStructT);
+  mPerVertex =
+      mBuilder.createVariable(gl_PerVertexPtrT, spv::StorageClass::Output);
+
+  mInterfaces.push_back(mPerVertex);
+  return mPerVertex;
+}
+
+spirv::VariableValue ConverterContext::getFragCoord() {
+  if (mFragCoord) {
+    return mFragCoord;
+  }
+
+  auto inputType = getPointerType(spv::StorageClass::Input, TypeId::Float32x4);
+  mFragCoord =
+      mBuilder.createVariable(inputType, spv::StorageClass::Input);
+
+  mBuilder.createDecorate(mFragCoord, spv::Decoration::BuiltIn,
+                          {{static_cast<std::uint32_t>(spv::BuiltIn::FragCoord)}});
+
+  mInterfaces.push_back(mFragCoord);
+  return mFragCoord;
+}
+
+spirv::VariableValue ConverterContext::getIn(unsigned location) {
+  auto [it, inserted] = mIns.try_emplace(location);
+  if (!inserted) {
+    return it->second;
+  }
+
+  auto inputType = getPointerType(spv::StorageClass::Input, TypeId::Float32x4);
+  auto inputVariable =
+      mBuilder.createVariable(inputType, spv::StorageClass::Input);
+
+  mBuilder.createDecorate(inputVariable, spv::Decoration::Location,
+                          {{location}});
+
+  mInterfaces.push_back(inputVariable);
+  it->second = inputVariable;
+  return inputVariable;
+}
+
+spirv::VariableValue ConverterContext::getOut(unsigned location) {
+  auto [it, inserted] = mOuts.try_emplace(location);
+  if (!inserted) {
+    return it->second;
+  }
+  auto outputType =
+      getPointerType(spv::StorageClass::Output, TypeId::Float32x4);
+  auto outputVariable =
+      mBuilder.createVariable(outputType, spv::StorageClass::Output);
+
+  mBuilder.createDecorate(outputVariable, spv::Decoration::Location,
+                          {{location}});
+
+  mInterfaces.push_back(outputVariable);
+  it->second = outputVariable;
+  return outputVariable;
+}
+
+spirv::Function ConverterContext::getDiscardFn() {
+  if (mDiscardFn) {
+    return mDiscardFn;
+  }
+
+  if (mStage != Stage::Fragment) {
+    util::unreachable();
+  }
+
+  auto fn = mBuilder.createFunctionBuilder(5);
+  mDiscardFn = fn.id;
+  auto entry = fn.createBlockBuilder(5);
+  entry.createKill();
+
+  fn.insertBlock(entry);
+  mBuilder.insertFunction(fn, getVoidType(), {},
+                          getFunctionType(getVoidType(), {}));
+
+  return mDiscardFn;
+}
+
+std::optional<std::uint32_t>
+ConverterContext::findUint32Value(spirv::Value id) const {
+  for (auto [value, constId] : mConstantUint32Map) {
+    if (constId == id) {
+      return value;
+    }
+  }
+
+  return std::nullopt;
+}
+
+std::optional<std::int32_t>
+ConverterContext::findSint32Value(spirv::Value id) const {
+  for (auto [value, constId] : mConstantSint32Map) {
+    if (constId == id) {
+      return value;
+    }
+  }
+
+  return std::nullopt;
+}
+
+std::optional<float> ConverterContext::findFloat32Value(spirv::Value id) const {
+  for (auto [value, constId] : mConstantFloat32Map) {
+    if (constId == id) {
+      return std::bit_cast<float>(value);
+    }
+  }
+
+  return std::nullopt;
+}
+
+spirv::FunctionType
+ConverterContext::getFunctionType(spirv::Type resultType,
+                                  std::span<const spirv::Type> params) {
+  for (auto fnType : mFunctionTypes) {
+    if (fnType.resultType != resultType) {
+      continue;
+    }
+
+    if (fnType.params.size() != params.size()) {
+      continue;
+    }
+
+    bool match = true;
+    for (std::size_t i = 0, end = params.size(); i < end; ++i) {
+      if (fnType.params[i] != params[i]) {
+        match = false;
+        break;
+      }
+    }
+    if (!match) {
+      continue;
+    }
+
+    return fnType.id;
+  }
+
+  auto id = mBuilder.createTypeFunction(resultType, params);
+
+  std::vector<spirv::Type> paramsVec;
+  paramsVec.reserve(params.size());
+
+  for (auto param : params) {
+    paramsVec.push_back(param);
+  }
+
+  mFunctionTypes.push_back(FunctionType{
+      .resultType = resultType, .params = std::move(paramsVec), .id = id});
+
+  return id;
+}
+
+Function *ConverterContext::createFunction(std::size_t expectedSize) {
+  auto result = &mFunctions.emplace_front();
+
+  result->context = this;
+  result->entryFragment.context = this;
+  result->entryFragment.function = result;
+  result->entryFragment.builder = mBuilder.createBlockBuilder(expectedSize);
+  result->entryFragment.entryBlockId = result->entryFragment.builder.id;
+  result->fragments.push_back(&result->entryFragment);
+
+  result->exitFragment.context = this;
+  result->exitFragment.function = result;
+  result->exitFragment.builder = mBuilder.createBlockBuilder(0);
+  result->exitFragment.entryBlockId = result->exitFragment.builder.id;
+  result->builder = mBuilder.createFunctionBuilder(expectedSize);
+
+  return result;
+}
+
+Fragment *ConverterContext::createFragment(std::size_t expectedSize) {
+  auto result = &mFragments.emplace_front();
+
+  result->context = this;
+  result->builder = mBuilder.createBlockBuilder(expectedSize);
+  result->entryBlockId = result->builder.id;
+
+  return result;
+}
--- a/hw/amdgpu/shader/src/Fragment.cpp
+++ b/hw/amdgpu/shader/src/Fragment.cpp
--- a/hw/amdgpu/shader/src/Function.cpp
+++ b/hw/amdgpu/shader/src/Function.cpp
@ -0,0 +1,274 @@
+#include "Function.hpp"
+#include "ConverterContext.hpp"
+#include "RegisterId.hpp"
+
+using namespace amdgpu::shader;
+
+Value Function::createInput(RegisterId id) {
+  auto [it, inserted] = inputs.try_emplace(id);
+
+  if (!inserted) {
+    assert(it->second);
+    return it->second;
+  }
+
+  auto offset = id.getOffset();
+
+  if (id.isScalar()) {
+    auto uint32T = context->getUInt32Type();
+
+    if (userSgprs.size() > offset) {
+      return ((it->second = {uint32T, context->getUInt32(userSgprs[offset])}));
+    }
+
+    if (stage == Stage::None) {
+      return ((it->second =
+                   Value{uint32T, builder.createFunctionParameter(uint32T)}));
+    }
+
+    switch (id.raw) {
+    case RegisterId::ExecLo:
+      return ((it->second = {uint32T, context->getUInt32(1)}));
+    case RegisterId::ExecHi:
+      return ((it->second = {uint32T, context->getUInt32(0)}));
+
+    case RegisterId::Scc:
+      return ((it->second = {context->getBoolType(), context->getFalse()}));
+
+    default:
+      break;
+    }
+
+    if (stage == Stage::Vertex) {
+      return ((it->second = {uint32T, context->getUInt32(0)}));
+    } else if (stage == Stage::Fragment) {
+      return ((it->second = {uint32T, context->getUInt32(0)}));
+    } else if (stage == Stage::Compute) {
+      std::uint32_t offsetAfterSgprs = offset - userSgprs.size();
+      if (offsetAfterSgprs < 3) {
+        auto workgroupIdVar = context->getWorkgroupId();
+        auto workgroupId = entryFragment.builder.createLoad(
+            context->getUint32x3Type(), workgroupIdVar);
+        for (uint32_t i = 0; i < 3; ++i) {
+          auto input = entryFragment.builder.createCompositeExtract(
+              uint32T, workgroupId, {{i}});
+
+          inputs[RegisterId::Scalar(userSgprs.size() + i)] = {uint32T, input};
+        }
+
+        return inputs[id];
+      }
+
+      return ((it->second = {uint32T, context->getUInt32(0)}));
+    }
+
+    util::unreachable();
+  }
+
+  if (stage == Stage::None) {
+    auto float32T = context->getFloat32Type();
+    return (
+        (it->second = {float32T, builder.createFunctionParameter(float32T)}));
+  }
+
+  if (stage == Stage::Vertex) {
+    if (id.isVector()) {
+      auto uint32T = context->getUInt32Type();
+
+      if (id.getOffset() == 0) {
+        auto input =
+            entryFragment.builder.createLoad(uint32T, context->getThreadId());
+
+        return ((it->second = {uint32T, input}));
+      }
+
+      return ((it->second = {uint32T, context->getUInt32(0)}));
+    }
+
+    util::unreachable("Unexpected vertex input %u. user sgprs count=%zu",
+                      id.raw, userSgprs.size());
+  }
+
+  if (stage == Stage::Fragment) {
+    if (id.isAttr()) {
+      auto float4T = context->getFloat32x4Type();
+      auto input = entryFragment.builder.createLoad(
+          float4T, context->getIn(id.getOffset()));
+      return ((it->second = {float4T, input}));
+    }
+
+    if (id.isVector()) {
+      switch (offset) {
+      case 2:
+      case 3:
+      case 4:
+      case 5: {
+        auto float4T = context->getFloat32x4Type();
+        auto floatT = context->getFloat32Type();
+        auto fragCoord =
+            entryFragment.builder.createLoad(float4T, context->getFragCoord());
+        return (
+            (it->second = {floatT, entryFragment.builder.createCompositeExtract(
+                                       floatT, fragCoord, {{offset - 2}})}));
+      }
+      }
+    }
+
+    return ((it->second = {context->getUInt32Type(), context->getUInt32(0)}));
+  }
+
+  if (stage == Stage::Compute) {
+    if (id.isVector() && offset < 3) {
+      auto uint32T = context->getUInt32Type();
+      auto localInvocationIdVar = context->getLocalInvocationId();
+      auto localInvocationId = entryFragment.builder.createLoad(
+          context->getUint32x3Type(), localInvocationIdVar);
+
+      for (uint32_t i = 0; i < 3; ++i) {
+        auto input = entryFragment.builder.createCompositeExtract(
+            uint32T, localInvocationId, {{i}});
+
+        inputs[RegisterId::Vector(i)] = {uint32T, input};
+      }
+
+      return inputs[id];
+    }
+
+    return ((it->second = {context->getUInt32Type(), context->getUInt32(0)}));
+  }
+
+  util::unreachable();
+}
+
+void Function::createExport(spirv::BlockBuilder &builder, unsigned index,
+                            Value value) {
+  if (stage == Stage::Vertex) {
+    switch (index) {
+    case 12: {
+      auto float4OutPtrT =
+          context->getPointerType(spv::StorageClass::Output, TypeId::Float32x4);
+
+      auto gl_PerVertexPosition = builder.createAccessChain(
+          float4OutPtrT, context->getPerVertex(), {{context->getSInt32(0)}});
+
+      if (value.type != context->getFloat32x4Type()) {
+        util::unreachable();
+      }
+
+      builder.createStore(gl_PerVertexPosition, value.value);
+      return;
+    }
+
+    case 32 ... 64: { // paramN
+      if (value.type != context->getFloat32x4Type()) {
+        util::unreachable();
+      }
+
+      builder.createStore(context->getOut(index - 32), value.value);
+      return;
+    }
+    }
+
+    util::unreachable("Unexpected vartex export target %u", index);
+  }
+
+  if (stage == Stage::Fragment) {
+    switch (index) {
+    case 0 ... 7: {
+      if (value.type != context->getFloat32x4Type()) {
+        util::unreachable();
+      }
+
+      builder.createStore(context->getOut(index), value.value);
+      return;
+    }
+    }
+
+    util::unreachable("Unexpected fragment export target %u", index);
+  }
+
+  util::unreachable();
+}
+
+spirv::Type Function::getResultType() {
+  if (exitFragment.outputs.empty()) {
+    return context->getVoidType();
+  }
+
+  if (exitFragment.outputs.size() == 1) {
+    return exitFragment.registers->getRegister(*exitFragment.outputs.begin())
+        .type;
+  }
+
+  std::vector<spirv::Type> members;
+  members.reserve(exitFragment.outputs.size());
+
+  for (auto id : exitFragment.outputs) {
+    members.push_back(exitFragment.registers->getRegister(id).type);
+  }
+
+  return context->getStructType(members);
+}
+
+spirv::FunctionType Function::getFunctionType() {
+  if (stage != Stage::None) {
+    return context->getFunctionType(getResultType(), {});
+  }
+
+  std::vector<spirv::Type> params;
+  params.reserve(inputs.size());
+
+  for (auto inp : inputs) {
+    params.push_back(inp.second.type);
+  }
+
+  return context->getFunctionType(getResultType(), params);
+}
+
+Fragment *Function::createFragment() {
+  auto result = context->createFragment(0);
+  result->function = this;
+  fragments.push_back(result);
+  return result;
+}
+
+void Function::insertReturn() {
+  if (exitFragment.outputs.empty()) {
+    exitFragment.builder.createReturn();
+    return;
+  }
+
+  if (exitFragment.outputs.size() == 1) {
+    auto value =
+        exitFragment.registers->getRegister(*exitFragment.outputs.begin())
+            .value;
+    exitFragment.builder.createReturnValue(value);
+    return;
+  }
+
+  auto resultType = getResultType();
+
+  auto resultTypePointer = context->getBuilder().createTypePointer(
+      spv::StorageClass::Function, resultType);
+
+  auto resultVariable = entryFragment.builder.createVariable(
+      resultTypePointer, spv::StorageClass::Function);
+
+  std::uint32_t member = 0;
+  for (auto regId : exitFragment.outputs) {
+    auto value = exitFragment.registers->getRegister(regId);
+    auto valueTypeId = context->getTypeIdOf(value.type);
+
+    auto pointerType =
+        context->getPointerType(spv::StorageClass::Function, *valueTypeId);
+    auto valuePointer = exitFragment.builder.createAccessChain(
+        pointerType, resultVariable,
+        {{exitFragment.context->getUInt32(member++)}});
+
+    exitFragment.builder.createStore(valuePointer, value.value);
+  }
+
+  auto resultValue = exitFragment.builder.createLoad(resultType, resultVariable);
+
+  exitFragment.builder.createReturnValue(resultValue);
+}
--- a/hw/amdgpu/shader/src/Instruction.cpp
+++ b/hw/amdgpu/shader/src/Instruction.cpp
--- a/hw/amdgpu/shader/src/RegisterState.cpp
+++ b/hw/amdgpu/shader/src/RegisterState.cpp
@ -0,0 +1,72 @@
+#include "RegisterState.hpp"
+#include "util/unreachable.hpp"
+
+amdgpu::shader::Value
+amdgpu::shader::RegisterState::getRegister(RegisterId regId) {
+  auto offset = regId.getOffset();
+
+  if (regId.isScalar()) {
+    switch (offset) {
+    case 0 ... 103:
+      return sgprs[offset];
+    case 106:
+      return vccLo;
+    case 107:
+      return vccHi;
+    case 124:
+      return m0;
+    case 126:
+      return execLo;
+    case 127:
+      return execHi;
+    case 253:
+      return scc;
+    case 254:
+      return ldsDirect;
+    }
+
+    util::unreachable();
+  }
+
+  if (regId.isVector()) {
+    return vgprs[offset];
+  }
+
+  if (regId.isAttr()) {
+    return attrs[offset];
+  }
+
+  util::unreachable();
+}
+
+void amdgpu::shader::RegisterState::setRegister(RegisterId regId,
+                                                   Value value) {
+  auto offset = regId.getOffset();
+
+  if (regId.isScalar()) {
+    switch (offset) {
+    case 0 ... 103: sgprs[offset] = value; return;
+    case 106: vccLo = value; return;
+    case 107: vccHi = value; return;
+    case 124: m0 = value; return;
+    case 126: execLo = value; return;
+    case 127: execHi = value; return;
+    case 253: scc = value; return;
+    case 254: ldsDirect = value; return;
+    }
+
+    util::unreachable();
+  }
+
+  if (regId.isVector()) {
+    vgprs[offset] = value;
+    return;
+  }
+
+  if (regId.isAttr()) {
+    attrs[offset] = value;
+    return;
+  }
+
+  util::unreachable();
+}
--- a/hw/amdgpu/shader/src/TypeId.cpp
+++ b/hw/amdgpu/shader/src/TypeId.cpp
@ -0,0 +1,132 @@
+#include "TypeId.hpp"
+#include "util/unreachable.hpp"
+#include <cstdint>
+
+amdgpu::shader::TypeId amdgpu::shader::TypeId::getBaseType() const {
+  switch (raw) {
+  case TypeId::Void:
+  case TypeId::Bool:
+  case TypeId::SInt8:
+  case TypeId::UInt8:
+  case TypeId::SInt16:
+  case TypeId::UInt16:
+  case TypeId::SInt32:
+  case TypeId::UInt32:
+  case TypeId::SInt64:
+  case TypeId::UInt64:
+  case TypeId::Float16:
+  case TypeId::Float32:
+  case TypeId::Float64:
+  case TypeId::Sampler:
+  case TypeId::Image2D:
+  case TypeId::SampledImage2D:
+    return raw;
+
+  case TypeId::UInt32x2:
+  case TypeId::UInt32x3:
+  case TypeId::UInt32x4:
+  case TypeId::ArrayUInt32x8:
+  case TypeId::ArrayUInt32x16:
+    return TypeId::UInt32;
+
+  case TypeId::Float32x2:
+  case TypeId::Float32x3:
+  case TypeId::Float32x4:
+  case TypeId::ArrayFloat32x8:
+  case TypeId::ArrayFloat32x16:
+    return TypeId::Float32;
+  }
+
+  util::unreachable();
+}
+
+std::size_t amdgpu::shader::TypeId::getSize() const {
+  switch (raw) {
+  case TypeId::Void:
+  case TypeId::Sampler:
+  case TypeId::Image2D:
+  case TypeId::SampledImage2D:
+    return 0;
+  case TypeId::Bool:
+    return 1;
+  case TypeId::SInt8:
+  case TypeId::UInt8:
+    return 1;
+  case TypeId::SInt16:
+  case TypeId::UInt16:
+    return 2;
+  case TypeId::SInt32:
+  case TypeId::UInt32:
+    return 4;
+  case TypeId::SInt64:
+  case TypeId::UInt64:
+    return 8;
+  case TypeId::Float16:
+    return 2;
+  case TypeId::Float32:
+    return 4;
+  case TypeId::Float64:
+    return 8;
+
+  case TypeId::UInt32x2:
+  case TypeId::UInt32x3:
+  case TypeId::UInt32x4:
+  case TypeId::ArrayUInt32x8:
+  case TypeId::ArrayUInt32x16:
+  case TypeId::Float32x2:
+  case TypeId::Float32x3:
+  case TypeId::Float32x4:
+  case TypeId::ArrayFloat32x8:
+  case TypeId::ArrayFloat32x16:
+    return getElementsCount() * getBaseType().getSize();
+  }
+
+  util::unreachable();
+}
+
+std::size_t amdgpu::shader::TypeId::getElementsCount() const {
+  switch (raw) {
+  case TypeId::Bool:
+  case TypeId::SInt8:
+  case TypeId::UInt8:
+  case TypeId::SInt16:
+  case TypeId::UInt16:
+  case TypeId::SInt32:
+  case TypeId::UInt32:
+  case TypeId::SInt64:
+  case TypeId::UInt64:
+  case TypeId::Float16:
+  case TypeId::Float32:
+  case TypeId::Float64:
+    return 1;
+  
+  case TypeId::UInt32x2:
+    return 2;
+  case TypeId::UInt32x3:
+    return 3;
+  case TypeId::UInt32x4:
+    return 4;
+  case TypeId::ArrayUInt32x8:
+    return 8;
+  case TypeId::ArrayUInt32x16:
+    return 16;
+  case TypeId::Float32x2:
+    return 2;
+  case TypeId::Float32x3:
+    return 3;
+  case TypeId::Float32x4:
+    return 4;
+  case TypeId::ArrayFloat32x8:
+    return 8;
+  case TypeId::ArrayFloat32x16:
+    return 16;
+
+  case TypeId::Void:
+  case TypeId::Sampler:
+  case TypeId::Image2D:
+  case TypeId::SampledImage2D:
+    return 0;
+  }
+
+  util::unreachable();
+}
--- a/hw/amdgpu/shader/src/cf.cpp
+++ b/hw/amdgpu/shader/src/cf.cpp
@ -0,0 +1,117 @@
+#include "cf.hpp"
+#include <cassert>
+#include <fstream>
+#include <unordered_set>
+
+void cf::BasicBlock::split(BasicBlock *target) {
+  assert(target->address > address);
+  target->size = size - (target->address - address);
+  size = target->address - address;
+
+  for (std::size_t i = 0, count = getSuccessorsCount(); i < count; ++i) {
+    auto succ = getSuccessor(i);
+    succ->predecessors.erase(this);
+    succ->predecessors.insert(target);
+    target->successors[i] = successors[i];
+    successors[i] = nullptr;
+  }
+
+  target->terminator = terminator;
+  terminator = TerminatorKind::None;
+
+  createBranch(target);
+}
+
+void cf::BasicBlock::createConditionalBranch(BasicBlock *ifTrue,
+                                             BasicBlock *ifFalse) {
+  assert(terminator == TerminatorKind::None);
+  assert(getSuccessorsCount() == 0);
+  ifTrue->predecessors.insert(this);
+  ifFalse->predecessors.insert(this);
+
+  successors[0] = ifTrue;
+  successors[1] = ifFalse;
+
+  terminator = TerminatorKind::Branch;
+}
+
+void cf::BasicBlock::createBranch(BasicBlock *target) {
+  assert(terminator == TerminatorKind::None);
+  assert(getSuccessorsCount() == 0);
+
+  target->predecessors.insert(this);
+  successors[0] = target;
+
+  terminator = TerminatorKind::Branch;
+}
+
+void cf::BasicBlock::createBranchToUnknown() {
+  assert(terminator == TerminatorKind::None);
+  assert(getSuccessorsCount() == 0);
+
+  terminator = TerminatorKind::BranchToUnknown;
+}
+
+void cf::BasicBlock::createReturn() {
+  assert(terminator == TerminatorKind::None);
+  assert(getSuccessorsCount() == 0);
+
+  terminator = TerminatorKind::Return;
+}
+
+void cf::BasicBlock::replaceSuccessor(BasicBlock *origBB, BasicBlock *newBB) {
+  origBB->predecessors.erase(this);
+  newBB->predecessors.insert(this);
+
+  if (origBB == successors[0]) {
+    successors[0] = newBB;
+    return;
+  }
+
+  if (origBB == successors[1]) {
+    successors[1] = newBB;
+    return;
+  }
+
+  std::abort();
+}
+
+bool cf::BasicBlock::hasDirectPredecessor(const BasicBlock &block) const {
+  for (auto pred : predecessors) {
+    if (pred == &block) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+bool cf::BasicBlock::hasPredecessor(const BasicBlock &block) const {
+  if (&block == this) {
+    return hasDirectPredecessor(block);
+  }
+
+  std::vector<const BasicBlock *> workList;
+  std::unordered_set<const BasicBlock *> visited;
+  workList.push_back(this);
+  visited.insert(this);
+
+  while (!workList.empty()) {
+    auto node = workList.back();
+
+    if (node == &block) {
+      return true;
+    }
+
+    workList.pop_back();
+    workList.reserve(workList.size() + predecessors.size());
+
+    for (auto pred : predecessors) {
+      if (visited.insert(pred).second) {
+        workList.push_back(pred);
+      }
+    }
+  }
+
+  return false;
+}
--- a/hw/amdgpu/shader/src/scf.cpp
+++ b/hw/amdgpu/shader/src/scf.cpp
@ -0,0 +1,252 @@
+#include "scf.hpp"
+#include "cf.hpp"
+#include <cassert>
+#include <fstream>
+#include <unordered_set>
+#include <utility>
+
+void scf::Block::eraseFrom(Node *endBefore) {
+  mEnd = endBefore->getPrev();
+  if (mEnd != nullptr) { 
+    mEnd->mNext = nullptr;
+  } else {
+    mBegin = nullptr;
+  }
+}
+
+void scf::Block::splitInto(Block *target, Node *splitPoint) {
+  auto targetEnd = std::exchange(mEnd, splitPoint->mPrev);
+
+  if (mEnd != nullptr) {
+    mEnd->mNext = nullptr;
+  } else {
+    mBegin = nullptr;
+  }
+
+  for (auto node = splitPoint; node != nullptr; node = node->getNext()) {
+    node->mParent = target;
+  }
+
+  if (target->mEnd != nullptr) {
+    target->mEnd->mNext = splitPoint;
+  }
+
+  splitPoint->mPrev = target->mEnd;
+  target->mEnd = targetEnd;
+
+  if (target->mBegin == nullptr) {
+    target->mBegin = splitPoint;
+  }
+}
+
+scf::Block *scf::Block::split(Context &context, Node *splitPoint) {
+  auto result = context.create<Block>();
+  splitInto(result, splitPoint);
+  return result;
+}
+
+static scf::BasicBlock *findJumpTargetIn(scf::Block *parentBlock,
+                                         scf::Block *testBlock) {
+  auto jumpNode = dynCast<scf::Jump>(testBlock->getLastNode());
+
+  if (jumpNode == nullptr || jumpNode->target->getParent() != parentBlock) {
+    return nullptr;
+  }
+
+  return jumpNode->target;
+}
+
+static bool transformJumpToLoop(scf::Context &ctxt, scf::Block *block) {
+  // bb0
+  // bb1
+  // if true {
+  //   bb2
+  //   jump bb1
+  // } else {
+  //   bb3
+  // }
+  //
+  // -->
+  //
+  // bb0
+  // loop {
+  //   bb1
+  //   if false {
+  //     break
+  //   }
+  //   bb2
+  // }
+  // bb3
+
+  if (block->isEmpty()) {
+    return false;
+  }
+
+  auto ifElse = dynCast<scf::IfElse>(block->getLastNode());
+
+  if (ifElse == nullptr) {
+    return false;
+  }
+
+  auto loopTarget = findJumpTargetIn(block, ifElse->ifTrue);
+  auto loopBlock = ifElse->ifTrue;
+  auto invariantBlock = ifElse->ifFalse;
+
+  if (loopTarget == nullptr) {
+    loopTarget = findJumpTargetIn(block, ifElse->ifFalse);
+    loopBlock = ifElse->ifFalse;
+    invariantBlock = ifElse->ifTrue;
+
+    if (loopTarget == nullptr) {
+      return false;
+    }
+  }
+
+  auto loopBody = block->split(ctxt, loopTarget);
+  auto loop = ctxt.create<scf::Loop>(loopBody);
+  block->append(loop);
+
+  for (auto node = invariantBlock->getRootNode(); node != nullptr;) {
+    auto nextNode = node->getNext();
+    invariantBlock->detachNode(node);
+    block->append(node);
+    node = nextNode;
+  }
+
+  loopBlock->detachNode(loopBlock->getLastNode());
+
+  for (auto node = loopBlock->getRootNode(); node != nullptr;) {
+    auto nextNode = node->getNext();
+    loopBlock->detachNode(node);
+    loopBody->append(node);
+    node = nextNode;
+  }
+
+  invariantBlock->append(ctxt.create<scf::Break>());
+
+  return true;
+}
+
+static bool moveSameLastBlocksTo(scf::IfElse *ifElse, scf::Block *block) {
+  if (ifElse->ifTrue->isEmpty() || ifElse->ifFalse->isEmpty()) {
+    return false;
+  }
+
+  auto ifTrueIt = ifElse->ifTrue->getLastNode();
+  auto ifFalseIt = ifElse->ifFalse->getLastNode();
+
+  while (ifTrueIt != nullptr && ifFalseIt != nullptr) {
+    if (!ifTrueIt->isEqual(*ifFalseIt)) {
+      break;
+    }
+
+    ifTrueIt = ifTrueIt->getPrev();
+    ifFalseIt = ifFalseIt->getPrev();
+  }
+
+  if (ifTrueIt == ifElse->ifTrue->getLastNode()) {
+    return false;
+  }
+
+  if (ifTrueIt == nullptr) {
+    ifTrueIt = ifElse->ifTrue->getRootNode();
+  } else {
+    ifTrueIt = ifTrueIt->getNext();
+  }
+
+  if (ifFalseIt == nullptr) {
+    ifFalseIt = ifElse->ifFalse->getRootNode();
+  } else {
+    ifFalseIt = ifFalseIt->getNext();
+  }
+
+  ifElse->ifTrue->splitInto(block, ifTrueIt);
+  ifElse->ifFalse->eraseFrom(ifFalseIt);
+  return true;
+}
+
+class Structurizer {
+  scf::Context &context;
+
+public:
+  Structurizer(scf::Context &context) : context(context) {}
+
+  scf::Block *structurize(cf::BasicBlock *bb) {
+    return structurizeBlock(bb, {});
+  }
+
+public:
+  scf::IfElse *structurizeIfElse(
+      cf::BasicBlock *ifTrue, cf::BasicBlock *ifFalse,
+      std::unordered_map<cf::BasicBlock *, scf::BasicBlock *> &visited) {
+    auto ifTrueBlock = structurizeBlock(ifTrue, visited);
+    auto ifFalseBlock = structurizeBlock(ifFalse, visited);
+
+    return context.create<scf::IfElse>(ifTrueBlock, ifFalseBlock);
+  }
+
+  scf::Block *structurizeBlock(
+      cf::BasicBlock *bb,
+      std::unordered_map<cf::BasicBlock *, scf::BasicBlock *> visited) {
+    auto result = context.create<scf::Block>();
+    std::vector<cf::BasicBlock *> workList;
+    workList.push_back(bb);
+
+    while (!workList.empty()) {
+      auto block = workList.back();
+      workList.pop_back();
+
+      auto [it, inserted] = visited.try_emplace(block, nullptr);
+      if (!inserted) {
+        result->append(context.create<scf::Jump>(it->second));
+        continue;
+      }
+
+      auto scfBlock = context.create<scf::BasicBlock>(block->getAddress(),
+                                                      block->getSize());
+      it->second = scfBlock;
+      result->append(scfBlock);
+
+      switch (block->getTerminator()) {
+      case cf::TerminatorKind::None:
+        std::abort();
+        break;
+
+      case cf::TerminatorKind::Branch:
+        switch (block->getSuccessorsCount()) {
+        case 1:
+          workList.push_back(block->getSuccessor(0));
+          break;
+
+        case 2: {
+          auto ifElse = structurizeIfElse(block->getSuccessor(0),
+                                          block->getSuccessor(1), visited);
+          result->append(ifElse);
+
+          while (moveSameLastBlocksTo(ifElse, result) ||
+                 transformJumpToLoop(context, result)) {
+            ;
+          }
+
+          break;
+        }
+        }
+        break;
+
+      case cf::TerminatorKind::BranchToUnknown:
+        result->append(context.create<scf::UnknownBlock>());
+        break;
+
+      case cf::TerminatorKind::Return:
+        result->append(context.create<scf::Return>());
+        break;
+      }
+    }
+
+    return result;
+  }
+};
+
+scf::Block *scf::structurize(Context &ctxt, cf::BasicBlock *bb) {
+  return Structurizer{ctxt}.structurize(bb);
+}
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit 05d35b71483880246bc4c1a28f857e9046af7c36
+Subproject commit 6a093985c4a331661fd47ff9f1c06e4b9b102002