Added amdgpu hw project

This commit is contained in:
DH 2023-06-24 15:59:27 +03:00
parent 1fdadaaee9
commit a8af9198bf
49 changed files with 28342 additions and 1 deletions

View file

@ -7,3 +7,4 @@ set(CMAKE_CXX_STANDARD 23)
add_subdirectory(3rdparty/crypto)
add_subdirectory(orbis-kernel)
add_subdirectory(rpcsx-os)
add_subdirectory(hw/amdgpu)

17
hw/amdgpu/CMakeLists.txt Normal file
View file

@ -0,0 +1,17 @@
cmake_minimum_required(VERSION 3.10)
set(CMAKE_CXX_STANDARD 23)
set(CMAKE_CXX_EXTENSIONS off)
add_subdirectory(bridge)
add_subdirectory(device)
add_subdirectory(shader)
add_subdirectory(lib/libspirv)
project(amdgpu)
add_library(${PROJECT_NAME} INTERFACE)
target_include_directories(${PROJECT_NAME} INTERFACE include)
add_library(amdgpu::base ALIAS ${PROJECT_NAME})

View file

@ -0,0 +1,16 @@
project(libamdgpu-bridge)
set(PROJECT_PATH amdgpu/bridge)
set(INCLUDE
include/${PROJECT_PATH}/bridge.hpp
)
set(SRC
src/bridge.cpp
)
add_library(${PROJECT_NAME} STATIC ${INCLUDE} ${SRC})
target_include_directories(${PROJECT_NAME} PUBLIC include PRIVATE include/${PROJECT_PATH})
set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "")
add_library(amdgpu::bridge ALIAS ${PROJECT_NAME})
set_property(TARGET ${PROJECT_NAME} PROPERTY POSITION_INDEPENDENT_CODE ON)

View file

@ -0,0 +1,256 @@
#pragma once
#include <cstdint>
#include <cstring>
#include <initializer_list>
namespace amdgpu::bridge {
enum class CommandId : std::uint32_t {
Nop,
SetUpSharedMemory,
ProtectMemory,
CommandBuffer,
Flip,
DoFlip,
SetBuffer
};
struct CmdMemoryProt {
std::uint64_t address;
std::uint64_t size;
std::uint32_t prot;
};
struct CmdCommandBuffer {
std::uint64_t queue;
std::uint64_t address;
std::uint64_t size;
};
struct CmdBuffer {
std::uint32_t bufferIndex;
std::uint32_t width;
std::uint32_t height;
std::uint32_t pitch;
std::uint64_t address;
std::uint32_t pixelFormat;
std::uint32_t tilingMode;
};
struct CmdFlip {
std::uint32_t bufferIndex;
std::uint64_t arg;
};
struct BridgeHeader {
std::uint64_t size;
std::uint64_t info;
std::uint32_t pullerPid;
std::uint32_t pusherPid;
volatile std::uint64_t flags;
std::uint64_t vmAddress;
std::uint64_t vmSize;
char vmName[32];
volatile std::uint32_t flipBuffer;
volatile std::uint64_t flipArg;
volatile std::uint64_t flipCount;
std::uint32_t memoryAreaCount;
std::uint32_t commandBufferCount;
std::uint32_t bufferCount;
CmdMemoryProt memoryAreas[128];
CmdCommandBuffer commandBuffers[32];
CmdBuffer buffers[8];
volatile std::uint64_t pull;
volatile std::uint64_t push;
std::uint64_t commands[];
};
struct Command {
CommandId id;
union {
CmdMemoryProt memoryProt;
CmdCommandBuffer commandBuffer;
CmdBuffer buffer;
CmdFlip flip;
};
};
enum class BridgeFlags {
VmConfigured = 1 << 0,
PushLock = 1 << 1,
PullLock = 1 << 2,
};
class BridgePusher {
BridgeHeader *buffer = nullptr;
public:
BridgePusher() = default;
BridgePusher(BridgeHeader *buffer) : buffer(buffer) {}
void setVm(std::uint64_t address, std::uint64_t size, const char *name) {
buffer->vmAddress = address;
buffer->vmSize = size;
std::strncpy(buffer->vmName, name, sizeof(buffer->vmName));
buffer->flags |= static_cast<std::uint64_t>(BridgeFlags::VmConfigured);
}
void sendMemoryProtect(std::uint64_t address, std::uint64_t size,
std::uint32_t prot) {
sendCommand(CommandId::ProtectMemory, {address, size, prot});
}
void sendCommandBuffer(std::uint64_t queue, std::uint64_t address,
std::uint64_t size) {
sendCommand(CommandId::CommandBuffer, {queue, address, size});
}
void sendSetBuffer(std::uint32_t bufferIndex, std::uint64_t address,
std::uint32_t width, std::uint32_t height,
std::uint32_t pitch, std::uint32_t pixelFormat,
std::uint32_t tilingMode) {
sendCommand(CommandId::SetBuffer,
{static_cast<std::uint64_t>(bufferIndex) << 32 | tilingMode,
address, static_cast<std::uint64_t>(width) << 32 | height,
static_cast<std::uint64_t>(pitch) << 32 | pixelFormat});
}
void sendFlip(std::uint32_t bufferIndex, std::uint64_t arg) {
sendCommand(CommandId::Flip, {bufferIndex, arg});
}
void sendDoFlip() { sendCommand(CommandId::DoFlip, {}); }
void wait() {
while (buffer->pull != buffer->push)
;
}
private:
static std::uint64_t makeCommandHeader(CommandId id, std::size_t cmdSize) {
return static_cast<std::uint64_t>(id) |
(static_cast<std::uint64_t>(cmdSize - 1) << 32);
}
void sendCommand(CommandId id, std::initializer_list<std::uint64_t> args) {
std::size_t cmdSize = args.size() + 1;
std::uint64_t pos = getPushPosition(cmdSize);
buffer->commands[pos++] = makeCommandHeader(CommandId::Flip, cmdSize);
for (auto arg : args) {
buffer->commands[pos++] = arg;
}
buffer->push = pos;
}
std::uint64_t getPushPosition(std::uint64_t cmdSize) {
std::uint64_t position = buffer->push;
if (position + cmdSize > buffer->size) {
if (position < buffer->size) {
buffer->commands[position] =
static_cast<std::uint64_t>(CommandId::Nop) |
((buffer->size - position - 1) << 32);
}
position = 0;
waitPuller(cmdSize);
}
return position;
}
void waitPuller(std::uint64_t pullValue) {
while (buffer->pull < pullValue) {
;
}
}
};
class BridgePuller {
BridgeHeader *buffer = nullptr;
public:
BridgePuller() = default;
BridgePuller(BridgeHeader *buffer) : buffer(buffer) {}
std::size_t pullCommands(Command *commands, std::size_t maxCount) {
std::size_t processed = 0;
while (processed < maxCount) {
if (buffer->pull == buffer->push) {
break;
}
auto pos = buffer->pull;
auto cmd = buffer->commands[pos];
CommandId cmdId = static_cast<CommandId>(cmd);
std::uint32_t argsCount = cmd >> 32;
if (cmdId != CommandId::Nop) {
commands[processed++] =
unpackCommand(cmdId, buffer->commands + pos + 1, argsCount);
}
auto newPull = pos + argsCount + 1;
if (newPull >= buffer->size) {
newPull = 0;
}
buffer->pull = newPull;
}
return processed;
}
private:
Command unpackCommand(CommandId command, const std::uint64_t *args,
std::uint32_t argsCount) {
Command result;
result.id = command;
switch (command) {
case CommandId::Nop:
case CommandId::SetUpSharedMemory:
case CommandId::DoFlip:
return result;
case CommandId::ProtectMemory:
result.memoryProt.address = args[0];
result.memoryProt.size = args[1];
result.memoryProt.prot = args[2];
return result;
case CommandId::CommandBuffer:
result.commandBuffer.queue = args[0];
result.commandBuffer.address = args[1];
result.commandBuffer.size = args[2];
return result;
case CommandId::Flip:
result.flip.bufferIndex = args[0];
result.flip.arg = args[1];
return result;
case CommandId::SetBuffer:
result.buffer.bufferIndex = static_cast<std::uint32_t>(args[0] >> 32);
result.buffer.address = args[1];
result.buffer.width = static_cast<std::uint32_t>(args[2] >> 32);
result.buffer.height = static_cast<std::uint32_t>(args[2]);
result.buffer.pitch = static_cast<std::uint32_t>(args[3] >> 32);
result.buffer.pixelFormat = static_cast<std::uint32_t>(args[3]);
result.buffer.tilingMode = static_cast<std::uint32_t>(args[0]);
return result;
}
__builtin_trap();
}
};
BridgeHeader *createShmCommandBuffer(const char *name);
BridgeHeader *openShmCommandBuffer(const char *name);
void destroyShmCommandBuffer(BridgeHeader *buffer);
void unlinkShm(const char *name);
} // namespace amdgpu::bridge

View file

@ -0,0 +1,81 @@
#include "bridge.hpp"
#include <fcntl.h>
#include <new>
#include <sys/mman.h>
#include <unistd.h>
static int gShmFd = -1;
static constexpr std::size_t kShmSize = sizeof(amdgpu::bridge::BridgeHeader) +
(sizeof(std::uint64_t) * (1024 * 1024));
amdgpu::bridge::BridgeHeader *
amdgpu::bridge::createShmCommandBuffer(const char *name) {
if (gShmFd != -1) {
return nullptr;
}
unlinkShm(name);
int fd = ::shm_open(name, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR);
if (fd == -1) {
return nullptr;
}
if (ftruncate(fd, kShmSize) < 0) {
::close(fd);
return nullptr;
}
void *memory =
::mmap(nullptr, kShmSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (memory == MAP_FAILED) {
::close(fd);
return nullptr;
}
gShmFd = fd;
auto result = new (memory) amdgpu::bridge::BridgeHeader();
result->size = (kShmSize - sizeof(amdgpu::bridge::BridgeHeader)) /
sizeof(std::uint64_t);
return result;
}
amdgpu::bridge::BridgeHeader *
amdgpu::bridge::openShmCommandBuffer(const char *name) {
if (gShmFd != -1) {
return nullptr;
}
int fd = ::shm_open(name, O_RDWR, S_IRUSR | S_IWUSR);
if (fd == -1) {
return nullptr;
}
void *memory =
::mmap(nullptr, kShmSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (memory == MAP_FAILED) {
::close(fd);
return nullptr;
}
gShmFd = fd;
return new (memory) amdgpu::bridge::BridgeHeader;
}
void amdgpu::bridge::destroyShmCommandBuffer(
amdgpu::bridge::BridgeHeader *buffer) {
if (gShmFd == -1) {
__builtin_trap();
}
buffer->~BridgeHeader();
::close(gShmFd);
gShmFd = -1;
::munmap(buffer, kShmSize);
}
void amdgpu::bridge::unlinkShm(const char *name) { ::shm_unlink(name); }

View file

@ -0,0 +1,66 @@
project(libamdgpu-device)
set(PROJECT_PATH amdgpu/device)
set(SRC
src/device.cpp
)
function(add_precompiled_vulkan_spirv target)
add_library(${target} INTERFACE)
set(SPIRV_GEN_ROOT_DIR "spirv-gen/include/")
set(SPIRV_GEN_DIR "${SPIRV_GEN_ROOT_DIR}/shaders")
cmake_path(ABSOLUTE_PATH SPIRV_GEN_ROOT_DIR BASE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} NORMALIZE OUTPUT_VARIABLE outputrootdir)
cmake_path(ABSOLUTE_PATH SPIRV_GEN_DIR BASE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} NORMALIZE OUTPUT_VARIABLE outputdir)
file(MAKE_DIRECTORY ${outputrootdir})
file(MAKE_DIRECTORY ${outputdir})
target_include_directories(${target} INTERFACE ${outputrootdir})
foreach(input IN LISTS ARGN)
cmake_path(GET input FILENAME inputname)
cmake_path(REPLACE_EXTENSION inputname LAST_ONLY .h OUTPUT_VARIABLE outputname)
cmake_path(APPEND outputdir ${outputname} OUTPUT_VARIABLE outputpath)
cmake_path(REMOVE_EXTENSION inputname LAST_ONLY OUTPUT_VARIABLE varname)
string(REPLACE "." "_" varname ${varname})
string(PREPEND varname "spirv_")
add_custom_command(
OUTPUT ${outputpath}
COMMAND glslangValidator -V --vn "${varname}" -o "${outputpath}" "${CMAKE_CURRENT_SOURCE_DIR}/${input}"
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${input}"
COMMENT "Generating ${outputname}..."
)
set(subtarget ".${target}-subtarget-${outputname}")
add_custom_target(${subtarget} DEPENDS ${outputpath})
add_dependencies(${target} ${subtarget})
endforeach()
endfunction()
add_precompiled_vulkan_spirv(${PROJECT_NAME}-shaders
src/rect_list.geom.glsl
)
find_package(SPIRV-Tools REQUIRED CONFIG)
find_package(SPIRV-Tools-opt REQUIRED CONFIG)
add_library(${PROJECT_NAME} STATIC ${INCLUDE} ${SRC})
target_link_libraries(${PROJECT_NAME}
PUBLIC
spirv
amdgpu::base
amdgpu::bridge
amdgpu::shader
util
SPIRV-Tools
SPIRV-Tools-opt
PRIVATE
${PROJECT_NAME}-shaders
)
target_include_directories(${PROJECT_NAME} PUBLIC include PRIVATE include/${PROJECT_PATH})
set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "")
add_library(amdgpu::device ALIAS ${PROJECT_NAME})
set_property(TARGET ${PROJECT_NAME} PROPERTY POSITION_INDEPENDENT_CODE ON)

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,102 @@
#pragma once
namespace amdgpu {
enum PM4Opcodes {
NOP = 0x10,
SET_BASE = 0x11,
CLEAR_STATE = 0x12,
INDEX_BUFFER_SIZE = 0x13,
DISPATCH_DIRECT = 0x15,
DISPATCH_INDIRECT = 0x16,
INDIRECT_BUFFER_END = 0x17,
MODE_CONTROL = 0x18,
ATOMIC_GDS = 0x1D,
ATOMIC_MEM = 0x1E,
OCCLUSION_QUERY = 0x1F,
SET_PREDICATION = 0x20,
REG_RMW = 0x21,
COND_EXEC = 0x22,
PRED_EXEC = 0x23,
DRAW_INDIRECT = 0x24,
DRAW_INDEX_INDIRECT = 0x25,
INDEX_BASE = 0x26,
DRAW_INDEX_2 = 0x27,
CONTEXT_CONTROL = 0x28,
DRAW_INDEX_OFFSET = 0x29,
INDEX_TYPE = 0x2A,
DRAW_INDEX = 0x2B,
DRAW_INDIRECT_MULTI = 0x2C,
DRAW_INDEX_AUTO = 0x2D,
DRAW_INDEX_IMMD = 0x2E,
NUM_INSTANCES = 0x2F,
DRAW_INDEX_MULTI_AUTO = 0x30,
INDIRECT_BUFFER_32 = 0x32,
INDIRECT_BUFFER_CONST = 0x33,
STRMOUT_BUFFER_UPDATE = 0x34,
DRAW_INDEX_OFFSET_2 = 0x35,
DRAW_PREAMBLE = 0x36,
WRITE_DATA = 0x37,
DRAW_INDEX_INDIRECT_MULTI = 0x38,
MEM_SEMAPHORE = 0x39,
MPEG_INDEX = 0x3A,
COPY_DW = 0x3B,
WAIT_REG_MEM = 0x3C,
MEM_WRITE = 0x3D,
INDIRECT_BUFFER_3F = 0x3F,
COPY_DATA = 0x40,
CP_DMA = 0x41,
PFP_SYNC_ME = 0x42,
SURFACE_SYNC = 0x43,
ME_INITIALIZE = 0x44,
COND_WRITE = 0x45,
EVENT_WRITE = 0x46,
EVENT_WRITE_EOP = 0x47,
EVENT_WRITE_EOS = 0x48,
RELEASE_MEM = 0x49,
PREAMBLE_CNTL = 0x4A,
RB_OFFSET = 0x4B,
ALU_PS_CONST_BUFFER_COPY = 0x4C,
ALU_VS_CONST_BUFFER_COPY = 0x4D,
ALU_PS_CONST_UPDATE = 0x4E,
ALU_VS_CONST_UPDATE = 0x4F,
DMA_DATA = 0x50,
ONE_REG_WRITE = 0x57,
AQUIRE_MEM = 0x58,
REWIND = 0x59,
LOAD_UCONFIG_REG = 0x5E,
LOAD_SH_REG = 0x5F,
LOAD_CONFIG_REG = 0x60,
LOAD_CONTEXT_REG = 0x61,
SET_CONFIG_REG = 0x68,
SET_CONTEXT_REG = 0x69,
SET_ALU_CONST = 0x6A,
SET_BOOL_CONST = 0x6B,
SET_LOOP_CONST = 0x6C,
SET_RESOURCE = 0x6D,
SET_SAMPLER = 0x6E,
SET_CTL_CONST = 0x6F,
SET_RESOURCE_OFFSET = 0x70,
SET_ALU_CONST_VS = 0x71,
SET_ALU_CONST_DI = 0x72,
SET_CONTEXT_REG_INDIRECT = 0x73,
SET_RESOURCE_INDIRECT = 0x74,
SET_APPEND_CNT = 0x75,
SET_SH_REG = 0x76,
SET_SH_REG_OFFSET = 0x77,
SET_QUEUE_REG = 0x78,
SET_UCONFIG_REG = 0x79,
SCRATCH_RAM_WRITE = 0x7D,
SCRATCH_RAM_READ = 0x7E,
LOAD_CONST_RAM = 0x80,
WRITE_CONST_RAM = 0x81,
DUMP_CONST_RAM = 0x83,
INCREMENT_CE_COUNTER = 0x84,
INCREMENT_DE_COUNTER = 0x85,
WAIT_ON_CE_COUNTER = 0x86,
WAIT_ON_DE_COUNTER_DIFF = 0x88,
SWITCH_BUFFER = 0x8B,
};
const char *pm4OpcodeToString(int opcode);
} // namespace amdgpu::device

View file

@ -0,0 +1,681 @@
#pragma once
#include "device.hpp"
#include <algorithm>
#include <cstdint>
#include <cstdio>
namespace amdgpu::device {
namespace Gnm {
enum GpuMode { kGpuModeBase = 0, kGpuModeNeo = 1 };
enum TileMode {
kTileModeDepth_2dThin_64 = 0x00000000,
kTileModeDepth_2dThin_128 = 0x00000001,
kTileModeDepth_2dThin_256 = 0x00000002,
kTileModeDepth_2dThin_512 = 0x00000003,
kTileModeDepth_2dThin_1K = 0x00000004,
kTileModeDepth_2dThinPrt_256 = 0x00000006,
kTileModeDisplay_LinearAligned = 0x00000008,
kTileModeDisplay_2dThin = 0x0000000A,
kTileModeDisplay_ThinPrt = 0x0000000B,
kTileModeDisplay_2dThinPrt = 0x0000000C,
kTileModeThin_1dThin = 0x0000000D,
kTileModeThin_2dThin = 0x0000000E,
kTileModeThin_ThinPrt = 0x00000010,
kTileModeThin_2dThinPrt = 0x00000011,
kTileModeThin_3dThinPrt = 0x00000012,
kTileModeThick_1dThick = 0x00000013,
kTileModeThick_2dThick = 0x00000014,
kTileModeThick_ThickPrt = 0x00000016,
kTileModeThick_2dThickPrt = 0x00000017,
kTileModeThick_3dThickPrt = 0x00000018,
kTileModeThick_2dXThick = 0x00000019,
};
enum MicroTileMode {
kMicroTileModeDisplay = 0x00000000,
kMicroTileModeThin = 0x00000001,
kMicroTileModeDepth = 0x00000002,
kMicroTileModeRotated = 0x00000003,
kMicroTileModeThick = 0x00000004,
};
enum ArrayMode {
kArrayModeLinearGeneral = 0x00000000,
kArrayModeLinearAligned = 0x00000001,
kArrayMode1dTiledThin = 0x00000002,
kArrayMode1dTiledThick = 0x00000003,
kArrayMode2dTiledThin = 0x00000004,
kArrayModeTiledThinPrt = 0x00000005,
kArrayMode2dTiledThinPrt = 0x00000006,
kArrayMode2dTiledThick = 0x00000007,
kArrayMode2dTiledXThick = 0x00000008,
kArrayModeTiledThickPrt = 0x00000009,
kArrayMode2dTiledThickPrt = 0x0000000a,
kArrayMode3dTiledThinPrt = 0x0000000b,
kArrayMode3dTiledThin = 0x0000000c,
kArrayMode3dTiledThick = 0x0000000d,
kArrayMode3dTiledXThick = 0x0000000e,
kArrayMode3dTiledThickPrt = 0x0000000f,
};
enum PipeConfig {
kPipeConfigP8_32x32_8x16 = 0x0000000a,
kPipeConfigP8_32x32_16x16 = 0x0000000c,
kPipeConfigP16 = 0x00000012,
};
} // namespace Gnm
#define GNM_ERROR(msg, ...) \
//std::fprintf(stderr, msg, __VA_ARGS__); \
//std::abort() \
__builtin_trap();
static constexpr uint32_t kMicroTileWidth = 8;
static constexpr uint32_t kMicroTileHeight = 8;
static constexpr uint32_t getElementIndex(uint32_t x, uint32_t y, uint32_t z,
uint32_t bitsPerElement,
Gnm::MicroTileMode microTileMode,
Gnm::ArrayMode arrayMode) {
uint32_t elem = 0;
if (microTileMode == Gnm::kMicroTileModeDisplay) {
switch (bitsPerElement) {
case 8:
elem |= ((x >> 0) & 0x1) << 0;
elem |= ((x >> 1) & 0x1) << 1;
elem |= ((x >> 2) & 0x1) << 2;
elem |= ((y >> 1) & 0x1) << 3;
elem |= ((y >> 0) & 0x1) << 4;
elem |= ((y >> 2) & 0x1) << 5;
break;
case 16:
elem |= ((x >> 0) & 0x1) << 0;
elem |= ((x >> 1) & 0x1) << 1;
elem |= ((x >> 2) & 0x1) << 2;
elem |= ((y >> 0) & 0x1) << 3;
elem |= ((y >> 1) & 0x1) << 4;
elem |= ((y >> 2) & 0x1) << 5;
break;
case 32:
elem |= ((x >> 0) & 0x1) << 0;
elem |= ((x >> 1) & 0x1) << 1;
elem |= ((y >> 0) & 0x1) << 2;
elem |= ((x >> 2) & 0x1) << 3;
elem |= ((y >> 1) & 0x1) << 4;
elem |= ((y >> 2) & 0x1) << 5;
break;
case 64:
elem |= ((x >> 0) & 0x1) << 0;
elem |= ((y >> 0) & 0x1) << 1;
elem |= ((x >> 1) & 0x1) << 2;
elem |= ((x >> 2) & 0x1) << 3;
elem |= ((y >> 1) & 0x1) << 4;
elem |= ((y >> 2) & 0x1) << 5;
break;
default:
GNM_ERROR("Unsupported bitsPerElement (%u) for displayable surface.",
bitsPerElement);
}
} else if (microTileMode == Gnm::kMicroTileModeThin ||
microTileMode == Gnm::kMicroTileModeDepth) {
elem |= ((x >> 0) & 0x1) << 0;
elem |= ((y >> 0) & 0x1) << 1;
elem |= ((x >> 1) & 0x1) << 2;
elem |= ((y >> 1) & 0x1) << 3;
elem |= ((x >> 2) & 0x1) << 4;
elem |= ((y >> 2) & 0x1) << 5;
// Use Z too, if the array mode is Thick/XThick
switch (arrayMode) {
case Gnm::kArrayMode2dTiledXThick:
case Gnm::kArrayMode3dTiledXThick:
elem |= ((z >> 2) & 0x1) << 8;
// Intentional fall-through
case Gnm::kArrayMode1dTiledThick:
case Gnm::kArrayMode2dTiledThick:
case Gnm::kArrayMode3dTiledThick:
case Gnm::kArrayModeTiledThickPrt:
case Gnm::kArrayMode2dTiledThickPrt:
case Gnm::kArrayMode3dTiledThickPrt:
elem |= ((z >> 0) & 0x1) << 6;
elem |= ((z >> 1) & 0x1) << 7;
default:
break; // no other thick modes
}
} else if (microTileMode == Gnm::kMicroTileModeThick) // thick/xthick
{
switch (arrayMode) {
case Gnm::kArrayMode2dTiledXThick:
case Gnm::kArrayMode3dTiledXThick:
elem |= ((z >> 2) & 0x1) << 8;
// intentional fall-through
case Gnm::kArrayMode1dTiledThick:
case Gnm::kArrayMode2dTiledThick:
case Gnm::kArrayMode3dTiledThick:
case Gnm::kArrayModeTiledThickPrt:
case Gnm::kArrayMode2dTiledThickPrt:
case Gnm::kArrayMode3dTiledThickPrt:
if (bitsPerElement == 8 || bitsPerElement == 16) {
elem |= ((x >> 0) & 0x1) << 0;
elem |= ((y >> 0) & 0x1) << 1;
elem |= ((x >> 1) & 0x1) << 2;
elem |= ((y >> 1) & 0x1) << 3;
elem |= ((z >> 0) & 0x1) << 4;
elem |= ((z >> 1) & 0x1) << 5;
elem |= ((x >> 2) & 0x1) << 6;
elem |= ((y >> 2) & 0x1) << 7;
} else if (bitsPerElement == 32) {
elem |= ((x >> 0) & 0x1) << 0;
elem |= ((y >> 0) & 0x1) << 1;
elem |= ((x >> 1) & 0x1) << 2;
elem |= ((z >> 0) & 0x1) << 3;
elem |= ((y >> 1) & 0x1) << 4;
elem |= ((z >> 1) & 0x1) << 5;
elem |= ((x >> 2) & 0x1) << 6;
elem |= ((y >> 2) & 0x1) << 7;
} else if (bitsPerElement == 64 || bitsPerElement == 128) {
elem |= ((x >> 0) & 0x1) << 0;
elem |= ((y >> 0) & 0x1) << 1;
elem |= ((z >> 0) & 0x1) << 2;
elem |= ((x >> 1) & 0x1) << 3;
elem |= ((y >> 1) & 0x1) << 4;
elem |= ((z >> 1) & 0x1) << 5;
elem |= ((x >> 2) & 0x1) << 6;
elem |= ((y >> 2) & 0x1) << 7;
} else {
GNM_ERROR("Invalid bitsPerElement (%u) for "
"microTileMode=kMicroTileModeThick.",
bitsPerElement);
}
break;
default:
GNM_ERROR("Invalid arrayMode (0x%02X) for thick/xthick "
"microTileMode=kMicroTileModeThick.",
arrayMode);
}
}
// TODO: rotated
return elem;
}
static constexpr uint32_t getPipeIndex(uint32_t x, uint32_t y,
Gnm::PipeConfig pipeCfg) {
uint32_t pipe = 0;
switch (pipeCfg) {
case Gnm::kPipeConfigP8_32x32_8x16:
pipe |= (((x >> 4) ^ (y >> 3) ^ (x >> 5)) & 0x1) << 0;
pipe |= (((x >> 3) ^ (y >> 4)) & 0x1) << 1;
pipe |= (((x >> 5) ^ (y >> 5)) & 0x1) << 2;
break;
case Gnm::kPipeConfigP8_32x32_16x16:
pipe |= (((x >> 3) ^ (y >> 3) ^ (x >> 4)) & 0x1) << 0;
pipe |= (((x >> 4) ^ (y >> 4)) & 0x1) << 1;
pipe |= (((x >> 5) ^ (y >> 5)) & 0x1) << 2;
break;
case Gnm::kPipeConfigP16:
pipe |= (((x >> 3) ^ (y >> 3) ^ (x >> 4)) & 0x1) << 0;
pipe |= (((x >> 4) ^ (y >> 4)) & 0x1) << 1;
pipe |= (((x >> 5) ^ (y >> 5)) & 0x1) << 2;
pipe |= (((x >> 6) ^ (y >> 5)) & 0x1) << 3;
break;
default:
GNM_ERROR("Unsupported pipeCfg (0x%02X).", pipeCfg);
}
return pipe;
}
inline constexpr uint32_t fastIntLog2(uint32_t i) {
return 31 - __builtin_clz(i | 1);
}
static constexpr uint32_t getBankIndex(uint32_t x, uint32_t y,
uint32_t bank_width,
uint32_t bank_height, uint32_t num_banks,
uint32_t num_pipes) {
// bank_width=1, bank_height=1, num_banks = 16, num_pipes=8
const uint32_t x_shift_offset = fastIntLog2(bank_width * num_pipes);
const uint32_t y_shift_offset = fastIntLog2(bank_height);
const uint32_t xs = x >> x_shift_offset;
const uint32_t ys = y >> y_shift_offset;
uint32_t bank = 0;
switch (num_banks) {
case 2:
bank |= (((xs >> 3) ^ (ys >> 3)) & 0x1) << 0;
break;
case 4:
bank |= (((xs >> 3) ^ (ys >> 4)) & 0x1) << 0;
bank |= (((xs >> 4) ^ (ys >> 3)) & 0x1) << 1;
break;
case 8:
bank |= (((xs >> 3) ^ (ys >> 5)) & 0x1) << 0;
bank |= (((xs >> 4) ^ (ys >> 4) ^ (ys >> 5)) & 0x1) << 1;
bank |= (((xs >> 5) ^ (ys >> 3)) & 0x1) << 2;
break;
case 16:
bank |= (((xs >> 3) ^ (ys >> 6)) & 0x1) << 0;
bank |= (((xs >> 4) ^ (ys >> 5) ^ (ys >> 6)) & 0x1) << 1;
bank |= (((xs >> 5) ^ (ys >> 4)) & 0x1) << 2;
bank |= (((xs >> 6) ^ (ys >> 3)) & 0x1) << 3;
break;
default:
GNM_ERROR("invalid num_banks (%u) -- must be 2, 4, 8, or 16.", num_banks);
}
return bank;
}
inline std::uint32_t getTexelsPerElement(SurfaceFormat format) {
if (format >= kSurfaceFormatBc1 && format <= kSurfaceFormatBc7) {
return 16;
}
if (format >= kSurfaceFormat1) {
return 8;
}
return 1;
}
inline std::uint32_t getBitsPerElement(SurfaceFormat format) {
static constexpr int bitsPerElement[] = {
0, 8, 16, 16, 32, 32, 32, 32, 32, 32, 32, 64, 64, 96, 128, -1,
16, 16, 16, 16, 32, 32, 64, -1, -1, -1, -1, -1, -1, -1, -1, -1,
16, 16, 32, 4, 8, 8, 4, 8, 8, 8, -1, -1, 8, 8, 8, 8,
8, 8, 16, 16, 32, 32, 32, 64, 64, 8, 16, 1, 1};
auto rawFormat = static_cast<unsigned>(format);
if (rawFormat >= sizeof(bitsPerElement)) {
return 0;
}
return bitsPerElement[rawFormat];
}
struct Tiler1d {
Gnm::ArrayMode m_arrayMode;
uint32_t m_bitsPerElement;
Gnm::MicroTileMode m_microTileMode;
uint32_t m_tileThickness;
uint32_t m_tileBytes;
uint32_t m_tilesPerRow;
uint32_t m_tilesPerSlice;
Tiler1d(const GnmTBuffer *texture) {
/*
m_arrayMode = Gnm::ArrayMode::kArrayMode1dTiledThin;
m_bitsPerElement = 128;// getBitsPerElement(texture->dfmt);
m_microTileMode = Gnm::MicroTileMode::kMicroTileModeThin;
m_tileThickness = (m_arrayMode == Gnm::kArrayMode1dTiledThick) ? 4 : 1;
m_tileBytes = (kMicroTileWidth * kMicroTileHeight * m_tileThickness * m_bitsPerElement + 7) / 8;
auto width = texture->width + 1;
auto height = texture->height + 1;
width = (width + 3) / 4;
height = (height + 3) / 4;
m_tilesPerRow = width / kMicroTileWidth;
m_tilesPerSlice = std::max(m_tilesPerRow * (height / kMicroTileHeight), 1U);
*/
m_arrayMode = (Gnm::ArrayMode)2;
m_bitsPerElement = 128;
m_microTileMode = (Gnm::MicroTileMode)1;
m_tileThickness= 1;
m_tileBytes= 1024;
m_tilesPerRow = 16;
m_tilesPerSlice = 256;
}
uint64_t getTiledElementBitOffset(uint32_t x, uint32_t y, uint32_t z) const {
uint64_t element_index = getElementIndex(x, y, z, m_bitsPerElement,
m_microTileMode, m_arrayMode);
uint64_t slice_offset =
(z / m_tileThickness) * m_tilesPerSlice * m_tileBytes;
uint64_t tile_row_index = y / kMicroTileHeight;
uint64_t tile_column_index = x / kMicroTileWidth;
uint64_t tile_offset =
((tile_row_index * m_tilesPerRow) + tile_column_index) * m_tileBytes;
uint64_t element_offset = element_index * m_bitsPerElement;
return (slice_offset + tile_offset) * 8 + element_offset;
}
int32_t getTiledElementByteOffset(uint32_t x, uint32_t y, uint32_t z) const {
return getTiledElementBitOffset(x, y, z) / 8;
}
};
struct Tiler2d {
static constexpr int m_bitsPerElement = 32;
static constexpr Gnm::MicroTileMode m_microTileMode =
Gnm::kMicroTileModeDisplay;
static constexpr Gnm::ArrayMode m_arrayMode = Gnm::kArrayMode2dTiledThin;
static constexpr uint32_t m_macroTileWidth = 128;
static constexpr uint32_t m_macroTileHeight = 64;
static constexpr Gnm::PipeConfig m_pipeConfig =
Gnm::kPipeConfigP8_32x32_16x16;
static constexpr uint32_t m_bankWidth = 1;
static constexpr uint32_t m_bankHeight = 1;
static constexpr uint32_t m_numBanks = 16;
static constexpr uint32_t m_numPipes = 8;
static constexpr uint32_t m_tileThickness = 1;
static constexpr uint32_t m_numFragmentsPerPixel = 1;
static constexpr uint32_t m_tileSplitBytes = 512;
static constexpr uint32_t m_pipeInterleaveBytes = 256;
static constexpr uint32_t m_macroTileAspect = 2;
static constexpr uint32_t m_paddedWidth = 1280;
static constexpr uint32_t m_paddedHeight = 768;
static constexpr uint32_t m_arraySlice = 0;
static constexpr uint64_t m_bankSwizzleMask = 0;
static constexpr uint64_t m_pipeSwizzleMask = 0;
static constexpr uint64_t m_pipeInterleaveMask = 255;
static constexpr uint64_t m_pipeInterleaveBits = 8;
static constexpr uint64_t m_pipeBits = 3;
static constexpr uint64_t m_bankBits = 4;
static constexpr uint32_t kDramRowSize = 0x400;
static constexpr uint32_t kNumLogicalBanks = 16;
static constexpr uint32_t kPipeInterleaveBytes = 256;
static constexpr uint32_t kBankInterleave = 1;
static constexpr uint32_t kMicroTileWidth = 8;
static constexpr uint32_t kMicroTileHeight = 8;
static constexpr uint32_t kNumMicroTilePixels =
kMicroTileWidth * kMicroTileHeight;
static constexpr uint32_t kCmaskCacheBits = 0x400;
static constexpr uint32_t kHtileCacheBits = 0x4000;
int32_t getTiledElementBitOffset(uint64_t *outTiledBitOffset, uint32_t x,
uint32_t y, uint32_t z,
uint32_t fragmentIndex, bool log = false);
int32_t getTiledElementByteOffset(uint64_t *outTiledByteOffset, uint32_t x,
uint32_t y, uint32_t z,
uint32_t fragmentIndex, bool log = false) {
uint64_t bitOffset = 0;
int32_t status =
getTiledElementBitOffset(&bitOffset, x, y, z, fragmentIndex, log);
*outTiledByteOffset = bitOffset / 8;
return status;
}
};
inline int32_t Tiler2d::getTiledElementBitOffset(uint64_t *outTiledBitOffset,
uint32_t x, uint32_t y,
uint32_t z,
uint32_t fragmentIndex,
bool log) {
uint64_t element_index =
getElementIndex(x, y, z, m_bitsPerElement, m_microTileMode, m_arrayMode);
uint32_t xh = x, yh = y;
if (m_arrayMode == Gnm::kArrayModeTiledThinPrt ||
m_arrayMode == Gnm::kArrayModeTiledThickPrt) {
xh %= m_macroTileWidth;
yh %= m_macroTileHeight;
}
uint64_t pipe = getPipeIndex(xh, yh, m_pipeConfig);
uint64_t bank =
getBankIndex(xh, yh, m_bankWidth, m_bankHeight, m_numBanks, m_numPipes);
constexpr uint32_t tile_bytes =
(kMicroTileWidth * kMicroTileHeight * m_tileThickness * m_bitsPerElement *
m_numFragmentsPerPixel +
7) /
8;
uint64_t element_offset = 0;
if (m_microTileMode == Gnm::kMicroTileModeDepth) {
uint64_t pixel_offset =
element_index * m_bitsPerElement * m_numFragmentsPerPixel;
element_offset = pixel_offset + (fragmentIndex * m_bitsPerElement);
} else {
uint64_t fragment_offset =
fragmentIndex * (tile_bytes / m_numFragmentsPerPixel) * 8;
element_offset = fragment_offset + (element_index * m_bitsPerElement);
}
uint64_t slices_per_tile = 1;
uint64_t tile_split_slice = 0;
uint64_t macro_tile_bytes = (m_macroTileWidth / kMicroTileWidth) *
(m_macroTileHeight / kMicroTileHeight) *
tile_bytes / (m_numPipes * m_numBanks);
uint64_t macro_tiles_per_row = m_paddedWidth / m_macroTileWidth;
uint64_t macro_tile_row_index = y / m_macroTileHeight;
uint64_t macro_tile_column_index = x / m_macroTileWidth;
uint64_t macro_tile_index =
(macro_tile_row_index * macro_tiles_per_row) + macro_tile_column_index;
uint64_t macro_tile_offset = macro_tile_index * macro_tile_bytes;
uint64_t macro_tiles_per_slice =
macro_tiles_per_row * (m_paddedHeight / m_macroTileHeight);
uint64_t slice_bytes = macro_tiles_per_slice * macro_tile_bytes;
uint32_t slice = z;
uint64_t slice_offset =
(tile_split_slice + slices_per_tile * slice / m_tileThickness) *
slice_bytes;
if (m_arraySlice != 0) {
slice = m_arraySlice;
}
uint64_t tile_row_index = (y / kMicroTileHeight) % m_bankHeight;
uint64_t tile_column_index =
((x / kMicroTileWidth) / m_numPipes) % m_bankWidth;
uint64_t tile_index = (tile_row_index * m_bankWidth) + tile_column_index;
uint64_t tile_offset = tile_index * tile_bytes;
// Bank and pipe rotation/swizzling.
uint64_t bank_swizzle = m_bankSwizzleMask;
uint64_t pipe_swizzle = m_pipeSwizzleMask;
uint64_t pipe_slice_rotation = 0;
switch (m_arrayMode) {
case Gnm::kArrayMode3dTiledThin:
case Gnm::kArrayMode3dTiledThick:
case Gnm::kArrayMode3dTiledXThick:
pipe_slice_rotation =
std::max(1UL, (m_numPipes / 2UL) - 1UL) * (slice / m_tileThickness);
break;
default:
break;
}
pipe_swizzle += pipe_slice_rotation;
pipe_swizzle &= (m_numPipes - 1);
pipe = pipe ^ pipe_swizzle;
uint32_t slice_rotation = 0;
switch (m_arrayMode) {
case Gnm::kArrayMode2dTiledThin:
case Gnm::kArrayMode2dTiledThick:
case Gnm::kArrayMode2dTiledXThick:
slice_rotation = ((m_numBanks / 2) - 1) * (slice / m_tileThickness);
break;
case Gnm::kArrayMode3dTiledThin:
case Gnm::kArrayMode3dTiledThick:
case Gnm::kArrayMode3dTiledXThick:
slice_rotation = std::max(1UL, (m_numPipes / 2UL) - 1UL) *
(slice / m_tileThickness) / m_numPipes;
break;
default:
break;
}
uint64_t tile_split_slice_rotation = 0;
switch (m_arrayMode) {
case Gnm::kArrayMode2dTiledThin:
case Gnm::kArrayMode3dTiledThin:
case Gnm::kArrayMode2dTiledThinPrt:
case Gnm::kArrayMode3dTiledThinPrt:
tile_split_slice_rotation = ((m_numBanks / 2) + 1) * tile_split_slice;
break;
default:
break;
}
bank ^= bank_swizzle + slice_rotation;
bank ^= tile_split_slice_rotation;
bank &= (m_numBanks - 1);
uint64_t total_offset =
(slice_offset + macro_tile_offset + tile_offset) * 8 + element_offset;
uint64_t bitOffset = total_offset & 0x7;
total_offset /= 8;
uint64_t pipe_interleave_offset = total_offset & m_pipeInterleaveMask;
uint64_t offset = total_offset >> m_pipeInterleaveBits;
uint64_t finalByteOffset =
pipe_interleave_offset | (pipe << (m_pipeInterleaveBits)) |
(bank << (m_pipeInterleaveBits + m_pipeBits)) |
(offset << (m_pipeInterleaveBits + m_pipeBits + m_bankBits));
*outTiledBitOffset = (finalByteOffset << 3) | bitOffset;
return 0;
}
namespace surfaceTiler {
constexpr std::uint32_t getElementIndex(std::uint32_t x, std::uint32_t y) {
std::uint32_t elem = 0;
elem |= ((x >> 0) & 0x1) << 0;
elem |= ((x >> 1) & 0x1) << 1;
elem |= ((y >> 0) & 0x1) << 2;
elem |= ((x >> 2) & 0x1) << 3;
elem |= ((y >> 1) & 0x1) << 4;
elem |= ((y >> 2) & 0x1) << 5;
return elem;
}
constexpr std::uint32_t getPipeIndex(std::uint32_t x, std::uint32_t y) {
std::uint32_t pipe = 0;
pipe |= (((x >> 3) ^ (y >> 3) ^ (x >> 4)) & 0x1) << 0;
pipe |= (((x >> 4) ^ (y >> 4)) & 0x1) << 1;
pipe |= (((x >> 5) ^ (y >> 5)) & 0x1) << 2;
return pipe;
}
constexpr std::uint32_t getBankIndex(std::uint32_t x, std::uint32_t y) {
std::uint32_t bank = 0;
bank |= (((x >> 6) ^ (y >> 6)) & 0x1) << 0;
bank |= (((x >> 7) ^ (y >> 5) ^ (y >> 6)) & 0x1) << 1;
bank |= (((x >> 8) ^ (y >> 4)) & 0x1) << 2;
bank |= (((x >> 9) ^ (y >> 3)) & 0x1) << 3;
return bank;
}
inline std::uint64_t getTiledElementByteOffsetImpl(std::uint32_t x,
std::uint32_t y,
std::uint32_t width) {
std::uint32_t elementIndex = getElementIndex(x, y);
std::uint32_t pipe = getPipeIndex(x, y);
std::uint32_t bank = getBankIndex(x, y);
uint64_t macroTileIndex =
(static_cast<std::uint64_t>(y / 64) * (width / 128)) + x / 128;
uint64_t macroTileOffset = macroTileIndex * 256;
std::uint64_t totalOffset = macroTileOffset + elementIndex * 4;
std::uint64_t pipeInterleaveOffset = totalOffset & 255;
std::uint64_t offset = totalOffset >> 8;
return pipeInterleaveOffset | (pipe << 8) | (bank << 11) | (offset << 15);
}
static constexpr std::uint32_t kMaxPrecalculatedCount = 8;
static constexpr std::uint32_t kMaxPrecalculatedWidth = 2048;
static constexpr std::uint32_t kMaxPrecalculatedHeight = 2048;
static std::uint64_t gPrecalculatedTiledOffsets[kMaxPrecalculatedCount]
[kMaxPrecalculatedWidth *
kMaxPrecalculatedHeight];
struct PrecalculatedTiler {
std::uint32_t width;
std::uint32_t height;
std::uint32_t stride;
int index;
};
static PrecalculatedTiler gPrecalculatedTilers[kMaxPrecalculatedCount];
static int gPrecalculatedCount;
static int findPrecalculatedTile(std::uint32_t width, std::uint32_t height) {
for (int i = 0; i < gPrecalculatedCount; ++i) {
if (gPrecalculatedTilers[i].width == width &&
gPrecalculatedTilers[i].height == height) {
return i;
}
}
return -1;
}
inline int precalculateTiles(std::uint32_t width, std::uint32_t height) {
int index = findPrecalculatedTile(width, height);
if (index >= 0) {
if (index >= kMaxPrecalculatedCount / 2 &&
gPrecalculatedCount > kMaxPrecalculatedCount / 2) {
auto tmp = gPrecalculatedTilers[index];
for (int i = index; i > 0; --i) {
gPrecalculatedTilers[i] = gPrecalculatedTilers[i - 1];
}
gPrecalculatedTilers[0] = tmp;
return 0;
}
return index;
}
PrecalculatedTiler tiler;
tiler.width = width;
tiler.height = height;
tiler.stride = std::min(width, kMaxPrecalculatedWidth);
if (gPrecalculatedCount >= kMaxPrecalculatedCount) {
// TODO: insert in the middle?
tiler.index = gPrecalculatedTilers[kMaxPrecalculatedCount - 1].index;
index = kMaxPrecalculatedCount - 1;
} else {
tiler.index = gPrecalculatedCount++;
index = tiler.index;
}
gPrecalculatedTilers[index - 1] = tiler;
for (std::uint32_t y = 0; y < height; ++y) {
for (std::uint32_t x = 0; x < width; ++x) {
gPrecalculatedTiledOffsets[index][y * tiler.stride + x] =
getTiledElementByteOffsetImpl(x, y, tiler.width);
}
}
return index;
}
inline std::uint64_t getTiledElementByteOffset(int index, std::uint32_t x,
std::uint32_t y) {
auto tiler = gPrecalculatedTilers[index];
if (x < kMaxPrecalculatedWidth && y < kMaxPrecalculatedHeight) [[likely]] {
return gPrecalculatedTiledOffsets[index][x + y * tiler.stride];
}
return getTiledElementByteOffsetImpl(x, y, tiler.width);
}
} // namespace surfaceTiler
} // namespace amdgpu::device

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,40 @@
#version 450
layout (triangles) in;
layout (triangle_strip, max_vertices = 4) out;
void main(void)
{
vec4 topLeft = gl_in[0].gl_Position;
vec4 right = gl_in[1].gl_Position;
vec4 bottomLeft = gl_in[2].gl_Position;
vec4 topRight = vec4(
right.x,
topLeft.y,
topLeft.z,
topLeft.w
);
vec4 bottomRight = vec4(
right.x,
bottomLeft.y,
topLeft.z,
topLeft.w
);
gl_Position = topLeft;
EmitVertex();
gl_Position = bottomLeft;
EmitVertex();
gl_Position = topRight;
EmitVertex();
gl_Position = bottomRight;
EmitVertex();
EndPrimitive();
}

View file

@ -0,0 +1,12 @@
#pragma once
#include <cstdint>
namespace amdgpu {
struct RemoteMemory {
char *shmPointer;
template <typename T = void> T *getPointer(std::uint64_t address) const {
return address ? reinterpret_cast<T *>(shmPointer + address) : nullptr;
}
};
} // namespace amdgpu

View file

@ -0,0 +1,31 @@
#pragma once
namespace util {
class SourceLocation {
public:
const char *mFileName = {};
const char *mFunctionName = {};
unsigned mLine = 0;
unsigned mColumn = 0;
public:
constexpr SourceLocation(const char *fileName = __builtin_FILE(),
const char *functionName = __builtin_FUNCTION(),
unsigned line = __builtin_LINE(),
unsigned column =
#if __has_builtin(__builtin_COLUMN)
__builtin_COLUMN()
#else
0
#endif
) noexcept
: mFileName(fileName), mFunctionName(functionName), mLine(line),
mColumn(column) {
}
constexpr unsigned line() const noexcept { return mLine; }
constexpr unsigned column() const noexcept { return mColumn; }
constexpr const char *file_name() const noexcept { return mFileName; }
constexpr const char *function_name() const noexcept { return mFunctionName; }
};
} // namespace util

View file

@ -0,0 +1,26 @@
#pragma once
#include "SourceLocation.hpp"
#include "unreachable.hpp"
class Verify {
util::SourceLocation mLocation;
public:
util::SourceLocation location() const {
return mLocation;
}
Verify(util::SourceLocation location = util::SourceLocation())
: mLocation(location) {}
Verify &operator<<(bool result) {
if (!result) {
util::unreachable("Verification failed at %s: %s:%u:%u",
mLocation.function_name(), mLocation.file_name(),
mLocation.line(), mLocation.column());
}
return *this;
}
};

View file

@ -0,0 +1,29 @@
#pragma once
#include "SourceLocation.hpp"
#include <cstdio>
#include <cstdarg>
namespace util {
[[noreturn]] inline void unreachable_impl() { std::fflush(stdout); __builtin_trap(); }
[[noreturn]] inline void unreachable(SourceLocation location = {}) {
std::printf("\n");
std::fflush(stdout);
std::fprintf(stderr, "Unreachable at %s:%u:%u %s\n", location.file_name(),
location.line(), location.column(), location.function_name());
unreachable_impl();
}
[[noreturn]] inline void unreachable(const char *fmt, ...) {
std::printf("\n");
std::fflush(stdout);
va_list list;
va_start(list, fmt);
std::vfprintf(stderr, fmt, list);
va_end(list);
std::fprintf(stderr, "\n");
unreachable_impl();
}
} // namespace util

View file

@ -0,0 +1,4 @@
project(spirv)
add_library(${PROJECT_NAME} INTERFACE)
target_include_directories(${PROJECT_NAME} INTERFACE include)

View file

@ -0,0 +1,131 @@
/*
** Copyright (c) 2014-2016 The Khronos Group Inc.
**
** Permission is hereby granted, free of charge, to any person obtaining a copy
** of this software and/or associated documentation files (the "Materials"),
** to deal in the Materials without restriction, including without limitation
** the rights to use, copy, modify, merge, publish, distribute, sublicense,
** and/or sell copies of the Materials, and to permit persons to whom the
** Materials are furnished to do so, subject to the following conditions:
**
** The above copyright notice and this permission notice shall be included in
** all copies or substantial portions of the Materials.
**
** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS
** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND
** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/
**
** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS
** IN THE MATERIALS.
*/
#ifndef GLSLstd450_H
#define GLSLstd450_H
static const int GLSLstd450Version = 100;
static const int GLSLstd450Revision = 3;
enum GLSLstd450 {
GLSLstd450Bad = 0, // Don't use
GLSLstd450Round = 1,
GLSLstd450RoundEven = 2,
GLSLstd450Trunc = 3,
GLSLstd450FAbs = 4,
GLSLstd450SAbs = 5,
GLSLstd450FSign = 6,
GLSLstd450SSign = 7,
GLSLstd450Floor = 8,
GLSLstd450Ceil = 9,
GLSLstd450Fract = 10,
GLSLstd450Radians = 11,
GLSLstd450Degrees = 12,
GLSLstd450Sin = 13,
GLSLstd450Cos = 14,
GLSLstd450Tan = 15,
GLSLstd450Asin = 16,
GLSLstd450Acos = 17,
GLSLstd450Atan = 18,
GLSLstd450Sinh = 19,
GLSLstd450Cosh = 20,
GLSLstd450Tanh = 21,
GLSLstd450Asinh = 22,
GLSLstd450Acosh = 23,
GLSLstd450Atanh = 24,
GLSLstd450Atan2 = 25,
GLSLstd450Pow = 26,
GLSLstd450Exp = 27,
GLSLstd450Log = 28,
GLSLstd450Exp2 = 29,
GLSLstd450Log2 = 30,
GLSLstd450Sqrt = 31,
GLSLstd450InverseSqrt = 32,
GLSLstd450Determinant = 33,
GLSLstd450MatrixInverse = 34,
GLSLstd450Modf = 35, // second operand needs an OpVariable to write to
GLSLstd450ModfStruct = 36, // no OpVariable operand
GLSLstd450FMin = 37,
GLSLstd450UMin = 38,
GLSLstd450SMin = 39,
GLSLstd450FMax = 40,
GLSLstd450UMax = 41,
GLSLstd450SMax = 42,
GLSLstd450FClamp = 43,
GLSLstd450UClamp = 44,
GLSLstd450SClamp = 45,
GLSLstd450FMix = 46,
GLSLstd450IMix = 47, // Reserved
GLSLstd450Step = 48,
GLSLstd450SmoothStep = 49,
GLSLstd450Fma = 50,
GLSLstd450Frexp = 51, // second operand needs an OpVariable to write to
GLSLstd450FrexpStruct = 52, // no OpVariable operand
GLSLstd450Ldexp = 53,
GLSLstd450PackSnorm4x8 = 54,
GLSLstd450PackUnorm4x8 = 55,
GLSLstd450PackSnorm2x16 = 56,
GLSLstd450PackUnorm2x16 = 57,
GLSLstd450PackHalf2x16 = 58,
GLSLstd450PackDouble2x32 = 59,
GLSLstd450UnpackSnorm2x16 = 60,
GLSLstd450UnpackUnorm2x16 = 61,
GLSLstd450UnpackHalf2x16 = 62,
GLSLstd450UnpackSnorm4x8 = 63,
GLSLstd450UnpackUnorm4x8 = 64,
GLSLstd450UnpackDouble2x32 = 65,
GLSLstd450Length = 66,
GLSLstd450Distance = 67,
GLSLstd450Cross = 68,
GLSLstd450Normalize = 69,
GLSLstd450FaceForward = 70,
GLSLstd450Reflect = 71,
GLSLstd450Refract = 72,
GLSLstd450FindILsb = 73,
GLSLstd450FindSMsb = 74,
GLSLstd450FindUMsb = 75,
GLSLstd450InterpolateAtCentroid = 76,
GLSLstd450InterpolateAtSample = 77,
GLSLstd450InterpolateAtOffset = 78,
GLSLstd450NMin = 79,
GLSLstd450NMax = 80,
GLSLstd450NClamp = 81,
GLSLstd450Count
};
#endif // #ifndef GLSLstd450_H

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,22 @@
project(libamdgpu-shader)
set(PROJECT_PATH amdgpu/shader)
set(SRC
src/cf.cpp
src/scf.cpp
src/CfBuilder.cpp
src/Converter.cpp
src/ConverterContext.cpp
src/Fragment.cpp
src/Function.cpp
src/Instruction.cpp
src/RegisterState.cpp
src/TypeId.cpp
)
add_library(${PROJECT_NAME} STATIC ${INCLUDE} ${SRC})
target_link_libraries(${PROJECT_NAME} PUBLIC spirv amdgpu::base)
target_include_directories(${PROJECT_NAME} PUBLIC include PRIVATE include/${PROJECT_PATH})
set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "")
add_library(amdgpu::shader ALIAS ${PROJECT_NAME})
set_property(TARGET ${PROJECT_NAME} PROPERTY POSITION_INDEPENDENT_CODE ON)

View file

@ -0,0 +1,18 @@
#pragma once
namespace amdgpu::shader {
enum class AccessOp { None = 0, Load = 1 << 0, Store = 1 << 1 };
constexpr AccessOp operator|(AccessOp lhs, AccessOp rhs) {
return static_cast<AccessOp>(static_cast<int>(lhs) | static_cast<int>(rhs));
}
constexpr AccessOp operator&(AccessOp lhs, AccessOp rhs) {
return static_cast<AccessOp>(static_cast<int>(lhs) & static_cast<int>(rhs));
}
constexpr AccessOp &operator|=(AccessOp &lhs, AccessOp rhs) {
return ((lhs = lhs | rhs));
}
constexpr AccessOp &operator&=(AccessOp &lhs, AccessOp rhs) {
return ((lhs = lhs & rhs));
}
} // namespace amdgpu::shader

View file

@ -0,0 +1,5 @@
#pragma once
namespace amdgpu::shader {
enum class BufferKind { VBuffer, TBuffer };
}

View file

@ -0,0 +1,8 @@
#pragma once
#include "cf.hpp"
#include <amdgpu/RemoteMemory.hpp>
namespace amdgpu::shader {
cf::BasicBlock *buildCf(cf::Context &ctxt, RemoteMemory memory,
std::uint64_t entryPoint);
} // namespace amdgpu::shader

View file

@ -0,0 +1,35 @@
#pragma once
#include "Stage.hpp"
#include "AccessOp.hpp"
#include <amdgpu/RemoteMemory.hpp>
#include <cstdint>
#include <span>
#include <vector>
namespace amdgpu::shader {
struct Shader {
enum class UniformKind {
Buffer,
Sampler,
Image
};
struct UniformInfo {
std::uint32_t binding;
std::uint32_t buffer[8];
UniformKind kind;
AccessOp accessOp;
};
std::vector<UniformInfo> uniforms;
std::vector<std::uint32_t> spirv;
};
Shader convert(RemoteMemory memory, Stage stage, std::uint64_t entry,
std::span<const std::uint32_t> userSpgrs, int bindingOffset,
std::uint32_t dimX = 1, std::uint32_t dimY = 1,
std::uint32_t dimZ = 1);
} // namespace amdgpu::shader

View file

@ -0,0 +1,257 @@
#pragma once
#include "Fragment.hpp"
#include "Function.hpp"
#include "RegisterId.hpp"
#include "Stage.hpp"
#include "TypeId.hpp"
#include "Uniform.hpp"
#include "Value.hpp"
#include "scf.hpp"
#include <forward_list>
#include <amdgpu/RemoteMemory.hpp>
#include <spirv/spirv-builder.hpp>
#include <unordered_map>
#include <util/unreachable.hpp>
#include <bit>
#include <span>
#include <cassert>
#include <cstdint>
#include <vector>
#include <map>
namespace amdgpu::shader {
/*
struct MaterializedFunction {
spirv::Function function;
spirv::FunctionType type;
spirv::Type returnType;
std::vector<std::pair<RegisterId, TypeId>> args;
std::vector<std::pair<RegisterId, TypeId>> results;
};
*/
class ConverterContext {
Stage mStage;
RemoteMemory mMemory;
spirv::IdGenerator mGenerator;
spirv::SpirvBuilder mBuilder{mGenerator, 1024};
static constexpr auto kGenericTypesCount =
static_cast<std::size_t>(TypeId::Void) + 1;
spirv::Type mTypes[kGenericTypesCount];
spirv::PointerType mPtrTypes[13][kGenericTypesCount];
spirv::RuntimeArrayType mRuntimeArrayTypes[kGenericTypesCount];
spirv::VariableValue mThreadId;
spirv::VariableValue mWorkgroupId;
spirv::VariableValue mLocalInvocationId;
spirv::VariableValue mPerVertex;
spirv::VariableValue mFragCoord;
std::vector<spirv::VariableValue> mInterfaces;
std::map<unsigned, spirv::VariableValue> mIns;
std::map<unsigned, spirv::VariableValue> mOuts;
std::map<std::uint32_t, spirv::ConstantFloat> mConstantFloat32Map;
std::map<std::uint32_t, spirv::ConstantUInt> mConstantUint32Map;
std::map<std::uint32_t, spirv::ConstantSInt> mConstantSint32Map;
std::map<std::uint64_t, spirv::ConstantUInt> mConstantUint64Map;
struct FunctionType {
spirv::Type resultType;
std::vector<spirv::Type> params;
spirv::FunctionType id;
};
std::vector<FunctionType> mFunctionTypes;
struct StructTypeEntry {
spirv::StructType id;
std::vector<spirv::Type> members;
spirv::PointerType ptrTypes[13];
bool match(std::span<const spirv::Type> other) {
if (members.size() != other.size()) {
return false;
}
for (std::size_t i = 0; i < other.size(); ++i) {
if (members[i] != other[i]) {
return false;
}
}
return true;
}
};
std::vector<StructTypeEntry> mStructTypes;
std::forward_list<Fragment> mFragments;
std::forward_list<Function> mFunctions;
spirv::ConstantBool mTrue;
spirv::ConstantBool mFalse;
std::vector<UniformInfo> mUniforms;
spirv::ExtInstSet mGlslStd450;
spirv::Function mDiscardFn;
public:
ConverterContext(RemoteMemory memory, Stage stage) : mMemory(memory), mStage(stage) {
mGlslStd450 = mBuilder.createExtInstImport("GLSL.std.450");
}
const decltype(mInterfaces) &getInterfaces() const {
return mInterfaces;
}
spirv::SpirvBuilder &getBuilder() { return mBuilder; }
RemoteMemory getMemory() const { return mMemory; }
spirv::ExtInstSet getGlslStd450() const { return mGlslStd450; }
std::optional<TypeId> getTypeIdOf(spirv::Type type) const;
spirv::StructType findStructType(std::span<const spirv::Type> members);
spirv::StructType getStructType(std::span<const spirv::Type> members);
spirv::PointerType getStructPointerType(spv::StorageClass storageClass,
spirv::StructType structType);
spirv::Type getType(TypeId id);
spirv::PointerType getPointerType(spv::StorageClass storageClass, TypeId id) {
assert(static_cast<unsigned>(storageClass) < 13);
auto &type = mPtrTypes[static_cast<unsigned>(storageClass)]
[static_cast<std::uint32_t>(id)];
if (!type) {
type = mBuilder.createTypePointer(storageClass, getType(id));
}
return type;
}
spirv::RuntimeArrayType getRuntimeArrayType(TypeId id);
spirv::UIntType getUInt32Type() {
return spirv::cast<spirv::UIntType>(getType(TypeId::UInt32));
}
spirv::UIntType getUInt64Type() {
return spirv::cast<spirv::UIntType>(getType(TypeId::UInt64));
}
spirv::VectorOfType<spirv::UIntType> getUint32x2Type() {
return spirv::cast<spirv::VectorOfType<spirv::UIntType>>(
getType(TypeId::UInt32x2));
}
spirv::VectorOfType<spirv::UIntType> getUint32x3Type() {
return spirv::cast<spirv::VectorOfType<spirv::UIntType>>(
getType(TypeId::UInt32x3));
}
spirv::VectorOfType<spirv::UIntType> getUint32x4Type() {
return spirv::cast<spirv::VectorOfType<spirv::UIntType>>(
getType(TypeId::UInt32x4));
}
spirv::ArrayOfType<spirv::UIntType> getArrayUint32x8Type() {
return spirv::cast<spirv::ArrayOfType<spirv::UIntType>>(getType(TypeId::ArrayUInt32x8));
}
spirv::ArrayOfType<spirv::UIntType> getArrayUint32x16Type() {
return spirv::cast<spirv::ArrayOfType<spirv::UIntType>>(getType(TypeId::ArrayUInt32x16));
}
spirv::SIntType getSint32Type() {
return spirv::cast<spirv::SIntType>(getType(TypeId::SInt32));
}
spirv::SIntType getSint64Type() {
return spirv::cast<spirv::SIntType>(getType(TypeId::SInt64));
}
spirv::FloatType getFloat32Type() {
return spirv::cast<spirv::FloatType>(getType(TypeId::Float32));
}
spirv::VectorOfType<spirv::FloatType> getFloat32x4Type() {
return spirv::cast<spirv::VectorOfType<spirv::FloatType>>(
getType(TypeId::Float32x4));
}
spirv::VectorOfType<spirv::FloatType> getFloat32x3Type() {
return spirv::cast<spirv::VectorOfType<spirv::FloatType>>(
getType(TypeId::Float32x3));
}
spirv::VectorOfType<spirv::FloatType> getFloat32x2Type() {
return spirv::cast<spirv::VectorOfType<spirv::FloatType>>(
getType(TypeId::Float32x2));
}
spirv::BoolType getBoolType() {
return spirv::cast<spirv::BoolType>(getType(TypeId::Bool));
}
spirv::VoidType getVoidType() {
return spirv::cast<spirv::VoidType>(getType(TypeId::Void));
}
spirv::ConstantBool getTrue() {
if (!mTrue) {
mTrue = mBuilder.createConstantTrue(getBoolType());
}
return mTrue;
}
spirv::ConstantBool getFalse() {
if (!mFalse) {
mFalse = mBuilder.createConstantFalse(getBoolType());
}
return mFalse;
}
spirv::ConstantUInt getUInt64(std::uint64_t value);
spirv::ConstantUInt getUInt32(std::uint32_t value);
spirv::ConstantSInt getSInt32(std::uint32_t value);
spirv::ConstantFloat getFloat32Raw(std::uint32_t value);
spirv::ConstantFloat getFloat32(float id) {
return getFloat32Raw(std::bit_cast<std::uint32_t>(id));
}
spirv::SamplerType getSamplerType() {
return spirv::cast<spirv::SamplerType>(getType(TypeId::Sampler));
}
spirv::ImageType getImage2DType() {
return spirv::cast<spirv::ImageType>(getType(TypeId::Image2D));
}
spirv::SampledImageType getSampledImage2DType() {
return spirv::cast<spirv::SampledImageType>(getType(TypeId::SampledImage2D));
}
UniformInfo *createStorageBuffer(TypeId type);
UniformInfo *getOrCreateStorageBuffer(std::uint32_t *vbuffer, TypeId type);
UniformInfo *getOrCreateUniformConstant(std::uint32_t *buffer, std::size_t size, TypeId type);
spirv::VariableValue getThreadId();
spirv::VariableValue getWorkgroupId();
spirv::VariableValue getLocalInvocationId();
spirv::VariableValue getPerVertex();
spirv::VariableValue getFragCoord();
spirv::VariableValue getIn(unsigned location);
spirv::VariableValue getOut(unsigned location);
spirv::Function getDiscardFn();
std::optional<std::uint32_t> findUint32Value(spirv::Value id) const;
std::optional<std::int32_t> findSint32Value(spirv::Value id) const;
std::optional<float> findFloat32Value(spirv::Value id) const;
spirv::FunctionType getFunctionType(spirv::Type resultType,
std::span<const spirv::Type> params);
Function *createFunction(std::size_t expectedSize);
Fragment *createFragment(std::size_t expectedSize);
std::vector<UniformInfo> &getUniforms() {
return mUniforms;
}
};
} // namespace amdgpu::shader

View file

@ -0,0 +1,95 @@
#pragma once
#include "FragmentTerminator.hpp"
#include "Instruction.hpp"
#include "RegisterId.hpp"
#include "RegisterState.hpp"
#include "Stage.hpp"
#include "TypeId.hpp"
#include "Uniform.hpp"
#include "scf.hpp"
#include <map>
#include <optional>
#include <spirv/spirv-builder.hpp>
namespace amdgpu::shader {
enum class OperandGetFlags {
None,
PreserveType = 1 << 0
};
struct Function;
class ConverterContext;
struct Fragment {
ConverterContext *context = nullptr;
Function *function = nullptr;
spirv::Block entryBlockId;
spirv::BlockBuilder builder;
RegisterState *registers = nullptr;
std::set<RegisterId> values;
std::set<RegisterId> outputs;
std::vector<Fragment *> predecessors;
std::uint64_t jumpAddress = 0;
spirv::BoolValue branchCondition;
void appendBranch(Fragment &other) {
other.predecessors.push_back(this);
}
void injectValuesFromPreds();
// std::optional<RegisterId> findInput(spirv::Value value);
// Value addInput(RegisterId id, spirv::Type type);
spirv::SamplerValue createSampler(RegisterId base);
spirv::ImageValue createImage(RegisterId base, bool r128); // TODO: params
Value createCompositeExtract(Value composite, std::uint32_t member);
Value getOperand(RegisterId id, TypeId type, OperandGetFlags flags = OperandGetFlags::None);
void setOperand(RegisterId id, Value value);
void setVcc(Value value);
void setScc(Value value);
spirv::BoolValue getScc();
spirv::Value createBitcast(spirv::Type to, spirv::Type from, spirv::Value value);
Value getScalarOperand(int id, TypeId type, OperandGetFlags flags = OperandGetFlags::None) {
return getOperand(RegisterId::Scalar(id), type, flags);
}
Value getVectorOperand(int id, TypeId type, OperandGetFlags flags = OperandGetFlags::None) {
return getOperand(RegisterId::Vector(id), type, flags);
}
Value getAttrOperand(int id, TypeId type, OperandGetFlags flags = OperandGetFlags::None) {
return getOperand(RegisterId::Attr(id), type, flags);
}
Value getVccLo() {
return getOperand(RegisterId::VccLo, TypeId::UInt32);
}
Value getVccHi() {
return getOperand(RegisterId::VccHi, TypeId::UInt32);
}
Value getExecLo() {
return getOperand(RegisterId::ExecLo, TypeId::UInt32);
}
Value getExecHi() {
return getOperand(RegisterId::ExecHi, TypeId::UInt32);
}
void setScalarOperand(int id, Value value) {
setOperand(RegisterId::Scalar(id), value);
}
void setVectorOperand(int id, Value value) {
setOperand(RegisterId::Vector(id), value);
}
void setExportTarget(int id, Value value) {
setOperand(RegisterId::Export(id), value);
}
// void createCallTo(MaterializedFunction *other);
void convert(std::uint64_t size);
private:
Value getRegister(RegisterId id);
Value getRegister(RegisterId id, spirv::Type type);
void setRegister(RegisterId id, Value value);
};
} // namespace amdgpu::shader

View file

@ -0,0 +1,11 @@
#pragma once
namespace amdgpu::shader {
enum class FragmentTerminator {
None,
EndProgram,
CallToReg,
BranchToReg,
Branch,
};
}

View file

@ -0,0 +1,31 @@
#pragma once
#include "Fragment.hpp"
#include "RegisterId.hpp"
#include "spirv/spirv-builder.hpp"
#include <span>
namespace amdgpu::shader {
class ConverterContext;
struct Function {
ConverterContext *context = nullptr;
Stage stage = Stage::None;
std::span<const std::uint32_t> userSgprs;
std::span<const std::uint32_t> userVgprs;
Fragment entryFragment;
Fragment exitFragment;
std::map<RegisterId, Value> inputs;
spirv::FunctionBuilder builder;
std::vector<Fragment *> fragments;
Value getInput(RegisterId id);
Value createInput(RegisterId id);
void createExport(spirv::BlockBuilder &builder, unsigned index, Value value);
spirv::Type getResultType();
spirv::FunctionType getFunctionType();
Fragment *createFragment();
void insertReturn();
};
} // namespace amdgpu::shader

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,102 @@
#pragma once
#include <cstdint>
namespace amdgpu::shader {
class RegisterId {
static constexpr std::uint32_t kScalarOperandsOffset = 0;
static constexpr std::uint32_t kScalarOperandsCount = 256;
static constexpr std::uint32_t kVectorOperandsOffset =
kScalarOperandsOffset + kScalarOperandsCount;
static constexpr std::uint32_t kVectorOperandsCount = 512;
static constexpr std::uint32_t kExportOperandsOffset =
kVectorOperandsOffset + kVectorOperandsCount;
static constexpr std::uint32_t kExportOperandsCount = 64;
static constexpr std::uint32_t kAttrOperandsOffset =
kExportOperandsOffset + kExportOperandsCount;
static constexpr std::uint32_t kAttrOperandsCount = 32;
static constexpr std::uint32_t kOperandsCount =
kAttrOperandsOffset + kAttrOperandsCount;
static constexpr std::uint32_t kRegisterVccLoId = kScalarOperandsOffset + 106;
static constexpr std::uint32_t kRegisterVccHiId = kScalarOperandsOffset + 107;
static constexpr std::uint32_t kRegisterM0Id = kScalarOperandsOffset + 124;
static constexpr std::uint32_t kRegisterExecLoId =
kScalarOperandsOffset + 126;
static constexpr std::uint32_t kRegisterExecHiId =
kScalarOperandsOffset + 127;
static constexpr std::uint32_t kRegisterSccId = kScalarOperandsOffset + 253;
static constexpr std::uint32_t kRegisterLdsDirect =
kScalarOperandsOffset + 254;
public:
enum enum_type : std::uint32_t {
Invalid = ~static_cast<std::uint32_t>(0),
VccLo = kRegisterVccLoId,
VccHi = kRegisterVccHiId,
M0 = kRegisterM0Id,
ExecLo = kRegisterExecLoId,
ExecHi = kRegisterExecHiId,
Scc = kRegisterSccId,
LdsDirect = kRegisterLdsDirect,
} raw = Invalid;
RegisterId(enum_type value) : raw(value) {}
operator enum_type() const { return raw; }
static RegisterId Raw(std::uint32_t index) {
return static_cast<enum_type>(index);
}
static RegisterId Scalar(std::uint32_t index) {
return static_cast<enum_type>(index + kScalarOperandsOffset);
}
static RegisterId Vector(std::uint32_t index) {
return static_cast<enum_type>(index + kVectorOperandsOffset);
}
static RegisterId Export(std::uint32_t index) {
return static_cast<enum_type>(index + kExportOperandsOffset);
}
static RegisterId Attr(std::uint32_t index) {
return static_cast<enum_type>(index + kAttrOperandsOffset);
}
bool isScalar() const {
return raw >= kScalarOperandsOffset &&
raw < kScalarOperandsOffset + kScalarOperandsCount;
}
bool isVector() const {
return raw >= kVectorOperandsOffset &&
raw < kVectorOperandsOffset + kVectorOperandsCount;
}
bool isExport() const {
return raw >= kExportOperandsOffset &&
raw < kExportOperandsOffset + kExportOperandsCount;
}
bool isAttr() const {
return raw >= kAttrOperandsOffset &&
raw < kAttrOperandsOffset + kAttrOperandsCount;
}
unsigned getOffset() const {
if (isScalar()) {
return raw - kScalarOperandsOffset;
}
if (isVector()) {
return raw - kVectorOperandsOffset;
}
if (isExport()) {
return raw - kExportOperandsOffset;
}
if (isAttr()) {
return raw - kAttrOperandsOffset;
}
return raw;
}
};
} // namespace amdgpu::shader

View file

@ -0,0 +1,27 @@
#pragma once
#include "RegisterId.hpp"
#include "Value.hpp"
#include <cstdint>
namespace amdgpu::shader {
struct RegisterState {
std::uint64_t pc;
Value sgprs[104];
Value vccLo;
Value vccHi;
Value m0;
Value execLo;
Value execHi;
Value scc;
Value ldsDirect;
Value vgprs[512];
Value attrs[32];
Value getRegister(RegisterId regId);
void setRegister(RegisterId regId, Value value);
private:
Value getRegisterImpl(RegisterId regId);
};
} // namespace amdgpu::shader

View file

@ -0,0 +1,5 @@
#pragma once
namespace amdgpu::shader {
enum class Stage { None, Vertex, Fragment, Geometry, Compute };
}

View file

@ -0,0 +1,57 @@
#pragma once
#include <cstddef>
namespace amdgpu::shader {
struct TypeId {
enum {
Bool,
SInt8,
UInt8,
SInt16,
UInt16,
SInt32,
UInt32,
UInt32x2,
UInt32x3,
UInt32x4,
UInt64,
SInt64,
ArrayUInt32x8,
ArrayUInt32x16,
Float16,
Float32,
Float32x2,
Float32x3,
Float32x4,
Float64,
ArrayFloat32x8,
ArrayFloat32x16,
Sampler,
Image2D,
SampledImage2D,
Void // should be last
} raw = Void;
using enum_type = decltype(raw);
TypeId() = default;
TypeId(enum_type value) : raw(value) {}
operator enum_type() const { return raw; }
TypeId getBaseType() const;
std::size_t getSize() const;
std::size_t getElementsCount() const;
bool isSignedInt() const {
return raw == TypeId::SInt8 || raw == TypeId::SInt16 ||
raw == TypeId::SInt32 || raw == TypeId::SInt64;
}
bool isFloatPoint() const {
return raw == TypeId::Float16 || raw == TypeId::Float32 ||
raw == TypeId::Float64;
}
};
} // namespace amdgpu::shader

View file

@ -0,0 +1,20 @@
#pragma once
#include "AccessOp.hpp"
#include "TypeId.hpp"
#include "spirv/spirv-builder.hpp"
#include <cstdint>
#include <set>
namespace amdgpu::shader {
struct UniformInfo {
std::uint32_t buffer[8];
int index;
TypeId typeId;
spirv::PointerType type;
spirv::VariableValue variable;
AccessOp accessOp = AccessOp::None;
bool isBuffer;
};
} // namespace amdgpu::shader

View file

@ -0,0 +1,15 @@
#pragma once
#include <spirv/spirv-builder.hpp>
namespace amdgpu::shader {
struct Value {
spirv::Type type;
spirv::Value value;
Value() = default;
Value(spirv::Type type, spirv::Value value) : type(type), value(value) {}
explicit operator bool() const { return static_cast<bool>(value); }
bool operator==(Value other) const { return value == other.value; }
};
} // namespace amdgpu::shader

View file

@ -0,0 +1,146 @@
#pragma once
#include <cstdint>
#include <map>
#include <set>
#include <vector>
namespace cf {
enum class TerminatorKind {
None,
Branch,
BranchToUnknown,
Return,
};
class BasicBlock {
std::uint64_t address;
std::uint64_t size = 0;
std::set<BasicBlock *> predecessors;
BasicBlock *successors[2]{};
TerminatorKind terminator = TerminatorKind::None;
public:
explicit BasicBlock(std::uint64_t address, std::uint64_t size = 0)
: address(address), size(size) {}
BasicBlock(const BasicBlock &) = delete;
void setSize(std::uint64_t newSize) { size = newSize; }
std::uint64_t getSize() const { return size; }
std::uint64_t getAddress() const { return address; }
TerminatorKind getTerminator() const { return terminator; }
void createConditionalBranch(BasicBlock *ifTrue, BasicBlock *ifFalse);
void createBranch(BasicBlock *target);
void createBranchToUnknown();
void createReturn();
void replaceSuccessor(BasicBlock *origBB, BasicBlock *newBB);
void replacePredecessor(BasicBlock *origBB, BasicBlock *newBB) {
origBB->replaceSuccessor(this, newBB);
}
template <std::invocable<BasicBlock &> T> void walk(T &&cb) {
std::vector<BasicBlock *> workStack;
std::set<BasicBlock *> processed;
workStack.push_back(this);
processed.insert(this);
while (!workStack.empty()) {
auto block = workStack.back();
workStack.pop_back();
block->walkSuccessors([&](BasicBlock *successor) {
if (processed.insert(successor).second) {
workStack.push_back(successor);
}
});
cb(*block);
}
}
template <std::invocable<BasicBlock *> T> void walkSuccessors(T &&cb) const {
if (successors[0]) {
cb(successors[0]);
if (successors[1]) {
cb(successors[1]);
}
}
}
template <std::invocable<BasicBlock *> T> void walkPredecessors(T &&cb) const {
for (auto pred : predecessors) {
cb(pred);
}
}
std::size_t getPredecessorsCount() const { return predecessors.size(); }
bool hasDirectPredecessor(const BasicBlock &block) const;
bool hasPredecessor(const BasicBlock &block) const;
std::size_t getSuccessorsCount() const {
if (successors[0] == nullptr) {
return 0;
}
return successors[1] != nullptr ? 2 : 1;
}
BasicBlock *getSuccessor(std::size_t index) const { return successors[index]; }
void split(BasicBlock *target);
};
class Context {
std::map<std::uint64_t, BasicBlock, std::greater<>> basicBlocks;
public:
BasicBlock *getBasicBlockAt(std::uint64_t address) {
if (auto it = basicBlocks.find(address); it != basicBlocks.end()) {
return &it->second;
}
return nullptr;
}
BasicBlock *getBasicBlock(std::uint64_t address) {
if (auto it = basicBlocks.lower_bound(address); it != basicBlocks.end()) {
auto bb = &it->second;
if (bb->getAddress() <= address &&
bb->getAddress() + bb->getSize() > address) {
return bb;
}
}
return nullptr;
}
BasicBlock *getOrCreateBasicBlock(std::uint64_t address, bool split = true) {
auto it = basicBlocks.lower_bound(address);
if (it != basicBlocks.end()) {
auto bb = &it->second;
if (bb->getAddress() <= address &&
bb->getAddress() + bb->getSize() > address) {
if (split && bb->getAddress() != address) {
auto result = &basicBlocks.emplace_hint(it, address, address)->second;
bb->split(result);
return result;
}
return bb;
}
}
return &basicBlocks.emplace_hint(it, address, address)->second;
}
};
} // namespace cf

View file

@ -0,0 +1,371 @@
#pragma once
#include <cassert>
#include <cstdint>
#include <forward_list>
#include <functional>
#include <memory>
namespace cf {
class BasicBlock;
}
namespace scf {
class BasicBlock;
struct PrintOptions {
unsigned char identCount = 2;
char identChar = ' ';
std::function<void(const PrintOptions &, unsigned depth, BasicBlock *)>
blockPrinter;
std::string makeIdent(unsigned depth) const {
return std::string(depth * identCount, identChar);
}
};
class Node {
Node *mParent = nullptr;
Node *mNext = nullptr;
Node *mPrev = nullptr;
public:
virtual ~Node() = default;
virtual void print(const PrintOptions &options, unsigned depth) = 0;
virtual bool isEqual(const Node &other) const {
return this == &other;
}
void dump() {
print({}, 0);
}
void setParent(Node *parent) {
mParent = parent;
}
Node *getParent() const {
return mParent;
}
template<typename T> requires(std::is_base_of_v<Node, T>)
auto getParent() const -> decltype(dynCast<T>(mParent)) {
return dynCast<T>(mParent);
}
Node *getNext() const {
return mNext;
}
Node *getPrev() const {
return mPrev;
}
friend class Block;
};
template <typename T, typename ST>
requires(std::is_base_of_v<Node, T> && std::is_base_of_v<Node, ST>) &&
requires(ST *s) { dynamic_cast<T *>(s); }
T *dynCast(ST *s) {
return dynamic_cast<T *>(s);
}
template <typename T, typename ST>
requires(std::is_base_of_v<Node, T> && std::is_base_of_v<Node, ST>) &&
requires(const ST *s) { dynamic_cast<const T *>(s); }
const T *dynCast(const ST *s) {
return dynamic_cast<const T *>(s);
}
inline bool isNodeEqual(const Node *lhs, const Node *rhs) {
if (lhs == rhs) {
return true;
}
return lhs != nullptr && rhs != nullptr && lhs->isEqual(*rhs);
}
struct UnknownBlock final : Node {
void print(const PrintOptions &options, unsigned depth) override {
std::printf("%sunknown\n", options.makeIdent(depth).c_str());
}
bool isEqual(const Node &other) const override {
return this == &other || dynCast<UnknownBlock>(&other) != nullptr;
}
};
struct Return final : Node {
void print(const PrintOptions &options, unsigned depth) override {
std::printf("%sreturn\n", options.makeIdent(depth).c_str());
}
bool isEqual(const Node &other) const override {
return this == &other || dynCast<Return>(&other) != nullptr;
}
};
class Context;
class Block final : public Node {
Node *mBegin = nullptr;
Node *mEnd = nullptr;
void *mUserData = nullptr;
public:
void print(const PrintOptions &options, unsigned depth) override {
std::printf("%s{\n", options.makeIdent(depth).c_str());
for (auto node = mBegin; node != nullptr; node = node->getNext()) {
node->print(options, depth + 1);
}
std::printf("%s}\n", options.makeIdent(depth).c_str());
}
bool isEmpty() const {
return mBegin == nullptr;
}
Node *getRootNode() const {
return mBegin;
}
Node *getLastNode() const {
return mEnd;
}
void setUserData(void *data) {
mUserData = data;
}
void* getUserData() const {
return mUserData;
}
template<typename T>
T* getUserData() const {
return static_cast<T *>(mUserData);
}
void eraseFrom(Node *endBefore);
void splitInto(Block *target, Node *splitPoint);
Block *split(Context &context, Node *splitPoint);
void append(Node *node) {
assert(node->mParent == nullptr);
assert(node->mPrev == nullptr);
assert(node->mNext == nullptr);
node->mParent = this;
node->mPrev = mEnd;
if (mEnd != nullptr) {
mEnd->mNext = node;
}
if (mBegin == nullptr) {
mBegin = node;
}
mEnd = node;
}
void detachNode(Node *node) {
if (node->mPrev != nullptr) {
node->mPrev->mNext = node->mNext;
}
if (node->mNext != nullptr) {
node->mNext->mPrev = node->mPrev;
}
if (mBegin == node) {
mBegin = node->mNext;
}
if (mEnd == node) {
mEnd = node->mPrev;
}
node->mNext = nullptr;
node->mPrev = nullptr;
node->mParent = nullptr;
}
bool isEqual(const Node &other) const override {
if (this == &other) {
return true;
}
auto otherBlock = dynCast<Block>(&other);
if (otherBlock == nullptr) {
return false;
}
auto thisIt = mBegin;
auto otherIt = otherBlock->mBegin;
while (thisIt != nullptr && otherIt != nullptr) {
if (!thisIt->isEqual(*otherIt)) {
return false;
}
thisIt = thisIt->mNext;
otherIt = otherIt->mNext;
}
return thisIt == otherIt;
}
};
class BasicBlock final : public Node {
std::uint64_t address;
std::uint64_t size = 0;
public:
explicit BasicBlock(std::uint64_t address, std::uint64_t size = 0)
: address(address), size(size) {}
std::uint64_t getSize() const { return size; }
std::uint64_t getAddress() const { return address; }
void print(const PrintOptions &options, unsigned depth) override {
std::printf(
"%sbb%lx\n",
std::string(depth * options.identCount, options.identChar).c_str(),
getAddress());
if (depth != 0 && options.blockPrinter) {
options.blockPrinter(options, depth + 1, this);
}
}
Block *getBlock() const {
return dynCast<Block>(getParent());
}
bool isEqual(const Node &other) const override {
if (this == &other) {
return true;
}
if (auto otherBlock = dynCast<BasicBlock>(&other)) {
return address == otherBlock->address;
}
return false;
}
};
struct IfElse final : Node {
Block *ifTrue;
Block *ifFalse;
IfElse(Block *ifTrue, Block *ifFalse) : ifTrue(ifTrue), ifFalse(ifFalse) {
ifTrue->setParent(this);
ifFalse->setParent(this);
}
void print(const PrintOptions &options, unsigned depth) override {
if (ifTrue->isEmpty()) {
std::printf("%sif false\n", options.makeIdent(depth).c_str());
ifFalse->print(options, depth);
return;
}
std::printf("%sif true\n", options.makeIdent(depth).c_str());
ifTrue->print(options, depth);
if (!ifFalse->isEmpty()) {
std::printf("%selse\n", options.makeIdent(depth).c_str());
ifFalse->print(options, depth);
}
}
bool isEqual(const Node &other) const override {
if (this == &other) {
return true;
}
if (auto otherBlock = dynCast<IfElse>(&other)) {
return ifTrue->isEqual(*otherBlock->ifTrue) &&
ifFalse->isEqual(*otherBlock->ifFalse);
}
return false;
}
};
struct Jump final : Node {
BasicBlock *target;
Jump(BasicBlock *target) : target(target) {}
bool isEqual(const Node &other) const override {
if (this == &other) {
return true;
}
if (auto otherJump = dynCast<Jump>(&other)) {
return target == otherJump->target;
}
return false;
}
void print(const PrintOptions &options, unsigned depth) override {
std::printf("%sjump ", options.makeIdent(depth).c_str());
target->print(options, 0);
}
};
struct Loop final : Node {
Block *body;
Loop(Block *body) : body(body) {
body->setParent(this);
}
bool isEqual(const Node &other) const override {
if (this == &other) {
return true;
}
if (auto otherLoop = dynCast<Loop>(&other)) {
return body->isEqual(*otherLoop->body);
}
return false;
}
void print(const PrintOptions &options, unsigned depth) override {
std::printf("%sloop {\n", options.makeIdent(depth).c_str());
body->print(options, depth + 1);
std::printf("%s}\n", options.makeIdent(depth).c_str());
}
};
struct Break final : Node {
bool isEqual(const Node &other) const override {
return this == &other || dynCast<Break>(&other) != nullptr;
}
void print(const PrintOptions &options, unsigned depth) override {
std::printf("%sbreak\n", options.makeIdent(depth).c_str());
}
};
class Context {
std::forward_list<std::unique_ptr<Node>> mNodes;
public:
template <typename T, typename... ArgsT>
requires(std::is_constructible_v<T, ArgsT...>)
T *create(ArgsT &&...args) {
auto result = new T(std::forward<ArgsT>(args)...);
mNodes.push_front(std::unique_ptr<Node>{result});
return result;
}
};
scf::Block *structurize(Context &ctxt, cf::BasicBlock *bb);
void makeUniqueBasicBlocks(Context &ctxt, Block *block);
} // namespace scf

View file

@ -0,0 +1,187 @@
#include "CfBuilder.hpp"
#include "Instruction.hpp"
#include <cassert>
#include <amdgpu/RemoteMemory.hpp>
#include <unordered_set>
using namespace amdgpu;
using namespace amdgpu::shader;
struct CfgBuilder {
cf::Context *context;
RemoteMemory memory;
std::size_t analyzeBb(cf::BasicBlock *bb, std::uint64_t *successors,
std::size_t *successorsCount, auto pushWork) {
auto address = bb->getAddress();
auto instBegin = memory.getPointer<std::uint32_t>(address);
auto instHex = instBegin;
while (true) {
auto instruction = Instruction(instHex);
auto size = instruction.size();
auto pc = address + ((instHex - instBegin) << 2);
instHex += size;
if (instruction.instClass == InstructionClass::Sop1) {
Sop1 sop1{instHex - size};
if (sop1.op == Sop1::Op::S_SETPC_B64 ||
sop1.op == Sop1::Op::S_SWAPPC_B64) {
bb->createBranchToUnknown();
break;
}
continue;
}
if (instruction.instClass == InstructionClass::Sopp) {
Sopp sopp{instHex - size};
if (sopp.op == Sopp::Op::S_ENDPGM) {
bb->createReturn();
break;
}
bool isEnd = false;
switch (sopp.op) {
case Sopp::Op::S_BRANCH:
successors[0] = pc + ((size + sopp.simm) << 2);
*successorsCount = 1;
isEnd = true;
break;
case Sopp::Op::S_CBRANCH_SCC0:
case Sopp::Op::S_CBRANCH_SCC1:
case Sopp::Op::S_CBRANCH_VCCZ:
case Sopp::Op::S_CBRANCH_VCCNZ:
case Sopp::Op::S_CBRANCH_EXECZ:
case Sopp::Op::S_CBRANCH_EXECNZ:
successors[0] = pc + ((size + sopp.simm) << 2);
successors[1] = pc + (size << 2);
*successorsCount = 2;
isEnd = true;
break;
default:
break;
}
if (isEnd) {
break;
}
continue;
}
// move instruction that requires EXEC test to separate bb
if (instruction.instClass == InstructionClass::Vop2 ||
instruction.instClass == InstructionClass::Vop3 ||
instruction.instClass == InstructionClass::Mubuf ||
instruction.instClass == InstructionClass::Mtbuf ||
instruction.instClass == InstructionClass::Mimg ||
instruction.instClass == InstructionClass::Ds ||
instruction.instClass == InstructionClass::Vintrp ||
instruction.instClass == InstructionClass::Exp ||
instruction.instClass == InstructionClass::Vop1 ||
instruction.instClass == InstructionClass::Vopc ||
instruction.instClass == InstructionClass::Smrd) {
*successorsCount = 1;
if (instBegin != instHex - size) {
// if it is not first instruction in block, move end to prev
// instruction, successor is current instruction
instHex -= size;
successors[0] = pc;
break;
}
successors[0] = pc + (size << 2);
break;
}
}
return (instHex - instBegin) << 2;
}
cf::BasicBlock *buildCfg(std::uint64_t entryPoint) {
std::vector<std::uint64_t> workList;
workList.push_back(entryPoint);
std::unordered_set<std::uint64_t> processed;
processed.insert(entryPoint);
struct BranchInfo {
std::uint64_t source;
std::size_t count;
std::uint64_t targets[2];
};
std::vector<BranchInfo> branches;
while (!workList.empty()) {
auto address = workList.back();
workList.pop_back();
auto bb = context->getOrCreateBasicBlock(address);
if (bb->getSize() != 0) {
continue;
}
std::uint64_t successors[2];
std::size_t successorsCount = 0;
std::size_t size = analyzeBb(bb, successors, &successorsCount,
[&](std::uint64_t address) {
if (processed.insert(address).second) {
workList.push_back(address);
}
});
bb->setSize(size);
if (successorsCount == 2) {
auto succ0Address = successors[0];
auto succ1Address = successors[1];
branches.push_back(
{address + size - 4, 2, {successors[0], successors[1]}});
if (processed.insert(successors[0]).second) {
workList.push_back(successors[0]);
}
if (processed.insert(successors[1]).second) {
workList.push_back(successors[1]);
}
} else if (successorsCount == 1) {
branches.push_back({address + size - 4, 1, {successors[0]}});
if (processed.insert(successors[0]).second) {
workList.push_back(successors[0]);
}
}
}
for (auto branch : branches) {
auto bb = context->getBasicBlock(branch.source);
assert(bb);
if (branch.count == 2) {
bb->createConditionalBranch(
context->getBasicBlockAt(branch.targets[0]),
context->getBasicBlockAt(branch.targets[1]));
} else {
bb->createBranch(context->getBasicBlockAt(branch.targets[0]));
}
}
return context->getBasicBlockAt(entryPoint);
}
};
cf::BasicBlock *amdgpu::shader::buildCf(cf::Context &ctxt,
RemoteMemory memory,
std::uint64_t entryPoint) {
CfgBuilder builder;
builder.context = &ctxt;
builder.memory = memory;
return builder.buildCfg(entryPoint);
}

View file

@ -0,0 +1,389 @@
#include "Converter.hpp"
#include "CfBuilder.hpp"
#include "ConverterContext.hpp"
#include "Fragment.hpp"
#include "FragmentTerminator.hpp"
#include "Instruction.hpp"
#include "RegisterId.hpp"
#include "RegisterState.hpp"
#include "cf.hpp"
#include "amdgpu/RemoteMemory.hpp"
#include "scf.hpp"
#include "util/unreachable.hpp"
#include <compare>
#include <cstddef>
#include <forward_list>
#include <memory>
#include <spirv/spirv.hpp>
#include <unordered_map>
#include <utility>
#include <vector>
static void printInstructions(const scf::PrintOptions &options, unsigned depth,
std::uint32_t *instBegin, std::size_t size) {
auto instHex = instBegin;
auto instEnd = instBegin + size / sizeof(std::uint32_t);
while (instHex < instEnd) {
auto instruction = amdgpu::shader::Instruction(instHex);
std::printf("%s", options.makeIdent(depth).c_str());
instruction.dump();
std::printf("\n");
instHex += instruction.size();
}
}
namespace amdgpu::shader {
class Converter {
scf::Context *scfContext;
cf::Context cfContext;
RemoteMemory memory;
Function *function = nullptr;
std::forward_list<RegisterState> states;
std::vector<RegisterState *> freeStates;
public:
void convertFunction(RemoteMemory mem, scf::Context *scfCtxt,
scf::Block *block, Function *fn) {
scfContext = scfCtxt;
function = fn;
memory = mem;
auto lastFragment = convertBlock(block, &function->entryFragment);
if (lastFragment != nullptr) {
lastFragment->builder.createBranch(fn->exitFragment.entryBlockId);
lastFragment->appendBranch(fn->exitFragment);
}
initState(&fn->exitFragment);
}
private:
RegisterState *allocateState() {
if (freeStates.empty()) {
return &states.emplace_front();
}
auto result = freeStates.back();
freeStates.pop_back();
*result = {};
return result;
}
void releaseState(RegisterState *state) {
assert(state != nullptr);
freeStates.push_back(state);
}
void initState(Fragment *fragment, std::uint64_t address = 0) {
if (fragment->registers == nullptr) {
fragment->registers = allocateState();
}
if (address != 0) {
fragment->registers->pc = address;
}
fragment->injectValuesFromPreds();
fragment->predecessors.clear();
}
void releaseStateOf(Fragment *frag) {
releaseState(frag->registers);
frag->registers = nullptr;
frag->values = {};
frag->outputs = {};
}
bool needInjectExecTest(Fragment *fragment) {
auto inst = memory.getPointer<std::uint32_t>(fragment->registers->pc);
auto instClass = getInstructionClass(*inst);
return instClass == InstructionClass::Vop2 ||
instClass == InstructionClass::Vop3 ||
instClass == InstructionClass::Mubuf ||
instClass == InstructionClass::Mtbuf ||
instClass == InstructionClass::Mimg ||
instClass == InstructionClass::Ds ||
instClass == InstructionClass::Vintrp ||
instClass == InstructionClass::Exp ||
instClass == InstructionClass::Vop1 ||
instClass == InstructionClass::Vopc/* ||
instClass == InstructionClass::Smrd*/;
}
spirv::BoolValue createExecTest(Fragment *fragment) {
auto context = fragment->context;
auto &builder = fragment->builder;
auto boolT = context->getBoolType();
auto uint32_0 = context->getUInt32(0);
auto loIsNotZero =
builder.createINotEqual(boolT, fragment->getExecLo().value, uint32_0);
auto hiIsNotZero =
builder.createINotEqual(boolT, fragment->getExecHi().value, uint32_0);
return builder.createLogicalOr(boolT, loIsNotZero, hiIsNotZero);
}
Fragment *convertBlock(scf::Block *block, Fragment *rootFragment) {
Fragment *currentFragment = nullptr;
for (scf::Node *node = block->getRootNode(); node != nullptr;
node = node->getNext()) {
if (auto bb = dynCast<scf::BasicBlock>(node)) {
if (currentFragment == nullptr) {
currentFragment = rootFragment;
} else {
auto newFragment = function->createFragment();
currentFragment->appendBranch(*newFragment);
currentFragment->builder.createBranch(newFragment->entryBlockId);
currentFragment = newFragment;
}
initState(currentFragment, bb->getAddress());
for (auto pred : currentFragment->predecessors) {
releaseStateOf(pred);
}
if (needInjectExecTest(currentFragment)) {
auto bodyFragment = function->createFragment();
auto mergeFragment = function->createFragment();
auto cond = createExecTest(currentFragment);
currentFragment->appendBranch(*bodyFragment);
currentFragment->appendBranch(*mergeFragment);
currentFragment->builder.createSelectionMerge(
mergeFragment->entryBlockId, {});
currentFragment->builder.createBranchConditional(
cond, bodyFragment->entryBlockId, mergeFragment->entryBlockId);
initState(bodyFragment, bb->getAddress());
bodyFragment->convert(bb->getSize());
bodyFragment->appendBranch(*mergeFragment);
bodyFragment->builder.createBranch(mergeFragment->entryBlockId);
initState(mergeFragment);
releaseState(currentFragment->registers);
releaseState(bodyFragment->registers);
currentFragment = mergeFragment;
} else {
currentFragment->convert(bb->getSize());
}
continue;
}
if (auto ifElse = dynCast<scf::IfElse>(node)) {
auto ifTrueFragment = function->createFragment();
auto ifFalseFragment = function->createFragment();
auto mergeFragment = function->createFragment();
currentFragment->appendBranch(*ifTrueFragment);
currentFragment->appendBranch(*ifFalseFragment);
currentFragment->builder.createSelectionMerge(
mergeFragment->entryBlockId, {});
currentFragment->builder.createBranchConditional(
currentFragment->branchCondition, ifTrueFragment->entryBlockId,
ifFalseFragment->entryBlockId);
auto ifTrueLastBlock = convertBlock(ifElse->ifTrue, ifTrueFragment);
auto ifFalseLastBlock = convertBlock(ifElse->ifFalse, ifFalseFragment);
if (ifTrueLastBlock != nullptr) {
ifTrueLastBlock->builder.createBranch(mergeFragment->entryBlockId);
ifTrueLastBlock->appendBranch(*mergeFragment);
if (ifTrueLastBlock->registers == nullptr) {
initState(ifTrueLastBlock);
}
}
if (ifFalseLastBlock != nullptr) {
ifFalseLastBlock->builder.createBranch(mergeFragment->entryBlockId);
ifFalseLastBlock->appendBranch(*mergeFragment);
if (ifFalseLastBlock->registers == nullptr) {
initState(ifFalseLastBlock);
}
}
releaseStateOf(currentFragment);
initState(mergeFragment);
if (ifTrueLastBlock != nullptr) {
releaseStateOf(ifTrueLastBlock);
}
if (ifFalseLastBlock != nullptr) {
releaseStateOf(ifFalseLastBlock);
}
currentFragment = mergeFragment;
continue;
}
if (dynCast<scf::UnknownBlock>(node)) {
auto jumpAddress = currentFragment->jumpAddress;
std::printf("jump to %lx\n", jumpAddress);
std::fflush(stdout);
if (jumpAddress == 0) {
util::unreachable("no jump register on unknown block");
}
auto block = buildCf(cfContext, memory, jumpAddress);
auto basicBlockPrinter = [this](const scf::PrintOptions &opts,
unsigned depth, scf::BasicBlock *bb) {
printInstructions(opts, depth,
memory.getPointer<std::uint32_t>(bb->getAddress()),
bb->getSize());
};
auto scfBlock = scf::structurize(*scfContext, block);
scfBlock->print({.blockPrinter = basicBlockPrinter}, 0);
std::fflush(stdout);
auto targetFragment = function->createFragment();
currentFragment->builder.createBranch(targetFragment->entryBlockId);
currentFragment->appendBranch(*targetFragment);
auto result = convertBlock(scfBlock, targetFragment);
if (currentFragment->registers == nullptr) {
initState(targetFragment);
releaseStateOf(currentFragment);
}
return result;
}
if (dynCast<scf::Return>(node)) {
currentFragment->appendBranch(function->exitFragment);
currentFragment->builder.createBranch(
function->exitFragment.entryBlockId);
return nullptr;
}
util::unreachable();
}
return currentFragment != nullptr ? currentFragment : rootFragment;
}
};
}; // namespace amdgpu::shader
amdgpu::shader::Shader amdgpu::shader::convert(
RemoteMemory memory, Stage stage, std::uint64_t entry,
std::span<const std::uint32_t> userSpgrs, int bindingOffset,
std::uint32_t dimX, std::uint32_t dimY, std::uint32_t dimZ) {
ConverterContext ctxt(memory, stage);
auto &builder = ctxt.getBuilder();
builder.createCapability(spv::Capability::Shader);
builder.createCapability(spv::Capability::ImageQuery);
builder.createCapability(spv::Capability::ImageBuffer);
builder.createCapability(spv::Capability::UniformAndStorageBuffer8BitAccess);
builder.createCapability(spv::Capability::UniformAndStorageBuffer16BitAccess);
builder.createCapability(spv::Capability::Int64);
builder.setMemoryModel(spv::AddressingModel::Logical,
spv::MemoryModel::GLSL450);
scf::Context scfContext;
scf::Block *entryBlock = nullptr;
{
cf::Context cfContext;
auto entryBB = buildCf(cfContext, memory, entry);
entryBlock = scf::structurize(scfContext, entryBB);
}
std::printf("========== stage: %u, user sgprs: %zu\n", (unsigned)stage,
userSpgrs.size());
std::printf("structurized CFG:\n");
auto basicBlockPrinter = [memory](const scf::PrintOptions &opts,
unsigned depth, scf::BasicBlock *bb) {
printInstructions(opts, depth,
memory.getPointer<std::uint32_t>(bb->getAddress()),
bb->getSize());
};
entryBlock->print({.blockPrinter = basicBlockPrinter}, 0);
std::printf("==========\n");
auto mainFunction = ctxt.createFunction(0);
mainFunction->userSgprs = userSpgrs;
mainFunction->stage = stage;
Converter converter;
converter.convertFunction(memory, &scfContext, entryBlock, mainFunction);
Shader result;
std::fflush(stdout);
mainFunction->exitFragment.outputs.clear();
for (auto &uniform : ctxt.getUniforms()) {
auto &newUniform = result.uniforms.emplace_back();
newUniform.binding = bindingOffset++;
for (int i = 0; i < 8; ++i) {
newUniform.buffer[i] = uniform.buffer[i];
}
std::uint32_t descriptorSet = 0;
ctxt.getBuilder().createDecorate(
uniform.variable, spv::Decoration::DescriptorSet, {{descriptorSet}});
ctxt.getBuilder().createDecorate(uniform.variable, spv::Decoration::Binding,
{{newUniform.binding}});
switch (uniform.typeId) {
case TypeId::Sampler:
newUniform.kind = Shader::UniformKind::Sampler;
break;
case TypeId::Image2D:
newUniform.kind = Shader::UniformKind::Image;
break;
default:
newUniform.kind = Shader::UniformKind::Buffer;
break;
}
newUniform.accessOp = uniform.accessOp;
}
mainFunction->insertReturn();
for (auto frag : mainFunction->fragments) {
mainFunction->builder.insertBlock(frag->builder);
}
mainFunction->builder.insertBlock(mainFunction->exitFragment.builder);
builder.insertFunction(mainFunction->builder, mainFunction->getResultType(),
spv::FunctionControlMask::MaskNone,
mainFunction->getFunctionType());
if (stage == Stage::Vertex) {
builder.createEntryPoint(spv::ExecutionModel::Vertex,
mainFunction->builder.id, "main",
ctxt.getInterfaces());
} else if (stage == Stage::Fragment) {
builder.createEntryPoint(spv::ExecutionModel::Fragment,
mainFunction->builder.id, "main",
ctxt.getInterfaces());
builder.createExecutionMode(mainFunction->builder.id,
spv::ExecutionMode::OriginUpperLeft, {});
} else if (stage == Stage::Compute) {
builder.createEntryPoint(spv::ExecutionModel::GLCompute,
mainFunction->builder.id, "main",
ctxt.getInterfaces());
builder.createExecutionMode(mainFunction->builder.id,
spv::ExecutionMode::LocalSize,
{{dimX, dimY, dimZ}});
}
result.spirv = ctxt.getBuilder().build(SPV_VERSION, 0);
return result;
}

View file

@ -0,0 +1,567 @@
#include "ConverterContext.hpp"
#include "util/unreachable.hpp"
using namespace amdgpu::shader;
std::optional<TypeId> ConverterContext::getTypeIdOf(spirv::Type type) const {
for (int i = 0; i < kGenericTypesCount; ++i) {
if (mTypes[i] == type) {
return static_cast<TypeId::enum_type>(i);
}
}
return std::nullopt;
}
spirv::StructType
ConverterContext::findStructType(std::span<const spirv::Type> members) {
for (auto &structType : mStructTypes) {
if (structType.match(members)) {
return structType.id;
}
}
return {};
}
spirv::StructType
ConverterContext::getStructType(std::span<const spirv::Type> members) {
for (auto &structType : mStructTypes) {
if (structType.match(members)) {
return structType.id;
}
}
auto &newType = mStructTypes.emplace_back();
newType.id = mBuilder.createTypeStruct(members);
newType.members.reserve(members.size());
for (auto member : members) {
newType.members.push_back(member);
}
return newType.id;
}
spirv::PointerType
ConverterContext::getStructPointerType(spv::StorageClass storageClass,
spirv::StructType structType) {
StructTypeEntry *entry = nullptr;
for (auto &structType : mStructTypes) {
if (structType.id != structType.id) {
continue;
}
entry = &structType;
}
if (entry == nullptr) {
util::unreachable("Struct type not found");
}
auto &ptrType = entry->ptrTypes[static_cast<unsigned>(storageClass)];
if (!ptrType) {
ptrType = mBuilder.createTypePointer(storageClass, structType);
}
return ptrType;
}
spirv::Type ConverterContext::getType(TypeId id) {
auto &type = mTypes[static_cast<std::uint32_t>(id)];
if (type) {
return type;
}
switch (id) {
case TypeId::Void:
return ((type = mBuilder.createTypeVoid()));
case TypeId::Bool:
return ((type = mBuilder.createTypeBool()));
case TypeId::SInt8:
return ((type = mBuilder.createTypeSInt(8)));
case TypeId::UInt8:
return ((type = mBuilder.createTypeUInt(8)));
case TypeId::SInt16:
return ((type = mBuilder.createTypeSInt(16)));
case TypeId::UInt16:
return ((type = mBuilder.createTypeUInt(16)));
case TypeId::SInt32:
return ((type = mBuilder.createTypeSInt(32)));
case TypeId::UInt32:
return ((type = mBuilder.createTypeUInt(32)));
case TypeId::UInt32x2:
return ((type = mBuilder.createTypeVector(getType(TypeId::UInt32), 2)));
case TypeId::UInt32x3:
return ((type = mBuilder.createTypeVector(getType(TypeId::UInt32), 3)));
case TypeId::UInt32x4:
return ((type = mBuilder.createTypeVector(getType(TypeId::UInt32), 4)));
case TypeId::UInt64:
return ((type = mBuilder.createTypeUInt(64)));
case TypeId::SInt64:
return ((type = mBuilder.createTypeSInt(64)));
case TypeId::ArrayUInt32x8:
type = mBuilder.createTypeArray(getType(TypeId::UInt32x4), getUInt32(2));
getBuilder().createDecorate(type, spv::Decoration::ArrayStride,
std::array{static_cast<std::uint32_t>(16)});
case TypeId::ArrayUInt32x16:
type = mBuilder.createTypeArray(getType(TypeId::UInt32x4), getUInt32(4));
getBuilder().createDecorate(type, spv::Decoration::ArrayStride,
std::array{static_cast<std::uint32_t>(16)});
return type;
case TypeId::Float16:
return ((type = mBuilder.createTypeFloat(16)));
case TypeId::Float32:
return ((type = mBuilder.createTypeFloat(32)));
case TypeId::Float32x2:
return ((type = mBuilder.createTypeVector(getType(TypeId::Float32), 2)));
case TypeId::Float32x3:
return ((type = mBuilder.createTypeVector(getType(TypeId::Float32), 3)));
case TypeId::Float32x4:
return ((type = mBuilder.createTypeVector(getType(TypeId::Float32), 4)));
case TypeId::Float64:
return ((type = mBuilder.createTypeFloat(64)));
case TypeId::ArrayFloat32x8:
type = mBuilder.createTypeArray(getType(TypeId::Float32x4), getUInt32(2));
getBuilder().createDecorate(type, spv::Decoration::ArrayStride,
std::array{static_cast<std::uint32_t>(16)});
return type;
case TypeId::ArrayFloat32x16:
type = mBuilder.createTypeArray(getType(TypeId::Float32x4), getUInt32(4));
getBuilder().createDecorate(type, spv::Decoration::ArrayStride,
std::array{static_cast<std::uint32_t>(16)});
return type;
case TypeId::Image2D:
return ((type = getBuilder().createTypeImage(getFloat32Type(),
spv::Dim::Dim2D, 0, 0, 0, 1,
spv::ImageFormat::Unknown)));
case TypeId::SampledImage2D:
return ((type = getBuilder().createTypeSampledImage(getImage2DType())));
case TypeId::Sampler:
return ((type = getBuilder().createTypeSampler()));
}
util::unreachable();
}
spirv::RuntimeArrayType ConverterContext::getRuntimeArrayType(TypeId id) {
auto &type = mRuntimeArrayTypes[static_cast<std::uint32_t>(id)];
if (!type) {
type = mBuilder.createTypeRuntimeArray(getType(id));
mBuilder.createDecorate(type, spv::Decoration::ArrayStride,
{{(std::uint32_t)id.getSize()}});
}
return type;
}
spirv::ConstantUInt ConverterContext::getUInt64(std::uint64_t value) {
auto &id = mConstantUint64Map[value];
if (!id) {
id = mBuilder.createConstant64(getUInt64Type(), value);
}
return id;
}
spirv::ConstantUInt ConverterContext::getUInt32(std::uint32_t value) {
auto &id = mConstantUint32Map[value];
if (!id) {
id = mBuilder.createConstant32(getUInt32Type(), value);
}
return id;
}
spirv::ConstantSInt ConverterContext::getSInt32(std::uint32_t value) {
auto &id = mConstantSint32Map[value];
if (!id) {
id = mBuilder.createConstant32(getSint32Type(), value);
}
return id;
}
spirv::ConstantFloat ConverterContext::getFloat32Raw(std::uint32_t value) {
auto &id = mConstantFloat32Map[value];
if (!id) {
id = mBuilder.createConstant32(getFloat32Type(), value);
}
return id;
}
UniformInfo *ConverterContext::createStorageBuffer(TypeId type) {
std::array<spirv::Type, 1> uniformStructMembers{getRuntimeArrayType(type)};
auto uniformStruct = findStructType(uniformStructMembers);
if (!uniformStruct) {
uniformStruct = getStructType(uniformStructMembers);
getBuilder().createDecorate(uniformStruct, spv::Decoration::Block, {});
getBuilder().createMemberDecorate(
uniformStruct, 0, spv::Decoration::Offset,
std::array{static_cast<std::uint32_t>(0)});
}
auto uniformType =
getStructPointerType(spv::StorageClass::StorageBuffer, uniformStruct);
auto uniformVariable = getBuilder().createVariable(
uniformType, spv::StorageClass::StorageBuffer);
mInterfaces.push_back(uniformVariable);
auto &newUniform = mUniforms.emplace_back();
newUniform.index = mUniforms.size() - 1;
newUniform.typeId = type;
newUniform.type = uniformType;
newUniform.variable = uniformVariable;
newUniform.isBuffer = true;
std::printf("new storage buffer %u of type %u\n", newUniform.index,
newUniform.typeId.raw);
return &newUniform;
}
UniformInfo *ConverterContext::getOrCreateStorageBuffer(std::uint32_t *vbuffer,
TypeId type) {
for (auto &uniform : mUniforms) {
if (std::memcmp(uniform.buffer, vbuffer, sizeof(std::uint32_t) * 4)) {
continue;
}
if (uniform.typeId != type) {
util::unreachable("getOrCreateStorageBuffer: access to the uniform with "
"different type");
}
if (!uniform.isBuffer) {
util::unreachable("getOrCreateStorageBuffer: uniform was constant");
}
// std::printf("reuse storage buffer %u of type %u\n", uniform.index,
// uniform.typeId.raw);
return &uniform;
}
auto newUniform = createStorageBuffer(type);
std::memcpy(newUniform->buffer, vbuffer, sizeof(std::uint32_t) * 4);
return newUniform;
}
UniformInfo *ConverterContext::getOrCreateUniformConstant(std::uint32_t *buffer,
std::size_t size,
TypeId type) {
for (auto &uniform : mUniforms) {
if (std::memcmp(uniform.buffer, buffer, sizeof(std::uint32_t) * size)) {
continue;
}
if (uniform.typeId != type) {
util::unreachable(
"getOrCreateUniformConstant: access to the uniform with "
"different type");
}
if (uniform.isBuffer) {
util::unreachable("getOrCreateUniformConstant: uniform was buffer");
}
return &uniform;
}
auto uniformType = getPointerType(spv::StorageClass::UniformConstant, type);
auto uniformVariable = getBuilder().createVariable(
uniformType, spv::StorageClass::UniformConstant);
mInterfaces.push_back(uniformVariable);
auto &newUniform = mUniforms.emplace_back();
newUniform.index = mUniforms.size() - 1;
newUniform.typeId = type;
newUniform.type = uniformType;
newUniform.variable = uniformVariable;
newUniform.isBuffer = false;
std::memcpy(newUniform.buffer, buffer, sizeof(std::uint32_t) * size);
return &newUniform;
}
spirv::VariableValue ConverterContext::getThreadId() {
if (mThreadId) {
return mThreadId;
}
auto inputType = getPointerType(spv::StorageClass::Input, TypeId::UInt32);
mThreadId = mBuilder.createVariable(inputType, spv::StorageClass::Input);
if (mStage == Stage::Vertex) {
mBuilder.createDecorate(
mThreadId, spv::Decoration::BuiltIn,
std::array{static_cast<std::uint32_t>(spv::BuiltIn::VertexIndex)});
} else {
util::unreachable();
}
mInterfaces.push_back(mThreadId);
return mThreadId;
}
spirv::VariableValue ConverterContext::getWorkgroupId() {
if (mWorkgroupId) {
return mWorkgroupId;
}
if (mStage != Stage::Compute) {
util::unreachable();
}
auto workgroupIdType =
getPointerType(spv::StorageClass::Input, TypeId::UInt32x3);
mWorkgroupId =
mBuilder.createVariable(workgroupIdType, spv::StorageClass::Input);
mBuilder.createDecorate(
mWorkgroupId, spv::Decoration::BuiltIn,
{{static_cast<std::uint32_t>(spv::BuiltIn::WorkgroupId)}});
mInterfaces.push_back(mWorkgroupId);
return mWorkgroupId;
}
spirv::VariableValue ConverterContext::getLocalInvocationId() {
if (mLocalInvocationId) {
return mLocalInvocationId;
}
if (mStage != Stage::Compute) {
util::unreachable();
}
auto localInvocationIdType =
getPointerType(spv::StorageClass::Input, TypeId::UInt32x3);
mLocalInvocationId =
mBuilder.createVariable(localInvocationIdType, spv::StorageClass::Input);
mBuilder.createDecorate(
mLocalInvocationId, spv::Decoration::BuiltIn,
std::array{static_cast<std::uint32_t>(spv::BuiltIn::LocalInvocationId)});
mInterfaces.push_back(mLocalInvocationId);
return mLocalInvocationId;
}
spirv::VariableValue ConverterContext::getPerVertex() {
if (mPerVertex) {
return mPerVertex;
}
auto floatT = getFloat32Type();
auto float4T = getFloat32x4Type();
auto uintConst1 = getUInt32(1);
auto arr1Float = mBuilder.createTypeArray(floatT, uintConst1);
auto gl_PerVertexStructT = mBuilder.createTypeStruct(std::array{
static_cast<spirv::Type>(float4T),
static_cast<spirv::Type>(floatT),
static_cast<spirv::Type>(arr1Float),
static_cast<spirv::Type>(arr1Float),
});
mBuilder.createDecorate(gl_PerVertexStructT, spv::Decoration::Block, {});
mBuilder.createMemberDecorate(
gl_PerVertexStructT, 0, spv::Decoration::BuiltIn,
std::array{static_cast<std::uint32_t>(spv::BuiltIn::Position)});
mBuilder.createMemberDecorate(
gl_PerVertexStructT, 1, spv::Decoration::BuiltIn,
std::array{static_cast<std::uint32_t>(spv::BuiltIn::PointSize)});
mBuilder.createMemberDecorate(
gl_PerVertexStructT, 2, spv::Decoration::BuiltIn,
std::array{static_cast<std::uint32_t>(spv::BuiltIn::ClipDistance)});
mBuilder.createMemberDecorate(
gl_PerVertexStructT, 3, spv::Decoration::BuiltIn,
std::array{static_cast<std::uint32_t>(spv::BuiltIn::CullDistance)});
auto gl_PerVertexPtrT = mBuilder.createTypePointer(spv::StorageClass::Output,
gl_PerVertexStructT);
mPerVertex =
mBuilder.createVariable(gl_PerVertexPtrT, spv::StorageClass::Output);
mInterfaces.push_back(mPerVertex);
return mPerVertex;
}
spirv::VariableValue ConverterContext::getFragCoord() {
if (mFragCoord) {
return mFragCoord;
}
auto inputType = getPointerType(spv::StorageClass::Input, TypeId::Float32x4);
mFragCoord =
mBuilder.createVariable(inputType, spv::StorageClass::Input);
mBuilder.createDecorate(mFragCoord, spv::Decoration::BuiltIn,
{{static_cast<std::uint32_t>(spv::BuiltIn::FragCoord)}});
mInterfaces.push_back(mFragCoord);
return mFragCoord;
}
spirv::VariableValue ConverterContext::getIn(unsigned location) {
auto [it, inserted] = mIns.try_emplace(location);
if (!inserted) {
return it->second;
}
auto inputType = getPointerType(spv::StorageClass::Input, TypeId::Float32x4);
auto inputVariable =
mBuilder.createVariable(inputType, spv::StorageClass::Input);
mBuilder.createDecorate(inputVariable, spv::Decoration::Location,
{{location}});
mInterfaces.push_back(inputVariable);
it->second = inputVariable;
return inputVariable;
}
spirv::VariableValue ConverterContext::getOut(unsigned location) {
auto [it, inserted] = mOuts.try_emplace(location);
if (!inserted) {
return it->second;
}
auto outputType =
getPointerType(spv::StorageClass::Output, TypeId::Float32x4);
auto outputVariable =
mBuilder.createVariable(outputType, spv::StorageClass::Output);
mBuilder.createDecorate(outputVariable, spv::Decoration::Location,
{{location}});
mInterfaces.push_back(outputVariable);
it->second = outputVariable;
return outputVariable;
}
spirv::Function ConverterContext::getDiscardFn() {
if (mDiscardFn) {
return mDiscardFn;
}
if (mStage != Stage::Fragment) {
util::unreachable();
}
auto fn = mBuilder.createFunctionBuilder(5);
mDiscardFn = fn.id;
auto entry = fn.createBlockBuilder(5);
entry.createKill();
fn.insertBlock(entry);
mBuilder.insertFunction(fn, getVoidType(), {},
getFunctionType(getVoidType(), {}));
return mDiscardFn;
}
std::optional<std::uint32_t>
ConverterContext::findUint32Value(spirv::Value id) const {
for (auto [value, constId] : mConstantUint32Map) {
if (constId == id) {
return value;
}
}
return std::nullopt;
}
std::optional<std::int32_t>
ConverterContext::findSint32Value(spirv::Value id) const {
for (auto [value, constId] : mConstantSint32Map) {
if (constId == id) {
return value;
}
}
return std::nullopt;
}
std::optional<float> ConverterContext::findFloat32Value(spirv::Value id) const {
for (auto [value, constId] : mConstantFloat32Map) {
if (constId == id) {
return std::bit_cast<float>(value);
}
}
return std::nullopt;
}
spirv::FunctionType
ConverterContext::getFunctionType(spirv::Type resultType,
std::span<const spirv::Type> params) {
for (auto fnType : mFunctionTypes) {
if (fnType.resultType != resultType) {
continue;
}
if (fnType.params.size() != params.size()) {
continue;
}
bool match = true;
for (std::size_t i = 0, end = params.size(); i < end; ++i) {
if (fnType.params[i] != params[i]) {
match = false;
break;
}
}
if (!match) {
continue;
}
return fnType.id;
}
auto id = mBuilder.createTypeFunction(resultType, params);
std::vector<spirv::Type> paramsVec;
paramsVec.reserve(params.size());
for (auto param : params) {
paramsVec.push_back(param);
}
mFunctionTypes.push_back(FunctionType{
.resultType = resultType, .params = std::move(paramsVec), .id = id});
return id;
}
Function *ConverterContext::createFunction(std::size_t expectedSize) {
auto result = &mFunctions.emplace_front();
result->context = this;
result->entryFragment.context = this;
result->entryFragment.function = result;
result->entryFragment.builder = mBuilder.createBlockBuilder(expectedSize);
result->entryFragment.entryBlockId = result->entryFragment.builder.id;
result->fragments.push_back(&result->entryFragment);
result->exitFragment.context = this;
result->exitFragment.function = result;
result->exitFragment.builder = mBuilder.createBlockBuilder(0);
result->exitFragment.entryBlockId = result->exitFragment.builder.id;
result->builder = mBuilder.createFunctionBuilder(expectedSize);
return result;
}
Fragment *ConverterContext::createFragment(std::size_t expectedSize) {
auto result = &mFragments.emplace_front();
result->context = this;
result->builder = mBuilder.createBlockBuilder(expectedSize);
result->entryBlockId = result->builder.id;
return result;
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,274 @@
#include "Function.hpp"
#include "ConverterContext.hpp"
#include "RegisterId.hpp"
using namespace amdgpu::shader;
Value Function::createInput(RegisterId id) {
auto [it, inserted] = inputs.try_emplace(id);
if (!inserted) {
assert(it->second);
return it->second;
}
auto offset = id.getOffset();
if (id.isScalar()) {
auto uint32T = context->getUInt32Type();
if (userSgprs.size() > offset) {
return ((it->second = {uint32T, context->getUInt32(userSgprs[offset])}));
}
if (stage == Stage::None) {
return ((it->second =
Value{uint32T, builder.createFunctionParameter(uint32T)}));
}
switch (id.raw) {
case RegisterId::ExecLo:
return ((it->second = {uint32T, context->getUInt32(1)}));
case RegisterId::ExecHi:
return ((it->second = {uint32T, context->getUInt32(0)}));
case RegisterId::Scc:
return ((it->second = {context->getBoolType(), context->getFalse()}));
default:
break;
}
if (stage == Stage::Vertex) {
return ((it->second = {uint32T, context->getUInt32(0)}));
} else if (stage == Stage::Fragment) {
return ((it->second = {uint32T, context->getUInt32(0)}));
} else if (stage == Stage::Compute) {
std::uint32_t offsetAfterSgprs = offset - userSgprs.size();
if (offsetAfterSgprs < 3) {
auto workgroupIdVar = context->getWorkgroupId();
auto workgroupId = entryFragment.builder.createLoad(
context->getUint32x3Type(), workgroupIdVar);
for (uint32_t i = 0; i < 3; ++i) {
auto input = entryFragment.builder.createCompositeExtract(
uint32T, workgroupId, {{i}});
inputs[RegisterId::Scalar(userSgprs.size() + i)] = {uint32T, input};
}
return inputs[id];
}
return ((it->second = {uint32T, context->getUInt32(0)}));
}
util::unreachable();
}
if (stage == Stage::None) {
auto float32T = context->getFloat32Type();
return (
(it->second = {float32T, builder.createFunctionParameter(float32T)}));
}
if (stage == Stage::Vertex) {
if (id.isVector()) {
auto uint32T = context->getUInt32Type();
if (id.getOffset() == 0) {
auto input =
entryFragment.builder.createLoad(uint32T, context->getThreadId());
return ((it->second = {uint32T, input}));
}
return ((it->second = {uint32T, context->getUInt32(0)}));
}
util::unreachable("Unexpected vertex input %u. user sgprs count=%zu",
id.raw, userSgprs.size());
}
if (stage == Stage::Fragment) {
if (id.isAttr()) {
auto float4T = context->getFloat32x4Type();
auto input = entryFragment.builder.createLoad(
float4T, context->getIn(id.getOffset()));
return ((it->second = {float4T, input}));
}
if (id.isVector()) {
switch (offset) {
case 2:
case 3:
case 4:
case 5: {
auto float4T = context->getFloat32x4Type();
auto floatT = context->getFloat32Type();
auto fragCoord =
entryFragment.builder.createLoad(float4T, context->getFragCoord());
return (
(it->second = {floatT, entryFragment.builder.createCompositeExtract(
floatT, fragCoord, {{offset - 2}})}));
}
}
}
return ((it->second = {context->getUInt32Type(), context->getUInt32(0)}));
}
if (stage == Stage::Compute) {
if (id.isVector() && offset < 3) {
auto uint32T = context->getUInt32Type();
auto localInvocationIdVar = context->getLocalInvocationId();
auto localInvocationId = entryFragment.builder.createLoad(
context->getUint32x3Type(), localInvocationIdVar);
for (uint32_t i = 0; i < 3; ++i) {
auto input = entryFragment.builder.createCompositeExtract(
uint32T, localInvocationId, {{i}});
inputs[RegisterId::Vector(i)] = {uint32T, input};
}
return inputs[id];
}
return ((it->second = {context->getUInt32Type(), context->getUInt32(0)}));
}
util::unreachable();
}
void Function::createExport(spirv::BlockBuilder &builder, unsigned index,
Value value) {
if (stage == Stage::Vertex) {
switch (index) {
case 12: {
auto float4OutPtrT =
context->getPointerType(spv::StorageClass::Output, TypeId::Float32x4);
auto gl_PerVertexPosition = builder.createAccessChain(
float4OutPtrT, context->getPerVertex(), {{context->getSInt32(0)}});
if (value.type != context->getFloat32x4Type()) {
util::unreachable();
}
builder.createStore(gl_PerVertexPosition, value.value);
return;
}
case 32 ... 64: { // paramN
if (value.type != context->getFloat32x4Type()) {
util::unreachable();
}
builder.createStore(context->getOut(index - 32), value.value);
return;
}
}
util::unreachable("Unexpected vartex export target %u", index);
}
if (stage == Stage::Fragment) {
switch (index) {
case 0 ... 7: {
if (value.type != context->getFloat32x4Type()) {
util::unreachable();
}
builder.createStore(context->getOut(index), value.value);
return;
}
}
util::unreachable("Unexpected fragment export target %u", index);
}
util::unreachable();
}
spirv::Type Function::getResultType() {
if (exitFragment.outputs.empty()) {
return context->getVoidType();
}
if (exitFragment.outputs.size() == 1) {
return exitFragment.registers->getRegister(*exitFragment.outputs.begin())
.type;
}
std::vector<spirv::Type> members;
members.reserve(exitFragment.outputs.size());
for (auto id : exitFragment.outputs) {
members.push_back(exitFragment.registers->getRegister(id).type);
}
return context->getStructType(members);
}
spirv::FunctionType Function::getFunctionType() {
if (stage != Stage::None) {
return context->getFunctionType(getResultType(), {});
}
std::vector<spirv::Type> params;
params.reserve(inputs.size());
for (auto inp : inputs) {
params.push_back(inp.second.type);
}
return context->getFunctionType(getResultType(), params);
}
Fragment *Function::createFragment() {
auto result = context->createFragment(0);
result->function = this;
fragments.push_back(result);
return result;
}
void Function::insertReturn() {
if (exitFragment.outputs.empty()) {
exitFragment.builder.createReturn();
return;
}
if (exitFragment.outputs.size() == 1) {
auto value =
exitFragment.registers->getRegister(*exitFragment.outputs.begin())
.value;
exitFragment.builder.createReturnValue(value);
return;
}
auto resultType = getResultType();
auto resultTypePointer = context->getBuilder().createTypePointer(
spv::StorageClass::Function, resultType);
auto resultVariable = entryFragment.builder.createVariable(
resultTypePointer, spv::StorageClass::Function);
std::uint32_t member = 0;
for (auto regId : exitFragment.outputs) {
auto value = exitFragment.registers->getRegister(regId);
auto valueTypeId = context->getTypeIdOf(value.type);
auto pointerType =
context->getPointerType(spv::StorageClass::Function, *valueTypeId);
auto valuePointer = exitFragment.builder.createAccessChain(
pointerType, resultVariable,
{{exitFragment.context->getUInt32(member++)}});
exitFragment.builder.createStore(valuePointer, value.value);
}
auto resultValue = exitFragment.builder.createLoad(resultType, resultVariable);
exitFragment.builder.createReturnValue(resultValue);
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,72 @@
#include "RegisterState.hpp"
#include "util/unreachable.hpp"
amdgpu::shader::Value
amdgpu::shader::RegisterState::getRegister(RegisterId regId) {
auto offset = regId.getOffset();
if (regId.isScalar()) {
switch (offset) {
case 0 ... 103:
return sgprs[offset];
case 106:
return vccLo;
case 107:
return vccHi;
case 124:
return m0;
case 126:
return execLo;
case 127:
return execHi;
case 253:
return scc;
case 254:
return ldsDirect;
}
util::unreachable();
}
if (regId.isVector()) {
return vgprs[offset];
}
if (regId.isAttr()) {
return attrs[offset];
}
util::unreachable();
}
void amdgpu::shader::RegisterState::setRegister(RegisterId regId,
Value value) {
auto offset = regId.getOffset();
if (regId.isScalar()) {
switch (offset) {
case 0 ... 103: sgprs[offset] = value; return;
case 106: vccLo = value; return;
case 107: vccHi = value; return;
case 124: m0 = value; return;
case 126: execLo = value; return;
case 127: execHi = value; return;
case 253: scc = value; return;
case 254: ldsDirect = value; return;
}
util::unreachable();
}
if (regId.isVector()) {
vgprs[offset] = value;
return;
}
if (regId.isAttr()) {
attrs[offset] = value;
return;
}
util::unreachable();
}

View file

@ -0,0 +1,132 @@
#include "TypeId.hpp"
#include "util/unreachable.hpp"
#include <cstdint>
amdgpu::shader::TypeId amdgpu::shader::TypeId::getBaseType() const {
switch (raw) {
case TypeId::Void:
case TypeId::Bool:
case TypeId::SInt8:
case TypeId::UInt8:
case TypeId::SInt16:
case TypeId::UInt16:
case TypeId::SInt32:
case TypeId::UInt32:
case TypeId::SInt64:
case TypeId::UInt64:
case TypeId::Float16:
case TypeId::Float32:
case TypeId::Float64:
case TypeId::Sampler:
case TypeId::Image2D:
case TypeId::SampledImage2D:
return raw;
case TypeId::UInt32x2:
case TypeId::UInt32x3:
case TypeId::UInt32x4:
case TypeId::ArrayUInt32x8:
case TypeId::ArrayUInt32x16:
return TypeId::UInt32;
case TypeId::Float32x2:
case TypeId::Float32x3:
case TypeId::Float32x4:
case TypeId::ArrayFloat32x8:
case TypeId::ArrayFloat32x16:
return TypeId::Float32;
}
util::unreachable();
}
std::size_t amdgpu::shader::TypeId::getSize() const {
switch (raw) {
case TypeId::Void:
case TypeId::Sampler:
case TypeId::Image2D:
case TypeId::SampledImage2D:
return 0;
case TypeId::Bool:
return 1;
case TypeId::SInt8:
case TypeId::UInt8:
return 1;
case TypeId::SInt16:
case TypeId::UInt16:
return 2;
case TypeId::SInt32:
case TypeId::UInt32:
return 4;
case TypeId::SInt64:
case TypeId::UInt64:
return 8;
case TypeId::Float16:
return 2;
case TypeId::Float32:
return 4;
case TypeId::Float64:
return 8;
case TypeId::UInt32x2:
case TypeId::UInt32x3:
case TypeId::UInt32x4:
case TypeId::ArrayUInt32x8:
case TypeId::ArrayUInt32x16:
case TypeId::Float32x2:
case TypeId::Float32x3:
case TypeId::Float32x4:
case TypeId::ArrayFloat32x8:
case TypeId::ArrayFloat32x16:
return getElementsCount() * getBaseType().getSize();
}
util::unreachable();
}
std::size_t amdgpu::shader::TypeId::getElementsCount() const {
switch (raw) {
case TypeId::Bool:
case TypeId::SInt8:
case TypeId::UInt8:
case TypeId::SInt16:
case TypeId::UInt16:
case TypeId::SInt32:
case TypeId::UInt32:
case TypeId::SInt64:
case TypeId::UInt64:
case TypeId::Float16:
case TypeId::Float32:
case TypeId::Float64:
return 1;
case TypeId::UInt32x2:
return 2;
case TypeId::UInt32x3:
return 3;
case TypeId::UInt32x4:
return 4;
case TypeId::ArrayUInt32x8:
return 8;
case TypeId::ArrayUInt32x16:
return 16;
case TypeId::Float32x2:
return 2;
case TypeId::Float32x3:
return 3;
case TypeId::Float32x4:
return 4;
case TypeId::ArrayFloat32x8:
return 8;
case TypeId::ArrayFloat32x16:
return 16;
case TypeId::Void:
case TypeId::Sampler:
case TypeId::Image2D:
case TypeId::SampledImage2D:
return 0;
}
util::unreachable();
}

117
hw/amdgpu/shader/src/cf.cpp Normal file
View file

@ -0,0 +1,117 @@
#include "cf.hpp"
#include <cassert>
#include <fstream>
#include <unordered_set>
void cf::BasicBlock::split(BasicBlock *target) {
assert(target->address > address);
target->size = size - (target->address - address);
size = target->address - address;
for (std::size_t i = 0, count = getSuccessorsCount(); i < count; ++i) {
auto succ = getSuccessor(i);
succ->predecessors.erase(this);
succ->predecessors.insert(target);
target->successors[i] = successors[i];
successors[i] = nullptr;
}
target->terminator = terminator;
terminator = TerminatorKind::None;
createBranch(target);
}
void cf::BasicBlock::createConditionalBranch(BasicBlock *ifTrue,
BasicBlock *ifFalse) {
assert(terminator == TerminatorKind::None);
assert(getSuccessorsCount() == 0);
ifTrue->predecessors.insert(this);
ifFalse->predecessors.insert(this);
successors[0] = ifTrue;
successors[1] = ifFalse;
terminator = TerminatorKind::Branch;
}
void cf::BasicBlock::createBranch(BasicBlock *target) {
assert(terminator == TerminatorKind::None);
assert(getSuccessorsCount() == 0);
target->predecessors.insert(this);
successors[0] = target;
terminator = TerminatorKind::Branch;
}
void cf::BasicBlock::createBranchToUnknown() {
assert(terminator == TerminatorKind::None);
assert(getSuccessorsCount() == 0);
terminator = TerminatorKind::BranchToUnknown;
}
void cf::BasicBlock::createReturn() {
assert(terminator == TerminatorKind::None);
assert(getSuccessorsCount() == 0);
terminator = TerminatorKind::Return;
}
void cf::BasicBlock::replaceSuccessor(BasicBlock *origBB, BasicBlock *newBB) {
origBB->predecessors.erase(this);
newBB->predecessors.insert(this);
if (origBB == successors[0]) {
successors[0] = newBB;
return;
}
if (origBB == successors[1]) {
successors[1] = newBB;
return;
}
std::abort();
}
bool cf::BasicBlock::hasDirectPredecessor(const BasicBlock &block) const {
for (auto pred : predecessors) {
if (pred == &block) {
return true;
}
}
return false;
}
bool cf::BasicBlock::hasPredecessor(const BasicBlock &block) const {
if (&block == this) {
return hasDirectPredecessor(block);
}
std::vector<const BasicBlock *> workList;
std::unordered_set<const BasicBlock *> visited;
workList.push_back(this);
visited.insert(this);
while (!workList.empty()) {
auto node = workList.back();
if (node == &block) {
return true;
}
workList.pop_back();
workList.reserve(workList.size() + predecessors.size());
for (auto pred : predecessors) {
if (visited.insert(pred).second) {
workList.push_back(pred);
}
}
}
return false;
}

View file

@ -0,0 +1,252 @@
#include "scf.hpp"
#include "cf.hpp"
#include <cassert>
#include <fstream>
#include <unordered_set>
#include <utility>
void scf::Block::eraseFrom(Node *endBefore) {
mEnd = endBefore->getPrev();
if (mEnd != nullptr) {
mEnd->mNext = nullptr;
} else {
mBegin = nullptr;
}
}
void scf::Block::splitInto(Block *target, Node *splitPoint) {
auto targetEnd = std::exchange(mEnd, splitPoint->mPrev);
if (mEnd != nullptr) {
mEnd->mNext = nullptr;
} else {
mBegin = nullptr;
}
for (auto node = splitPoint; node != nullptr; node = node->getNext()) {
node->mParent = target;
}
if (target->mEnd != nullptr) {
target->mEnd->mNext = splitPoint;
}
splitPoint->mPrev = target->mEnd;
target->mEnd = targetEnd;
if (target->mBegin == nullptr) {
target->mBegin = splitPoint;
}
}
scf::Block *scf::Block::split(Context &context, Node *splitPoint) {
auto result = context.create<Block>();
splitInto(result, splitPoint);
return result;
}
static scf::BasicBlock *findJumpTargetIn(scf::Block *parentBlock,
scf::Block *testBlock) {
auto jumpNode = dynCast<scf::Jump>(testBlock->getLastNode());
if (jumpNode == nullptr || jumpNode->target->getParent() != parentBlock) {
return nullptr;
}
return jumpNode->target;
}
static bool transformJumpToLoop(scf::Context &ctxt, scf::Block *block) {
// bb0
// bb1
// if true {
// bb2
// jump bb1
// } else {
// bb3
// }
//
// -->
//
// bb0
// loop {
// bb1
// if false {
// break
// }
// bb2
// }
// bb3
if (block->isEmpty()) {
return false;
}
auto ifElse = dynCast<scf::IfElse>(block->getLastNode());
if (ifElse == nullptr) {
return false;
}
auto loopTarget = findJumpTargetIn(block, ifElse->ifTrue);
auto loopBlock = ifElse->ifTrue;
auto invariantBlock = ifElse->ifFalse;
if (loopTarget == nullptr) {
loopTarget = findJumpTargetIn(block, ifElse->ifFalse);
loopBlock = ifElse->ifFalse;
invariantBlock = ifElse->ifTrue;
if (loopTarget == nullptr) {
return false;
}
}
auto loopBody = block->split(ctxt, loopTarget);
auto loop = ctxt.create<scf::Loop>(loopBody);
block->append(loop);
for (auto node = invariantBlock->getRootNode(); node != nullptr;) {
auto nextNode = node->getNext();
invariantBlock->detachNode(node);
block->append(node);
node = nextNode;
}
loopBlock->detachNode(loopBlock->getLastNode());
for (auto node = loopBlock->getRootNode(); node != nullptr;) {
auto nextNode = node->getNext();
loopBlock->detachNode(node);
loopBody->append(node);
node = nextNode;
}
invariantBlock->append(ctxt.create<scf::Break>());
return true;
}
static bool moveSameLastBlocksTo(scf::IfElse *ifElse, scf::Block *block) {
if (ifElse->ifTrue->isEmpty() || ifElse->ifFalse->isEmpty()) {
return false;
}
auto ifTrueIt = ifElse->ifTrue->getLastNode();
auto ifFalseIt = ifElse->ifFalse->getLastNode();
while (ifTrueIt != nullptr && ifFalseIt != nullptr) {
if (!ifTrueIt->isEqual(*ifFalseIt)) {
break;
}
ifTrueIt = ifTrueIt->getPrev();
ifFalseIt = ifFalseIt->getPrev();
}
if (ifTrueIt == ifElse->ifTrue->getLastNode()) {
return false;
}
if (ifTrueIt == nullptr) {
ifTrueIt = ifElse->ifTrue->getRootNode();
} else {
ifTrueIt = ifTrueIt->getNext();
}
if (ifFalseIt == nullptr) {
ifFalseIt = ifElse->ifFalse->getRootNode();
} else {
ifFalseIt = ifFalseIt->getNext();
}
ifElse->ifTrue->splitInto(block, ifTrueIt);
ifElse->ifFalse->eraseFrom(ifFalseIt);
return true;
}
class Structurizer {
scf::Context &context;
public:
Structurizer(scf::Context &context) : context(context) {}
scf::Block *structurize(cf::BasicBlock *bb) {
return structurizeBlock(bb, {});
}
public:
scf::IfElse *structurizeIfElse(
cf::BasicBlock *ifTrue, cf::BasicBlock *ifFalse,
std::unordered_map<cf::BasicBlock *, scf::BasicBlock *> &visited) {
auto ifTrueBlock = structurizeBlock(ifTrue, visited);
auto ifFalseBlock = structurizeBlock(ifFalse, visited);
return context.create<scf::IfElse>(ifTrueBlock, ifFalseBlock);
}
scf::Block *structurizeBlock(
cf::BasicBlock *bb,
std::unordered_map<cf::BasicBlock *, scf::BasicBlock *> visited) {
auto result = context.create<scf::Block>();
std::vector<cf::BasicBlock *> workList;
workList.push_back(bb);
while (!workList.empty()) {
auto block = workList.back();
workList.pop_back();
auto [it, inserted] = visited.try_emplace(block, nullptr);
if (!inserted) {
result->append(context.create<scf::Jump>(it->second));
continue;
}
auto scfBlock = context.create<scf::BasicBlock>(block->getAddress(),
block->getSize());
it->second = scfBlock;
result->append(scfBlock);
switch (block->getTerminator()) {
case cf::TerminatorKind::None:
std::abort();
break;
case cf::TerminatorKind::Branch:
switch (block->getSuccessorsCount()) {
case 1:
workList.push_back(block->getSuccessor(0));
break;
case 2: {
auto ifElse = structurizeIfElse(block->getSuccessor(0),
block->getSuccessor(1), visited);
result->append(ifElse);
while (moveSameLastBlocksTo(ifElse, result) ||
transformJumpToLoop(context, result)) {
;
}
break;
}
}
break;
case cf::TerminatorKind::BranchToUnknown:
result->append(context.create<scf::UnknownBlock>());
break;
case cf::TerminatorKind::Return:
result->append(context.create<scf::Return>());
break;
}
}
return result;
}
};
scf::Block *scf::structurize(Context &ctxt, cf::BasicBlock *bb) {
return Structurizer{ctxt}.structurize(bb);
}

@ -1 +1 @@
Subproject commit 05d35b71483880246bc4c1a28f857e9046af7c36
Subproject commit 6a093985c4a331661fd47ff9f1c06e4b9b102002