mirror of
https://github.com/RPCSX/rpcsx.git
synced 2026-02-08 08:44:39 +01:00
Added amdgpu hw project
This commit is contained in:
parent
1fdadaaee9
commit
a8af9198bf
|
|
@ -7,3 +7,4 @@ set(CMAKE_CXX_STANDARD 23)
|
|||
add_subdirectory(3rdparty/crypto)
|
||||
add_subdirectory(orbis-kernel)
|
||||
add_subdirectory(rpcsx-os)
|
||||
add_subdirectory(hw/amdgpu)
|
||||
|
|
|
|||
17
hw/amdgpu/CMakeLists.txt
Normal file
17
hw/amdgpu/CMakeLists.txt
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
cmake_minimum_required(VERSION 3.10)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 23)
|
||||
set(CMAKE_CXX_EXTENSIONS off)
|
||||
|
||||
add_subdirectory(bridge)
|
||||
add_subdirectory(device)
|
||||
add_subdirectory(shader)
|
||||
add_subdirectory(lib/libspirv)
|
||||
|
||||
project(amdgpu)
|
||||
|
||||
add_library(${PROJECT_NAME} INTERFACE)
|
||||
target_include_directories(${PROJECT_NAME} INTERFACE include)
|
||||
|
||||
add_library(amdgpu::base ALIAS ${PROJECT_NAME})
|
||||
|
||||
16
hw/amdgpu/bridge/CMakeLists.txt
Normal file
16
hw/amdgpu/bridge/CMakeLists.txt
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
project(libamdgpu-bridge)
|
||||
set(PROJECT_PATH amdgpu/bridge)
|
||||
|
||||
set(INCLUDE
|
||||
include/${PROJECT_PATH}/bridge.hpp
|
||||
)
|
||||
|
||||
set(SRC
|
||||
src/bridge.cpp
|
||||
)
|
||||
|
||||
add_library(${PROJECT_NAME} STATIC ${INCLUDE} ${SRC})
|
||||
target_include_directories(${PROJECT_NAME} PUBLIC include PRIVATE include/${PROJECT_PATH})
|
||||
set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "")
|
||||
add_library(amdgpu::bridge ALIAS ${PROJECT_NAME})
|
||||
set_property(TARGET ${PROJECT_NAME} PROPERTY POSITION_INDEPENDENT_CODE ON)
|
||||
256
hw/amdgpu/bridge/include/amdgpu/bridge/bridge.hpp
Normal file
256
hw/amdgpu/bridge/include/amdgpu/bridge/bridge.hpp
Normal file
|
|
@ -0,0 +1,256 @@
|
|||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <initializer_list>
|
||||
|
||||
namespace amdgpu::bridge {
|
||||
enum class CommandId : std::uint32_t {
|
||||
Nop,
|
||||
SetUpSharedMemory,
|
||||
ProtectMemory,
|
||||
CommandBuffer,
|
||||
Flip,
|
||||
DoFlip,
|
||||
SetBuffer
|
||||
};
|
||||
|
||||
struct CmdMemoryProt {
|
||||
std::uint64_t address;
|
||||
std::uint64_t size;
|
||||
std::uint32_t prot;
|
||||
};
|
||||
|
||||
struct CmdCommandBuffer {
|
||||
std::uint64_t queue;
|
||||
std::uint64_t address;
|
||||
std::uint64_t size;
|
||||
};
|
||||
|
||||
struct CmdBuffer {
|
||||
std::uint32_t bufferIndex;
|
||||
std::uint32_t width;
|
||||
std::uint32_t height;
|
||||
std::uint32_t pitch;
|
||||
std::uint64_t address;
|
||||
std::uint32_t pixelFormat;
|
||||
std::uint32_t tilingMode;
|
||||
};
|
||||
|
||||
struct CmdFlip {
|
||||
std::uint32_t bufferIndex;
|
||||
std::uint64_t arg;
|
||||
};
|
||||
|
||||
struct BridgeHeader {
|
||||
std::uint64_t size;
|
||||
std::uint64_t info;
|
||||
std::uint32_t pullerPid;
|
||||
std::uint32_t pusherPid;
|
||||
volatile std::uint64_t flags;
|
||||
std::uint64_t vmAddress;
|
||||
std::uint64_t vmSize;
|
||||
char vmName[32];
|
||||
volatile std::uint32_t flipBuffer;
|
||||
volatile std::uint64_t flipArg;
|
||||
volatile std::uint64_t flipCount;
|
||||
std::uint32_t memoryAreaCount;
|
||||
std::uint32_t commandBufferCount;
|
||||
std::uint32_t bufferCount;
|
||||
CmdMemoryProt memoryAreas[128];
|
||||
CmdCommandBuffer commandBuffers[32];
|
||||
CmdBuffer buffers[8];
|
||||
|
||||
volatile std::uint64_t pull;
|
||||
volatile std::uint64_t push;
|
||||
std::uint64_t commands[];
|
||||
};
|
||||
|
||||
struct Command {
|
||||
CommandId id;
|
||||
|
||||
union {
|
||||
CmdMemoryProt memoryProt;
|
||||
CmdCommandBuffer commandBuffer;
|
||||
CmdBuffer buffer;
|
||||
CmdFlip flip;
|
||||
};
|
||||
};
|
||||
|
||||
enum class BridgeFlags {
|
||||
VmConfigured = 1 << 0,
|
||||
PushLock = 1 << 1,
|
||||
PullLock = 1 << 2,
|
||||
};
|
||||
|
||||
class BridgePusher {
|
||||
BridgeHeader *buffer = nullptr;
|
||||
|
||||
public:
|
||||
BridgePusher() = default;
|
||||
BridgePusher(BridgeHeader *buffer) : buffer(buffer) {}
|
||||
|
||||
void setVm(std::uint64_t address, std::uint64_t size, const char *name) {
|
||||
buffer->vmAddress = address;
|
||||
buffer->vmSize = size;
|
||||
std::strncpy(buffer->vmName, name, sizeof(buffer->vmName));
|
||||
buffer->flags |= static_cast<std::uint64_t>(BridgeFlags::VmConfigured);
|
||||
}
|
||||
|
||||
void sendMemoryProtect(std::uint64_t address, std::uint64_t size,
|
||||
std::uint32_t prot) {
|
||||
sendCommand(CommandId::ProtectMemory, {address, size, prot});
|
||||
}
|
||||
|
||||
void sendCommandBuffer(std::uint64_t queue, std::uint64_t address,
|
||||
std::uint64_t size) {
|
||||
sendCommand(CommandId::CommandBuffer, {queue, address, size});
|
||||
}
|
||||
|
||||
void sendSetBuffer(std::uint32_t bufferIndex, std::uint64_t address,
|
||||
std::uint32_t width, std::uint32_t height,
|
||||
std::uint32_t pitch, std::uint32_t pixelFormat,
|
||||
std::uint32_t tilingMode) {
|
||||
sendCommand(CommandId::SetBuffer,
|
||||
{static_cast<std::uint64_t>(bufferIndex) << 32 | tilingMode,
|
||||
address, static_cast<std::uint64_t>(width) << 32 | height,
|
||||
static_cast<std::uint64_t>(pitch) << 32 | pixelFormat});
|
||||
}
|
||||
|
||||
void sendFlip(std::uint32_t bufferIndex, std::uint64_t arg) {
|
||||
sendCommand(CommandId::Flip, {bufferIndex, arg});
|
||||
}
|
||||
|
||||
void sendDoFlip() { sendCommand(CommandId::DoFlip, {}); }
|
||||
|
||||
void wait() {
|
||||
while (buffer->pull != buffer->push)
|
||||
;
|
||||
}
|
||||
|
||||
private:
|
||||
static std::uint64_t makeCommandHeader(CommandId id, std::size_t cmdSize) {
|
||||
return static_cast<std::uint64_t>(id) |
|
||||
(static_cast<std::uint64_t>(cmdSize - 1) << 32);
|
||||
}
|
||||
|
||||
void sendCommand(CommandId id, std::initializer_list<std::uint64_t> args) {
|
||||
std::size_t cmdSize = args.size() + 1;
|
||||
std::uint64_t pos = getPushPosition(cmdSize);
|
||||
|
||||
buffer->commands[pos++] = makeCommandHeader(CommandId::Flip, cmdSize);
|
||||
for (auto arg : args) {
|
||||
buffer->commands[pos++] = arg;
|
||||
}
|
||||
buffer->push = pos;
|
||||
}
|
||||
|
||||
std::uint64_t getPushPosition(std::uint64_t cmdSize) {
|
||||
std::uint64_t position = buffer->push;
|
||||
|
||||
if (position + cmdSize > buffer->size) {
|
||||
if (position < buffer->size) {
|
||||
buffer->commands[position] =
|
||||
static_cast<std::uint64_t>(CommandId::Nop) |
|
||||
((buffer->size - position - 1) << 32);
|
||||
}
|
||||
|
||||
position = 0;
|
||||
waitPuller(cmdSize);
|
||||
}
|
||||
|
||||
return position;
|
||||
}
|
||||
void waitPuller(std::uint64_t pullValue) {
|
||||
while (buffer->pull < pullValue) {
|
||||
;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class BridgePuller {
|
||||
BridgeHeader *buffer = nullptr;
|
||||
|
||||
public:
|
||||
BridgePuller() = default;
|
||||
BridgePuller(BridgeHeader *buffer) : buffer(buffer) {}
|
||||
|
||||
std::size_t pullCommands(Command *commands, std::size_t maxCount) {
|
||||
std::size_t processed = 0;
|
||||
|
||||
while (processed < maxCount) {
|
||||
if (buffer->pull == buffer->push) {
|
||||
break;
|
||||
}
|
||||
|
||||
auto pos = buffer->pull;
|
||||
auto cmd = buffer->commands[pos];
|
||||
CommandId cmdId = static_cast<CommandId>(cmd);
|
||||
std::uint32_t argsCount = cmd >> 32;
|
||||
|
||||
if (cmdId != CommandId::Nop) {
|
||||
commands[processed++] =
|
||||
unpackCommand(cmdId, buffer->commands + pos + 1, argsCount);
|
||||
}
|
||||
|
||||
auto newPull = pos + argsCount + 1;
|
||||
|
||||
if (newPull >= buffer->size) {
|
||||
newPull = 0;
|
||||
}
|
||||
|
||||
buffer->pull = newPull;
|
||||
}
|
||||
|
||||
return processed;
|
||||
}
|
||||
|
||||
private:
|
||||
Command unpackCommand(CommandId command, const std::uint64_t *args,
|
||||
std::uint32_t argsCount) {
|
||||
Command result;
|
||||
result.id = command;
|
||||
|
||||
switch (command) {
|
||||
case CommandId::Nop:
|
||||
case CommandId::SetUpSharedMemory:
|
||||
case CommandId::DoFlip:
|
||||
return result;
|
||||
|
||||
case CommandId::ProtectMemory:
|
||||
result.memoryProt.address = args[0];
|
||||
result.memoryProt.size = args[1];
|
||||
result.memoryProt.prot = args[2];
|
||||
return result;
|
||||
|
||||
case CommandId::CommandBuffer:
|
||||
result.commandBuffer.queue = args[0];
|
||||
result.commandBuffer.address = args[1];
|
||||
result.commandBuffer.size = args[2];
|
||||
return result;
|
||||
|
||||
case CommandId::Flip:
|
||||
result.flip.bufferIndex = args[0];
|
||||
result.flip.arg = args[1];
|
||||
return result;
|
||||
|
||||
case CommandId::SetBuffer:
|
||||
result.buffer.bufferIndex = static_cast<std::uint32_t>(args[0] >> 32);
|
||||
result.buffer.address = args[1];
|
||||
result.buffer.width = static_cast<std::uint32_t>(args[2] >> 32);
|
||||
result.buffer.height = static_cast<std::uint32_t>(args[2]);
|
||||
result.buffer.pitch = static_cast<std::uint32_t>(args[3] >> 32);
|
||||
result.buffer.pixelFormat = static_cast<std::uint32_t>(args[3]);
|
||||
result.buffer.tilingMode = static_cast<std::uint32_t>(args[0]);
|
||||
return result;
|
||||
}
|
||||
|
||||
__builtin_trap();
|
||||
}
|
||||
};
|
||||
|
||||
BridgeHeader *createShmCommandBuffer(const char *name);
|
||||
BridgeHeader *openShmCommandBuffer(const char *name);
|
||||
void destroyShmCommandBuffer(BridgeHeader *buffer);
|
||||
void unlinkShm(const char *name);
|
||||
} // namespace amdgpu::bridge
|
||||
81
hw/amdgpu/bridge/src/bridge.cpp
Normal file
81
hw/amdgpu/bridge/src/bridge.cpp
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
#include "bridge.hpp"
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <new>
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h>
|
||||
|
||||
static int gShmFd = -1;
|
||||
static constexpr std::size_t kShmSize = sizeof(amdgpu::bridge::BridgeHeader) +
|
||||
(sizeof(std::uint64_t) * (1024 * 1024));
|
||||
amdgpu::bridge::BridgeHeader *
|
||||
amdgpu::bridge::createShmCommandBuffer(const char *name) {
|
||||
if (gShmFd != -1) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
unlinkShm(name);
|
||||
|
||||
int fd = ::shm_open(name, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR);
|
||||
|
||||
if (fd == -1) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (ftruncate(fd, kShmSize) < 0) {
|
||||
::close(fd);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void *memory =
|
||||
::mmap(nullptr, kShmSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
|
||||
|
||||
if (memory == MAP_FAILED) {
|
||||
::close(fd);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
gShmFd = fd;
|
||||
auto result = new (memory) amdgpu::bridge::BridgeHeader();
|
||||
result->size = (kShmSize - sizeof(amdgpu::bridge::BridgeHeader)) /
|
||||
sizeof(std::uint64_t);
|
||||
return result;
|
||||
}
|
||||
|
||||
amdgpu::bridge::BridgeHeader *
|
||||
amdgpu::bridge::openShmCommandBuffer(const char *name) {
|
||||
if (gShmFd != -1) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
int fd = ::shm_open(name, O_RDWR, S_IRUSR | S_IWUSR);
|
||||
|
||||
if (fd == -1) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void *memory =
|
||||
::mmap(nullptr, kShmSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
|
||||
|
||||
if (memory == MAP_FAILED) {
|
||||
::close(fd);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
gShmFd = fd;
|
||||
return new (memory) amdgpu::bridge::BridgeHeader;
|
||||
}
|
||||
|
||||
void amdgpu::bridge::destroyShmCommandBuffer(
|
||||
amdgpu::bridge::BridgeHeader *buffer) {
|
||||
if (gShmFd == -1) {
|
||||
__builtin_trap();
|
||||
}
|
||||
|
||||
buffer->~BridgeHeader();
|
||||
::close(gShmFd);
|
||||
gShmFd = -1;
|
||||
::munmap(buffer, kShmSize);
|
||||
}
|
||||
|
||||
void amdgpu::bridge::unlinkShm(const char *name) { ::shm_unlink(name); }
|
||||
66
hw/amdgpu/device/CMakeLists.txt
Normal file
66
hw/amdgpu/device/CMakeLists.txt
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
project(libamdgpu-device)
|
||||
set(PROJECT_PATH amdgpu/device)
|
||||
|
||||
set(SRC
|
||||
src/device.cpp
|
||||
)
|
||||
|
||||
function(add_precompiled_vulkan_spirv target)
|
||||
add_library(${target} INTERFACE)
|
||||
set(SPIRV_GEN_ROOT_DIR "spirv-gen/include/")
|
||||
set(SPIRV_GEN_DIR "${SPIRV_GEN_ROOT_DIR}/shaders")
|
||||
|
||||
cmake_path(ABSOLUTE_PATH SPIRV_GEN_ROOT_DIR BASE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} NORMALIZE OUTPUT_VARIABLE outputrootdir)
|
||||
cmake_path(ABSOLUTE_PATH SPIRV_GEN_DIR BASE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} NORMALIZE OUTPUT_VARIABLE outputdir)
|
||||
file(MAKE_DIRECTORY ${outputrootdir})
|
||||
file(MAKE_DIRECTORY ${outputdir})
|
||||
target_include_directories(${target} INTERFACE ${outputrootdir})
|
||||
|
||||
foreach(input IN LISTS ARGN)
|
||||
cmake_path(GET input FILENAME inputname)
|
||||
cmake_path(REPLACE_EXTENSION inputname LAST_ONLY .h OUTPUT_VARIABLE outputname)
|
||||
cmake_path(APPEND outputdir ${outputname} OUTPUT_VARIABLE outputpath)
|
||||
cmake_path(REMOVE_EXTENSION inputname LAST_ONLY OUTPUT_VARIABLE varname)
|
||||
|
||||
string(REPLACE "." "_" varname ${varname})
|
||||
string(PREPEND varname "spirv_")
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${outputpath}
|
||||
COMMAND glslangValidator -V --vn "${varname}" -o "${outputpath}" "${CMAKE_CURRENT_SOURCE_DIR}/${input}"
|
||||
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${input}"
|
||||
COMMENT "Generating ${outputname}..."
|
||||
)
|
||||
|
||||
set(subtarget ".${target}-subtarget-${outputname}")
|
||||
add_custom_target(${subtarget} DEPENDS ${outputpath})
|
||||
add_dependencies(${target} ${subtarget})
|
||||
endforeach()
|
||||
endfunction()
|
||||
|
||||
add_precompiled_vulkan_spirv(${PROJECT_NAME}-shaders
|
||||
src/rect_list.geom.glsl
|
||||
)
|
||||
|
||||
find_package(SPIRV-Tools REQUIRED CONFIG)
|
||||
find_package(SPIRV-Tools-opt REQUIRED CONFIG)
|
||||
|
||||
add_library(${PROJECT_NAME} STATIC ${INCLUDE} ${SRC})
|
||||
target_link_libraries(${PROJECT_NAME}
|
||||
PUBLIC
|
||||
spirv
|
||||
amdgpu::base
|
||||
amdgpu::bridge
|
||||
amdgpu::shader
|
||||
util
|
||||
SPIRV-Tools
|
||||
SPIRV-Tools-opt
|
||||
|
||||
PRIVATE
|
||||
${PROJECT_NAME}-shaders
|
||||
)
|
||||
|
||||
target_include_directories(${PROJECT_NAME} PUBLIC include PRIVATE include/${PROJECT_PATH})
|
||||
set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "")
|
||||
add_library(amdgpu::device ALIAS ${PROJECT_NAME})
|
||||
set_property(TARGET ${PROJECT_NAME} PROPERTY POSITION_INDEPENDENT_CODE ON)
|
||||
1542
hw/amdgpu/device/include/amdgpu/device/device.hpp
Normal file
1542
hw/amdgpu/device/include/amdgpu/device/device.hpp
Normal file
File diff suppressed because it is too large
Load diff
102
hw/amdgpu/device/include/amdgpu/device/pm4.hpp
Normal file
102
hw/amdgpu/device/include/amdgpu/device/pm4.hpp
Normal file
|
|
@ -0,0 +1,102 @@
|
|||
#pragma once
|
||||
|
||||
namespace amdgpu {
|
||||
enum PM4Opcodes {
|
||||
NOP = 0x10,
|
||||
SET_BASE = 0x11,
|
||||
CLEAR_STATE = 0x12,
|
||||
INDEX_BUFFER_SIZE = 0x13,
|
||||
DISPATCH_DIRECT = 0x15,
|
||||
DISPATCH_INDIRECT = 0x16,
|
||||
INDIRECT_BUFFER_END = 0x17,
|
||||
MODE_CONTROL = 0x18,
|
||||
ATOMIC_GDS = 0x1D,
|
||||
ATOMIC_MEM = 0x1E,
|
||||
OCCLUSION_QUERY = 0x1F,
|
||||
SET_PREDICATION = 0x20,
|
||||
REG_RMW = 0x21,
|
||||
COND_EXEC = 0x22,
|
||||
PRED_EXEC = 0x23,
|
||||
DRAW_INDIRECT = 0x24,
|
||||
DRAW_INDEX_INDIRECT = 0x25,
|
||||
INDEX_BASE = 0x26,
|
||||
DRAW_INDEX_2 = 0x27,
|
||||
CONTEXT_CONTROL = 0x28,
|
||||
DRAW_INDEX_OFFSET = 0x29,
|
||||
INDEX_TYPE = 0x2A,
|
||||
DRAW_INDEX = 0x2B,
|
||||
DRAW_INDIRECT_MULTI = 0x2C,
|
||||
DRAW_INDEX_AUTO = 0x2D,
|
||||
DRAW_INDEX_IMMD = 0x2E,
|
||||
NUM_INSTANCES = 0x2F,
|
||||
DRAW_INDEX_MULTI_AUTO = 0x30,
|
||||
INDIRECT_BUFFER_32 = 0x32,
|
||||
INDIRECT_BUFFER_CONST = 0x33,
|
||||
STRMOUT_BUFFER_UPDATE = 0x34,
|
||||
DRAW_INDEX_OFFSET_2 = 0x35,
|
||||
DRAW_PREAMBLE = 0x36,
|
||||
WRITE_DATA = 0x37,
|
||||
DRAW_INDEX_INDIRECT_MULTI = 0x38,
|
||||
MEM_SEMAPHORE = 0x39,
|
||||
MPEG_INDEX = 0x3A,
|
||||
COPY_DW = 0x3B,
|
||||
WAIT_REG_MEM = 0x3C,
|
||||
MEM_WRITE = 0x3D,
|
||||
INDIRECT_BUFFER_3F = 0x3F,
|
||||
COPY_DATA = 0x40,
|
||||
CP_DMA = 0x41,
|
||||
PFP_SYNC_ME = 0x42,
|
||||
SURFACE_SYNC = 0x43,
|
||||
ME_INITIALIZE = 0x44,
|
||||
COND_WRITE = 0x45,
|
||||
EVENT_WRITE = 0x46,
|
||||
EVENT_WRITE_EOP = 0x47,
|
||||
EVENT_WRITE_EOS = 0x48,
|
||||
RELEASE_MEM = 0x49,
|
||||
PREAMBLE_CNTL = 0x4A,
|
||||
RB_OFFSET = 0x4B,
|
||||
ALU_PS_CONST_BUFFER_COPY = 0x4C,
|
||||
ALU_VS_CONST_BUFFER_COPY = 0x4D,
|
||||
ALU_PS_CONST_UPDATE = 0x4E,
|
||||
ALU_VS_CONST_UPDATE = 0x4F,
|
||||
DMA_DATA = 0x50,
|
||||
ONE_REG_WRITE = 0x57,
|
||||
AQUIRE_MEM = 0x58,
|
||||
REWIND = 0x59,
|
||||
LOAD_UCONFIG_REG = 0x5E,
|
||||
LOAD_SH_REG = 0x5F,
|
||||
LOAD_CONFIG_REG = 0x60,
|
||||
LOAD_CONTEXT_REG = 0x61,
|
||||
SET_CONFIG_REG = 0x68,
|
||||
SET_CONTEXT_REG = 0x69,
|
||||
SET_ALU_CONST = 0x6A,
|
||||
SET_BOOL_CONST = 0x6B,
|
||||
SET_LOOP_CONST = 0x6C,
|
||||
SET_RESOURCE = 0x6D,
|
||||
SET_SAMPLER = 0x6E,
|
||||
SET_CTL_CONST = 0x6F,
|
||||
SET_RESOURCE_OFFSET = 0x70,
|
||||
SET_ALU_CONST_VS = 0x71,
|
||||
SET_ALU_CONST_DI = 0x72,
|
||||
SET_CONTEXT_REG_INDIRECT = 0x73,
|
||||
SET_RESOURCE_INDIRECT = 0x74,
|
||||
SET_APPEND_CNT = 0x75,
|
||||
SET_SH_REG = 0x76,
|
||||
SET_SH_REG_OFFSET = 0x77,
|
||||
SET_QUEUE_REG = 0x78,
|
||||
SET_UCONFIG_REG = 0x79,
|
||||
SCRATCH_RAM_WRITE = 0x7D,
|
||||
SCRATCH_RAM_READ = 0x7E,
|
||||
LOAD_CONST_RAM = 0x80,
|
||||
WRITE_CONST_RAM = 0x81,
|
||||
DUMP_CONST_RAM = 0x83,
|
||||
INCREMENT_CE_COUNTER = 0x84,
|
||||
INCREMENT_DE_COUNTER = 0x85,
|
||||
WAIT_ON_CE_COUNTER = 0x86,
|
||||
WAIT_ON_DE_COUNTER_DIFF = 0x88,
|
||||
SWITCH_BUFFER = 0x8B,
|
||||
};
|
||||
|
||||
const char *pm4OpcodeToString(int opcode);
|
||||
} // namespace amdgpu::device
|
||||
|
||||
681
hw/amdgpu/device/include/amdgpu/device/tiler.hpp
Normal file
681
hw/amdgpu/device/include/amdgpu/device/tiler.hpp
Normal file
|
|
@ -0,0 +1,681 @@
|
|||
#pragma once
|
||||
#include "device.hpp"
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
|
||||
namespace amdgpu::device {
|
||||
namespace Gnm {
|
||||
enum GpuMode { kGpuModeBase = 0, kGpuModeNeo = 1 };
|
||||
enum TileMode {
|
||||
kTileModeDepth_2dThin_64 = 0x00000000,
|
||||
kTileModeDepth_2dThin_128 = 0x00000001,
|
||||
kTileModeDepth_2dThin_256 = 0x00000002,
|
||||
kTileModeDepth_2dThin_512 = 0x00000003,
|
||||
kTileModeDepth_2dThin_1K = 0x00000004,
|
||||
kTileModeDepth_2dThinPrt_256 = 0x00000006,
|
||||
|
||||
kTileModeDisplay_LinearAligned = 0x00000008,
|
||||
kTileModeDisplay_2dThin = 0x0000000A,
|
||||
kTileModeDisplay_ThinPrt = 0x0000000B,
|
||||
kTileModeDisplay_2dThinPrt = 0x0000000C,
|
||||
|
||||
kTileModeThin_1dThin = 0x0000000D,
|
||||
kTileModeThin_2dThin = 0x0000000E,
|
||||
kTileModeThin_ThinPrt = 0x00000010,
|
||||
kTileModeThin_2dThinPrt = 0x00000011,
|
||||
kTileModeThin_3dThinPrt = 0x00000012,
|
||||
|
||||
kTileModeThick_1dThick = 0x00000013,
|
||||
kTileModeThick_2dThick = 0x00000014,
|
||||
kTileModeThick_ThickPrt = 0x00000016,
|
||||
kTileModeThick_2dThickPrt = 0x00000017,
|
||||
kTileModeThick_3dThickPrt = 0x00000018,
|
||||
kTileModeThick_2dXThick = 0x00000019,
|
||||
};
|
||||
|
||||
enum MicroTileMode {
|
||||
kMicroTileModeDisplay = 0x00000000,
|
||||
kMicroTileModeThin = 0x00000001,
|
||||
kMicroTileModeDepth = 0x00000002,
|
||||
kMicroTileModeRotated = 0x00000003,
|
||||
kMicroTileModeThick = 0x00000004,
|
||||
};
|
||||
|
||||
enum ArrayMode {
|
||||
kArrayModeLinearGeneral = 0x00000000,
|
||||
kArrayModeLinearAligned = 0x00000001,
|
||||
kArrayMode1dTiledThin = 0x00000002,
|
||||
kArrayMode1dTiledThick = 0x00000003,
|
||||
kArrayMode2dTiledThin = 0x00000004,
|
||||
kArrayModeTiledThinPrt = 0x00000005,
|
||||
kArrayMode2dTiledThinPrt = 0x00000006,
|
||||
kArrayMode2dTiledThick = 0x00000007,
|
||||
kArrayMode2dTiledXThick = 0x00000008,
|
||||
kArrayModeTiledThickPrt = 0x00000009,
|
||||
kArrayMode2dTiledThickPrt = 0x0000000a,
|
||||
kArrayMode3dTiledThinPrt = 0x0000000b,
|
||||
kArrayMode3dTiledThin = 0x0000000c,
|
||||
kArrayMode3dTiledThick = 0x0000000d,
|
||||
kArrayMode3dTiledXThick = 0x0000000e,
|
||||
kArrayMode3dTiledThickPrt = 0x0000000f,
|
||||
};
|
||||
|
||||
enum PipeConfig {
|
||||
kPipeConfigP8_32x32_8x16 = 0x0000000a,
|
||||
kPipeConfigP8_32x32_16x16 = 0x0000000c,
|
||||
kPipeConfigP16 = 0x00000012,
|
||||
};
|
||||
} // namespace Gnm
|
||||
|
||||
#define GNM_ERROR(msg, ...) \
|
||||
//std::fprintf(stderr, msg, __VA_ARGS__); \
|
||||
//std::abort() \
|
||||
__builtin_trap();
|
||||
|
||||
static constexpr uint32_t kMicroTileWidth = 8;
|
||||
static constexpr uint32_t kMicroTileHeight = 8;
|
||||
|
||||
static constexpr uint32_t getElementIndex(uint32_t x, uint32_t y, uint32_t z,
|
||||
uint32_t bitsPerElement,
|
||||
Gnm::MicroTileMode microTileMode,
|
||||
Gnm::ArrayMode arrayMode) {
|
||||
uint32_t elem = 0;
|
||||
|
||||
if (microTileMode == Gnm::kMicroTileModeDisplay) {
|
||||
switch (bitsPerElement) {
|
||||
case 8:
|
||||
elem |= ((x >> 0) & 0x1) << 0;
|
||||
elem |= ((x >> 1) & 0x1) << 1;
|
||||
elem |= ((x >> 2) & 0x1) << 2;
|
||||
elem |= ((y >> 1) & 0x1) << 3;
|
||||
elem |= ((y >> 0) & 0x1) << 4;
|
||||
elem |= ((y >> 2) & 0x1) << 5;
|
||||
break;
|
||||
case 16:
|
||||
elem |= ((x >> 0) & 0x1) << 0;
|
||||
elem |= ((x >> 1) & 0x1) << 1;
|
||||
elem |= ((x >> 2) & 0x1) << 2;
|
||||
elem |= ((y >> 0) & 0x1) << 3;
|
||||
elem |= ((y >> 1) & 0x1) << 4;
|
||||
elem |= ((y >> 2) & 0x1) << 5;
|
||||
break;
|
||||
case 32:
|
||||
elem |= ((x >> 0) & 0x1) << 0;
|
||||
elem |= ((x >> 1) & 0x1) << 1;
|
||||
elem |= ((y >> 0) & 0x1) << 2;
|
||||
elem |= ((x >> 2) & 0x1) << 3;
|
||||
elem |= ((y >> 1) & 0x1) << 4;
|
||||
elem |= ((y >> 2) & 0x1) << 5;
|
||||
break;
|
||||
case 64:
|
||||
elem |= ((x >> 0) & 0x1) << 0;
|
||||
elem |= ((y >> 0) & 0x1) << 1;
|
||||
elem |= ((x >> 1) & 0x1) << 2;
|
||||
elem |= ((x >> 2) & 0x1) << 3;
|
||||
elem |= ((y >> 1) & 0x1) << 4;
|
||||
elem |= ((y >> 2) & 0x1) << 5;
|
||||
break;
|
||||
default:
|
||||
GNM_ERROR("Unsupported bitsPerElement (%u) for displayable surface.",
|
||||
bitsPerElement);
|
||||
}
|
||||
} else if (microTileMode == Gnm::kMicroTileModeThin ||
|
||||
microTileMode == Gnm::kMicroTileModeDepth) {
|
||||
elem |= ((x >> 0) & 0x1) << 0;
|
||||
elem |= ((y >> 0) & 0x1) << 1;
|
||||
elem |= ((x >> 1) & 0x1) << 2;
|
||||
elem |= ((y >> 1) & 0x1) << 3;
|
||||
elem |= ((x >> 2) & 0x1) << 4;
|
||||
elem |= ((y >> 2) & 0x1) << 5;
|
||||
// Use Z too, if the array mode is Thick/XThick
|
||||
switch (arrayMode) {
|
||||
case Gnm::kArrayMode2dTiledXThick:
|
||||
case Gnm::kArrayMode3dTiledXThick:
|
||||
elem |= ((z >> 2) & 0x1) << 8;
|
||||
// Intentional fall-through
|
||||
case Gnm::kArrayMode1dTiledThick:
|
||||
case Gnm::kArrayMode2dTiledThick:
|
||||
case Gnm::kArrayMode3dTiledThick:
|
||||
case Gnm::kArrayModeTiledThickPrt:
|
||||
case Gnm::kArrayMode2dTiledThickPrt:
|
||||
case Gnm::kArrayMode3dTiledThickPrt:
|
||||
elem |= ((z >> 0) & 0x1) << 6;
|
||||
elem |= ((z >> 1) & 0x1) << 7;
|
||||
default:
|
||||
break; // no other thick modes
|
||||
}
|
||||
} else if (microTileMode == Gnm::kMicroTileModeThick) // thick/xthick
|
||||
{
|
||||
switch (arrayMode) {
|
||||
case Gnm::kArrayMode2dTiledXThick:
|
||||
case Gnm::kArrayMode3dTiledXThick:
|
||||
elem |= ((z >> 2) & 0x1) << 8;
|
||||
// intentional fall-through
|
||||
case Gnm::kArrayMode1dTiledThick:
|
||||
case Gnm::kArrayMode2dTiledThick:
|
||||
case Gnm::kArrayMode3dTiledThick:
|
||||
case Gnm::kArrayModeTiledThickPrt:
|
||||
case Gnm::kArrayMode2dTiledThickPrt:
|
||||
case Gnm::kArrayMode3dTiledThickPrt:
|
||||
if (bitsPerElement == 8 || bitsPerElement == 16) {
|
||||
elem |= ((x >> 0) & 0x1) << 0;
|
||||
elem |= ((y >> 0) & 0x1) << 1;
|
||||
elem |= ((x >> 1) & 0x1) << 2;
|
||||
elem |= ((y >> 1) & 0x1) << 3;
|
||||
elem |= ((z >> 0) & 0x1) << 4;
|
||||
elem |= ((z >> 1) & 0x1) << 5;
|
||||
elem |= ((x >> 2) & 0x1) << 6;
|
||||
elem |= ((y >> 2) & 0x1) << 7;
|
||||
} else if (bitsPerElement == 32) {
|
||||
elem |= ((x >> 0) & 0x1) << 0;
|
||||
elem |= ((y >> 0) & 0x1) << 1;
|
||||
elem |= ((x >> 1) & 0x1) << 2;
|
||||
elem |= ((z >> 0) & 0x1) << 3;
|
||||
elem |= ((y >> 1) & 0x1) << 4;
|
||||
elem |= ((z >> 1) & 0x1) << 5;
|
||||
elem |= ((x >> 2) & 0x1) << 6;
|
||||
elem |= ((y >> 2) & 0x1) << 7;
|
||||
} else if (bitsPerElement == 64 || bitsPerElement == 128) {
|
||||
elem |= ((x >> 0) & 0x1) << 0;
|
||||
elem |= ((y >> 0) & 0x1) << 1;
|
||||
elem |= ((z >> 0) & 0x1) << 2;
|
||||
elem |= ((x >> 1) & 0x1) << 3;
|
||||
elem |= ((y >> 1) & 0x1) << 4;
|
||||
elem |= ((z >> 1) & 0x1) << 5;
|
||||
elem |= ((x >> 2) & 0x1) << 6;
|
||||
elem |= ((y >> 2) & 0x1) << 7;
|
||||
} else {
|
||||
GNM_ERROR("Invalid bitsPerElement (%u) for "
|
||||
"microTileMode=kMicroTileModeThick.",
|
||||
bitsPerElement);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
GNM_ERROR("Invalid arrayMode (0x%02X) for thick/xthick "
|
||||
"microTileMode=kMicroTileModeThick.",
|
||||
arrayMode);
|
||||
}
|
||||
}
|
||||
// TODO: rotated
|
||||
|
||||
return elem;
|
||||
}
|
||||
static constexpr uint32_t getPipeIndex(uint32_t x, uint32_t y,
|
||||
Gnm::PipeConfig pipeCfg) {
|
||||
uint32_t pipe = 0;
|
||||
switch (pipeCfg) {
|
||||
case Gnm::kPipeConfigP8_32x32_8x16:
|
||||
pipe |= (((x >> 4) ^ (y >> 3) ^ (x >> 5)) & 0x1) << 0;
|
||||
pipe |= (((x >> 3) ^ (y >> 4)) & 0x1) << 1;
|
||||
pipe |= (((x >> 5) ^ (y >> 5)) & 0x1) << 2;
|
||||
break;
|
||||
case Gnm::kPipeConfigP8_32x32_16x16:
|
||||
pipe |= (((x >> 3) ^ (y >> 3) ^ (x >> 4)) & 0x1) << 0;
|
||||
pipe |= (((x >> 4) ^ (y >> 4)) & 0x1) << 1;
|
||||
pipe |= (((x >> 5) ^ (y >> 5)) & 0x1) << 2;
|
||||
break;
|
||||
case Gnm::kPipeConfigP16:
|
||||
pipe |= (((x >> 3) ^ (y >> 3) ^ (x >> 4)) & 0x1) << 0;
|
||||
pipe |= (((x >> 4) ^ (y >> 4)) & 0x1) << 1;
|
||||
pipe |= (((x >> 5) ^ (y >> 5)) & 0x1) << 2;
|
||||
pipe |= (((x >> 6) ^ (y >> 5)) & 0x1) << 3;
|
||||
break;
|
||||
default:
|
||||
GNM_ERROR("Unsupported pipeCfg (0x%02X).", pipeCfg);
|
||||
}
|
||||
return pipe;
|
||||
}
|
||||
|
||||
inline constexpr uint32_t fastIntLog2(uint32_t i) {
|
||||
return 31 - __builtin_clz(i | 1);
|
||||
}
|
||||
|
||||
static constexpr uint32_t getBankIndex(uint32_t x, uint32_t y,
|
||||
uint32_t bank_width,
|
||||
uint32_t bank_height, uint32_t num_banks,
|
||||
uint32_t num_pipes) {
|
||||
|
||||
// bank_width=1, bank_height=1, num_banks = 16, num_pipes=8
|
||||
const uint32_t x_shift_offset = fastIntLog2(bank_width * num_pipes);
|
||||
const uint32_t y_shift_offset = fastIntLog2(bank_height);
|
||||
const uint32_t xs = x >> x_shift_offset;
|
||||
const uint32_t ys = y >> y_shift_offset;
|
||||
|
||||
uint32_t bank = 0;
|
||||
switch (num_banks) {
|
||||
case 2:
|
||||
bank |= (((xs >> 3) ^ (ys >> 3)) & 0x1) << 0;
|
||||
break;
|
||||
case 4:
|
||||
bank |= (((xs >> 3) ^ (ys >> 4)) & 0x1) << 0;
|
||||
bank |= (((xs >> 4) ^ (ys >> 3)) & 0x1) << 1;
|
||||
break;
|
||||
case 8:
|
||||
bank |= (((xs >> 3) ^ (ys >> 5)) & 0x1) << 0;
|
||||
bank |= (((xs >> 4) ^ (ys >> 4) ^ (ys >> 5)) & 0x1) << 1;
|
||||
bank |= (((xs >> 5) ^ (ys >> 3)) & 0x1) << 2;
|
||||
break;
|
||||
case 16:
|
||||
bank |= (((xs >> 3) ^ (ys >> 6)) & 0x1) << 0;
|
||||
bank |= (((xs >> 4) ^ (ys >> 5) ^ (ys >> 6)) & 0x1) << 1;
|
||||
bank |= (((xs >> 5) ^ (ys >> 4)) & 0x1) << 2;
|
||||
bank |= (((xs >> 6) ^ (ys >> 3)) & 0x1) << 3;
|
||||
break;
|
||||
default:
|
||||
GNM_ERROR("invalid num_banks (%u) -- must be 2, 4, 8, or 16.", num_banks);
|
||||
}
|
||||
|
||||
return bank;
|
||||
}
|
||||
|
||||
inline std::uint32_t getTexelsPerElement(SurfaceFormat format) {
|
||||
if (format >= kSurfaceFormatBc1 && format <= kSurfaceFormatBc7) {
|
||||
return 16;
|
||||
}
|
||||
|
||||
if (format >= kSurfaceFormat1) {
|
||||
return 8;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
inline std::uint32_t getBitsPerElement(SurfaceFormat format) {
|
||||
static constexpr int bitsPerElement[] = {
|
||||
0, 8, 16, 16, 32, 32, 32, 32, 32, 32, 32, 64, 64, 96, 128, -1,
|
||||
16, 16, 16, 16, 32, 32, 64, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
16, 16, 32, 4, 8, 8, 4, 8, 8, 8, -1, -1, 8, 8, 8, 8,
|
||||
8, 8, 16, 16, 32, 32, 32, 64, 64, 8, 16, 1, 1};
|
||||
|
||||
auto rawFormat = static_cast<unsigned>(format);
|
||||
|
||||
if (rawFormat >= sizeof(bitsPerElement)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return bitsPerElement[rawFormat];
|
||||
}
|
||||
|
||||
struct Tiler1d {
|
||||
Gnm::ArrayMode m_arrayMode;
|
||||
uint32_t m_bitsPerElement;
|
||||
|
||||
Gnm::MicroTileMode m_microTileMode;
|
||||
uint32_t m_tileThickness;
|
||||
uint32_t m_tileBytes;
|
||||
uint32_t m_tilesPerRow;
|
||||
uint32_t m_tilesPerSlice;
|
||||
|
||||
Tiler1d(const GnmTBuffer *texture) {
|
||||
/*
|
||||
m_arrayMode = Gnm::ArrayMode::kArrayMode1dTiledThin;
|
||||
m_bitsPerElement = 128;// getBitsPerElement(texture->dfmt);
|
||||
m_microTileMode = Gnm::MicroTileMode::kMicroTileModeThin;
|
||||
m_tileThickness = (m_arrayMode == Gnm::kArrayMode1dTiledThick) ? 4 : 1;
|
||||
m_tileBytes = (kMicroTileWidth * kMicroTileHeight * m_tileThickness * m_bitsPerElement + 7) / 8;
|
||||
|
||||
auto width = texture->width + 1;
|
||||
auto height = texture->height + 1;
|
||||
width = (width + 3) / 4;
|
||||
height = (height + 3) / 4;
|
||||
m_tilesPerRow = width / kMicroTileWidth;
|
||||
m_tilesPerSlice = std::max(m_tilesPerRow * (height / kMicroTileHeight), 1U);
|
||||
*/
|
||||
|
||||
m_arrayMode = (Gnm::ArrayMode)2;
|
||||
m_bitsPerElement = 128;
|
||||
m_microTileMode = (Gnm::MicroTileMode)1;
|
||||
m_tileThickness= 1;
|
||||
m_tileBytes= 1024;
|
||||
m_tilesPerRow = 16;
|
||||
m_tilesPerSlice = 256;
|
||||
}
|
||||
|
||||
uint64_t getTiledElementBitOffset(uint32_t x, uint32_t y, uint32_t z) const {
|
||||
uint64_t element_index = getElementIndex(x, y, z, m_bitsPerElement,
|
||||
m_microTileMode, m_arrayMode);
|
||||
|
||||
uint64_t slice_offset =
|
||||
(z / m_tileThickness) * m_tilesPerSlice * m_tileBytes;
|
||||
|
||||
uint64_t tile_row_index = y / kMicroTileHeight;
|
||||
uint64_t tile_column_index = x / kMicroTileWidth;
|
||||
uint64_t tile_offset =
|
||||
((tile_row_index * m_tilesPerRow) + tile_column_index) * m_tileBytes;
|
||||
|
||||
uint64_t element_offset = element_index * m_bitsPerElement;
|
||||
|
||||
return (slice_offset + tile_offset) * 8 + element_offset;
|
||||
}
|
||||
|
||||
int32_t getTiledElementByteOffset(uint32_t x, uint32_t y, uint32_t z) const {
|
||||
return getTiledElementBitOffset(x, y, z) / 8;
|
||||
}
|
||||
};
|
||||
|
||||
struct Tiler2d {
|
||||
static constexpr int m_bitsPerElement = 32;
|
||||
static constexpr Gnm::MicroTileMode m_microTileMode =
|
||||
Gnm::kMicroTileModeDisplay;
|
||||
static constexpr Gnm::ArrayMode m_arrayMode = Gnm::kArrayMode2dTiledThin;
|
||||
static constexpr uint32_t m_macroTileWidth = 128;
|
||||
static constexpr uint32_t m_macroTileHeight = 64;
|
||||
static constexpr Gnm::PipeConfig m_pipeConfig =
|
||||
Gnm::kPipeConfigP8_32x32_16x16;
|
||||
static constexpr uint32_t m_bankWidth = 1;
|
||||
static constexpr uint32_t m_bankHeight = 1;
|
||||
static constexpr uint32_t m_numBanks = 16;
|
||||
static constexpr uint32_t m_numPipes = 8;
|
||||
static constexpr uint32_t m_tileThickness = 1;
|
||||
static constexpr uint32_t m_numFragmentsPerPixel = 1;
|
||||
static constexpr uint32_t m_tileSplitBytes = 512;
|
||||
static constexpr uint32_t m_pipeInterleaveBytes = 256;
|
||||
static constexpr uint32_t m_macroTileAspect = 2;
|
||||
static constexpr uint32_t m_paddedWidth = 1280;
|
||||
static constexpr uint32_t m_paddedHeight = 768;
|
||||
static constexpr uint32_t m_arraySlice = 0;
|
||||
static constexpr uint64_t m_bankSwizzleMask = 0;
|
||||
static constexpr uint64_t m_pipeSwizzleMask = 0;
|
||||
static constexpr uint64_t m_pipeInterleaveMask = 255;
|
||||
static constexpr uint64_t m_pipeInterleaveBits = 8;
|
||||
static constexpr uint64_t m_pipeBits = 3;
|
||||
static constexpr uint64_t m_bankBits = 4;
|
||||
|
||||
static constexpr uint32_t kDramRowSize = 0x400;
|
||||
static constexpr uint32_t kNumLogicalBanks = 16;
|
||||
static constexpr uint32_t kPipeInterleaveBytes = 256;
|
||||
static constexpr uint32_t kBankInterleave = 1;
|
||||
static constexpr uint32_t kMicroTileWidth = 8;
|
||||
static constexpr uint32_t kMicroTileHeight = 8;
|
||||
static constexpr uint32_t kNumMicroTilePixels =
|
||||
kMicroTileWidth * kMicroTileHeight;
|
||||
static constexpr uint32_t kCmaskCacheBits = 0x400;
|
||||
static constexpr uint32_t kHtileCacheBits = 0x4000;
|
||||
|
||||
int32_t getTiledElementBitOffset(uint64_t *outTiledBitOffset, uint32_t x,
|
||||
uint32_t y, uint32_t z,
|
||||
uint32_t fragmentIndex, bool log = false);
|
||||
|
||||
int32_t getTiledElementByteOffset(uint64_t *outTiledByteOffset, uint32_t x,
|
||||
uint32_t y, uint32_t z,
|
||||
uint32_t fragmentIndex, bool log = false) {
|
||||
uint64_t bitOffset = 0;
|
||||
int32_t status =
|
||||
getTiledElementBitOffset(&bitOffset, x, y, z, fragmentIndex, log);
|
||||
*outTiledByteOffset = bitOffset / 8;
|
||||
return status;
|
||||
}
|
||||
};
|
||||
|
||||
inline int32_t Tiler2d::getTiledElementBitOffset(uint64_t *outTiledBitOffset,
|
||||
uint32_t x, uint32_t y,
|
||||
uint32_t z,
|
||||
uint32_t fragmentIndex,
|
||||
bool log) {
|
||||
uint64_t element_index =
|
||||
getElementIndex(x, y, z, m_bitsPerElement, m_microTileMode, m_arrayMode);
|
||||
|
||||
uint32_t xh = x, yh = y;
|
||||
if (m_arrayMode == Gnm::kArrayModeTiledThinPrt ||
|
||||
m_arrayMode == Gnm::kArrayModeTiledThickPrt) {
|
||||
xh %= m_macroTileWidth;
|
||||
yh %= m_macroTileHeight;
|
||||
}
|
||||
uint64_t pipe = getPipeIndex(xh, yh, m_pipeConfig);
|
||||
uint64_t bank =
|
||||
getBankIndex(xh, yh, m_bankWidth, m_bankHeight, m_numBanks, m_numPipes);
|
||||
|
||||
constexpr uint32_t tile_bytes =
|
||||
(kMicroTileWidth * kMicroTileHeight * m_tileThickness * m_bitsPerElement *
|
||||
m_numFragmentsPerPixel +
|
||||
7) /
|
||||
8;
|
||||
|
||||
uint64_t element_offset = 0;
|
||||
if (m_microTileMode == Gnm::kMicroTileModeDepth) {
|
||||
uint64_t pixel_offset =
|
||||
element_index * m_bitsPerElement * m_numFragmentsPerPixel;
|
||||
element_offset = pixel_offset + (fragmentIndex * m_bitsPerElement);
|
||||
} else {
|
||||
uint64_t fragment_offset =
|
||||
fragmentIndex * (tile_bytes / m_numFragmentsPerPixel) * 8;
|
||||
element_offset = fragment_offset + (element_index * m_bitsPerElement);
|
||||
}
|
||||
|
||||
uint64_t slices_per_tile = 1;
|
||||
uint64_t tile_split_slice = 0;
|
||||
|
||||
uint64_t macro_tile_bytes = (m_macroTileWidth / kMicroTileWidth) *
|
||||
(m_macroTileHeight / kMicroTileHeight) *
|
||||
tile_bytes / (m_numPipes * m_numBanks);
|
||||
uint64_t macro_tiles_per_row = m_paddedWidth / m_macroTileWidth;
|
||||
uint64_t macro_tile_row_index = y / m_macroTileHeight;
|
||||
uint64_t macro_tile_column_index = x / m_macroTileWidth;
|
||||
uint64_t macro_tile_index =
|
||||
(macro_tile_row_index * macro_tiles_per_row) + macro_tile_column_index;
|
||||
uint64_t macro_tile_offset = macro_tile_index * macro_tile_bytes;
|
||||
uint64_t macro_tiles_per_slice =
|
||||
macro_tiles_per_row * (m_paddedHeight / m_macroTileHeight);
|
||||
uint64_t slice_bytes = macro_tiles_per_slice * macro_tile_bytes;
|
||||
|
||||
uint32_t slice = z;
|
||||
|
||||
uint64_t slice_offset =
|
||||
(tile_split_slice + slices_per_tile * slice / m_tileThickness) *
|
||||
slice_bytes;
|
||||
if (m_arraySlice != 0) {
|
||||
slice = m_arraySlice;
|
||||
}
|
||||
|
||||
uint64_t tile_row_index = (y / kMicroTileHeight) % m_bankHeight;
|
||||
uint64_t tile_column_index =
|
||||
((x / kMicroTileWidth) / m_numPipes) % m_bankWidth;
|
||||
uint64_t tile_index = (tile_row_index * m_bankWidth) + tile_column_index;
|
||||
uint64_t tile_offset = tile_index * tile_bytes;
|
||||
|
||||
// Bank and pipe rotation/swizzling.
|
||||
uint64_t bank_swizzle = m_bankSwizzleMask;
|
||||
uint64_t pipe_swizzle = m_pipeSwizzleMask;
|
||||
|
||||
uint64_t pipe_slice_rotation = 0;
|
||||
switch (m_arrayMode) {
|
||||
case Gnm::kArrayMode3dTiledThin:
|
||||
case Gnm::kArrayMode3dTiledThick:
|
||||
case Gnm::kArrayMode3dTiledXThick:
|
||||
pipe_slice_rotation =
|
||||
std::max(1UL, (m_numPipes / 2UL) - 1UL) * (slice / m_tileThickness);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
pipe_swizzle += pipe_slice_rotation;
|
||||
pipe_swizzle &= (m_numPipes - 1);
|
||||
pipe = pipe ^ pipe_swizzle;
|
||||
|
||||
uint32_t slice_rotation = 0;
|
||||
switch (m_arrayMode) {
|
||||
case Gnm::kArrayMode2dTiledThin:
|
||||
case Gnm::kArrayMode2dTiledThick:
|
||||
case Gnm::kArrayMode2dTiledXThick:
|
||||
slice_rotation = ((m_numBanks / 2) - 1) * (slice / m_tileThickness);
|
||||
break;
|
||||
case Gnm::kArrayMode3dTiledThin:
|
||||
case Gnm::kArrayMode3dTiledThick:
|
||||
case Gnm::kArrayMode3dTiledXThick:
|
||||
slice_rotation = std::max(1UL, (m_numPipes / 2UL) - 1UL) *
|
||||
(slice / m_tileThickness) / m_numPipes;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
uint64_t tile_split_slice_rotation = 0;
|
||||
switch (m_arrayMode) {
|
||||
case Gnm::kArrayMode2dTiledThin:
|
||||
case Gnm::kArrayMode3dTiledThin:
|
||||
case Gnm::kArrayMode2dTiledThinPrt:
|
||||
case Gnm::kArrayMode3dTiledThinPrt:
|
||||
tile_split_slice_rotation = ((m_numBanks / 2) + 1) * tile_split_slice;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
bank ^= bank_swizzle + slice_rotation;
|
||||
bank ^= tile_split_slice_rotation;
|
||||
bank &= (m_numBanks - 1);
|
||||
|
||||
uint64_t total_offset =
|
||||
(slice_offset + macro_tile_offset + tile_offset) * 8 + element_offset;
|
||||
uint64_t bitOffset = total_offset & 0x7;
|
||||
total_offset /= 8;
|
||||
|
||||
uint64_t pipe_interleave_offset = total_offset & m_pipeInterleaveMask;
|
||||
uint64_t offset = total_offset >> m_pipeInterleaveBits;
|
||||
|
||||
uint64_t finalByteOffset =
|
||||
pipe_interleave_offset | (pipe << (m_pipeInterleaveBits)) |
|
||||
(bank << (m_pipeInterleaveBits + m_pipeBits)) |
|
||||
(offset << (m_pipeInterleaveBits + m_pipeBits + m_bankBits));
|
||||
*outTiledBitOffset = (finalByteOffset << 3) | bitOffset;
|
||||
return 0;
|
||||
}
|
||||
|
||||
namespace surfaceTiler {
|
||||
constexpr std::uint32_t getElementIndex(std::uint32_t x, std::uint32_t y) {
|
||||
std::uint32_t elem = 0;
|
||||
|
||||
elem |= ((x >> 0) & 0x1) << 0;
|
||||
elem |= ((x >> 1) & 0x1) << 1;
|
||||
elem |= ((y >> 0) & 0x1) << 2;
|
||||
elem |= ((x >> 2) & 0x1) << 3;
|
||||
elem |= ((y >> 1) & 0x1) << 4;
|
||||
elem |= ((y >> 2) & 0x1) << 5;
|
||||
|
||||
return elem;
|
||||
}
|
||||
|
||||
constexpr std::uint32_t getPipeIndex(std::uint32_t x, std::uint32_t y) {
|
||||
std::uint32_t pipe = 0;
|
||||
|
||||
pipe |= (((x >> 3) ^ (y >> 3) ^ (x >> 4)) & 0x1) << 0;
|
||||
pipe |= (((x >> 4) ^ (y >> 4)) & 0x1) << 1;
|
||||
pipe |= (((x >> 5) ^ (y >> 5)) & 0x1) << 2;
|
||||
|
||||
return pipe;
|
||||
}
|
||||
|
||||
constexpr std::uint32_t getBankIndex(std::uint32_t x, std::uint32_t y) {
|
||||
std::uint32_t bank = 0;
|
||||
|
||||
bank |= (((x >> 6) ^ (y >> 6)) & 0x1) << 0;
|
||||
bank |= (((x >> 7) ^ (y >> 5) ^ (y >> 6)) & 0x1) << 1;
|
||||
bank |= (((x >> 8) ^ (y >> 4)) & 0x1) << 2;
|
||||
bank |= (((x >> 9) ^ (y >> 3)) & 0x1) << 3;
|
||||
|
||||
return bank;
|
||||
}
|
||||
|
||||
inline std::uint64_t getTiledElementByteOffsetImpl(std::uint32_t x,
|
||||
std::uint32_t y,
|
||||
std::uint32_t width) {
|
||||
std::uint32_t elementIndex = getElementIndex(x, y);
|
||||
std::uint32_t pipe = getPipeIndex(x, y);
|
||||
std::uint32_t bank = getBankIndex(x, y);
|
||||
|
||||
uint64_t macroTileIndex =
|
||||
(static_cast<std::uint64_t>(y / 64) * (width / 128)) + x / 128;
|
||||
uint64_t macroTileOffset = macroTileIndex * 256;
|
||||
|
||||
std::uint64_t totalOffset = macroTileOffset + elementIndex * 4;
|
||||
|
||||
std::uint64_t pipeInterleaveOffset = totalOffset & 255;
|
||||
std::uint64_t offset = totalOffset >> 8;
|
||||
|
||||
return pipeInterleaveOffset | (pipe << 8) | (bank << 11) | (offset << 15);
|
||||
}
|
||||
|
||||
static constexpr std::uint32_t kMaxPrecalculatedCount = 8;
|
||||
static constexpr std::uint32_t kMaxPrecalculatedWidth = 2048;
|
||||
static constexpr std::uint32_t kMaxPrecalculatedHeight = 2048;
|
||||
|
||||
static std::uint64_t gPrecalculatedTiledOffsets[kMaxPrecalculatedCount]
|
||||
[kMaxPrecalculatedWidth *
|
||||
kMaxPrecalculatedHeight];
|
||||
|
||||
struct PrecalculatedTiler {
|
||||
std::uint32_t width;
|
||||
std::uint32_t height;
|
||||
std::uint32_t stride;
|
||||
int index;
|
||||
};
|
||||
|
||||
static PrecalculatedTiler gPrecalculatedTilers[kMaxPrecalculatedCount];
|
||||
static int gPrecalculatedCount;
|
||||
|
||||
static int findPrecalculatedTile(std::uint32_t width, std::uint32_t height) {
|
||||
for (int i = 0; i < gPrecalculatedCount; ++i) {
|
||||
if (gPrecalculatedTilers[i].width == width &&
|
||||
gPrecalculatedTilers[i].height == height) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
inline int precalculateTiles(std::uint32_t width, std::uint32_t height) {
|
||||
int index = findPrecalculatedTile(width, height);
|
||||
if (index >= 0) {
|
||||
if (index >= kMaxPrecalculatedCount / 2 &&
|
||||
gPrecalculatedCount > kMaxPrecalculatedCount / 2) {
|
||||
auto tmp = gPrecalculatedTilers[index];
|
||||
|
||||
for (int i = index; i > 0; --i) {
|
||||
gPrecalculatedTilers[i] = gPrecalculatedTilers[i - 1];
|
||||
}
|
||||
|
||||
gPrecalculatedTilers[0] = tmp;
|
||||
return 0;
|
||||
}
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
PrecalculatedTiler tiler;
|
||||
tiler.width = width;
|
||||
tiler.height = height;
|
||||
tiler.stride = std::min(width, kMaxPrecalculatedWidth);
|
||||
|
||||
if (gPrecalculatedCount >= kMaxPrecalculatedCount) {
|
||||
// TODO: insert in the middle?
|
||||
tiler.index = gPrecalculatedTilers[kMaxPrecalculatedCount - 1].index;
|
||||
index = kMaxPrecalculatedCount - 1;
|
||||
} else {
|
||||
tiler.index = gPrecalculatedCount++;
|
||||
index = tiler.index;
|
||||
}
|
||||
|
||||
gPrecalculatedTilers[index - 1] = tiler;
|
||||
|
||||
for (std::uint32_t y = 0; y < height; ++y) {
|
||||
for (std::uint32_t x = 0; x < width; ++x) {
|
||||
gPrecalculatedTiledOffsets[index][y * tiler.stride + x] =
|
||||
getTiledElementByteOffsetImpl(x, y, tiler.width);
|
||||
}
|
||||
}
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
inline std::uint64_t getTiledElementByteOffset(int index, std::uint32_t x,
|
||||
std::uint32_t y) {
|
||||
auto tiler = gPrecalculatedTilers[index];
|
||||
if (x < kMaxPrecalculatedWidth && y < kMaxPrecalculatedHeight) [[likely]] {
|
||||
return gPrecalculatedTiledOffsets[index][x + y * tiler.stride];
|
||||
}
|
||||
|
||||
return getTiledElementByteOffsetImpl(x, y, tiler.width);
|
||||
}
|
||||
} // namespace surfaceTiler
|
||||
} // namespace amdgpu::device
|
||||
4283
hw/amdgpu/device/src/device.cpp
Normal file
4283
hw/amdgpu/device/src/device.cpp
Normal file
File diff suppressed because it is too large
Load diff
40
hw/amdgpu/device/src/rect_list.geom.glsl
Normal file
40
hw/amdgpu/device/src/rect_list.geom.glsl
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
#version 450
|
||||
|
||||
layout (triangles) in;
|
||||
layout (triangle_strip, max_vertices = 4) out;
|
||||
|
||||
void main(void)
|
||||
{
|
||||
vec4 topLeft = gl_in[0].gl_Position;
|
||||
vec4 right = gl_in[1].gl_Position;
|
||||
vec4 bottomLeft = gl_in[2].gl_Position;
|
||||
|
||||
vec4 topRight = vec4(
|
||||
right.x,
|
||||
topLeft.y,
|
||||
topLeft.z,
|
||||
topLeft.w
|
||||
);
|
||||
|
||||
vec4 bottomRight = vec4(
|
||||
right.x,
|
||||
bottomLeft.y,
|
||||
topLeft.z,
|
||||
topLeft.w
|
||||
);
|
||||
|
||||
|
||||
gl_Position = topLeft;
|
||||
EmitVertex();
|
||||
|
||||
gl_Position = bottomLeft;
|
||||
EmitVertex();
|
||||
|
||||
gl_Position = topRight;
|
||||
EmitVertex();
|
||||
|
||||
gl_Position = bottomRight;
|
||||
EmitVertex();
|
||||
|
||||
EndPrimitive();
|
||||
}
|
||||
12
hw/amdgpu/include/amdgpu/RemoteMemory.hpp
Normal file
12
hw/amdgpu/include/amdgpu/RemoteMemory.hpp
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
#pragma once
|
||||
#include <cstdint>
|
||||
|
||||
namespace amdgpu {
|
||||
struct RemoteMemory {
|
||||
char *shmPointer;
|
||||
|
||||
template <typename T = void> T *getPointer(std::uint64_t address) const {
|
||||
return address ? reinterpret_cast<T *>(shmPointer + address) : nullptr;
|
||||
}
|
||||
};
|
||||
} // namespace amdgpu
|
||||
31
hw/amdgpu/include/util/SourceLocation.hpp
Normal file
31
hw/amdgpu/include/util/SourceLocation.hpp
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
#pragma once
|
||||
|
||||
namespace util {
|
||||
class SourceLocation {
|
||||
public:
|
||||
const char *mFileName = {};
|
||||
const char *mFunctionName = {};
|
||||
unsigned mLine = 0;
|
||||
unsigned mColumn = 0;
|
||||
|
||||
public:
|
||||
constexpr SourceLocation(const char *fileName = __builtin_FILE(),
|
||||
const char *functionName = __builtin_FUNCTION(),
|
||||
unsigned line = __builtin_LINE(),
|
||||
unsigned column =
|
||||
#if __has_builtin(__builtin_COLUMN)
|
||||
__builtin_COLUMN()
|
||||
#else
|
||||
0
|
||||
#endif
|
||||
) noexcept
|
||||
: mFileName(fileName), mFunctionName(functionName), mLine(line),
|
||||
mColumn(column) {
|
||||
}
|
||||
|
||||
constexpr unsigned line() const noexcept { return mLine; }
|
||||
constexpr unsigned column() const noexcept { return mColumn; }
|
||||
constexpr const char *file_name() const noexcept { return mFileName; }
|
||||
constexpr const char *function_name() const noexcept { return mFunctionName; }
|
||||
};
|
||||
} // namespace util
|
||||
26
hw/amdgpu/include/util/Verify.hpp
Normal file
26
hw/amdgpu/include/util/Verify.hpp
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
#pragma once
|
||||
|
||||
#include "SourceLocation.hpp"
|
||||
#include "unreachable.hpp"
|
||||
|
||||
class Verify {
|
||||
util::SourceLocation mLocation;
|
||||
|
||||
public:
|
||||
util::SourceLocation location() const {
|
||||
return mLocation;
|
||||
}
|
||||
|
||||
Verify(util::SourceLocation location = util::SourceLocation())
|
||||
: mLocation(location) {}
|
||||
|
||||
Verify &operator<<(bool result) {
|
||||
if (!result) {
|
||||
util::unreachable("Verification failed at %s: %s:%u:%u",
|
||||
mLocation.function_name(), mLocation.file_name(),
|
||||
mLocation.line(), mLocation.column());
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
29
hw/amdgpu/include/util/unreachable.hpp
Normal file
29
hw/amdgpu/include/util/unreachable.hpp
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
#pragma once
|
||||
|
||||
#include "SourceLocation.hpp"
|
||||
#include <cstdio>
|
||||
#include <cstdarg>
|
||||
|
||||
namespace util {
|
||||
[[noreturn]] inline void unreachable_impl() { std::fflush(stdout); __builtin_trap(); }
|
||||
|
||||
[[noreturn]] inline void unreachable(SourceLocation location = {}) {
|
||||
std::printf("\n");
|
||||
std::fflush(stdout);
|
||||
std::fprintf(stderr, "Unreachable at %s:%u:%u %s\n", location.file_name(),
|
||||
location.line(), location.column(), location.function_name());
|
||||
unreachable_impl();
|
||||
}
|
||||
|
||||
[[noreturn]] inline void unreachable(const char *fmt, ...) {
|
||||
std::printf("\n");
|
||||
std::fflush(stdout);
|
||||
va_list list;
|
||||
va_start(list, fmt);
|
||||
std::vfprintf(stderr, fmt, list);
|
||||
va_end(list);
|
||||
std::fprintf(stderr, "\n");
|
||||
|
||||
unreachable_impl();
|
||||
}
|
||||
} // namespace util
|
||||
4
hw/amdgpu/lib/libspirv/CMakeLists.txt
Normal file
4
hw/amdgpu/lib/libspirv/CMakeLists.txt
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
project(spirv)
|
||||
|
||||
add_library(${PROJECT_NAME} INTERFACE)
|
||||
target_include_directories(${PROJECT_NAME} INTERFACE include)
|
||||
131
hw/amdgpu/lib/libspirv/include/spirv/GLSL.std.450.h
Normal file
131
hw/amdgpu/lib/libspirv/include/spirv/GLSL.std.450.h
Normal file
|
|
@ -0,0 +1,131 @@
|
|||
/*
|
||||
** Copyright (c) 2014-2016 The Khronos Group Inc.
|
||||
**
|
||||
** Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
** of this software and/or associated documentation files (the "Materials"),
|
||||
** to deal in the Materials without restriction, including without limitation
|
||||
** the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
** and/or sell copies of the Materials, and to permit persons to whom the
|
||||
** Materials are furnished to do so, subject to the following conditions:
|
||||
**
|
||||
** The above copyright notice and this permission notice shall be included in
|
||||
** all copies or substantial portions of the Materials.
|
||||
**
|
||||
** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS
|
||||
** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND
|
||||
** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/
|
||||
**
|
||||
** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS
|
||||
** IN THE MATERIALS.
|
||||
*/
|
||||
|
||||
#ifndef GLSLstd450_H
|
||||
#define GLSLstd450_H
|
||||
|
||||
static const int GLSLstd450Version = 100;
|
||||
static const int GLSLstd450Revision = 3;
|
||||
|
||||
enum GLSLstd450 {
|
||||
GLSLstd450Bad = 0, // Don't use
|
||||
|
||||
GLSLstd450Round = 1,
|
||||
GLSLstd450RoundEven = 2,
|
||||
GLSLstd450Trunc = 3,
|
||||
GLSLstd450FAbs = 4,
|
||||
GLSLstd450SAbs = 5,
|
||||
GLSLstd450FSign = 6,
|
||||
GLSLstd450SSign = 7,
|
||||
GLSLstd450Floor = 8,
|
||||
GLSLstd450Ceil = 9,
|
||||
GLSLstd450Fract = 10,
|
||||
|
||||
GLSLstd450Radians = 11,
|
||||
GLSLstd450Degrees = 12,
|
||||
GLSLstd450Sin = 13,
|
||||
GLSLstd450Cos = 14,
|
||||
GLSLstd450Tan = 15,
|
||||
GLSLstd450Asin = 16,
|
||||
GLSLstd450Acos = 17,
|
||||
GLSLstd450Atan = 18,
|
||||
GLSLstd450Sinh = 19,
|
||||
GLSLstd450Cosh = 20,
|
||||
GLSLstd450Tanh = 21,
|
||||
GLSLstd450Asinh = 22,
|
||||
GLSLstd450Acosh = 23,
|
||||
GLSLstd450Atanh = 24,
|
||||
GLSLstd450Atan2 = 25,
|
||||
|
||||
GLSLstd450Pow = 26,
|
||||
GLSLstd450Exp = 27,
|
||||
GLSLstd450Log = 28,
|
||||
GLSLstd450Exp2 = 29,
|
||||
GLSLstd450Log2 = 30,
|
||||
GLSLstd450Sqrt = 31,
|
||||
GLSLstd450InverseSqrt = 32,
|
||||
|
||||
GLSLstd450Determinant = 33,
|
||||
GLSLstd450MatrixInverse = 34,
|
||||
|
||||
GLSLstd450Modf = 35, // second operand needs an OpVariable to write to
|
||||
GLSLstd450ModfStruct = 36, // no OpVariable operand
|
||||
GLSLstd450FMin = 37,
|
||||
GLSLstd450UMin = 38,
|
||||
GLSLstd450SMin = 39,
|
||||
GLSLstd450FMax = 40,
|
||||
GLSLstd450UMax = 41,
|
||||
GLSLstd450SMax = 42,
|
||||
GLSLstd450FClamp = 43,
|
||||
GLSLstd450UClamp = 44,
|
||||
GLSLstd450SClamp = 45,
|
||||
GLSLstd450FMix = 46,
|
||||
GLSLstd450IMix = 47, // Reserved
|
||||
GLSLstd450Step = 48,
|
||||
GLSLstd450SmoothStep = 49,
|
||||
|
||||
GLSLstd450Fma = 50,
|
||||
GLSLstd450Frexp = 51, // second operand needs an OpVariable to write to
|
||||
GLSLstd450FrexpStruct = 52, // no OpVariable operand
|
||||
GLSLstd450Ldexp = 53,
|
||||
|
||||
GLSLstd450PackSnorm4x8 = 54,
|
||||
GLSLstd450PackUnorm4x8 = 55,
|
||||
GLSLstd450PackSnorm2x16 = 56,
|
||||
GLSLstd450PackUnorm2x16 = 57,
|
||||
GLSLstd450PackHalf2x16 = 58,
|
||||
GLSLstd450PackDouble2x32 = 59,
|
||||
GLSLstd450UnpackSnorm2x16 = 60,
|
||||
GLSLstd450UnpackUnorm2x16 = 61,
|
||||
GLSLstd450UnpackHalf2x16 = 62,
|
||||
GLSLstd450UnpackSnorm4x8 = 63,
|
||||
GLSLstd450UnpackUnorm4x8 = 64,
|
||||
GLSLstd450UnpackDouble2x32 = 65,
|
||||
|
||||
GLSLstd450Length = 66,
|
||||
GLSLstd450Distance = 67,
|
||||
GLSLstd450Cross = 68,
|
||||
GLSLstd450Normalize = 69,
|
||||
GLSLstd450FaceForward = 70,
|
||||
GLSLstd450Reflect = 71,
|
||||
GLSLstd450Refract = 72,
|
||||
|
||||
GLSLstd450FindILsb = 73,
|
||||
GLSLstd450FindSMsb = 74,
|
||||
GLSLstd450FindUMsb = 75,
|
||||
|
||||
GLSLstd450InterpolateAtCentroid = 76,
|
||||
GLSLstd450InterpolateAtSample = 77,
|
||||
GLSLstd450InterpolateAtOffset = 78,
|
||||
|
||||
GLSLstd450NMin = 79,
|
||||
GLSLstd450NMax = 80,
|
||||
GLSLstd450NClamp = 81,
|
||||
|
||||
GLSLstd450Count
|
||||
};
|
||||
|
||||
#endif // #ifndef GLSLstd450_H
|
||||
2120
hw/amdgpu/lib/libspirv/include/spirv/spirv-builder.hpp
Normal file
2120
hw/amdgpu/lib/libspirv/include/spirv/spirv-builder.hpp
Normal file
File diff suppressed because it is too large
Load diff
2422
hw/amdgpu/lib/libspirv/include/spirv/spirv-instruction.hpp
Normal file
2422
hw/amdgpu/lib/libspirv/include/spirv/spirv-instruction.hpp
Normal file
File diff suppressed because it is too large
Load diff
2753
hw/amdgpu/lib/libspirv/include/spirv/spirv.hpp
Normal file
2753
hw/amdgpu/lib/libspirv/include/spirv/spirv.hpp
Normal file
File diff suppressed because it is too large
Load diff
22
hw/amdgpu/shader/CMakeLists.txt
Normal file
22
hw/amdgpu/shader/CMakeLists.txt
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
project(libamdgpu-shader)
|
||||
set(PROJECT_PATH amdgpu/shader)
|
||||
|
||||
set(SRC
|
||||
src/cf.cpp
|
||||
src/scf.cpp
|
||||
src/CfBuilder.cpp
|
||||
src/Converter.cpp
|
||||
src/ConverterContext.cpp
|
||||
src/Fragment.cpp
|
||||
src/Function.cpp
|
||||
src/Instruction.cpp
|
||||
src/RegisterState.cpp
|
||||
src/TypeId.cpp
|
||||
)
|
||||
|
||||
add_library(${PROJECT_NAME} STATIC ${INCLUDE} ${SRC})
|
||||
target_link_libraries(${PROJECT_NAME} PUBLIC spirv amdgpu::base)
|
||||
target_include_directories(${PROJECT_NAME} PUBLIC include PRIVATE include/${PROJECT_PATH})
|
||||
set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "")
|
||||
add_library(amdgpu::shader ALIAS ${PROJECT_NAME})
|
||||
set_property(TARGET ${PROJECT_NAME} PROPERTY POSITION_INDEPENDENT_CODE ON)
|
||||
18
hw/amdgpu/shader/include/amdgpu/shader/AccessOp.hpp
Normal file
18
hw/amdgpu/shader/include/amdgpu/shader/AccessOp.hpp
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
#pragma once
|
||||
|
||||
namespace amdgpu::shader {
|
||||
enum class AccessOp { None = 0, Load = 1 << 0, Store = 1 << 1 };
|
||||
|
||||
constexpr AccessOp operator|(AccessOp lhs, AccessOp rhs) {
|
||||
return static_cast<AccessOp>(static_cast<int>(lhs) | static_cast<int>(rhs));
|
||||
}
|
||||
constexpr AccessOp operator&(AccessOp lhs, AccessOp rhs) {
|
||||
return static_cast<AccessOp>(static_cast<int>(lhs) & static_cast<int>(rhs));
|
||||
}
|
||||
constexpr AccessOp &operator|=(AccessOp &lhs, AccessOp rhs) {
|
||||
return ((lhs = lhs | rhs));
|
||||
}
|
||||
constexpr AccessOp &operator&=(AccessOp &lhs, AccessOp rhs) {
|
||||
return ((lhs = lhs & rhs));
|
||||
}
|
||||
} // namespace amdgpu::shader
|
||||
5
hw/amdgpu/shader/include/amdgpu/shader/BufferKind.hpp
Normal file
5
hw/amdgpu/shader/include/amdgpu/shader/BufferKind.hpp
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
#pragma once
|
||||
|
||||
namespace amdgpu::shader {
|
||||
enum class BufferKind { VBuffer, TBuffer };
|
||||
}
|
||||
8
hw/amdgpu/shader/include/amdgpu/shader/CfBuilder.hpp
Normal file
8
hw/amdgpu/shader/include/amdgpu/shader/CfBuilder.hpp
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
#pragma once
|
||||
#include "cf.hpp"
|
||||
#include <amdgpu/RemoteMemory.hpp>
|
||||
|
||||
namespace amdgpu::shader {
|
||||
cf::BasicBlock *buildCf(cf::Context &ctxt, RemoteMemory memory,
|
||||
std::uint64_t entryPoint);
|
||||
} // namespace amdgpu::shader
|
||||
35
hw/amdgpu/shader/include/amdgpu/shader/Converter.hpp
Normal file
35
hw/amdgpu/shader/include/amdgpu/shader/Converter.hpp
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
#pragma once
|
||||
|
||||
#include "Stage.hpp"
|
||||
#include "AccessOp.hpp"
|
||||
|
||||
#include <amdgpu/RemoteMemory.hpp>
|
||||
|
||||
#include <cstdint>
|
||||
#include <span>
|
||||
#include <vector>
|
||||
|
||||
namespace amdgpu::shader {
|
||||
struct Shader {
|
||||
enum class UniformKind {
|
||||
Buffer,
|
||||
Sampler,
|
||||
Image
|
||||
};
|
||||
|
||||
struct UniformInfo {
|
||||
std::uint32_t binding;
|
||||
std::uint32_t buffer[8];
|
||||
UniformKind kind;
|
||||
AccessOp accessOp;
|
||||
};
|
||||
|
||||
std::vector<UniformInfo> uniforms;
|
||||
std::vector<std::uint32_t> spirv;
|
||||
};
|
||||
|
||||
Shader convert(RemoteMemory memory, Stage stage, std::uint64_t entry,
|
||||
std::span<const std::uint32_t> userSpgrs, int bindingOffset,
|
||||
std::uint32_t dimX = 1, std::uint32_t dimY = 1,
|
||||
std::uint32_t dimZ = 1);
|
||||
} // namespace amdgpu::shader
|
||||
257
hw/amdgpu/shader/include/amdgpu/shader/ConverterContext.hpp
Normal file
257
hw/amdgpu/shader/include/amdgpu/shader/ConverterContext.hpp
Normal file
|
|
@ -0,0 +1,257 @@
|
|||
#pragma once
|
||||
|
||||
#include "Fragment.hpp"
|
||||
#include "Function.hpp"
|
||||
#include "RegisterId.hpp"
|
||||
#include "Stage.hpp"
|
||||
#include "TypeId.hpp"
|
||||
#include "Uniform.hpp"
|
||||
#include "Value.hpp"
|
||||
#include "scf.hpp"
|
||||
|
||||
#include <forward_list>
|
||||
#include <amdgpu/RemoteMemory.hpp>
|
||||
#include <spirv/spirv-builder.hpp>
|
||||
#include <unordered_map>
|
||||
#include <util/unreachable.hpp>
|
||||
|
||||
#include <bit>
|
||||
#include <span>
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
|
||||
namespace amdgpu::shader {
|
||||
/*
|
||||
struct MaterializedFunction {
|
||||
spirv::Function function;
|
||||
spirv::FunctionType type;
|
||||
spirv::Type returnType;
|
||||
|
||||
std::vector<std::pair<RegisterId, TypeId>> args;
|
||||
std::vector<std::pair<RegisterId, TypeId>> results;
|
||||
};
|
||||
*/
|
||||
|
||||
class ConverterContext {
|
||||
Stage mStage;
|
||||
RemoteMemory mMemory;
|
||||
spirv::IdGenerator mGenerator;
|
||||
spirv::SpirvBuilder mBuilder{mGenerator, 1024};
|
||||
static constexpr auto kGenericTypesCount =
|
||||
static_cast<std::size_t>(TypeId::Void) + 1;
|
||||
spirv::Type mTypes[kGenericTypesCount];
|
||||
spirv::PointerType mPtrTypes[13][kGenericTypesCount];
|
||||
spirv::RuntimeArrayType mRuntimeArrayTypes[kGenericTypesCount];
|
||||
spirv::VariableValue mThreadId;
|
||||
spirv::VariableValue mWorkgroupId;
|
||||
spirv::VariableValue mLocalInvocationId;
|
||||
spirv::VariableValue mPerVertex;
|
||||
spirv::VariableValue mFragCoord;
|
||||
std::vector<spirv::VariableValue> mInterfaces;
|
||||
std::map<unsigned, spirv::VariableValue> mIns;
|
||||
std::map<unsigned, spirv::VariableValue> mOuts;
|
||||
|
||||
std::map<std::uint32_t, spirv::ConstantFloat> mConstantFloat32Map;
|
||||
std::map<std::uint32_t, spirv::ConstantUInt> mConstantUint32Map;
|
||||
std::map<std::uint32_t, spirv::ConstantSInt> mConstantSint32Map;
|
||||
std::map<std::uint64_t, spirv::ConstantUInt> mConstantUint64Map;
|
||||
|
||||
struct FunctionType {
|
||||
spirv::Type resultType;
|
||||
std::vector<spirv::Type> params;
|
||||
spirv::FunctionType id;
|
||||
};
|
||||
|
||||
std::vector<FunctionType> mFunctionTypes;
|
||||
|
||||
struct StructTypeEntry {
|
||||
spirv::StructType id;
|
||||
std::vector<spirv::Type> members;
|
||||
spirv::PointerType ptrTypes[13];
|
||||
|
||||
bool match(std::span<const spirv::Type> other) {
|
||||
if (members.size() != other.size()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (std::size_t i = 0; i < other.size(); ++i) {
|
||||
if (members[i] != other[i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
std::vector<StructTypeEntry> mStructTypes;
|
||||
|
||||
std::forward_list<Fragment> mFragments;
|
||||
std::forward_list<Function> mFunctions;
|
||||
|
||||
spirv::ConstantBool mTrue;
|
||||
spirv::ConstantBool mFalse;
|
||||
|
||||
std::vector<UniformInfo> mUniforms;
|
||||
spirv::ExtInstSet mGlslStd450;
|
||||
spirv::Function mDiscardFn;
|
||||
|
||||
public:
|
||||
ConverterContext(RemoteMemory memory, Stage stage) : mMemory(memory), mStage(stage) {
|
||||
mGlslStd450 = mBuilder.createExtInstImport("GLSL.std.450");
|
||||
}
|
||||
|
||||
const decltype(mInterfaces) &getInterfaces() const {
|
||||
return mInterfaces;
|
||||
}
|
||||
|
||||
spirv::SpirvBuilder &getBuilder() { return mBuilder; }
|
||||
RemoteMemory getMemory() const { return mMemory; }
|
||||
spirv::ExtInstSet getGlslStd450() const { return mGlslStd450; }
|
||||
std::optional<TypeId> getTypeIdOf(spirv::Type type) const;
|
||||
|
||||
spirv::StructType findStructType(std::span<const spirv::Type> members);
|
||||
spirv::StructType getStructType(std::span<const spirv::Type> members);
|
||||
spirv::PointerType getStructPointerType(spv::StorageClass storageClass,
|
||||
spirv::StructType structType);
|
||||
spirv::Type getType(TypeId id);
|
||||
|
||||
spirv::PointerType getPointerType(spv::StorageClass storageClass, TypeId id) {
|
||||
assert(static_cast<unsigned>(storageClass) < 13);
|
||||
auto &type = mPtrTypes[static_cast<unsigned>(storageClass)]
|
||||
[static_cast<std::uint32_t>(id)];
|
||||
|
||||
if (!type) {
|
||||
type = mBuilder.createTypePointer(storageClass, getType(id));
|
||||
}
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
spirv::RuntimeArrayType getRuntimeArrayType(TypeId id);
|
||||
|
||||
spirv::UIntType getUInt32Type() {
|
||||
return spirv::cast<spirv::UIntType>(getType(TypeId::UInt32));
|
||||
}
|
||||
spirv::UIntType getUInt64Type() {
|
||||
return spirv::cast<spirv::UIntType>(getType(TypeId::UInt64));
|
||||
}
|
||||
|
||||
spirv::VectorOfType<spirv::UIntType> getUint32x2Type() {
|
||||
return spirv::cast<spirv::VectorOfType<spirv::UIntType>>(
|
||||
getType(TypeId::UInt32x2));
|
||||
}
|
||||
|
||||
spirv::VectorOfType<spirv::UIntType> getUint32x3Type() {
|
||||
return spirv::cast<spirv::VectorOfType<spirv::UIntType>>(
|
||||
getType(TypeId::UInt32x3));
|
||||
}
|
||||
|
||||
spirv::VectorOfType<spirv::UIntType> getUint32x4Type() {
|
||||
return spirv::cast<spirv::VectorOfType<spirv::UIntType>>(
|
||||
getType(TypeId::UInt32x4));
|
||||
}
|
||||
|
||||
spirv::ArrayOfType<spirv::UIntType> getArrayUint32x8Type() {
|
||||
return spirv::cast<spirv::ArrayOfType<spirv::UIntType>>(getType(TypeId::ArrayUInt32x8));
|
||||
}
|
||||
|
||||
spirv::ArrayOfType<spirv::UIntType> getArrayUint32x16Type() {
|
||||
return spirv::cast<spirv::ArrayOfType<spirv::UIntType>>(getType(TypeId::ArrayUInt32x16));
|
||||
}
|
||||
|
||||
spirv::SIntType getSint32Type() {
|
||||
return spirv::cast<spirv::SIntType>(getType(TypeId::SInt32));
|
||||
}
|
||||
spirv::SIntType getSint64Type() {
|
||||
return spirv::cast<spirv::SIntType>(getType(TypeId::SInt64));
|
||||
}
|
||||
|
||||
spirv::FloatType getFloat32Type() {
|
||||
return spirv::cast<spirv::FloatType>(getType(TypeId::Float32));
|
||||
}
|
||||
|
||||
spirv::VectorOfType<spirv::FloatType> getFloat32x4Type() {
|
||||
return spirv::cast<spirv::VectorOfType<spirv::FloatType>>(
|
||||
getType(TypeId::Float32x4));
|
||||
}
|
||||
|
||||
spirv::VectorOfType<spirv::FloatType> getFloat32x3Type() {
|
||||
return spirv::cast<spirv::VectorOfType<spirv::FloatType>>(
|
||||
getType(TypeId::Float32x3));
|
||||
}
|
||||
|
||||
spirv::VectorOfType<spirv::FloatType> getFloat32x2Type() {
|
||||
return spirv::cast<spirv::VectorOfType<spirv::FloatType>>(
|
||||
getType(TypeId::Float32x2));
|
||||
}
|
||||
|
||||
spirv::BoolType getBoolType() {
|
||||
return spirv::cast<spirv::BoolType>(getType(TypeId::Bool));
|
||||
}
|
||||
|
||||
spirv::VoidType getVoidType() {
|
||||
return spirv::cast<spirv::VoidType>(getType(TypeId::Void));
|
||||
}
|
||||
|
||||
spirv::ConstantBool getTrue() {
|
||||
if (!mTrue) {
|
||||
mTrue = mBuilder.createConstantTrue(getBoolType());
|
||||
}
|
||||
return mTrue;
|
||||
}
|
||||
spirv::ConstantBool getFalse() {
|
||||
if (!mFalse) {
|
||||
mFalse = mBuilder.createConstantFalse(getBoolType());
|
||||
}
|
||||
return mFalse;
|
||||
}
|
||||
|
||||
spirv::ConstantUInt getUInt64(std::uint64_t value);
|
||||
spirv::ConstantUInt getUInt32(std::uint32_t value);
|
||||
spirv::ConstantSInt getSInt32(std::uint32_t value);
|
||||
spirv::ConstantFloat getFloat32Raw(std::uint32_t value);
|
||||
|
||||
spirv::ConstantFloat getFloat32(float id) {
|
||||
return getFloat32Raw(std::bit_cast<std::uint32_t>(id));
|
||||
}
|
||||
|
||||
spirv::SamplerType getSamplerType() {
|
||||
return spirv::cast<spirv::SamplerType>(getType(TypeId::Sampler));
|
||||
}
|
||||
spirv::ImageType getImage2DType() {
|
||||
return spirv::cast<spirv::ImageType>(getType(TypeId::Image2D));
|
||||
}
|
||||
spirv::SampledImageType getSampledImage2DType() {
|
||||
return spirv::cast<spirv::SampledImageType>(getType(TypeId::SampledImage2D));
|
||||
}
|
||||
|
||||
UniformInfo *createStorageBuffer(TypeId type);
|
||||
UniformInfo *getOrCreateStorageBuffer(std::uint32_t *vbuffer, TypeId type);
|
||||
UniformInfo *getOrCreateUniformConstant(std::uint32_t *buffer, std::size_t size, TypeId type);
|
||||
spirv::VariableValue getThreadId();
|
||||
spirv::VariableValue getWorkgroupId();
|
||||
spirv::VariableValue getLocalInvocationId();
|
||||
spirv::VariableValue getPerVertex();
|
||||
spirv::VariableValue getFragCoord();
|
||||
spirv::VariableValue getIn(unsigned location);
|
||||
spirv::VariableValue getOut(unsigned location);
|
||||
|
||||
spirv::Function getDiscardFn();
|
||||
|
||||
std::optional<std::uint32_t> findUint32Value(spirv::Value id) const;
|
||||
std::optional<std::int32_t> findSint32Value(spirv::Value id) const;
|
||||
std::optional<float> findFloat32Value(spirv::Value id) const;
|
||||
spirv::FunctionType getFunctionType(spirv::Type resultType,
|
||||
std::span<const spirv::Type> params);
|
||||
|
||||
Function *createFunction(std::size_t expectedSize);
|
||||
Fragment *createFragment(std::size_t expectedSize);
|
||||
|
||||
std::vector<UniformInfo> &getUniforms() {
|
||||
return mUniforms;
|
||||
}
|
||||
};
|
||||
} // namespace amdgpu::shader
|
||||
95
hw/amdgpu/shader/include/amdgpu/shader/Fragment.hpp
Normal file
95
hw/amdgpu/shader/include/amdgpu/shader/Fragment.hpp
Normal file
|
|
@ -0,0 +1,95 @@
|
|||
#pragma once
|
||||
|
||||
#include "FragmentTerminator.hpp"
|
||||
#include "Instruction.hpp"
|
||||
#include "RegisterId.hpp"
|
||||
#include "RegisterState.hpp"
|
||||
#include "Stage.hpp"
|
||||
#include "TypeId.hpp"
|
||||
#include "Uniform.hpp"
|
||||
#include "scf.hpp"
|
||||
|
||||
#include <map>
|
||||
#include <optional>
|
||||
#include <spirv/spirv-builder.hpp>
|
||||
|
||||
namespace amdgpu::shader {
|
||||
enum class OperandGetFlags {
|
||||
None,
|
||||
PreserveType = 1 << 0
|
||||
};
|
||||
|
||||
struct Function;
|
||||
class ConverterContext;
|
||||
|
||||
struct Fragment {
|
||||
ConverterContext *context = nullptr;
|
||||
Function *function = nullptr;
|
||||
spirv::Block entryBlockId;
|
||||
spirv::BlockBuilder builder;
|
||||
RegisterState *registers = nullptr;
|
||||
|
||||
std::set<RegisterId> values;
|
||||
std::set<RegisterId> outputs;
|
||||
|
||||
std::vector<Fragment *> predecessors;
|
||||
std::uint64_t jumpAddress = 0;
|
||||
spirv::BoolValue branchCondition;
|
||||
|
||||
void appendBranch(Fragment &other) {
|
||||
other.predecessors.push_back(this);
|
||||
}
|
||||
|
||||
void injectValuesFromPreds();
|
||||
|
||||
// std::optional<RegisterId> findInput(spirv::Value value);
|
||||
// Value addInput(RegisterId id, spirv::Type type);
|
||||
spirv::SamplerValue createSampler(RegisterId base);
|
||||
spirv::ImageValue createImage(RegisterId base, bool r128); // TODO: params
|
||||
Value createCompositeExtract(Value composite, std::uint32_t member);
|
||||
Value getOperand(RegisterId id, TypeId type, OperandGetFlags flags = OperandGetFlags::None);
|
||||
void setOperand(RegisterId id, Value value);
|
||||
void setVcc(Value value);
|
||||
void setScc(Value value);
|
||||
spirv::BoolValue getScc();
|
||||
spirv::Value createBitcast(spirv::Type to, spirv::Type from, spirv::Value value);
|
||||
|
||||
Value getScalarOperand(int id, TypeId type, OperandGetFlags flags = OperandGetFlags::None) {
|
||||
return getOperand(RegisterId::Scalar(id), type, flags);
|
||||
}
|
||||
Value getVectorOperand(int id, TypeId type, OperandGetFlags flags = OperandGetFlags::None) {
|
||||
return getOperand(RegisterId::Vector(id), type, flags);
|
||||
}
|
||||
Value getAttrOperand(int id, TypeId type, OperandGetFlags flags = OperandGetFlags::None) {
|
||||
return getOperand(RegisterId::Attr(id), type, flags);
|
||||
}
|
||||
Value getVccLo() {
|
||||
return getOperand(RegisterId::VccLo, TypeId::UInt32);
|
||||
}
|
||||
Value getVccHi() {
|
||||
return getOperand(RegisterId::VccHi, TypeId::UInt32);
|
||||
}
|
||||
Value getExecLo() {
|
||||
return getOperand(RegisterId::ExecLo, TypeId::UInt32);
|
||||
}
|
||||
Value getExecHi() {
|
||||
return getOperand(RegisterId::ExecHi, TypeId::UInt32);
|
||||
}
|
||||
void setScalarOperand(int id, Value value) {
|
||||
setOperand(RegisterId::Scalar(id), value);
|
||||
}
|
||||
void setVectorOperand(int id, Value value) {
|
||||
setOperand(RegisterId::Vector(id), value);
|
||||
}
|
||||
void setExportTarget(int id, Value value) {
|
||||
setOperand(RegisterId::Export(id), value);
|
||||
}
|
||||
// void createCallTo(MaterializedFunction *other);
|
||||
void convert(std::uint64_t size);
|
||||
|
||||
private:
|
||||
Value getRegister(RegisterId id);
|
||||
Value getRegister(RegisterId id, spirv::Type type);
|
||||
void setRegister(RegisterId id, Value value);
|
||||
};
|
||||
} // namespace amdgpu::shader
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
#pragma once
|
||||
|
||||
namespace amdgpu::shader {
|
||||
enum class FragmentTerminator {
|
||||
None,
|
||||
EndProgram,
|
||||
CallToReg,
|
||||
BranchToReg,
|
||||
Branch,
|
||||
};
|
||||
}
|
||||
31
hw/amdgpu/shader/include/amdgpu/shader/Function.hpp
Normal file
31
hw/amdgpu/shader/include/amdgpu/shader/Function.hpp
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
#pragma once
|
||||
#include "Fragment.hpp"
|
||||
#include "RegisterId.hpp"
|
||||
#include "spirv/spirv-builder.hpp"
|
||||
#include <span>
|
||||
|
||||
namespace amdgpu::shader {
|
||||
class ConverterContext;
|
||||
|
||||
struct Function {
|
||||
ConverterContext *context = nullptr;
|
||||
Stage stage = Stage::None;
|
||||
std::span<const std::uint32_t> userSgprs;
|
||||
std::span<const std::uint32_t> userVgprs;
|
||||
Fragment entryFragment;
|
||||
Fragment exitFragment;
|
||||
std::map<RegisterId, Value> inputs;
|
||||
spirv::FunctionBuilder builder;
|
||||
std::vector<Fragment *> fragments;
|
||||
|
||||
Value getInput(RegisterId id);
|
||||
Value createInput(RegisterId id);
|
||||
void createExport(spirv::BlockBuilder &builder, unsigned index, Value value);
|
||||
spirv::Type getResultType();
|
||||
spirv::FunctionType getFunctionType();
|
||||
|
||||
Fragment *createFragment();
|
||||
|
||||
void insertReturn();
|
||||
};
|
||||
} // namespace amdgpu::shader
|
||||
1972
hw/amdgpu/shader/include/amdgpu/shader/Instruction.hpp
Normal file
1972
hw/amdgpu/shader/include/amdgpu/shader/Instruction.hpp
Normal file
File diff suppressed because it is too large
Load diff
102
hw/amdgpu/shader/include/amdgpu/shader/RegisterId.hpp
Normal file
102
hw/amdgpu/shader/include/amdgpu/shader/RegisterId.hpp
Normal file
|
|
@ -0,0 +1,102 @@
|
|||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace amdgpu::shader {
|
||||
class RegisterId {
|
||||
static constexpr std::uint32_t kScalarOperandsOffset = 0;
|
||||
static constexpr std::uint32_t kScalarOperandsCount = 256;
|
||||
static constexpr std::uint32_t kVectorOperandsOffset =
|
||||
kScalarOperandsOffset + kScalarOperandsCount;
|
||||
static constexpr std::uint32_t kVectorOperandsCount = 512;
|
||||
static constexpr std::uint32_t kExportOperandsOffset =
|
||||
kVectorOperandsOffset + kVectorOperandsCount;
|
||||
static constexpr std::uint32_t kExportOperandsCount = 64;
|
||||
static constexpr std::uint32_t kAttrOperandsOffset =
|
||||
kExportOperandsOffset + kExportOperandsCount;
|
||||
static constexpr std::uint32_t kAttrOperandsCount = 32;
|
||||
static constexpr std::uint32_t kOperandsCount =
|
||||
kAttrOperandsOffset + kAttrOperandsCount;
|
||||
|
||||
static constexpr std::uint32_t kRegisterVccLoId = kScalarOperandsOffset + 106;
|
||||
static constexpr std::uint32_t kRegisterVccHiId = kScalarOperandsOffset + 107;
|
||||
static constexpr std::uint32_t kRegisterM0Id = kScalarOperandsOffset + 124;
|
||||
static constexpr std::uint32_t kRegisterExecLoId =
|
||||
kScalarOperandsOffset + 126;
|
||||
static constexpr std::uint32_t kRegisterExecHiId =
|
||||
kScalarOperandsOffset + 127;
|
||||
static constexpr std::uint32_t kRegisterSccId = kScalarOperandsOffset + 253;
|
||||
static constexpr std::uint32_t kRegisterLdsDirect =
|
||||
kScalarOperandsOffset + 254;
|
||||
|
||||
public:
|
||||
enum enum_type : std::uint32_t {
|
||||
Invalid = ~static_cast<std::uint32_t>(0),
|
||||
|
||||
VccLo = kRegisterVccLoId,
|
||||
VccHi = kRegisterVccHiId,
|
||||
M0 = kRegisterM0Id,
|
||||
ExecLo = kRegisterExecLoId,
|
||||
ExecHi = kRegisterExecHiId,
|
||||
Scc = kRegisterSccId,
|
||||
LdsDirect = kRegisterLdsDirect,
|
||||
} raw = Invalid;
|
||||
|
||||
RegisterId(enum_type value) : raw(value) {}
|
||||
|
||||
operator enum_type() const { return raw; }
|
||||
|
||||
static RegisterId Raw(std::uint32_t index) {
|
||||
return static_cast<enum_type>(index);
|
||||
}
|
||||
static RegisterId Scalar(std::uint32_t index) {
|
||||
return static_cast<enum_type>(index + kScalarOperandsOffset);
|
||||
}
|
||||
static RegisterId Vector(std::uint32_t index) {
|
||||
return static_cast<enum_type>(index + kVectorOperandsOffset);
|
||||
}
|
||||
static RegisterId Export(std::uint32_t index) {
|
||||
return static_cast<enum_type>(index + kExportOperandsOffset);
|
||||
}
|
||||
static RegisterId Attr(std::uint32_t index) {
|
||||
return static_cast<enum_type>(index + kAttrOperandsOffset);
|
||||
}
|
||||
|
||||
bool isScalar() const {
|
||||
return raw >= kScalarOperandsOffset &&
|
||||
raw < kScalarOperandsOffset + kScalarOperandsCount;
|
||||
}
|
||||
bool isVector() const {
|
||||
return raw >= kVectorOperandsOffset &&
|
||||
raw < kVectorOperandsOffset + kVectorOperandsCount;
|
||||
}
|
||||
bool isExport() const {
|
||||
return raw >= kExportOperandsOffset &&
|
||||
raw < kExportOperandsOffset + kExportOperandsCount;
|
||||
}
|
||||
bool isAttr() const {
|
||||
return raw >= kAttrOperandsOffset &&
|
||||
raw < kAttrOperandsOffset + kAttrOperandsCount;
|
||||
}
|
||||
|
||||
unsigned getOffset() const {
|
||||
if (isScalar()) {
|
||||
return raw - kScalarOperandsOffset;
|
||||
}
|
||||
|
||||
if (isVector()) {
|
||||
return raw - kVectorOperandsOffset;
|
||||
}
|
||||
|
||||
if (isExport()) {
|
||||
return raw - kExportOperandsOffset;
|
||||
}
|
||||
|
||||
if (isAttr()) {
|
||||
return raw - kAttrOperandsOffset;
|
||||
}
|
||||
|
||||
return raw;
|
||||
}
|
||||
};
|
||||
} // namespace amdgpu::shader
|
||||
27
hw/amdgpu/shader/include/amdgpu/shader/RegisterState.hpp
Normal file
27
hw/amdgpu/shader/include/amdgpu/shader/RegisterState.hpp
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
#pragma once
|
||||
#include "RegisterId.hpp"
|
||||
#include "Value.hpp"
|
||||
#include <cstdint>
|
||||
|
||||
namespace amdgpu::shader {
|
||||
struct RegisterState {
|
||||
std::uint64_t pc;
|
||||
|
||||
Value sgprs[104];
|
||||
Value vccLo;
|
||||
Value vccHi;
|
||||
Value m0;
|
||||
Value execLo;
|
||||
Value execHi;
|
||||
Value scc;
|
||||
Value ldsDirect;
|
||||
Value vgprs[512];
|
||||
Value attrs[32];
|
||||
|
||||
Value getRegister(RegisterId regId);
|
||||
void setRegister(RegisterId regId, Value value);
|
||||
|
||||
private:
|
||||
Value getRegisterImpl(RegisterId regId);
|
||||
};
|
||||
} // namespace amdgpu::shader
|
||||
5
hw/amdgpu/shader/include/amdgpu/shader/Stage.hpp
Normal file
5
hw/amdgpu/shader/include/amdgpu/shader/Stage.hpp
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
#pragma once
|
||||
|
||||
namespace amdgpu::shader {
|
||||
enum class Stage { None, Vertex, Fragment, Geometry, Compute };
|
||||
}
|
||||
57
hw/amdgpu/shader/include/amdgpu/shader/TypeId.hpp
Normal file
57
hw/amdgpu/shader/include/amdgpu/shader/TypeId.hpp
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
namespace amdgpu::shader {
|
||||
struct TypeId {
|
||||
enum {
|
||||
Bool,
|
||||
SInt8,
|
||||
UInt8,
|
||||
SInt16,
|
||||
UInt16,
|
||||
SInt32,
|
||||
UInt32,
|
||||
UInt32x2,
|
||||
UInt32x3,
|
||||
UInt32x4,
|
||||
UInt64,
|
||||
SInt64,
|
||||
ArrayUInt32x8,
|
||||
ArrayUInt32x16,
|
||||
Float16,
|
||||
Float32,
|
||||
Float32x2,
|
||||
Float32x3,
|
||||
Float32x4,
|
||||
Float64,
|
||||
ArrayFloat32x8,
|
||||
ArrayFloat32x16,
|
||||
Sampler,
|
||||
Image2D,
|
||||
SampledImage2D,
|
||||
|
||||
Void // should be last
|
||||
} raw = Void;
|
||||
|
||||
using enum_type = decltype(raw);
|
||||
|
||||
TypeId() = default;
|
||||
TypeId(enum_type value) : raw(value) {}
|
||||
operator enum_type() const { return raw; }
|
||||
|
||||
TypeId getBaseType() const;
|
||||
std::size_t getSize() const;
|
||||
std::size_t getElementsCount() const;
|
||||
|
||||
bool isSignedInt() const {
|
||||
return raw == TypeId::SInt8 || raw == TypeId::SInt16 ||
|
||||
raw == TypeId::SInt32 || raw == TypeId::SInt64;
|
||||
}
|
||||
|
||||
bool isFloatPoint() const {
|
||||
return raw == TypeId::Float16 || raw == TypeId::Float32 ||
|
||||
raw == TypeId::Float64;
|
||||
}
|
||||
};
|
||||
} // namespace amdgpu::shader
|
||||
20
hw/amdgpu/shader/include/amdgpu/shader/Uniform.hpp
Normal file
20
hw/amdgpu/shader/include/amdgpu/shader/Uniform.hpp
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
#pragma once
|
||||
|
||||
#include "AccessOp.hpp"
|
||||
#include "TypeId.hpp"
|
||||
#include "spirv/spirv-builder.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <set>
|
||||
|
||||
namespace amdgpu::shader {
|
||||
struct UniformInfo {
|
||||
std::uint32_t buffer[8];
|
||||
int index;
|
||||
TypeId typeId;
|
||||
spirv::PointerType type;
|
||||
spirv::VariableValue variable;
|
||||
AccessOp accessOp = AccessOp::None;
|
||||
bool isBuffer;
|
||||
};
|
||||
} // namespace amdgpu::shader
|
||||
15
hw/amdgpu/shader/include/amdgpu/shader/Value.hpp
Normal file
15
hw/amdgpu/shader/include/amdgpu/shader/Value.hpp
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
#pragma once
|
||||
#include <spirv/spirv-builder.hpp>
|
||||
|
||||
namespace amdgpu::shader {
|
||||
struct Value {
|
||||
spirv::Type type;
|
||||
spirv::Value value;
|
||||
|
||||
Value() = default;
|
||||
Value(spirv::Type type, spirv::Value value) : type(type), value(value) {}
|
||||
|
||||
explicit operator bool() const { return static_cast<bool>(value); }
|
||||
bool operator==(Value other) const { return value == other.value; }
|
||||
};
|
||||
} // namespace amdgpu::shader
|
||||
146
hw/amdgpu/shader/include/amdgpu/shader/cf.hpp
Normal file
146
hw/amdgpu/shader/include/amdgpu/shader/cf.hpp
Normal file
|
|
@ -0,0 +1,146 @@
|
|||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
namespace cf {
|
||||
enum class TerminatorKind {
|
||||
None,
|
||||
Branch,
|
||||
BranchToUnknown,
|
||||
Return,
|
||||
};
|
||||
|
||||
class BasicBlock {
|
||||
std::uint64_t address;
|
||||
std::uint64_t size = 0;
|
||||
|
||||
std::set<BasicBlock *> predecessors;
|
||||
BasicBlock *successors[2]{};
|
||||
TerminatorKind terminator = TerminatorKind::None;
|
||||
|
||||
public:
|
||||
explicit BasicBlock(std::uint64_t address, std::uint64_t size = 0)
|
||||
: address(address), size(size) {}
|
||||
|
||||
BasicBlock(const BasicBlock &) = delete;
|
||||
|
||||
void setSize(std::uint64_t newSize) { size = newSize; }
|
||||
std::uint64_t getSize() const { return size; }
|
||||
std::uint64_t getAddress() const { return address; }
|
||||
TerminatorKind getTerminator() const { return terminator; }
|
||||
|
||||
void createConditionalBranch(BasicBlock *ifTrue, BasicBlock *ifFalse);
|
||||
void createBranch(BasicBlock *target);
|
||||
void createBranchToUnknown();
|
||||
void createReturn();
|
||||
|
||||
void replaceSuccessor(BasicBlock *origBB, BasicBlock *newBB);
|
||||
void replacePredecessor(BasicBlock *origBB, BasicBlock *newBB) {
|
||||
origBB->replaceSuccessor(this, newBB);
|
||||
}
|
||||
|
||||
template <std::invocable<BasicBlock &> T> void walk(T &&cb) {
|
||||
std::vector<BasicBlock *> workStack;
|
||||
std::set<BasicBlock *> processed;
|
||||
|
||||
workStack.push_back(this);
|
||||
processed.insert(this);
|
||||
|
||||
while (!workStack.empty()) {
|
||||
auto block = workStack.back();
|
||||
workStack.pop_back();
|
||||
|
||||
block->walkSuccessors([&](BasicBlock *successor) {
|
||||
if (processed.insert(successor).second) {
|
||||
workStack.push_back(successor);
|
||||
}
|
||||
});
|
||||
|
||||
cb(*block);
|
||||
}
|
||||
}
|
||||
|
||||
template <std::invocable<BasicBlock *> T> void walkSuccessors(T &&cb) const {
|
||||
if (successors[0]) {
|
||||
cb(successors[0]);
|
||||
|
||||
if (successors[1]) {
|
||||
cb(successors[1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <std::invocable<BasicBlock *> T> void walkPredecessors(T &&cb) const {
|
||||
for (auto pred : predecessors) {
|
||||
cb(pred);
|
||||
}
|
||||
}
|
||||
|
||||
std::size_t getPredecessorsCount() const { return predecessors.size(); }
|
||||
|
||||
bool hasDirectPredecessor(const BasicBlock &block) const;
|
||||
bool hasPredecessor(const BasicBlock &block) const;
|
||||
|
||||
std::size_t getSuccessorsCount() const {
|
||||
if (successors[0] == nullptr) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return successors[1] != nullptr ? 2 : 1;
|
||||
}
|
||||
|
||||
BasicBlock *getSuccessor(std::size_t index) const { return successors[index]; }
|
||||
|
||||
void split(BasicBlock *target);
|
||||
};
|
||||
|
||||
class Context {
|
||||
std::map<std::uint64_t, BasicBlock, std::greater<>> basicBlocks;
|
||||
|
||||
public:
|
||||
BasicBlock *getBasicBlockAt(std::uint64_t address) {
|
||||
if (auto it = basicBlocks.find(address); it != basicBlocks.end()) {
|
||||
return &it->second;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
BasicBlock *getBasicBlock(std::uint64_t address) {
|
||||
if (auto it = basicBlocks.lower_bound(address); it != basicBlocks.end()) {
|
||||
auto bb = &it->second;
|
||||
|
||||
if (bb->getAddress() <= address &&
|
||||
bb->getAddress() + bb->getSize() > address) {
|
||||
return bb;
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
BasicBlock *getOrCreateBasicBlock(std::uint64_t address, bool split = true) {
|
||||
auto it = basicBlocks.lower_bound(address);
|
||||
|
||||
if (it != basicBlocks.end()) {
|
||||
auto bb = &it->second;
|
||||
|
||||
if (bb->getAddress() <= address &&
|
||||
bb->getAddress() + bb->getSize() > address) {
|
||||
if (split && bb->getAddress() != address) {
|
||||
auto result = &basicBlocks.emplace_hint(it, address, address)->second;
|
||||
bb->split(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
return bb;
|
||||
}
|
||||
}
|
||||
|
||||
return &basicBlocks.emplace_hint(it, address, address)->second;
|
||||
}
|
||||
};
|
||||
} // namespace cf
|
||||
371
hw/amdgpu/shader/include/amdgpu/shader/scf.hpp
Normal file
371
hw/amdgpu/shader/include/amdgpu/shader/scf.hpp
Normal file
|
|
@ -0,0 +1,371 @@
|
|||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <forward_list>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
|
||||
namespace cf {
|
||||
class BasicBlock;
|
||||
}
|
||||
|
||||
namespace scf {
|
||||
class BasicBlock;
|
||||
struct PrintOptions {
|
||||
unsigned char identCount = 2;
|
||||
char identChar = ' ';
|
||||
std::function<void(const PrintOptions &, unsigned depth, BasicBlock *)>
|
||||
blockPrinter;
|
||||
|
||||
std::string makeIdent(unsigned depth) const {
|
||||
return std::string(depth * identCount, identChar);
|
||||
}
|
||||
};
|
||||
|
||||
class Node {
|
||||
Node *mParent = nullptr;
|
||||
Node *mNext = nullptr;
|
||||
Node *mPrev = nullptr;
|
||||
|
||||
public:
|
||||
virtual ~Node() = default;
|
||||
virtual void print(const PrintOptions &options, unsigned depth) = 0;
|
||||
virtual bool isEqual(const Node &other) const {
|
||||
return this == &other;
|
||||
}
|
||||
|
||||
void dump() {
|
||||
print({}, 0);
|
||||
}
|
||||
|
||||
void setParent(Node *parent) {
|
||||
mParent = parent;
|
||||
}
|
||||
|
||||
Node *getParent() const {
|
||||
return mParent;
|
||||
}
|
||||
|
||||
template<typename T> requires(std::is_base_of_v<Node, T>)
|
||||
auto getParent() const -> decltype(dynCast<T>(mParent)) {
|
||||
return dynCast<T>(mParent);
|
||||
}
|
||||
|
||||
Node *getNext() const {
|
||||
return mNext;
|
||||
}
|
||||
|
||||
Node *getPrev() const {
|
||||
return mPrev;
|
||||
}
|
||||
|
||||
friend class Block;
|
||||
};
|
||||
|
||||
template <typename T, typename ST>
|
||||
requires(std::is_base_of_v<Node, T> && std::is_base_of_v<Node, ST>) &&
|
||||
requires(ST *s) { dynamic_cast<T *>(s); }
|
||||
T *dynCast(ST *s) {
|
||||
return dynamic_cast<T *>(s);
|
||||
}
|
||||
|
||||
template <typename T, typename ST>
|
||||
requires(std::is_base_of_v<Node, T> && std::is_base_of_v<Node, ST>) &&
|
||||
requires(const ST *s) { dynamic_cast<const T *>(s); }
|
||||
const T *dynCast(const ST *s) {
|
||||
return dynamic_cast<const T *>(s);
|
||||
}
|
||||
|
||||
inline bool isNodeEqual(const Node *lhs, const Node *rhs) {
|
||||
if (lhs == rhs) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return lhs != nullptr && rhs != nullptr && lhs->isEqual(*rhs);
|
||||
}
|
||||
|
||||
struct UnknownBlock final : Node {
|
||||
void print(const PrintOptions &options, unsigned depth) override {
|
||||
std::printf("%sunknown\n", options.makeIdent(depth).c_str());
|
||||
}
|
||||
|
||||
bool isEqual(const Node &other) const override {
|
||||
return this == &other || dynCast<UnknownBlock>(&other) != nullptr;
|
||||
}
|
||||
};
|
||||
|
||||
struct Return final : Node {
|
||||
void print(const PrintOptions &options, unsigned depth) override {
|
||||
std::printf("%sreturn\n", options.makeIdent(depth).c_str());
|
||||
}
|
||||
|
||||
bool isEqual(const Node &other) const override {
|
||||
return this == &other || dynCast<Return>(&other) != nullptr;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class Context;
|
||||
|
||||
class Block final : public Node {
|
||||
Node *mBegin = nullptr;
|
||||
Node *mEnd = nullptr;
|
||||
|
||||
void *mUserData = nullptr;
|
||||
|
||||
public:
|
||||
void print(const PrintOptions &options, unsigned depth) override {
|
||||
std::printf("%s{\n", options.makeIdent(depth).c_str());
|
||||
|
||||
for (auto node = mBegin; node != nullptr; node = node->getNext()) {
|
||||
node->print(options, depth + 1);
|
||||
}
|
||||
std::printf("%s}\n", options.makeIdent(depth).c_str());
|
||||
}
|
||||
|
||||
bool isEmpty() const {
|
||||
return mBegin == nullptr;
|
||||
}
|
||||
|
||||
Node *getRootNode() const {
|
||||
return mBegin;
|
||||
}
|
||||
Node *getLastNode() const {
|
||||
return mEnd;
|
||||
}
|
||||
|
||||
void setUserData(void *data) {
|
||||
mUserData = data;
|
||||
}
|
||||
void* getUserData() const {
|
||||
return mUserData;
|
||||
}
|
||||
template<typename T>
|
||||
T* getUserData() const {
|
||||
return static_cast<T *>(mUserData);
|
||||
}
|
||||
|
||||
void eraseFrom(Node *endBefore);
|
||||
void splitInto(Block *target, Node *splitPoint);
|
||||
Block *split(Context &context, Node *splitPoint);
|
||||
|
||||
void append(Node *node) {
|
||||
assert(node->mParent == nullptr);
|
||||
assert(node->mPrev == nullptr);
|
||||
assert(node->mNext == nullptr);
|
||||
|
||||
node->mParent = this;
|
||||
node->mPrev = mEnd;
|
||||
|
||||
if (mEnd != nullptr) {
|
||||
mEnd->mNext = node;
|
||||
}
|
||||
|
||||
if (mBegin == nullptr) {
|
||||
mBegin = node;
|
||||
}
|
||||
|
||||
mEnd = node;
|
||||
}
|
||||
|
||||
void detachNode(Node *node) {
|
||||
if (node->mPrev != nullptr) {
|
||||
node->mPrev->mNext = node->mNext;
|
||||
}
|
||||
|
||||
if (node->mNext != nullptr) {
|
||||
node->mNext->mPrev = node->mPrev;
|
||||
}
|
||||
|
||||
if (mBegin == node) {
|
||||
mBegin = node->mNext;
|
||||
}
|
||||
|
||||
if (mEnd == node) {
|
||||
mEnd = node->mPrev;
|
||||
}
|
||||
|
||||
node->mNext = nullptr;
|
||||
node->mPrev = nullptr;
|
||||
node->mParent = nullptr;
|
||||
}
|
||||
|
||||
bool isEqual(const Node &other) const override {
|
||||
if (this == &other) {
|
||||
return true;
|
||||
}
|
||||
|
||||
auto otherBlock = dynCast<Block>(&other);
|
||||
|
||||
if (otherBlock == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto thisIt = mBegin;
|
||||
auto otherIt = otherBlock->mBegin;
|
||||
|
||||
while (thisIt != nullptr && otherIt != nullptr) {
|
||||
if (!thisIt->isEqual(*otherIt)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
thisIt = thisIt->mNext;
|
||||
otherIt = otherIt->mNext;
|
||||
}
|
||||
|
||||
return thisIt == otherIt;
|
||||
}
|
||||
};
|
||||
|
||||
class BasicBlock final : public Node {
|
||||
std::uint64_t address;
|
||||
std::uint64_t size = 0;
|
||||
|
||||
public:
|
||||
explicit BasicBlock(std::uint64_t address, std::uint64_t size = 0)
|
||||
: address(address), size(size) {}
|
||||
|
||||
std::uint64_t getSize() const { return size; }
|
||||
std::uint64_t getAddress() const { return address; }
|
||||
|
||||
void print(const PrintOptions &options, unsigned depth) override {
|
||||
std::printf(
|
||||
"%sbb%lx\n",
|
||||
std::string(depth * options.identCount, options.identChar).c_str(),
|
||||
getAddress());
|
||||
if (depth != 0 && options.blockPrinter) {
|
||||
options.blockPrinter(options, depth + 1, this);
|
||||
}
|
||||
}
|
||||
|
||||
Block *getBlock() const {
|
||||
return dynCast<Block>(getParent());
|
||||
}
|
||||
|
||||
bool isEqual(const Node &other) const override {
|
||||
if (this == &other) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (auto otherBlock = dynCast<BasicBlock>(&other)) {
|
||||
return address == otherBlock->address;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
struct IfElse final : Node {
|
||||
Block *ifTrue;
|
||||
Block *ifFalse;
|
||||
|
||||
IfElse(Block *ifTrue, Block *ifFalse) : ifTrue(ifTrue), ifFalse(ifFalse) {
|
||||
ifTrue->setParent(this);
|
||||
ifFalse->setParent(this);
|
||||
}
|
||||
|
||||
void print(const PrintOptions &options, unsigned depth) override {
|
||||
if (ifTrue->isEmpty()) {
|
||||
std::printf("%sif false\n", options.makeIdent(depth).c_str());
|
||||
ifFalse->print(options, depth);
|
||||
return;
|
||||
}
|
||||
|
||||
std::printf("%sif true\n", options.makeIdent(depth).c_str());
|
||||
ifTrue->print(options, depth);
|
||||
if (!ifFalse->isEmpty()) {
|
||||
std::printf("%selse\n", options.makeIdent(depth).c_str());
|
||||
ifFalse->print(options, depth);
|
||||
}
|
||||
}
|
||||
|
||||
bool isEqual(const Node &other) const override {
|
||||
if (this == &other) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (auto otherBlock = dynCast<IfElse>(&other)) {
|
||||
return ifTrue->isEqual(*otherBlock->ifTrue) &&
|
||||
ifFalse->isEqual(*otherBlock->ifFalse);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
struct Jump final : Node {
|
||||
BasicBlock *target;
|
||||
|
||||
Jump(BasicBlock *target) : target(target) {}
|
||||
|
||||
bool isEqual(const Node &other) const override {
|
||||
if (this == &other) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (auto otherJump = dynCast<Jump>(&other)) {
|
||||
return target == otherJump->target;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void print(const PrintOptions &options, unsigned depth) override {
|
||||
std::printf("%sjump ", options.makeIdent(depth).c_str());
|
||||
target->print(options, 0);
|
||||
}
|
||||
};
|
||||
|
||||
struct Loop final : Node {
|
||||
Block *body;
|
||||
|
||||
Loop(Block *body) : body(body) {
|
||||
body->setParent(this);
|
||||
}
|
||||
|
||||
bool isEqual(const Node &other) const override {
|
||||
if (this == &other) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (auto otherLoop = dynCast<Loop>(&other)) {
|
||||
return body->isEqual(*otherLoop->body);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void print(const PrintOptions &options, unsigned depth) override {
|
||||
std::printf("%sloop {\n", options.makeIdent(depth).c_str());
|
||||
body->print(options, depth + 1);
|
||||
std::printf("%s}\n", options.makeIdent(depth).c_str());
|
||||
}
|
||||
};
|
||||
|
||||
struct Break final : Node {
|
||||
bool isEqual(const Node &other) const override {
|
||||
return this == &other || dynCast<Break>(&other) != nullptr;
|
||||
}
|
||||
|
||||
void print(const PrintOptions &options, unsigned depth) override {
|
||||
std::printf("%sbreak\n", options.makeIdent(depth).c_str());
|
||||
}
|
||||
};
|
||||
|
||||
class Context {
|
||||
std::forward_list<std::unique_ptr<Node>> mNodes;
|
||||
|
||||
public:
|
||||
template <typename T, typename... ArgsT>
|
||||
requires(std::is_constructible_v<T, ArgsT...>)
|
||||
T *create(ArgsT &&...args) {
|
||||
auto result = new T(std::forward<ArgsT>(args)...);
|
||||
mNodes.push_front(std::unique_ptr<Node>{result});
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
scf::Block *structurize(Context &ctxt, cf::BasicBlock *bb);
|
||||
void makeUniqueBasicBlocks(Context &ctxt, Block *block);
|
||||
} // namespace scf
|
||||
187
hw/amdgpu/shader/src/CfBuilder.cpp
Normal file
187
hw/amdgpu/shader/src/CfBuilder.cpp
Normal file
|
|
@ -0,0 +1,187 @@
|
|||
#include "CfBuilder.hpp"
|
||||
#include "Instruction.hpp"
|
||||
#include <cassert>
|
||||
#include <amdgpu/RemoteMemory.hpp>
|
||||
#include <unordered_set>
|
||||
|
||||
using namespace amdgpu;
|
||||
using namespace amdgpu::shader;
|
||||
|
||||
struct CfgBuilder {
|
||||
cf::Context *context;
|
||||
RemoteMemory memory;
|
||||
|
||||
std::size_t analyzeBb(cf::BasicBlock *bb, std::uint64_t *successors,
|
||||
std::size_t *successorsCount, auto pushWork) {
|
||||
auto address = bb->getAddress();
|
||||
auto instBegin = memory.getPointer<std::uint32_t>(address);
|
||||
auto instHex = instBegin;
|
||||
|
||||
while (true) {
|
||||
auto instruction = Instruction(instHex);
|
||||
auto size = instruction.size();
|
||||
auto pc = address + ((instHex - instBegin) << 2);
|
||||
instHex += size;
|
||||
|
||||
if (instruction.instClass == InstructionClass::Sop1) {
|
||||
Sop1 sop1{instHex - size};
|
||||
|
||||
if (sop1.op == Sop1::Op::S_SETPC_B64 ||
|
||||
sop1.op == Sop1::Op::S_SWAPPC_B64) {
|
||||
bb->createBranchToUnknown();
|
||||
break;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if (instruction.instClass == InstructionClass::Sopp) {
|
||||
Sopp sopp{instHex - size};
|
||||
|
||||
if (sopp.op == Sopp::Op::S_ENDPGM) {
|
||||
bb->createReturn();
|
||||
break;
|
||||
}
|
||||
|
||||
bool isEnd = false;
|
||||
switch (sopp.op) {
|
||||
case Sopp::Op::S_BRANCH:
|
||||
successors[0] = pc + ((size + sopp.simm) << 2);
|
||||
*successorsCount = 1;
|
||||
|
||||
isEnd = true;
|
||||
break;
|
||||
|
||||
case Sopp::Op::S_CBRANCH_SCC0:
|
||||
case Sopp::Op::S_CBRANCH_SCC1:
|
||||
case Sopp::Op::S_CBRANCH_VCCZ:
|
||||
case Sopp::Op::S_CBRANCH_VCCNZ:
|
||||
case Sopp::Op::S_CBRANCH_EXECZ:
|
||||
case Sopp::Op::S_CBRANCH_EXECNZ:
|
||||
successors[0] = pc + ((size + sopp.simm) << 2);
|
||||
successors[1] = pc + (size << 2);
|
||||
*successorsCount = 2;
|
||||
isEnd = true;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (isEnd) {
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// move instruction that requires EXEC test to separate bb
|
||||
if (instruction.instClass == InstructionClass::Vop2 ||
|
||||
instruction.instClass == InstructionClass::Vop3 ||
|
||||
instruction.instClass == InstructionClass::Mubuf ||
|
||||
instruction.instClass == InstructionClass::Mtbuf ||
|
||||
instruction.instClass == InstructionClass::Mimg ||
|
||||
instruction.instClass == InstructionClass::Ds ||
|
||||
instruction.instClass == InstructionClass::Vintrp ||
|
||||
instruction.instClass == InstructionClass::Exp ||
|
||||
instruction.instClass == InstructionClass::Vop1 ||
|
||||
instruction.instClass == InstructionClass::Vopc ||
|
||||
instruction.instClass == InstructionClass::Smrd) {
|
||||
*successorsCount = 1;
|
||||
|
||||
if (instBegin != instHex - size) {
|
||||
// if it is not first instruction in block, move end to prev
|
||||
// instruction, successor is current instruction
|
||||
instHex -= size;
|
||||
successors[0] = pc;
|
||||
break;
|
||||
}
|
||||
|
||||
successors[0] = pc + (size << 2);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return (instHex - instBegin) << 2;
|
||||
}
|
||||
|
||||
cf::BasicBlock *buildCfg(std::uint64_t entryPoint) {
|
||||
std::vector<std::uint64_t> workList;
|
||||
workList.push_back(entryPoint);
|
||||
std::unordered_set<std::uint64_t> processed;
|
||||
processed.insert(entryPoint);
|
||||
|
||||
struct BranchInfo {
|
||||
std::uint64_t source;
|
||||
std::size_t count;
|
||||
std::uint64_t targets[2];
|
||||
};
|
||||
|
||||
std::vector<BranchInfo> branches;
|
||||
|
||||
while (!workList.empty()) {
|
||||
auto address = workList.back();
|
||||
workList.pop_back();
|
||||
|
||||
auto bb = context->getOrCreateBasicBlock(address);
|
||||
|
||||
if (bb->getSize() != 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
std::uint64_t successors[2];
|
||||
std::size_t successorsCount = 0;
|
||||
std::size_t size = analyzeBb(bb, successors, &successorsCount,
|
||||
[&](std::uint64_t address) {
|
||||
if (processed.insert(address).second) {
|
||||
workList.push_back(address);
|
||||
}
|
||||
});
|
||||
bb->setSize(size);
|
||||
|
||||
if (successorsCount == 2) {
|
||||
auto succ0Address = successors[0];
|
||||
auto succ1Address = successors[1];
|
||||
|
||||
branches.push_back(
|
||||
{address + size - 4, 2, {successors[0], successors[1]}});
|
||||
|
||||
if (processed.insert(successors[0]).second) {
|
||||
workList.push_back(successors[0]);
|
||||
}
|
||||
if (processed.insert(successors[1]).second) {
|
||||
workList.push_back(successors[1]);
|
||||
}
|
||||
} else if (successorsCount == 1) {
|
||||
branches.push_back({address + size - 4, 1, {successors[0]}});
|
||||
|
||||
if (processed.insert(successors[0]).second) {
|
||||
workList.push_back(successors[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (auto branch : branches) {
|
||||
auto bb = context->getBasicBlock(branch.source);
|
||||
assert(bb);
|
||||
if (branch.count == 2) {
|
||||
bb->createConditionalBranch(
|
||||
context->getBasicBlockAt(branch.targets[0]),
|
||||
context->getBasicBlockAt(branch.targets[1]));
|
||||
} else {
|
||||
bb->createBranch(context->getBasicBlockAt(branch.targets[0]));
|
||||
}
|
||||
}
|
||||
|
||||
return context->getBasicBlockAt(entryPoint);
|
||||
}
|
||||
};
|
||||
|
||||
cf::BasicBlock *amdgpu::shader::buildCf(cf::Context &ctxt,
|
||||
RemoteMemory memory,
|
||||
std::uint64_t entryPoint) {
|
||||
CfgBuilder builder;
|
||||
builder.context = &ctxt;
|
||||
builder.memory = memory;
|
||||
|
||||
return builder.buildCfg(entryPoint);
|
||||
}
|
||||
389
hw/amdgpu/shader/src/Converter.cpp
Normal file
389
hw/amdgpu/shader/src/Converter.cpp
Normal file
|
|
@ -0,0 +1,389 @@
|
|||
#include "Converter.hpp"
|
||||
#include "CfBuilder.hpp"
|
||||
#include "ConverterContext.hpp"
|
||||
#include "Fragment.hpp"
|
||||
#include "FragmentTerminator.hpp"
|
||||
#include "Instruction.hpp"
|
||||
#include "RegisterId.hpp"
|
||||
#include "RegisterState.hpp"
|
||||
#include "cf.hpp"
|
||||
#include "amdgpu/RemoteMemory.hpp"
|
||||
#include "scf.hpp"
|
||||
#include "util/unreachable.hpp"
|
||||
#include <compare>
|
||||
#include <cstddef>
|
||||
#include <forward_list>
|
||||
#include <memory>
|
||||
#include <spirv/spirv.hpp>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
static void printInstructions(const scf::PrintOptions &options, unsigned depth,
|
||||
std::uint32_t *instBegin, std::size_t size) {
|
||||
auto instHex = instBegin;
|
||||
auto instEnd = instBegin + size / sizeof(std::uint32_t);
|
||||
|
||||
while (instHex < instEnd) {
|
||||
auto instruction = amdgpu::shader::Instruction(instHex);
|
||||
std::printf("%s", options.makeIdent(depth).c_str());
|
||||
instruction.dump();
|
||||
std::printf("\n");
|
||||
instHex += instruction.size();
|
||||
}
|
||||
}
|
||||
|
||||
namespace amdgpu::shader {
|
||||
class Converter {
|
||||
scf::Context *scfContext;
|
||||
cf::Context cfContext;
|
||||
RemoteMemory memory;
|
||||
Function *function = nullptr;
|
||||
std::forward_list<RegisterState> states;
|
||||
std::vector<RegisterState *> freeStates;
|
||||
|
||||
public:
|
||||
void convertFunction(RemoteMemory mem, scf::Context *scfCtxt,
|
||||
scf::Block *block, Function *fn) {
|
||||
scfContext = scfCtxt;
|
||||
function = fn;
|
||||
memory = mem;
|
||||
|
||||
auto lastFragment = convertBlock(block, &function->entryFragment);
|
||||
|
||||
if (lastFragment != nullptr) {
|
||||
lastFragment->builder.createBranch(fn->exitFragment.entryBlockId);
|
||||
lastFragment->appendBranch(fn->exitFragment);
|
||||
}
|
||||
|
||||
initState(&fn->exitFragment);
|
||||
}
|
||||
|
||||
private:
|
||||
RegisterState *allocateState() {
|
||||
if (freeStates.empty()) {
|
||||
return &states.emplace_front();
|
||||
}
|
||||
|
||||
auto result = freeStates.back();
|
||||
freeStates.pop_back();
|
||||
*result = {};
|
||||
return result;
|
||||
}
|
||||
|
||||
void releaseState(RegisterState *state) {
|
||||
assert(state != nullptr);
|
||||
freeStates.push_back(state);
|
||||
}
|
||||
|
||||
void initState(Fragment *fragment, std::uint64_t address = 0) {
|
||||
if (fragment->registers == nullptr) {
|
||||
fragment->registers = allocateState();
|
||||
}
|
||||
|
||||
if (address != 0) {
|
||||
fragment->registers->pc = address;
|
||||
}
|
||||
|
||||
fragment->injectValuesFromPreds();
|
||||
fragment->predecessors.clear();
|
||||
}
|
||||
|
||||
void releaseStateOf(Fragment *frag) {
|
||||
releaseState(frag->registers);
|
||||
frag->registers = nullptr;
|
||||
frag->values = {};
|
||||
frag->outputs = {};
|
||||
}
|
||||
|
||||
bool needInjectExecTest(Fragment *fragment) {
|
||||
auto inst = memory.getPointer<std::uint32_t>(fragment->registers->pc);
|
||||
auto instClass = getInstructionClass(*inst);
|
||||
return instClass == InstructionClass::Vop2 ||
|
||||
instClass == InstructionClass::Vop3 ||
|
||||
instClass == InstructionClass::Mubuf ||
|
||||
instClass == InstructionClass::Mtbuf ||
|
||||
instClass == InstructionClass::Mimg ||
|
||||
instClass == InstructionClass::Ds ||
|
||||
instClass == InstructionClass::Vintrp ||
|
||||
instClass == InstructionClass::Exp ||
|
||||
instClass == InstructionClass::Vop1 ||
|
||||
instClass == InstructionClass::Vopc/* ||
|
||||
instClass == InstructionClass::Smrd*/;
|
||||
}
|
||||
|
||||
spirv::BoolValue createExecTest(Fragment *fragment) {
|
||||
auto context = fragment->context;
|
||||
auto &builder = fragment->builder;
|
||||
auto boolT = context->getBoolType();
|
||||
auto uint32_0 = context->getUInt32(0);
|
||||
auto loIsNotZero =
|
||||
builder.createINotEqual(boolT, fragment->getExecLo().value, uint32_0);
|
||||
auto hiIsNotZero =
|
||||
builder.createINotEqual(boolT, fragment->getExecHi().value, uint32_0);
|
||||
|
||||
return builder.createLogicalOr(boolT, loIsNotZero, hiIsNotZero);
|
||||
}
|
||||
|
||||
Fragment *convertBlock(scf::Block *block, Fragment *rootFragment) {
|
||||
Fragment *currentFragment = nullptr;
|
||||
|
||||
for (scf::Node *node = block->getRootNode(); node != nullptr;
|
||||
node = node->getNext()) {
|
||||
|
||||
if (auto bb = dynCast<scf::BasicBlock>(node)) {
|
||||
if (currentFragment == nullptr) {
|
||||
currentFragment = rootFragment;
|
||||
} else {
|
||||
auto newFragment = function->createFragment();
|
||||
currentFragment->appendBranch(*newFragment);
|
||||
currentFragment->builder.createBranch(newFragment->entryBlockId);
|
||||
currentFragment = newFragment;
|
||||
}
|
||||
|
||||
initState(currentFragment, bb->getAddress());
|
||||
for (auto pred : currentFragment->predecessors) {
|
||||
releaseStateOf(pred);
|
||||
}
|
||||
|
||||
if (needInjectExecTest(currentFragment)) {
|
||||
auto bodyFragment = function->createFragment();
|
||||
auto mergeFragment = function->createFragment();
|
||||
|
||||
auto cond = createExecTest(currentFragment);
|
||||
|
||||
currentFragment->appendBranch(*bodyFragment);
|
||||
currentFragment->appendBranch(*mergeFragment);
|
||||
currentFragment->builder.createSelectionMerge(
|
||||
mergeFragment->entryBlockId, {});
|
||||
currentFragment->builder.createBranchConditional(
|
||||
cond, bodyFragment->entryBlockId, mergeFragment->entryBlockId);
|
||||
|
||||
initState(bodyFragment, bb->getAddress());
|
||||
bodyFragment->convert(bb->getSize());
|
||||
|
||||
bodyFragment->appendBranch(*mergeFragment);
|
||||
bodyFragment->builder.createBranch(mergeFragment->entryBlockId);
|
||||
|
||||
initState(mergeFragment);
|
||||
releaseState(currentFragment->registers);
|
||||
releaseState(bodyFragment->registers);
|
||||
|
||||
currentFragment = mergeFragment;
|
||||
} else {
|
||||
currentFragment->convert(bb->getSize());
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (auto ifElse = dynCast<scf::IfElse>(node)) {
|
||||
auto ifTrueFragment = function->createFragment();
|
||||
auto ifFalseFragment = function->createFragment();
|
||||
auto mergeFragment = function->createFragment();
|
||||
|
||||
currentFragment->appendBranch(*ifTrueFragment);
|
||||
currentFragment->appendBranch(*ifFalseFragment);
|
||||
|
||||
currentFragment->builder.createSelectionMerge(
|
||||
mergeFragment->entryBlockId, {});
|
||||
currentFragment->builder.createBranchConditional(
|
||||
currentFragment->branchCondition, ifTrueFragment->entryBlockId,
|
||||
ifFalseFragment->entryBlockId);
|
||||
|
||||
auto ifTrueLastBlock = convertBlock(ifElse->ifTrue, ifTrueFragment);
|
||||
auto ifFalseLastBlock = convertBlock(ifElse->ifFalse, ifFalseFragment);
|
||||
|
||||
if (ifTrueLastBlock != nullptr) {
|
||||
ifTrueLastBlock->builder.createBranch(mergeFragment->entryBlockId);
|
||||
ifTrueLastBlock->appendBranch(*mergeFragment);
|
||||
|
||||
if (ifTrueLastBlock->registers == nullptr) {
|
||||
initState(ifTrueLastBlock);
|
||||
}
|
||||
}
|
||||
|
||||
if (ifFalseLastBlock != nullptr) {
|
||||
ifFalseLastBlock->builder.createBranch(mergeFragment->entryBlockId);
|
||||
ifFalseLastBlock->appendBranch(*mergeFragment);
|
||||
|
||||
if (ifFalseLastBlock->registers == nullptr) {
|
||||
initState(ifFalseLastBlock);
|
||||
}
|
||||
}
|
||||
|
||||
releaseStateOf(currentFragment);
|
||||
initState(mergeFragment);
|
||||
|
||||
if (ifTrueLastBlock != nullptr) {
|
||||
releaseStateOf(ifTrueLastBlock);
|
||||
}
|
||||
|
||||
if (ifFalseLastBlock != nullptr) {
|
||||
releaseStateOf(ifFalseLastBlock);
|
||||
}
|
||||
currentFragment = mergeFragment;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (dynCast<scf::UnknownBlock>(node)) {
|
||||
auto jumpAddress = currentFragment->jumpAddress;
|
||||
|
||||
std::printf("jump to %lx\n", jumpAddress);
|
||||
std::fflush(stdout);
|
||||
|
||||
if (jumpAddress == 0) {
|
||||
util::unreachable("no jump register on unknown block");
|
||||
}
|
||||
|
||||
auto block = buildCf(cfContext, memory, jumpAddress);
|
||||
auto basicBlockPrinter = [this](const scf::PrintOptions &opts,
|
||||
unsigned depth, scf::BasicBlock *bb) {
|
||||
printInstructions(opts, depth,
|
||||
memory.getPointer<std::uint32_t>(bb->getAddress()),
|
||||
bb->getSize());
|
||||
};
|
||||
auto scfBlock = scf::structurize(*scfContext, block);
|
||||
scfBlock->print({.blockPrinter = basicBlockPrinter}, 0);
|
||||
std::fflush(stdout);
|
||||
|
||||
auto targetFragment = function->createFragment();
|
||||
currentFragment->builder.createBranch(targetFragment->entryBlockId);
|
||||
currentFragment->appendBranch(*targetFragment);
|
||||
auto result = convertBlock(scfBlock, targetFragment);
|
||||
|
||||
if (currentFragment->registers == nullptr) {
|
||||
initState(targetFragment);
|
||||
releaseStateOf(currentFragment);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
if (dynCast<scf::Return>(node)) {
|
||||
currentFragment->appendBranch(function->exitFragment);
|
||||
currentFragment->builder.createBranch(
|
||||
function->exitFragment.entryBlockId);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
return currentFragment != nullptr ? currentFragment : rootFragment;
|
||||
}
|
||||
};
|
||||
}; // namespace amdgpu::shader
|
||||
|
||||
amdgpu::shader::Shader amdgpu::shader::convert(
|
||||
RemoteMemory memory, Stage stage, std::uint64_t entry,
|
||||
std::span<const std::uint32_t> userSpgrs, int bindingOffset,
|
||||
std::uint32_t dimX, std::uint32_t dimY, std::uint32_t dimZ) {
|
||||
ConverterContext ctxt(memory, stage);
|
||||
auto &builder = ctxt.getBuilder();
|
||||
builder.createCapability(spv::Capability::Shader);
|
||||
builder.createCapability(spv::Capability::ImageQuery);
|
||||
builder.createCapability(spv::Capability::ImageBuffer);
|
||||
builder.createCapability(spv::Capability::UniformAndStorageBuffer8BitAccess);
|
||||
builder.createCapability(spv::Capability::UniformAndStorageBuffer16BitAccess);
|
||||
builder.createCapability(spv::Capability::Int64);
|
||||
builder.setMemoryModel(spv::AddressingModel::Logical,
|
||||
spv::MemoryModel::GLSL450);
|
||||
|
||||
scf::Context scfContext;
|
||||
scf::Block *entryBlock = nullptr;
|
||||
{
|
||||
cf::Context cfContext;
|
||||
auto entryBB = buildCf(cfContext, memory, entry);
|
||||
entryBlock = scf::structurize(scfContext, entryBB);
|
||||
}
|
||||
|
||||
std::printf("========== stage: %u, user sgprs: %zu\n", (unsigned)stage,
|
||||
userSpgrs.size());
|
||||
std::printf("structurized CFG:\n");
|
||||
|
||||
auto basicBlockPrinter = [memory](const scf::PrintOptions &opts,
|
||||
unsigned depth, scf::BasicBlock *bb) {
|
||||
printInstructions(opts, depth,
|
||||
memory.getPointer<std::uint32_t>(bb->getAddress()),
|
||||
bb->getSize());
|
||||
};
|
||||
|
||||
entryBlock->print({.blockPrinter = basicBlockPrinter}, 0);
|
||||
std::printf("==========\n");
|
||||
|
||||
auto mainFunction = ctxt.createFunction(0);
|
||||
mainFunction->userSgprs = userSpgrs;
|
||||
mainFunction->stage = stage;
|
||||
|
||||
Converter converter;
|
||||
converter.convertFunction(memory, &scfContext, entryBlock, mainFunction);
|
||||
|
||||
Shader result;
|
||||
|
||||
std::fflush(stdout);
|
||||
mainFunction->exitFragment.outputs.clear();
|
||||
|
||||
for (auto &uniform : ctxt.getUniforms()) {
|
||||
auto &newUniform = result.uniforms.emplace_back();
|
||||
newUniform.binding = bindingOffset++;
|
||||
|
||||
for (int i = 0; i < 8; ++i) {
|
||||
newUniform.buffer[i] = uniform.buffer[i];
|
||||
}
|
||||
|
||||
std::uint32_t descriptorSet = 0;
|
||||
|
||||
ctxt.getBuilder().createDecorate(
|
||||
uniform.variable, spv::Decoration::DescriptorSet, {{descriptorSet}});
|
||||
ctxt.getBuilder().createDecorate(uniform.variable, spv::Decoration::Binding,
|
||||
{{newUniform.binding}});
|
||||
|
||||
switch (uniform.typeId) {
|
||||
case TypeId::Sampler:
|
||||
newUniform.kind = Shader::UniformKind::Sampler;
|
||||
break;
|
||||
case TypeId::Image2D:
|
||||
newUniform.kind = Shader::UniformKind::Image;
|
||||
break;
|
||||
default:
|
||||
newUniform.kind = Shader::UniformKind::Buffer;
|
||||
break;
|
||||
}
|
||||
|
||||
newUniform.accessOp = uniform.accessOp;
|
||||
}
|
||||
|
||||
mainFunction->insertReturn();
|
||||
|
||||
for (auto frag : mainFunction->fragments) {
|
||||
mainFunction->builder.insertBlock(frag->builder);
|
||||
}
|
||||
|
||||
mainFunction->builder.insertBlock(mainFunction->exitFragment.builder);
|
||||
|
||||
builder.insertFunction(mainFunction->builder, mainFunction->getResultType(),
|
||||
spv::FunctionControlMask::MaskNone,
|
||||
mainFunction->getFunctionType());
|
||||
|
||||
if (stage == Stage::Vertex) {
|
||||
builder.createEntryPoint(spv::ExecutionModel::Vertex,
|
||||
mainFunction->builder.id, "main",
|
||||
ctxt.getInterfaces());
|
||||
} else if (stage == Stage::Fragment) {
|
||||
builder.createEntryPoint(spv::ExecutionModel::Fragment,
|
||||
mainFunction->builder.id, "main",
|
||||
ctxt.getInterfaces());
|
||||
builder.createExecutionMode(mainFunction->builder.id,
|
||||
spv::ExecutionMode::OriginUpperLeft, {});
|
||||
} else if (stage == Stage::Compute) {
|
||||
builder.createEntryPoint(spv::ExecutionModel::GLCompute,
|
||||
mainFunction->builder.id, "main",
|
||||
ctxt.getInterfaces());
|
||||
builder.createExecutionMode(mainFunction->builder.id,
|
||||
spv::ExecutionMode::LocalSize,
|
||||
{{dimX, dimY, dimZ}});
|
||||
}
|
||||
|
||||
result.spirv = ctxt.getBuilder().build(SPV_VERSION, 0);
|
||||
return result;
|
||||
}
|
||||
567
hw/amdgpu/shader/src/ConverterContext.cpp
Normal file
567
hw/amdgpu/shader/src/ConverterContext.cpp
Normal file
|
|
@ -0,0 +1,567 @@
|
|||
#include "ConverterContext.hpp"
|
||||
#include "util/unreachable.hpp"
|
||||
using namespace amdgpu::shader;
|
||||
|
||||
std::optional<TypeId> ConverterContext::getTypeIdOf(spirv::Type type) const {
|
||||
for (int i = 0; i < kGenericTypesCount; ++i) {
|
||||
if (mTypes[i] == type) {
|
||||
return static_cast<TypeId::enum_type>(i);
|
||||
}
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
spirv::StructType
|
||||
ConverterContext::findStructType(std::span<const spirv::Type> members) {
|
||||
for (auto &structType : mStructTypes) {
|
||||
if (structType.match(members)) {
|
||||
return structType.id;
|
||||
}
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
spirv::StructType
|
||||
ConverterContext::getStructType(std::span<const spirv::Type> members) {
|
||||
for (auto &structType : mStructTypes) {
|
||||
if (structType.match(members)) {
|
||||
return structType.id;
|
||||
}
|
||||
}
|
||||
|
||||
auto &newType = mStructTypes.emplace_back();
|
||||
newType.id = mBuilder.createTypeStruct(members);
|
||||
newType.members.reserve(members.size());
|
||||
for (auto member : members) {
|
||||
newType.members.push_back(member);
|
||||
}
|
||||
return newType.id;
|
||||
}
|
||||
|
||||
spirv::PointerType
|
||||
ConverterContext::getStructPointerType(spv::StorageClass storageClass,
|
||||
spirv::StructType structType) {
|
||||
StructTypeEntry *entry = nullptr;
|
||||
for (auto &structType : mStructTypes) {
|
||||
if (structType.id != structType.id) {
|
||||
continue;
|
||||
}
|
||||
|
||||
entry = &structType;
|
||||
}
|
||||
|
||||
if (entry == nullptr) {
|
||||
util::unreachable("Struct type not found");
|
||||
}
|
||||
|
||||
auto &ptrType = entry->ptrTypes[static_cast<unsigned>(storageClass)];
|
||||
|
||||
if (!ptrType) {
|
||||
ptrType = mBuilder.createTypePointer(storageClass, structType);
|
||||
}
|
||||
|
||||
return ptrType;
|
||||
}
|
||||
|
||||
spirv::Type ConverterContext::getType(TypeId id) {
|
||||
auto &type = mTypes[static_cast<std::uint32_t>(id)];
|
||||
|
||||
if (type) {
|
||||
return type;
|
||||
}
|
||||
|
||||
switch (id) {
|
||||
case TypeId::Void:
|
||||
return ((type = mBuilder.createTypeVoid()));
|
||||
case TypeId::Bool:
|
||||
return ((type = mBuilder.createTypeBool()));
|
||||
case TypeId::SInt8:
|
||||
return ((type = mBuilder.createTypeSInt(8)));
|
||||
case TypeId::UInt8:
|
||||
return ((type = mBuilder.createTypeUInt(8)));
|
||||
case TypeId::SInt16:
|
||||
return ((type = mBuilder.createTypeSInt(16)));
|
||||
case TypeId::UInt16:
|
||||
return ((type = mBuilder.createTypeUInt(16)));
|
||||
case TypeId::SInt32:
|
||||
return ((type = mBuilder.createTypeSInt(32)));
|
||||
case TypeId::UInt32:
|
||||
return ((type = mBuilder.createTypeUInt(32)));
|
||||
case TypeId::UInt32x2:
|
||||
return ((type = mBuilder.createTypeVector(getType(TypeId::UInt32), 2)));
|
||||
case TypeId::UInt32x3:
|
||||
return ((type = mBuilder.createTypeVector(getType(TypeId::UInt32), 3)));
|
||||
case TypeId::UInt32x4:
|
||||
return ((type = mBuilder.createTypeVector(getType(TypeId::UInt32), 4)));
|
||||
case TypeId::UInt64:
|
||||
return ((type = mBuilder.createTypeUInt(64)));
|
||||
case TypeId::SInt64:
|
||||
return ((type = mBuilder.createTypeSInt(64)));
|
||||
case TypeId::ArrayUInt32x8:
|
||||
type = mBuilder.createTypeArray(getType(TypeId::UInt32x4), getUInt32(2));
|
||||
getBuilder().createDecorate(type, spv::Decoration::ArrayStride,
|
||||
std::array{static_cast<std::uint32_t>(16)});
|
||||
case TypeId::ArrayUInt32x16:
|
||||
type = mBuilder.createTypeArray(getType(TypeId::UInt32x4), getUInt32(4));
|
||||
getBuilder().createDecorate(type, spv::Decoration::ArrayStride,
|
||||
std::array{static_cast<std::uint32_t>(16)});
|
||||
return type;
|
||||
case TypeId::Float16:
|
||||
return ((type = mBuilder.createTypeFloat(16)));
|
||||
case TypeId::Float32:
|
||||
return ((type = mBuilder.createTypeFloat(32)));
|
||||
case TypeId::Float32x2:
|
||||
return ((type = mBuilder.createTypeVector(getType(TypeId::Float32), 2)));
|
||||
case TypeId::Float32x3:
|
||||
return ((type = mBuilder.createTypeVector(getType(TypeId::Float32), 3)));
|
||||
case TypeId::Float32x4:
|
||||
return ((type = mBuilder.createTypeVector(getType(TypeId::Float32), 4)));
|
||||
case TypeId::Float64:
|
||||
return ((type = mBuilder.createTypeFloat(64)));
|
||||
case TypeId::ArrayFloat32x8:
|
||||
type = mBuilder.createTypeArray(getType(TypeId::Float32x4), getUInt32(2));
|
||||
getBuilder().createDecorate(type, spv::Decoration::ArrayStride,
|
||||
std::array{static_cast<std::uint32_t>(16)});
|
||||
return type;
|
||||
case TypeId::ArrayFloat32x16:
|
||||
type = mBuilder.createTypeArray(getType(TypeId::Float32x4), getUInt32(4));
|
||||
getBuilder().createDecorate(type, spv::Decoration::ArrayStride,
|
||||
std::array{static_cast<std::uint32_t>(16)});
|
||||
return type;
|
||||
|
||||
case TypeId::Image2D:
|
||||
return ((type = getBuilder().createTypeImage(getFloat32Type(),
|
||||
spv::Dim::Dim2D, 0, 0, 0, 1,
|
||||
spv::ImageFormat::Unknown)));
|
||||
case TypeId::SampledImage2D:
|
||||
return ((type = getBuilder().createTypeSampledImage(getImage2DType())));
|
||||
|
||||
case TypeId::Sampler:
|
||||
return ((type = getBuilder().createTypeSampler()));
|
||||
}
|
||||
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
spirv::RuntimeArrayType ConverterContext::getRuntimeArrayType(TypeId id) {
|
||||
auto &type = mRuntimeArrayTypes[static_cast<std::uint32_t>(id)];
|
||||
|
||||
if (!type) {
|
||||
type = mBuilder.createTypeRuntimeArray(getType(id));
|
||||
mBuilder.createDecorate(type, spv::Decoration::ArrayStride,
|
||||
{{(std::uint32_t)id.getSize()}});
|
||||
}
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
spirv::ConstantUInt ConverterContext::getUInt64(std::uint64_t value) {
|
||||
auto &id = mConstantUint64Map[value];
|
||||
if (!id) {
|
||||
id = mBuilder.createConstant64(getUInt64Type(), value);
|
||||
}
|
||||
return id;
|
||||
}
|
||||
|
||||
spirv::ConstantUInt ConverterContext::getUInt32(std::uint32_t value) {
|
||||
auto &id = mConstantUint32Map[value];
|
||||
if (!id) {
|
||||
id = mBuilder.createConstant32(getUInt32Type(), value);
|
||||
}
|
||||
return id;
|
||||
}
|
||||
|
||||
spirv::ConstantSInt ConverterContext::getSInt32(std::uint32_t value) {
|
||||
auto &id = mConstantSint32Map[value];
|
||||
if (!id) {
|
||||
id = mBuilder.createConstant32(getSint32Type(), value);
|
||||
}
|
||||
return id;
|
||||
}
|
||||
|
||||
spirv::ConstantFloat ConverterContext::getFloat32Raw(std::uint32_t value) {
|
||||
auto &id = mConstantFloat32Map[value];
|
||||
if (!id) {
|
||||
id = mBuilder.createConstant32(getFloat32Type(), value);
|
||||
}
|
||||
return id;
|
||||
}
|
||||
|
||||
UniformInfo *ConverterContext::createStorageBuffer(TypeId type) {
|
||||
std::array<spirv::Type, 1> uniformStructMembers{getRuntimeArrayType(type)};
|
||||
auto uniformStruct = findStructType(uniformStructMembers);
|
||||
|
||||
if (!uniformStruct) {
|
||||
uniformStruct = getStructType(uniformStructMembers);
|
||||
|
||||
getBuilder().createDecorate(uniformStruct, spv::Decoration::Block, {});
|
||||
|
||||
getBuilder().createMemberDecorate(
|
||||
uniformStruct, 0, spv::Decoration::Offset,
|
||||
std::array{static_cast<std::uint32_t>(0)});
|
||||
}
|
||||
|
||||
auto uniformType =
|
||||
getStructPointerType(spv::StorageClass::StorageBuffer, uniformStruct);
|
||||
auto uniformVariable = getBuilder().createVariable(
|
||||
uniformType, spv::StorageClass::StorageBuffer);
|
||||
|
||||
mInterfaces.push_back(uniformVariable);
|
||||
|
||||
auto &newUniform = mUniforms.emplace_back();
|
||||
newUniform.index = mUniforms.size() - 1;
|
||||
newUniform.typeId = type;
|
||||
newUniform.type = uniformType;
|
||||
newUniform.variable = uniformVariable;
|
||||
newUniform.isBuffer = true;
|
||||
std::printf("new storage buffer %u of type %u\n", newUniform.index,
|
||||
newUniform.typeId.raw);
|
||||
return &newUniform;
|
||||
}
|
||||
|
||||
UniformInfo *ConverterContext::getOrCreateStorageBuffer(std::uint32_t *vbuffer,
|
||||
TypeId type) {
|
||||
for (auto &uniform : mUniforms) {
|
||||
if (std::memcmp(uniform.buffer, vbuffer, sizeof(std::uint32_t) * 4)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (uniform.typeId != type) {
|
||||
util::unreachable("getOrCreateStorageBuffer: access to the uniform with "
|
||||
"different type");
|
||||
}
|
||||
|
||||
if (!uniform.isBuffer) {
|
||||
util::unreachable("getOrCreateStorageBuffer: uniform was constant");
|
||||
}
|
||||
|
||||
// std::printf("reuse storage buffer %u of type %u\n", uniform.index,
|
||||
// uniform.typeId.raw);
|
||||
return &uniform;
|
||||
}
|
||||
|
||||
auto newUniform = createStorageBuffer(type);
|
||||
std::memcpy(newUniform->buffer, vbuffer, sizeof(std::uint32_t) * 4);
|
||||
return newUniform;
|
||||
}
|
||||
|
||||
UniformInfo *ConverterContext::getOrCreateUniformConstant(std::uint32_t *buffer,
|
||||
std::size_t size,
|
||||
TypeId type) {
|
||||
for (auto &uniform : mUniforms) {
|
||||
if (std::memcmp(uniform.buffer, buffer, sizeof(std::uint32_t) * size)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (uniform.typeId != type) {
|
||||
util::unreachable(
|
||||
"getOrCreateUniformConstant: access to the uniform with "
|
||||
"different type");
|
||||
}
|
||||
|
||||
if (uniform.isBuffer) {
|
||||
util::unreachable("getOrCreateUniformConstant: uniform was buffer");
|
||||
}
|
||||
|
||||
return &uniform;
|
||||
}
|
||||
|
||||
auto uniformType = getPointerType(spv::StorageClass::UniformConstant, type);
|
||||
auto uniformVariable = getBuilder().createVariable(
|
||||
uniformType, spv::StorageClass::UniformConstant);
|
||||
mInterfaces.push_back(uniformVariable);
|
||||
|
||||
auto &newUniform = mUniforms.emplace_back();
|
||||
newUniform.index = mUniforms.size() - 1;
|
||||
newUniform.typeId = type;
|
||||
newUniform.type = uniformType;
|
||||
newUniform.variable = uniformVariable;
|
||||
newUniform.isBuffer = false;
|
||||
std::memcpy(newUniform.buffer, buffer, sizeof(std::uint32_t) * size);
|
||||
|
||||
return &newUniform;
|
||||
}
|
||||
|
||||
spirv::VariableValue ConverterContext::getThreadId() {
|
||||
if (mThreadId) {
|
||||
return mThreadId;
|
||||
}
|
||||
|
||||
auto inputType = getPointerType(spv::StorageClass::Input, TypeId::UInt32);
|
||||
mThreadId = mBuilder.createVariable(inputType, spv::StorageClass::Input);
|
||||
|
||||
if (mStage == Stage::Vertex) {
|
||||
mBuilder.createDecorate(
|
||||
mThreadId, spv::Decoration::BuiltIn,
|
||||
std::array{static_cast<std::uint32_t>(spv::BuiltIn::VertexIndex)});
|
||||
} else {
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
mInterfaces.push_back(mThreadId);
|
||||
|
||||
return mThreadId;
|
||||
}
|
||||
|
||||
spirv::VariableValue ConverterContext::getWorkgroupId() {
|
||||
if (mWorkgroupId) {
|
||||
return mWorkgroupId;
|
||||
}
|
||||
|
||||
if (mStage != Stage::Compute) {
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
auto workgroupIdType =
|
||||
getPointerType(spv::StorageClass::Input, TypeId::UInt32x3);
|
||||
mWorkgroupId =
|
||||
mBuilder.createVariable(workgroupIdType, spv::StorageClass::Input);
|
||||
|
||||
mBuilder.createDecorate(
|
||||
mWorkgroupId, spv::Decoration::BuiltIn,
|
||||
{{static_cast<std::uint32_t>(spv::BuiltIn::WorkgroupId)}});
|
||||
mInterfaces.push_back(mWorkgroupId);
|
||||
|
||||
return mWorkgroupId;
|
||||
}
|
||||
|
||||
spirv::VariableValue ConverterContext::getLocalInvocationId() {
|
||||
if (mLocalInvocationId) {
|
||||
return mLocalInvocationId;
|
||||
}
|
||||
|
||||
if (mStage != Stage::Compute) {
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
auto localInvocationIdType =
|
||||
getPointerType(spv::StorageClass::Input, TypeId::UInt32x3);
|
||||
mLocalInvocationId =
|
||||
mBuilder.createVariable(localInvocationIdType, spv::StorageClass::Input);
|
||||
|
||||
mBuilder.createDecorate(
|
||||
mLocalInvocationId, spv::Decoration::BuiltIn,
|
||||
std::array{static_cast<std::uint32_t>(spv::BuiltIn::LocalInvocationId)});
|
||||
|
||||
mInterfaces.push_back(mLocalInvocationId);
|
||||
|
||||
return mLocalInvocationId;
|
||||
}
|
||||
|
||||
spirv::VariableValue ConverterContext::getPerVertex() {
|
||||
if (mPerVertex) {
|
||||
return mPerVertex;
|
||||
}
|
||||
|
||||
auto floatT = getFloat32Type();
|
||||
auto float4T = getFloat32x4Type();
|
||||
|
||||
auto uintConst1 = getUInt32(1);
|
||||
auto arr1Float = mBuilder.createTypeArray(floatT, uintConst1);
|
||||
|
||||
auto gl_PerVertexStructT = mBuilder.createTypeStruct(std::array{
|
||||
static_cast<spirv::Type>(float4T),
|
||||
static_cast<spirv::Type>(floatT),
|
||||
static_cast<spirv::Type>(arr1Float),
|
||||
static_cast<spirv::Type>(arr1Float),
|
||||
});
|
||||
|
||||
mBuilder.createDecorate(gl_PerVertexStructT, spv::Decoration::Block, {});
|
||||
mBuilder.createMemberDecorate(
|
||||
gl_PerVertexStructT, 0, spv::Decoration::BuiltIn,
|
||||
std::array{static_cast<std::uint32_t>(spv::BuiltIn::Position)});
|
||||
mBuilder.createMemberDecorate(
|
||||
gl_PerVertexStructT, 1, spv::Decoration::BuiltIn,
|
||||
std::array{static_cast<std::uint32_t>(spv::BuiltIn::PointSize)});
|
||||
mBuilder.createMemberDecorate(
|
||||
gl_PerVertexStructT, 2, spv::Decoration::BuiltIn,
|
||||
std::array{static_cast<std::uint32_t>(spv::BuiltIn::ClipDistance)});
|
||||
mBuilder.createMemberDecorate(
|
||||
gl_PerVertexStructT, 3, spv::Decoration::BuiltIn,
|
||||
std::array{static_cast<std::uint32_t>(spv::BuiltIn::CullDistance)});
|
||||
|
||||
auto gl_PerVertexPtrT = mBuilder.createTypePointer(spv::StorageClass::Output,
|
||||
gl_PerVertexStructT);
|
||||
mPerVertex =
|
||||
mBuilder.createVariable(gl_PerVertexPtrT, spv::StorageClass::Output);
|
||||
|
||||
mInterfaces.push_back(mPerVertex);
|
||||
return mPerVertex;
|
||||
}
|
||||
|
||||
spirv::VariableValue ConverterContext::getFragCoord() {
|
||||
if (mFragCoord) {
|
||||
return mFragCoord;
|
||||
}
|
||||
|
||||
auto inputType = getPointerType(spv::StorageClass::Input, TypeId::Float32x4);
|
||||
mFragCoord =
|
||||
mBuilder.createVariable(inputType, spv::StorageClass::Input);
|
||||
|
||||
mBuilder.createDecorate(mFragCoord, spv::Decoration::BuiltIn,
|
||||
{{static_cast<std::uint32_t>(spv::BuiltIn::FragCoord)}});
|
||||
|
||||
mInterfaces.push_back(mFragCoord);
|
||||
return mFragCoord;
|
||||
}
|
||||
|
||||
spirv::VariableValue ConverterContext::getIn(unsigned location) {
|
||||
auto [it, inserted] = mIns.try_emplace(location);
|
||||
if (!inserted) {
|
||||
return it->second;
|
||||
}
|
||||
|
||||
auto inputType = getPointerType(spv::StorageClass::Input, TypeId::Float32x4);
|
||||
auto inputVariable =
|
||||
mBuilder.createVariable(inputType, spv::StorageClass::Input);
|
||||
|
||||
mBuilder.createDecorate(inputVariable, spv::Decoration::Location,
|
||||
{{location}});
|
||||
|
||||
mInterfaces.push_back(inputVariable);
|
||||
it->second = inputVariable;
|
||||
return inputVariable;
|
||||
}
|
||||
|
||||
spirv::VariableValue ConverterContext::getOut(unsigned location) {
|
||||
auto [it, inserted] = mOuts.try_emplace(location);
|
||||
if (!inserted) {
|
||||
return it->second;
|
||||
}
|
||||
auto outputType =
|
||||
getPointerType(spv::StorageClass::Output, TypeId::Float32x4);
|
||||
auto outputVariable =
|
||||
mBuilder.createVariable(outputType, spv::StorageClass::Output);
|
||||
|
||||
mBuilder.createDecorate(outputVariable, spv::Decoration::Location,
|
||||
{{location}});
|
||||
|
||||
mInterfaces.push_back(outputVariable);
|
||||
it->second = outputVariable;
|
||||
return outputVariable;
|
||||
}
|
||||
|
||||
spirv::Function ConverterContext::getDiscardFn() {
|
||||
if (mDiscardFn) {
|
||||
return mDiscardFn;
|
||||
}
|
||||
|
||||
if (mStage != Stage::Fragment) {
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
auto fn = mBuilder.createFunctionBuilder(5);
|
||||
mDiscardFn = fn.id;
|
||||
auto entry = fn.createBlockBuilder(5);
|
||||
entry.createKill();
|
||||
|
||||
fn.insertBlock(entry);
|
||||
mBuilder.insertFunction(fn, getVoidType(), {},
|
||||
getFunctionType(getVoidType(), {}));
|
||||
|
||||
return mDiscardFn;
|
||||
}
|
||||
|
||||
std::optional<std::uint32_t>
|
||||
ConverterContext::findUint32Value(spirv::Value id) const {
|
||||
for (auto [value, constId] : mConstantUint32Map) {
|
||||
if (constId == id) {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
std::optional<std::int32_t>
|
||||
ConverterContext::findSint32Value(spirv::Value id) const {
|
||||
for (auto [value, constId] : mConstantSint32Map) {
|
||||
if (constId == id) {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
std::optional<float> ConverterContext::findFloat32Value(spirv::Value id) const {
|
||||
for (auto [value, constId] : mConstantFloat32Map) {
|
||||
if (constId == id) {
|
||||
return std::bit_cast<float>(value);
|
||||
}
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
spirv::FunctionType
|
||||
ConverterContext::getFunctionType(spirv::Type resultType,
|
||||
std::span<const spirv::Type> params) {
|
||||
for (auto fnType : mFunctionTypes) {
|
||||
if (fnType.resultType != resultType) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (fnType.params.size() != params.size()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
bool match = true;
|
||||
for (std::size_t i = 0, end = params.size(); i < end; ++i) {
|
||||
if (fnType.params[i] != params[i]) {
|
||||
match = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!match) {
|
||||
continue;
|
||||
}
|
||||
|
||||
return fnType.id;
|
||||
}
|
||||
|
||||
auto id = mBuilder.createTypeFunction(resultType, params);
|
||||
|
||||
std::vector<spirv::Type> paramsVec;
|
||||
paramsVec.reserve(params.size());
|
||||
|
||||
for (auto param : params) {
|
||||
paramsVec.push_back(param);
|
||||
}
|
||||
|
||||
mFunctionTypes.push_back(FunctionType{
|
||||
.resultType = resultType, .params = std::move(paramsVec), .id = id});
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
Function *ConverterContext::createFunction(std::size_t expectedSize) {
|
||||
auto result = &mFunctions.emplace_front();
|
||||
|
||||
result->context = this;
|
||||
result->entryFragment.context = this;
|
||||
result->entryFragment.function = result;
|
||||
result->entryFragment.builder = mBuilder.createBlockBuilder(expectedSize);
|
||||
result->entryFragment.entryBlockId = result->entryFragment.builder.id;
|
||||
result->fragments.push_back(&result->entryFragment);
|
||||
|
||||
result->exitFragment.context = this;
|
||||
result->exitFragment.function = result;
|
||||
result->exitFragment.builder = mBuilder.createBlockBuilder(0);
|
||||
result->exitFragment.entryBlockId = result->exitFragment.builder.id;
|
||||
result->builder = mBuilder.createFunctionBuilder(expectedSize);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
Fragment *ConverterContext::createFragment(std::size_t expectedSize) {
|
||||
auto result = &mFragments.emplace_front();
|
||||
|
||||
result->context = this;
|
||||
result->builder = mBuilder.createBlockBuilder(expectedSize);
|
||||
result->entryBlockId = result->builder.id;
|
||||
|
||||
return result;
|
||||
}
|
||||
5380
hw/amdgpu/shader/src/Fragment.cpp
Normal file
5380
hw/amdgpu/shader/src/Fragment.cpp
Normal file
File diff suppressed because it is too large
Load diff
274
hw/amdgpu/shader/src/Function.cpp
Normal file
274
hw/amdgpu/shader/src/Function.cpp
Normal file
|
|
@ -0,0 +1,274 @@
|
|||
#include "Function.hpp"
|
||||
#include "ConverterContext.hpp"
|
||||
#include "RegisterId.hpp"
|
||||
|
||||
using namespace amdgpu::shader;
|
||||
|
||||
Value Function::createInput(RegisterId id) {
|
||||
auto [it, inserted] = inputs.try_emplace(id);
|
||||
|
||||
if (!inserted) {
|
||||
assert(it->second);
|
||||
return it->second;
|
||||
}
|
||||
|
||||
auto offset = id.getOffset();
|
||||
|
||||
if (id.isScalar()) {
|
||||
auto uint32T = context->getUInt32Type();
|
||||
|
||||
if (userSgprs.size() > offset) {
|
||||
return ((it->second = {uint32T, context->getUInt32(userSgprs[offset])}));
|
||||
}
|
||||
|
||||
if (stage == Stage::None) {
|
||||
return ((it->second =
|
||||
Value{uint32T, builder.createFunctionParameter(uint32T)}));
|
||||
}
|
||||
|
||||
switch (id.raw) {
|
||||
case RegisterId::ExecLo:
|
||||
return ((it->second = {uint32T, context->getUInt32(1)}));
|
||||
case RegisterId::ExecHi:
|
||||
return ((it->second = {uint32T, context->getUInt32(0)}));
|
||||
|
||||
case RegisterId::Scc:
|
||||
return ((it->second = {context->getBoolType(), context->getFalse()}));
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (stage == Stage::Vertex) {
|
||||
return ((it->second = {uint32T, context->getUInt32(0)}));
|
||||
} else if (stage == Stage::Fragment) {
|
||||
return ((it->second = {uint32T, context->getUInt32(0)}));
|
||||
} else if (stage == Stage::Compute) {
|
||||
std::uint32_t offsetAfterSgprs = offset - userSgprs.size();
|
||||
if (offsetAfterSgprs < 3) {
|
||||
auto workgroupIdVar = context->getWorkgroupId();
|
||||
auto workgroupId = entryFragment.builder.createLoad(
|
||||
context->getUint32x3Type(), workgroupIdVar);
|
||||
for (uint32_t i = 0; i < 3; ++i) {
|
||||
auto input = entryFragment.builder.createCompositeExtract(
|
||||
uint32T, workgroupId, {{i}});
|
||||
|
||||
inputs[RegisterId::Scalar(userSgprs.size() + i)] = {uint32T, input};
|
||||
}
|
||||
|
||||
return inputs[id];
|
||||
}
|
||||
|
||||
return ((it->second = {uint32T, context->getUInt32(0)}));
|
||||
}
|
||||
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
if (stage == Stage::None) {
|
||||
auto float32T = context->getFloat32Type();
|
||||
return (
|
||||
(it->second = {float32T, builder.createFunctionParameter(float32T)}));
|
||||
}
|
||||
|
||||
if (stage == Stage::Vertex) {
|
||||
if (id.isVector()) {
|
||||
auto uint32T = context->getUInt32Type();
|
||||
|
||||
if (id.getOffset() == 0) {
|
||||
auto input =
|
||||
entryFragment.builder.createLoad(uint32T, context->getThreadId());
|
||||
|
||||
return ((it->second = {uint32T, input}));
|
||||
}
|
||||
|
||||
return ((it->second = {uint32T, context->getUInt32(0)}));
|
||||
}
|
||||
|
||||
util::unreachable("Unexpected vertex input %u. user sgprs count=%zu",
|
||||
id.raw, userSgprs.size());
|
||||
}
|
||||
|
||||
if (stage == Stage::Fragment) {
|
||||
if (id.isAttr()) {
|
||||
auto float4T = context->getFloat32x4Type();
|
||||
auto input = entryFragment.builder.createLoad(
|
||||
float4T, context->getIn(id.getOffset()));
|
||||
return ((it->second = {float4T, input}));
|
||||
}
|
||||
|
||||
if (id.isVector()) {
|
||||
switch (offset) {
|
||||
case 2:
|
||||
case 3:
|
||||
case 4:
|
||||
case 5: {
|
||||
auto float4T = context->getFloat32x4Type();
|
||||
auto floatT = context->getFloat32Type();
|
||||
auto fragCoord =
|
||||
entryFragment.builder.createLoad(float4T, context->getFragCoord());
|
||||
return (
|
||||
(it->second = {floatT, entryFragment.builder.createCompositeExtract(
|
||||
floatT, fragCoord, {{offset - 2}})}));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ((it->second = {context->getUInt32Type(), context->getUInt32(0)}));
|
||||
}
|
||||
|
||||
if (stage == Stage::Compute) {
|
||||
if (id.isVector() && offset < 3) {
|
||||
auto uint32T = context->getUInt32Type();
|
||||
auto localInvocationIdVar = context->getLocalInvocationId();
|
||||
auto localInvocationId = entryFragment.builder.createLoad(
|
||||
context->getUint32x3Type(), localInvocationIdVar);
|
||||
|
||||
for (uint32_t i = 0; i < 3; ++i) {
|
||||
auto input = entryFragment.builder.createCompositeExtract(
|
||||
uint32T, localInvocationId, {{i}});
|
||||
|
||||
inputs[RegisterId::Vector(i)] = {uint32T, input};
|
||||
}
|
||||
|
||||
return inputs[id];
|
||||
}
|
||||
|
||||
return ((it->second = {context->getUInt32Type(), context->getUInt32(0)}));
|
||||
}
|
||||
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
void Function::createExport(spirv::BlockBuilder &builder, unsigned index,
|
||||
Value value) {
|
||||
if (stage == Stage::Vertex) {
|
||||
switch (index) {
|
||||
case 12: {
|
||||
auto float4OutPtrT =
|
||||
context->getPointerType(spv::StorageClass::Output, TypeId::Float32x4);
|
||||
|
||||
auto gl_PerVertexPosition = builder.createAccessChain(
|
||||
float4OutPtrT, context->getPerVertex(), {{context->getSInt32(0)}});
|
||||
|
||||
if (value.type != context->getFloat32x4Type()) {
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
builder.createStore(gl_PerVertexPosition, value.value);
|
||||
return;
|
||||
}
|
||||
|
||||
case 32 ... 64: { // paramN
|
||||
if (value.type != context->getFloat32x4Type()) {
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
builder.createStore(context->getOut(index - 32), value.value);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
util::unreachable("Unexpected vartex export target %u", index);
|
||||
}
|
||||
|
||||
if (stage == Stage::Fragment) {
|
||||
switch (index) {
|
||||
case 0 ... 7: {
|
||||
if (value.type != context->getFloat32x4Type()) {
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
builder.createStore(context->getOut(index), value.value);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
util::unreachable("Unexpected fragment export target %u", index);
|
||||
}
|
||||
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
spirv::Type Function::getResultType() {
|
||||
if (exitFragment.outputs.empty()) {
|
||||
return context->getVoidType();
|
||||
}
|
||||
|
||||
if (exitFragment.outputs.size() == 1) {
|
||||
return exitFragment.registers->getRegister(*exitFragment.outputs.begin())
|
||||
.type;
|
||||
}
|
||||
|
||||
std::vector<spirv::Type> members;
|
||||
members.reserve(exitFragment.outputs.size());
|
||||
|
||||
for (auto id : exitFragment.outputs) {
|
||||
members.push_back(exitFragment.registers->getRegister(id).type);
|
||||
}
|
||||
|
||||
return context->getStructType(members);
|
||||
}
|
||||
|
||||
spirv::FunctionType Function::getFunctionType() {
|
||||
if (stage != Stage::None) {
|
||||
return context->getFunctionType(getResultType(), {});
|
||||
}
|
||||
|
||||
std::vector<spirv::Type> params;
|
||||
params.reserve(inputs.size());
|
||||
|
||||
for (auto inp : inputs) {
|
||||
params.push_back(inp.second.type);
|
||||
}
|
||||
|
||||
return context->getFunctionType(getResultType(), params);
|
||||
}
|
||||
|
||||
Fragment *Function::createFragment() {
|
||||
auto result = context->createFragment(0);
|
||||
result->function = this;
|
||||
fragments.push_back(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
void Function::insertReturn() {
|
||||
if (exitFragment.outputs.empty()) {
|
||||
exitFragment.builder.createReturn();
|
||||
return;
|
||||
}
|
||||
|
||||
if (exitFragment.outputs.size() == 1) {
|
||||
auto value =
|
||||
exitFragment.registers->getRegister(*exitFragment.outputs.begin())
|
||||
.value;
|
||||
exitFragment.builder.createReturnValue(value);
|
||||
return;
|
||||
}
|
||||
|
||||
auto resultType = getResultType();
|
||||
|
||||
auto resultTypePointer = context->getBuilder().createTypePointer(
|
||||
spv::StorageClass::Function, resultType);
|
||||
|
||||
auto resultVariable = entryFragment.builder.createVariable(
|
||||
resultTypePointer, spv::StorageClass::Function);
|
||||
|
||||
std::uint32_t member = 0;
|
||||
for (auto regId : exitFragment.outputs) {
|
||||
auto value = exitFragment.registers->getRegister(regId);
|
||||
auto valueTypeId = context->getTypeIdOf(value.type);
|
||||
|
||||
auto pointerType =
|
||||
context->getPointerType(spv::StorageClass::Function, *valueTypeId);
|
||||
auto valuePointer = exitFragment.builder.createAccessChain(
|
||||
pointerType, resultVariable,
|
||||
{{exitFragment.context->getUInt32(member++)}});
|
||||
|
||||
exitFragment.builder.createStore(valuePointer, value.value);
|
||||
}
|
||||
|
||||
auto resultValue = exitFragment.builder.createLoad(resultType, resultVariable);
|
||||
|
||||
exitFragment.builder.createReturnValue(resultValue);
|
||||
}
|
||||
3161
hw/amdgpu/shader/src/Instruction.cpp
Normal file
3161
hw/amdgpu/shader/src/Instruction.cpp
Normal file
File diff suppressed because it is too large
Load diff
72
hw/amdgpu/shader/src/RegisterState.cpp
Normal file
72
hw/amdgpu/shader/src/RegisterState.cpp
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
#include "RegisterState.hpp"
|
||||
#include "util/unreachable.hpp"
|
||||
|
||||
amdgpu::shader::Value
|
||||
amdgpu::shader::RegisterState::getRegister(RegisterId regId) {
|
||||
auto offset = regId.getOffset();
|
||||
|
||||
if (regId.isScalar()) {
|
||||
switch (offset) {
|
||||
case 0 ... 103:
|
||||
return sgprs[offset];
|
||||
case 106:
|
||||
return vccLo;
|
||||
case 107:
|
||||
return vccHi;
|
||||
case 124:
|
||||
return m0;
|
||||
case 126:
|
||||
return execLo;
|
||||
case 127:
|
||||
return execHi;
|
||||
case 253:
|
||||
return scc;
|
||||
case 254:
|
||||
return ldsDirect;
|
||||
}
|
||||
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
if (regId.isVector()) {
|
||||
return vgprs[offset];
|
||||
}
|
||||
|
||||
if (regId.isAttr()) {
|
||||
return attrs[offset];
|
||||
}
|
||||
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
void amdgpu::shader::RegisterState::setRegister(RegisterId regId,
|
||||
Value value) {
|
||||
auto offset = regId.getOffset();
|
||||
|
||||
if (regId.isScalar()) {
|
||||
switch (offset) {
|
||||
case 0 ... 103: sgprs[offset] = value; return;
|
||||
case 106: vccLo = value; return;
|
||||
case 107: vccHi = value; return;
|
||||
case 124: m0 = value; return;
|
||||
case 126: execLo = value; return;
|
||||
case 127: execHi = value; return;
|
||||
case 253: scc = value; return;
|
||||
case 254: ldsDirect = value; return;
|
||||
}
|
||||
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
if (regId.isVector()) {
|
||||
vgprs[offset] = value;
|
||||
return;
|
||||
}
|
||||
|
||||
if (regId.isAttr()) {
|
||||
attrs[offset] = value;
|
||||
return;
|
||||
}
|
||||
|
||||
util::unreachable();
|
||||
}
|
||||
132
hw/amdgpu/shader/src/TypeId.cpp
Normal file
132
hw/amdgpu/shader/src/TypeId.cpp
Normal file
|
|
@ -0,0 +1,132 @@
|
|||
#include "TypeId.hpp"
|
||||
#include "util/unreachable.hpp"
|
||||
#include <cstdint>
|
||||
|
||||
amdgpu::shader::TypeId amdgpu::shader::TypeId::getBaseType() const {
|
||||
switch (raw) {
|
||||
case TypeId::Void:
|
||||
case TypeId::Bool:
|
||||
case TypeId::SInt8:
|
||||
case TypeId::UInt8:
|
||||
case TypeId::SInt16:
|
||||
case TypeId::UInt16:
|
||||
case TypeId::SInt32:
|
||||
case TypeId::UInt32:
|
||||
case TypeId::SInt64:
|
||||
case TypeId::UInt64:
|
||||
case TypeId::Float16:
|
||||
case TypeId::Float32:
|
||||
case TypeId::Float64:
|
||||
case TypeId::Sampler:
|
||||
case TypeId::Image2D:
|
||||
case TypeId::SampledImage2D:
|
||||
return raw;
|
||||
|
||||
case TypeId::UInt32x2:
|
||||
case TypeId::UInt32x3:
|
||||
case TypeId::UInt32x4:
|
||||
case TypeId::ArrayUInt32x8:
|
||||
case TypeId::ArrayUInt32x16:
|
||||
return TypeId::UInt32;
|
||||
|
||||
case TypeId::Float32x2:
|
||||
case TypeId::Float32x3:
|
||||
case TypeId::Float32x4:
|
||||
case TypeId::ArrayFloat32x8:
|
||||
case TypeId::ArrayFloat32x16:
|
||||
return TypeId::Float32;
|
||||
}
|
||||
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
std::size_t amdgpu::shader::TypeId::getSize() const {
|
||||
switch (raw) {
|
||||
case TypeId::Void:
|
||||
case TypeId::Sampler:
|
||||
case TypeId::Image2D:
|
||||
case TypeId::SampledImage2D:
|
||||
return 0;
|
||||
case TypeId::Bool:
|
||||
return 1;
|
||||
case TypeId::SInt8:
|
||||
case TypeId::UInt8:
|
||||
return 1;
|
||||
case TypeId::SInt16:
|
||||
case TypeId::UInt16:
|
||||
return 2;
|
||||
case TypeId::SInt32:
|
||||
case TypeId::UInt32:
|
||||
return 4;
|
||||
case TypeId::SInt64:
|
||||
case TypeId::UInt64:
|
||||
return 8;
|
||||
case TypeId::Float16:
|
||||
return 2;
|
||||
case TypeId::Float32:
|
||||
return 4;
|
||||
case TypeId::Float64:
|
||||
return 8;
|
||||
|
||||
case TypeId::UInt32x2:
|
||||
case TypeId::UInt32x3:
|
||||
case TypeId::UInt32x4:
|
||||
case TypeId::ArrayUInt32x8:
|
||||
case TypeId::ArrayUInt32x16:
|
||||
case TypeId::Float32x2:
|
||||
case TypeId::Float32x3:
|
||||
case TypeId::Float32x4:
|
||||
case TypeId::ArrayFloat32x8:
|
||||
case TypeId::ArrayFloat32x16:
|
||||
return getElementsCount() * getBaseType().getSize();
|
||||
}
|
||||
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
std::size_t amdgpu::shader::TypeId::getElementsCount() const {
|
||||
switch (raw) {
|
||||
case TypeId::Bool:
|
||||
case TypeId::SInt8:
|
||||
case TypeId::UInt8:
|
||||
case TypeId::SInt16:
|
||||
case TypeId::UInt16:
|
||||
case TypeId::SInt32:
|
||||
case TypeId::UInt32:
|
||||
case TypeId::SInt64:
|
||||
case TypeId::UInt64:
|
||||
case TypeId::Float16:
|
||||
case TypeId::Float32:
|
||||
case TypeId::Float64:
|
||||
return 1;
|
||||
|
||||
case TypeId::UInt32x2:
|
||||
return 2;
|
||||
case TypeId::UInt32x3:
|
||||
return 3;
|
||||
case TypeId::UInt32x4:
|
||||
return 4;
|
||||
case TypeId::ArrayUInt32x8:
|
||||
return 8;
|
||||
case TypeId::ArrayUInt32x16:
|
||||
return 16;
|
||||
case TypeId::Float32x2:
|
||||
return 2;
|
||||
case TypeId::Float32x3:
|
||||
return 3;
|
||||
case TypeId::Float32x4:
|
||||
return 4;
|
||||
case TypeId::ArrayFloat32x8:
|
||||
return 8;
|
||||
case TypeId::ArrayFloat32x16:
|
||||
return 16;
|
||||
|
||||
case TypeId::Void:
|
||||
case TypeId::Sampler:
|
||||
case TypeId::Image2D:
|
||||
case TypeId::SampledImage2D:
|
||||
return 0;
|
||||
}
|
||||
|
||||
util::unreachable();
|
||||
}
|
||||
117
hw/amdgpu/shader/src/cf.cpp
Normal file
117
hw/amdgpu/shader/src/cf.cpp
Normal file
|
|
@ -0,0 +1,117 @@
|
|||
#include "cf.hpp"
|
||||
#include <cassert>
|
||||
#include <fstream>
|
||||
#include <unordered_set>
|
||||
|
||||
void cf::BasicBlock::split(BasicBlock *target) {
|
||||
assert(target->address > address);
|
||||
target->size = size - (target->address - address);
|
||||
size = target->address - address;
|
||||
|
||||
for (std::size_t i = 0, count = getSuccessorsCount(); i < count; ++i) {
|
||||
auto succ = getSuccessor(i);
|
||||
succ->predecessors.erase(this);
|
||||
succ->predecessors.insert(target);
|
||||
target->successors[i] = successors[i];
|
||||
successors[i] = nullptr;
|
||||
}
|
||||
|
||||
target->terminator = terminator;
|
||||
terminator = TerminatorKind::None;
|
||||
|
||||
createBranch(target);
|
||||
}
|
||||
|
||||
void cf::BasicBlock::createConditionalBranch(BasicBlock *ifTrue,
|
||||
BasicBlock *ifFalse) {
|
||||
assert(terminator == TerminatorKind::None);
|
||||
assert(getSuccessorsCount() == 0);
|
||||
ifTrue->predecessors.insert(this);
|
||||
ifFalse->predecessors.insert(this);
|
||||
|
||||
successors[0] = ifTrue;
|
||||
successors[1] = ifFalse;
|
||||
|
||||
terminator = TerminatorKind::Branch;
|
||||
}
|
||||
|
||||
void cf::BasicBlock::createBranch(BasicBlock *target) {
|
||||
assert(terminator == TerminatorKind::None);
|
||||
assert(getSuccessorsCount() == 0);
|
||||
|
||||
target->predecessors.insert(this);
|
||||
successors[0] = target;
|
||||
|
||||
terminator = TerminatorKind::Branch;
|
||||
}
|
||||
|
||||
void cf::BasicBlock::createBranchToUnknown() {
|
||||
assert(terminator == TerminatorKind::None);
|
||||
assert(getSuccessorsCount() == 0);
|
||||
|
||||
terminator = TerminatorKind::BranchToUnknown;
|
||||
}
|
||||
|
||||
void cf::BasicBlock::createReturn() {
|
||||
assert(terminator == TerminatorKind::None);
|
||||
assert(getSuccessorsCount() == 0);
|
||||
|
||||
terminator = TerminatorKind::Return;
|
||||
}
|
||||
|
||||
void cf::BasicBlock::replaceSuccessor(BasicBlock *origBB, BasicBlock *newBB) {
|
||||
origBB->predecessors.erase(this);
|
||||
newBB->predecessors.insert(this);
|
||||
|
||||
if (origBB == successors[0]) {
|
||||
successors[0] = newBB;
|
||||
return;
|
||||
}
|
||||
|
||||
if (origBB == successors[1]) {
|
||||
successors[1] = newBB;
|
||||
return;
|
||||
}
|
||||
|
||||
std::abort();
|
||||
}
|
||||
|
||||
bool cf::BasicBlock::hasDirectPredecessor(const BasicBlock &block) const {
|
||||
for (auto pred : predecessors) {
|
||||
if (pred == &block) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool cf::BasicBlock::hasPredecessor(const BasicBlock &block) const {
|
||||
if (&block == this) {
|
||||
return hasDirectPredecessor(block);
|
||||
}
|
||||
|
||||
std::vector<const BasicBlock *> workList;
|
||||
std::unordered_set<const BasicBlock *> visited;
|
||||
workList.push_back(this);
|
||||
visited.insert(this);
|
||||
|
||||
while (!workList.empty()) {
|
||||
auto node = workList.back();
|
||||
|
||||
if (node == &block) {
|
||||
return true;
|
||||
}
|
||||
|
||||
workList.pop_back();
|
||||
workList.reserve(workList.size() + predecessors.size());
|
||||
|
||||
for (auto pred : predecessors) {
|
||||
if (visited.insert(pred).second) {
|
||||
workList.push_back(pred);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
252
hw/amdgpu/shader/src/scf.cpp
Normal file
252
hw/amdgpu/shader/src/scf.cpp
Normal file
|
|
@ -0,0 +1,252 @@
|
|||
#include "scf.hpp"
|
||||
#include "cf.hpp"
|
||||
#include <cassert>
|
||||
#include <fstream>
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
|
||||
void scf::Block::eraseFrom(Node *endBefore) {
|
||||
mEnd = endBefore->getPrev();
|
||||
if (mEnd != nullptr) {
|
||||
mEnd->mNext = nullptr;
|
||||
} else {
|
||||
mBegin = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void scf::Block::splitInto(Block *target, Node *splitPoint) {
|
||||
auto targetEnd = std::exchange(mEnd, splitPoint->mPrev);
|
||||
|
||||
if (mEnd != nullptr) {
|
||||
mEnd->mNext = nullptr;
|
||||
} else {
|
||||
mBegin = nullptr;
|
||||
}
|
||||
|
||||
for (auto node = splitPoint; node != nullptr; node = node->getNext()) {
|
||||
node->mParent = target;
|
||||
}
|
||||
|
||||
if (target->mEnd != nullptr) {
|
||||
target->mEnd->mNext = splitPoint;
|
||||
}
|
||||
|
||||
splitPoint->mPrev = target->mEnd;
|
||||
target->mEnd = targetEnd;
|
||||
|
||||
if (target->mBegin == nullptr) {
|
||||
target->mBegin = splitPoint;
|
||||
}
|
||||
}
|
||||
|
||||
scf::Block *scf::Block::split(Context &context, Node *splitPoint) {
|
||||
auto result = context.create<Block>();
|
||||
splitInto(result, splitPoint);
|
||||
return result;
|
||||
}
|
||||
|
||||
static scf::BasicBlock *findJumpTargetIn(scf::Block *parentBlock,
|
||||
scf::Block *testBlock) {
|
||||
auto jumpNode = dynCast<scf::Jump>(testBlock->getLastNode());
|
||||
|
||||
if (jumpNode == nullptr || jumpNode->target->getParent() != parentBlock) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return jumpNode->target;
|
||||
}
|
||||
|
||||
static bool transformJumpToLoop(scf::Context &ctxt, scf::Block *block) {
|
||||
// bb0
|
||||
// bb1
|
||||
// if true {
|
||||
// bb2
|
||||
// jump bb1
|
||||
// } else {
|
||||
// bb3
|
||||
// }
|
||||
//
|
||||
// -->
|
||||
//
|
||||
// bb0
|
||||
// loop {
|
||||
// bb1
|
||||
// if false {
|
||||
// break
|
||||
// }
|
||||
// bb2
|
||||
// }
|
||||
// bb3
|
||||
|
||||
if (block->isEmpty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto ifElse = dynCast<scf::IfElse>(block->getLastNode());
|
||||
|
||||
if (ifElse == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto loopTarget = findJumpTargetIn(block, ifElse->ifTrue);
|
||||
auto loopBlock = ifElse->ifTrue;
|
||||
auto invariantBlock = ifElse->ifFalse;
|
||||
|
||||
if (loopTarget == nullptr) {
|
||||
loopTarget = findJumpTargetIn(block, ifElse->ifFalse);
|
||||
loopBlock = ifElse->ifFalse;
|
||||
invariantBlock = ifElse->ifTrue;
|
||||
|
||||
if (loopTarget == nullptr) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
auto loopBody = block->split(ctxt, loopTarget);
|
||||
auto loop = ctxt.create<scf::Loop>(loopBody);
|
||||
block->append(loop);
|
||||
|
||||
for (auto node = invariantBlock->getRootNode(); node != nullptr;) {
|
||||
auto nextNode = node->getNext();
|
||||
invariantBlock->detachNode(node);
|
||||
block->append(node);
|
||||
node = nextNode;
|
||||
}
|
||||
|
||||
loopBlock->detachNode(loopBlock->getLastNode());
|
||||
|
||||
for (auto node = loopBlock->getRootNode(); node != nullptr;) {
|
||||
auto nextNode = node->getNext();
|
||||
loopBlock->detachNode(node);
|
||||
loopBody->append(node);
|
||||
node = nextNode;
|
||||
}
|
||||
|
||||
invariantBlock->append(ctxt.create<scf::Break>());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool moveSameLastBlocksTo(scf::IfElse *ifElse, scf::Block *block) {
|
||||
if (ifElse->ifTrue->isEmpty() || ifElse->ifFalse->isEmpty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto ifTrueIt = ifElse->ifTrue->getLastNode();
|
||||
auto ifFalseIt = ifElse->ifFalse->getLastNode();
|
||||
|
||||
while (ifTrueIt != nullptr && ifFalseIt != nullptr) {
|
||||
if (!ifTrueIt->isEqual(*ifFalseIt)) {
|
||||
break;
|
||||
}
|
||||
|
||||
ifTrueIt = ifTrueIt->getPrev();
|
||||
ifFalseIt = ifFalseIt->getPrev();
|
||||
}
|
||||
|
||||
if (ifTrueIt == ifElse->ifTrue->getLastNode()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ifTrueIt == nullptr) {
|
||||
ifTrueIt = ifElse->ifTrue->getRootNode();
|
||||
} else {
|
||||
ifTrueIt = ifTrueIt->getNext();
|
||||
}
|
||||
|
||||
if (ifFalseIt == nullptr) {
|
||||
ifFalseIt = ifElse->ifFalse->getRootNode();
|
||||
} else {
|
||||
ifFalseIt = ifFalseIt->getNext();
|
||||
}
|
||||
|
||||
ifElse->ifTrue->splitInto(block, ifTrueIt);
|
||||
ifElse->ifFalse->eraseFrom(ifFalseIt);
|
||||
return true;
|
||||
}
|
||||
|
||||
class Structurizer {
|
||||
scf::Context &context;
|
||||
|
||||
public:
|
||||
Structurizer(scf::Context &context) : context(context) {}
|
||||
|
||||
scf::Block *structurize(cf::BasicBlock *bb) {
|
||||
return structurizeBlock(bb, {});
|
||||
}
|
||||
|
||||
public:
|
||||
scf::IfElse *structurizeIfElse(
|
||||
cf::BasicBlock *ifTrue, cf::BasicBlock *ifFalse,
|
||||
std::unordered_map<cf::BasicBlock *, scf::BasicBlock *> &visited) {
|
||||
auto ifTrueBlock = structurizeBlock(ifTrue, visited);
|
||||
auto ifFalseBlock = structurizeBlock(ifFalse, visited);
|
||||
|
||||
return context.create<scf::IfElse>(ifTrueBlock, ifFalseBlock);
|
||||
}
|
||||
|
||||
scf::Block *structurizeBlock(
|
||||
cf::BasicBlock *bb,
|
||||
std::unordered_map<cf::BasicBlock *, scf::BasicBlock *> visited) {
|
||||
auto result = context.create<scf::Block>();
|
||||
std::vector<cf::BasicBlock *> workList;
|
||||
workList.push_back(bb);
|
||||
|
||||
while (!workList.empty()) {
|
||||
auto block = workList.back();
|
||||
workList.pop_back();
|
||||
|
||||
auto [it, inserted] = visited.try_emplace(block, nullptr);
|
||||
if (!inserted) {
|
||||
result->append(context.create<scf::Jump>(it->second));
|
||||
continue;
|
||||
}
|
||||
|
||||
auto scfBlock = context.create<scf::BasicBlock>(block->getAddress(),
|
||||
block->getSize());
|
||||
it->second = scfBlock;
|
||||
result->append(scfBlock);
|
||||
|
||||
switch (block->getTerminator()) {
|
||||
case cf::TerminatorKind::None:
|
||||
std::abort();
|
||||
break;
|
||||
|
||||
case cf::TerminatorKind::Branch:
|
||||
switch (block->getSuccessorsCount()) {
|
||||
case 1:
|
||||
workList.push_back(block->getSuccessor(0));
|
||||
break;
|
||||
|
||||
case 2: {
|
||||
auto ifElse = structurizeIfElse(block->getSuccessor(0),
|
||||
block->getSuccessor(1), visited);
|
||||
result->append(ifElse);
|
||||
|
||||
while (moveSameLastBlocksTo(ifElse, result) ||
|
||||
transformJumpToLoop(context, result)) {
|
||||
;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case cf::TerminatorKind::BranchToUnknown:
|
||||
result->append(context.create<scf::UnknownBlock>());
|
||||
break;
|
||||
|
||||
case cf::TerminatorKind::Return:
|
||||
result->append(context.create<scf::Return>());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
scf::Block *scf::structurize(Context &ctxt, cf::BasicBlock *bb) {
|
||||
return Structurizer{ctxt}.structurize(bb);
|
||||
}
|
||||
|
|
@ -1 +1 @@
|
|||
Subproject commit 05d35b71483880246bc4c1a28f857e9046af7c36
|
||||
Subproject commit 6a093985c4a331661fd47ff9f1c06e4b9b102002
|
||||
Loading…
Reference in a new issue