mirror of
https://github.com/RPCSX/rpcsx.git
synced 2026-01-08 01:30:13 +01:00
gpu2: initial dispatch implementation
This commit is contained in:
parent
424ce5cf68
commit
239a0645bc
|
|
@ -762,8 +762,6 @@ Cache::Shader Cache::Tag::getShader(const ShaderKey &key,
|
|||
|
||||
std::shared_ptr<Cache::Entry>
|
||||
Cache::Tag::findShader(const ShaderKey &key, const ShaderKey *dependedKey) {
|
||||
auto data = RemoteMemory{mParent->mVmIm}.getPointer(key.address);
|
||||
|
||||
auto cacheIt = mParent->mShaders.queryArea(key.address);
|
||||
|
||||
if (cacheIt == mParent->mShaders.end() ||
|
||||
|
|
@ -1088,10 +1086,9 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) {
|
|||
key.mipCount, key.pow2pad);
|
||||
|
||||
VkImageUsageFlags usage =
|
||||
VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
|
||||
VK_IMAGE_USAGE_SAMPLED_BIT // | VK_IMAGE_USAGE_STORAGE_BIT
|
||||
;
|
||||
|
||||
VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
|
||||
if (key.kind == ImageKind::Color) {
|
||||
usage |= VK_IMAGE_USAGE_SAMPLED_BIT;
|
||||
bool isCompressed =
|
||||
key.dfmt == gnm::kDataFormatBc1 || key.dfmt == gnm::kDataFormatBc2 ||
|
||||
key.dfmt == gnm::kDataFormatBc3 || key.dfmt == gnm::kDataFormatBc4 ||
|
||||
|
|
@ -1101,6 +1098,9 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) {
|
|||
|
||||
if (!isCompressed) {
|
||||
usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
|
||||
}
|
||||
} else {
|
||||
usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
|
||||
}
|
||||
|
||||
auto image = vk::Image::Allocate(
|
||||
|
|
@ -1151,25 +1151,6 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) {
|
|||
.depth = std::max(key.extent.depth >> mipLevel, 1u),
|
||||
},
|
||||
});
|
||||
|
||||
regions.push_back({
|
||||
.bufferOffset = info.offset,
|
||||
.bufferRowLength =
|
||||
mipLevel > 0 ? 0 : std::max(key.pitch >> mipLevel, 1u),
|
||||
.imageSubresource =
|
||||
{
|
||||
.aspectMask = toAspect(key.kind),
|
||||
.mipLevel = mipLevel,
|
||||
.baseArrayLayer = key.baseArrayLayer,
|
||||
.layerCount = key.arrayLayerCount,
|
||||
},
|
||||
.imageExtent =
|
||||
{
|
||||
.width = std::max(key.extent.width >> mipLevel, 1u),
|
||||
.height = std::max(key.extent.height >> mipLevel, 1u),
|
||||
.depth = std::max(key.extent.depth >> mipLevel, 1u),
|
||||
},
|
||||
});
|
||||
}
|
||||
} else {
|
||||
auto &tiler = mParent->mDevice->tiler;
|
||||
|
|
@ -1434,10 +1415,10 @@ Cache::GraphicsTag::getShader(gcn::Stage stage, const SpiShaderPgm &pgm,
|
|||
gcn::Environment env{
|
||||
.vgprCount = pgm.rsrc1.getVGprCount(),
|
||||
.sgprCount = pgm.rsrc1.getSGprCount(),
|
||||
.userSgprs = std::span(pgm.userData.data(), pgm.rsrc2.userSgpr),
|
||||
.supportsBarycentric = vk::context->supportsBarycentric,
|
||||
.supportsInt8 = vk::context->supportsInt8,
|
||||
.supportsInt64Atomics = vk::context->supportsInt64Atomics,
|
||||
.userSgprs = std::span(pgm.userData.data(), pgm.rsrc2.userSgpr),
|
||||
};
|
||||
|
||||
auto shader = Tag::getShader({
|
||||
|
|
@ -1545,6 +1526,10 @@ Cache::GraphicsTag::getShader(gcn::Stage stage, const SpiShaderPgm &pgm,
|
|||
configPtr[index] = std::bit_cast<std::uint32_t>(
|
||||
context.cbColor[slot.data].info.compSwap);
|
||||
break;
|
||||
|
||||
default:
|
||||
rx::die("unexpected resource slot in graphics shader %u, stage %u",
|
||||
int(slot.type), int(stage));
|
||||
}
|
||||
|
||||
++index;
|
||||
|
|
@ -1575,7 +1560,140 @@ Cache::GraphicsTag::getShader(gcn::Stage stage, const SpiShaderPgm &pgm,
|
|||
|
||||
Cache::Shader
|
||||
Cache::ComputeTag::getShader(const Registers::ComputeConfig &pgm) {
|
||||
return {};
|
||||
auto descriptorSet = getDescriptorSet();
|
||||
gcn::Environment env{
|
||||
.vgprCount = pgm.rsrc1.getVGprCount(),
|
||||
.sgprCount = pgm.rsrc1.getSGprCount(),
|
||||
.numThreadX = static_cast<std::uint8_t>(pgm.numThreadX),
|
||||
.numThreadY = static_cast<std::uint8_t>(pgm.numThreadY),
|
||||
.numThreadZ = static_cast<std::uint8_t>(pgm.numThreadZ),
|
||||
.supportsBarycentric = vk::context->supportsBarycentric,
|
||||
.supportsInt8 = vk::context->supportsInt8,
|
||||
.supportsInt64Atomics = vk::context->supportsInt64Atomics,
|
||||
.userSgprs = std::span(pgm.userData.data(), pgm.rsrc2.userSgpr),
|
||||
};
|
||||
|
||||
auto shader = Tag::getShader({
|
||||
.address = pgm.address << 8,
|
||||
.stage = gcn::Stage::Cs,
|
||||
.env = env,
|
||||
});
|
||||
|
||||
if (!shader.handle) {
|
||||
return shader;
|
||||
}
|
||||
|
||||
std::uint64_t memoryTableAddress = getMemoryTable().deviceAddress;
|
||||
|
||||
std::uint64_t gdsAddress = mParent->getGdsBuffer().getAddress();
|
||||
mStorage->shaderResources.cacheTag = this;
|
||||
|
||||
std::uint32_t slotOffset = mStorage->shaderResources.slotOffset;
|
||||
|
||||
mStorage->shaderResources.loadResources(
|
||||
shader.info->resources,
|
||||
std::span(pgm.userData.data(), pgm.rsrc2.userSgpr));
|
||||
|
||||
const auto &configSlots = shader.info->configSlots;
|
||||
|
||||
auto configSize = configSlots.size() * sizeof(std::uint32_t);
|
||||
auto configBuffer = getInternalHostVisibleBuffer(configSize);
|
||||
|
||||
auto configPtr = reinterpret_cast<std::uint32_t *>(configBuffer.data);
|
||||
|
||||
std::uint32_t sgprInput[static_cast<std::size_t>(gcn::CsSGprInput::Count)];
|
||||
std::uint32_t sgprInputCount = 0;
|
||||
|
||||
if (pgm.rsrc2.tgIdXEn) {
|
||||
sgprInput[sgprInputCount++] = static_cast<std::uint32_t>(gcn::CsSGprInput::ThreadGroupIdX);
|
||||
}
|
||||
|
||||
if (pgm.rsrc2.tgIdYEn) {
|
||||
sgprInput[sgprInputCount++] = static_cast<std::uint32_t>(gcn::CsSGprInput::ThreadGroupIdY);
|
||||
}
|
||||
|
||||
if (pgm.rsrc2.tgIdZEn) {
|
||||
sgprInput[sgprInputCount++] = static_cast<std::uint32_t>(gcn::CsSGprInput::ThreadGroupIdZ);
|
||||
}
|
||||
|
||||
if (pgm.rsrc2.tgSizeEn) {
|
||||
sgprInput[sgprInputCount++] = static_cast<std::uint32_t>(gcn::CsSGprInput::ThreadGroupSize);
|
||||
}
|
||||
|
||||
if (pgm.rsrc2.scratchEn) {
|
||||
sgprInput[sgprInputCount++] = static_cast<std::uint32_t>(gcn::CsSGprInput::Scratch);
|
||||
}
|
||||
|
||||
for (std::size_t index = 0; const auto &slot : configSlots) {
|
||||
switch (slot.type) {
|
||||
case gcn::ConfigType::Imm:
|
||||
readMemory(&configPtr[index], slot.data, sizeof(std::uint32_t));
|
||||
break;
|
||||
case gcn::ConfigType::UserSgpr:
|
||||
configPtr[index] = pgm.userData[slot.data];
|
||||
break;
|
||||
case gcn::ConfigType::ResourceSlot:
|
||||
mStorage->memoryTableConfigSlots.push_back({
|
||||
.bufferIndex =
|
||||
static_cast<std::uint32_t>(mStorage->descriptorBuffers.size()),
|
||||
.configIndex = static_cast<std::uint32_t>(index),
|
||||
.resourceSlot = static_cast<std::uint32_t>(slotOffset + slot.data),
|
||||
});
|
||||
break;
|
||||
|
||||
case gcn::ConfigType::MemoryTable:
|
||||
if (slot.data == 0) {
|
||||
configPtr[index] = static_cast<std::uint32_t>(memoryTableAddress);
|
||||
} else {
|
||||
configPtr[index] = static_cast<std::uint32_t>(memoryTableAddress >> 32);
|
||||
}
|
||||
break;
|
||||
case gcn::ConfigType::Gds:
|
||||
if (slot.data == 0) {
|
||||
configPtr[index] = static_cast<std::uint32_t>(gdsAddress);
|
||||
} else {
|
||||
configPtr[index] = static_cast<std::uint32_t>(gdsAddress >> 32);
|
||||
}
|
||||
break;
|
||||
|
||||
case gcn::ConfigType::CsTgIdCompCnt:
|
||||
configPtr[index] = pgm.rsrc2.tidIgCompCount;
|
||||
break;
|
||||
|
||||
case gcn::ConfigType::CsInputSGpr:
|
||||
if (slot.data < sgprInputCount) {
|
||||
configPtr[index] = sgprInput[slot.data];
|
||||
} else {
|
||||
configPtr[index] = -1;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
rx::die("unexpected resource slot in compute shader %u", int(slot.type));
|
||||
}
|
||||
|
||||
++index;
|
||||
}
|
||||
|
||||
mStorage->descriptorBuffers.push_back(configPtr);
|
||||
|
||||
VkDescriptorBufferInfo bufferInfo{
|
||||
.buffer = configBuffer.handle,
|
||||
.offset = configBuffer.offset,
|
||||
.range = configSize,
|
||||
};
|
||||
|
||||
VkWriteDescriptorSet writeDescSet{
|
||||
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
||||
.dstSet = descriptorSet,
|
||||
.dstBinding = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
||||
.pBufferInfo = &bufferInfo,
|
||||
};
|
||||
|
||||
vkUpdateDescriptorSets(vk::context->device, 1, &writeDescSet, 0, nullptr);
|
||||
return shader;
|
||||
}
|
||||
|
||||
Cache::Cache(Device *device, int vmId) : mDevice(device), mVmIm(vmId) {
|
||||
|
|
|
|||
|
|
@ -157,7 +157,7 @@ struct Cache {
|
|||
VkImageSubresourceRange subresource;
|
||||
};
|
||||
|
||||
class Tag;
|
||||
struct Tag;
|
||||
|
||||
private:
|
||||
struct MemoryTableSlot {
|
||||
|
|
|
|||
|
|
@ -3,8 +3,7 @@
|
|||
#include "shaders/flip_alt.frag.h"
|
||||
#include "shaders/flip_std.frag.h"
|
||||
#include "vk.hpp"
|
||||
#include <atomic>
|
||||
#include <vulkan/vulkan_core.h>
|
||||
#include <vulkan/vulkan.h>
|
||||
|
||||
FlipPipeline::~FlipPipeline() {
|
||||
vkDestroyPipeline(vk::context->device, pipelines[0], vk::context->allocator);
|
||||
|
|
|
|||
|
|
@ -485,7 +485,8 @@ bool GraphicsPipe::dispatchDirect(Queue &queue) {
|
|||
auto dispatchInitiator = queue.rptr[4];
|
||||
sh.compute.computeDispatchInitiator = dispatchInitiator;
|
||||
|
||||
// FIXME
|
||||
amdgpu::dispatch(device->caches[queue.vmId], scheduler, sh.compute, dimX,
|
||||
dimY, dimZ);
|
||||
return true;
|
||||
}
|
||||
bool GraphicsPipe::dispatchIndirect(Queue &queue) {
|
||||
|
|
@ -500,7 +501,8 @@ bool GraphicsPipe::dispatchIndirect(Queue &queue) {
|
|||
auto dimY = buffer[1];
|
||||
auto dimZ = buffer[2];
|
||||
|
||||
// FIXME
|
||||
amdgpu::dispatch(device->caches[queue.vmId], scheduler, sh.compute, dimX,
|
||||
dimY, dimZ);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -551,20 +551,71 @@ struct Registers {
|
|||
|
||||
std::uint32_t computeDispatchInitiator;
|
||||
std::uint32_t _pad0[6];
|
||||
std::uint32_t computeNumThreadX;
|
||||
std::uint32_t computeNumThreadY;
|
||||
std::uint32_t computeNumThreadZ;
|
||||
std::uint32_t numThreadX;
|
||||
std::uint32_t numThreadY;
|
||||
std::uint32_t numThreadZ;
|
||||
std::uint32_t _pad1[2];
|
||||
std::uint32_t computePgmLo;
|
||||
std::uint32_t computePgmHi;
|
||||
std::uint64_t address;
|
||||
std::uint32_t _pad2[4];
|
||||
std::uint32_t computePgmRsrc1;
|
||||
std::uint32_t computePgmRsrc2;
|
||||
struct {
|
||||
union {
|
||||
std::uint32_t raw;
|
||||
|
||||
struct {
|
||||
std::uint32_t vgprs : 6;
|
||||
std::uint32_t sgprs : 4;
|
||||
std::uint32_t priority : 2;
|
||||
std::uint32_t floatMode : 8;
|
||||
std::uint32_t priv : 1;
|
||||
std::uint32_t dx10Clamp : 1;
|
||||
std::uint32_t debugMode : 1;
|
||||
std::uint32_t ieeeMode : 1;
|
||||
};
|
||||
};
|
||||
|
||||
std::uint8_t getVGprCount() const { return (vgprs + 1) * 4; }
|
||||
std::uint8_t getSGprCount() const { return (sgprs + 1) * 8; }
|
||||
} rsrc1;
|
||||
struct {
|
||||
union {
|
||||
std::uint32_t raw;
|
||||
|
||||
struct {
|
||||
bool scratchEn : 1;
|
||||
std::uint32_t userSgpr : 5;
|
||||
bool trapPresent : 1;
|
||||
bool tgIdXEn : 1;
|
||||
bool tgIdYEn : 1;
|
||||
bool tgIdZEn : 1;
|
||||
bool tgSizeEn : 1;
|
||||
std::uint32_t tidIgCompCount : 2;
|
||||
std::uint32_t : 2;
|
||||
std::uint32_t ldsSize : 9;
|
||||
std::uint32_t excpEn : 7;
|
||||
};
|
||||
};
|
||||
|
||||
std::uint32_t getLdsDwordsCount() const { return ldsSize * 64; }
|
||||
} rsrc2;
|
||||
std::uint32_t _pad3[1];
|
||||
std::uint32_t computeResourceLimits;
|
||||
std::uint32_t computeStaticThreadMgmtSe0;
|
||||
std::uint32_t computeStaticThreadMgmtSe1;
|
||||
std::uint32_t computeTmpRingSize;
|
||||
|
||||
struct {
|
||||
union {
|
||||
std::uint32_t raw;
|
||||
struct {
|
||||
std::uint32_t wavesPerSh : 6;
|
||||
std::uint32_t : 6;
|
||||
std::uint32_t tgPerCu : 4;
|
||||
std::uint32_t lockThreshold: 6;
|
||||
std::uint32_t simdDestCntl : 1;
|
||||
};
|
||||
|
||||
};
|
||||
std::uint32_t getWavesPerSh() const { return wavesPerSh << 4; }
|
||||
} resourceLimits;
|
||||
std::uint32_t staticThreadMgmtSe0;
|
||||
std::uint32_t staticThreadMgmtSe1;
|
||||
std::uint32_t tmpRingSize;
|
||||
std::uint32_t _pad4[39];
|
||||
std::array<std::uint32_t, 16> userData;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -512,70 +512,23 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
|
|||
pipe.scheduler.submit();
|
||||
}
|
||||
|
||||
// void amdgpu::dispatch(Scheduler &sched,
|
||||
// amdgpu::Registers::ComputeConfig &computeConfig, int
|
||||
// vmId, std::uint32_t groupCountX, std::uint32_t
|
||||
// groupCountY, std::uint32_t groupCountZ) {
|
||||
void amdgpu::dispatch(Cache &cache, Scheduler &sched,
|
||||
Registers::ComputeConfig &computeConfig,
|
||||
std::uint32_t groupCountX, std::uint32_t groupCountY,
|
||||
std::uint32_t groupCountZ) {
|
||||
auto tag = cache.createComputeTag(sched);
|
||||
auto descriptorSet = tag.getDescriptorSet();
|
||||
auto shader = tag.getShader(computeConfig);
|
||||
auto pipelineLayout = tag.getComputePipelineLayout();
|
||||
tag.buildDescriptors(descriptorSet);
|
||||
|
||||
// vkCmdDispatch(sched.getCommandBuffer(), groupCountX, groupCountY,
|
||||
// groupCountZ);
|
||||
|
||||
// sched.submit();
|
||||
// }
|
||||
|
||||
static void
|
||||
transitionImageLayout(VkCommandBuffer commandBuffer, VkImage image,
|
||||
VkImageLayout oldLayout, VkImageLayout newLayout,
|
||||
const VkImageSubresourceRange &subresourceRange) {
|
||||
VkImageMemoryBarrier barrier{};
|
||||
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
|
||||
barrier.oldLayout = oldLayout;
|
||||
barrier.newLayout = newLayout;
|
||||
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.image = image;
|
||||
barrier.subresourceRange = subresourceRange;
|
||||
|
||||
auto layoutToStageAccess = [](VkImageLayout layout)
|
||||
-> std::pair<VkPipelineStageFlags, VkAccessFlags> {
|
||||
switch (layout) {
|
||||
case VK_IMAGE_LAYOUT_UNDEFINED:
|
||||
case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR:
|
||||
case VK_IMAGE_LAYOUT_GENERAL:
|
||||
return {VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0};
|
||||
|
||||
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
|
||||
return {VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT};
|
||||
|
||||
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
|
||||
return {VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT};
|
||||
|
||||
case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
|
||||
return {VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_ACCESS_SHADER_READ_BIT};
|
||||
|
||||
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
|
||||
return {VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT,
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT};
|
||||
|
||||
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
|
||||
return {VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
||||
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
|
||||
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT};
|
||||
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
};
|
||||
|
||||
auto [sourceStage, sourceAccess] = layoutToStageAccess(oldLayout);
|
||||
auto [destinationStage, destinationAccess] = layoutToStageAccess(newLayout);
|
||||
|
||||
barrier.srcAccessMask = sourceAccess;
|
||||
barrier.dstAccessMask = destinationAccess;
|
||||
|
||||
vkCmdPipelineBarrier(commandBuffer, sourceStage, destinationStage, 0, 0,
|
||||
nullptr, 0, nullptr, 1, &barrier);
|
||||
auto commandBuffer = sched.getCommandBuffer();
|
||||
VkShaderStageFlagBits stages[]{VK_SHADER_STAGE_COMPUTE_BIT};
|
||||
vk::CmdBindShadersEXT(commandBuffer, 1, stages, &shader.handle);
|
||||
vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||
pipelineLayout, 0, 1, &descriptorSet, 0, nullptr);
|
||||
vkCmdDispatch(commandBuffer, groupCountX, groupCountY, groupCountZ);
|
||||
sched.submit();
|
||||
}
|
||||
|
||||
void amdgpu::flip(Cache::Tag &cacheTag, VkCommandBuffer commandBuffer,
|
||||
|
|
@ -604,12 +557,6 @@ void amdgpu::flip(Cache::Tag &cacheTag, VkCommandBuffer commandBuffer,
|
|||
auto imageView = cacheTag.getImageView(framebuffer, Access::Read);
|
||||
auto sampler = cacheTag.getSampler(framebufferSampler);
|
||||
|
||||
VkDescriptorImageInfo imageInfo{
|
||||
.sampler = sampler.handle,
|
||||
.imageView = imageView.handle,
|
||||
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
};
|
||||
|
||||
VkRenderingAttachmentInfo colorAttachments[1]{{
|
||||
.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
|
||||
.imageView = target,
|
||||
|
|
|
|||
|
|
@ -11,6 +11,10 @@ void draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
|
|||
std::uint32_t vertexCount, std::uint32_t firstInstance,
|
||||
std::uint32_t instanceCount, std::uint64_t indiciesAddress,
|
||||
std::uint32_t indexCount);
|
||||
void dispatch(Cache &cache, Scheduler &sched,
|
||||
Registers::ComputeConfig &computeConfig,
|
||||
std::uint32_t groupCountX, std::uint32_t groupCountY,
|
||||
std::uint32_t groupCountZ);
|
||||
void flip(Cache::Tag &cacheTag, VkCommandBuffer commandBuffer,
|
||||
VkExtent2D targetExtent, std::uint64_t address, VkImageView target,
|
||||
VkExtent2D imageExtent, FlipType type, TileMode tileMode,
|
||||
|
|
|
|||
|
|
@ -7,6 +7,70 @@
|
|||
#include <vector>
|
||||
|
||||
namespace shader::gcn {
|
||||
enum class VsSGprInput {
|
||||
State,
|
||||
StreamOutWriteIndex,
|
||||
StreamOutBaseOffset0,
|
||||
StreamOutBaseOffset1,
|
||||
StreamOutBaseOffset2,
|
||||
StreamOutBaseOffset3,
|
||||
OffchipLds,
|
||||
WaveId,
|
||||
Scratch,
|
||||
|
||||
Count,
|
||||
};
|
||||
|
||||
enum class PsSGprInput {
|
||||
State,
|
||||
WaveCount,
|
||||
Scratch,
|
||||
|
||||
Count,
|
||||
};
|
||||
|
||||
enum class GsSGprInput {
|
||||
GsVsOffset,
|
||||
GsWaveId,
|
||||
Scratch,
|
||||
|
||||
Count,
|
||||
};
|
||||
|
||||
enum class EsSGprInput {
|
||||
OffchipLds,
|
||||
IsOffchip,
|
||||
EsGsOffset,
|
||||
Scratch,
|
||||
|
||||
Count,
|
||||
};
|
||||
|
||||
enum class HsSGprInput {
|
||||
OffchipLds,
|
||||
ThreadGroupSize,
|
||||
TesselationFactorBase,
|
||||
Scratch,
|
||||
|
||||
Count,
|
||||
};
|
||||
|
||||
enum class LsSGprInput {
|
||||
Scratch,
|
||||
|
||||
Count,
|
||||
};
|
||||
|
||||
enum class CsSGprInput {
|
||||
ThreadGroupIdX,
|
||||
ThreadGroupIdY,
|
||||
ThreadGroupIdZ,
|
||||
ThreadGroupSize,
|
||||
Scratch,
|
||||
|
||||
Count,
|
||||
};
|
||||
|
||||
enum class PsVGprInput {
|
||||
IPerspSample,
|
||||
JPerspSample,
|
||||
|
|
@ -34,6 +98,7 @@ enum class PsVGprInput {
|
|||
|
||||
Count
|
||||
};
|
||||
|
||||
enum class ConfigType {
|
||||
Imm,
|
||||
UserSgpr,
|
||||
|
|
@ -41,7 +106,20 @@ enum class ConfigType {
|
|||
MemoryTable,
|
||||
Gds,
|
||||
PsInputVGpr,
|
||||
VsInputSGpr,
|
||||
PsInputSGpr,
|
||||
GsInputSGpr,
|
||||
EsInputSGpr,
|
||||
HsInputSGpr,
|
||||
LsInputSGpr,
|
||||
CsInputSGpr,
|
||||
GsPrimType,
|
||||
GsInstanceEn,
|
||||
InstanceEn,
|
||||
VsPrimType,
|
||||
PsPrimType,
|
||||
CsTgIdCompCnt,
|
||||
VsInputVgprCount,
|
||||
CbCompSwap,
|
||||
ViewPortOffsetX,
|
||||
ViewPortOffsetY,
|
||||
|
|
|
|||
|
|
@ -27,6 +27,23 @@ enum class Stage {
|
|||
Invalid,
|
||||
};
|
||||
|
||||
enum RegId {
|
||||
Sgpr,
|
||||
Vgpr,
|
||||
M0,
|
||||
Scc,
|
||||
Vcc,
|
||||
Exec,
|
||||
VccZ,
|
||||
ExecZ,
|
||||
LdsDirect,
|
||||
SgprCount,
|
||||
VgprCount,
|
||||
ThreadId,
|
||||
MemoryTable,
|
||||
Gds,
|
||||
};
|
||||
|
||||
struct Import : spv::Import {
|
||||
ir::Node getOrCloneImpl(ir::Context &context, ir::Node node,
|
||||
bool isOperand) override;
|
||||
|
|
@ -55,23 +72,6 @@ struct InstructionRegion : ir::RegionLikeImpl {
|
|||
}
|
||||
};
|
||||
|
||||
enum RegId {
|
||||
Sgpr,
|
||||
Vgpr,
|
||||
M0,
|
||||
Scc,
|
||||
Vcc,
|
||||
Exec,
|
||||
VccZ,
|
||||
ExecZ,
|
||||
LdsDirect,
|
||||
SgprCount,
|
||||
VgprCount,
|
||||
ThreadId,
|
||||
MemoryTable,
|
||||
Gds,
|
||||
};
|
||||
|
||||
struct Context : spv::Context {
|
||||
ir::Region body;
|
||||
rx::MemoryAreaTable<> memoryMap;
|
||||
|
|
@ -113,10 +113,13 @@ struct Context : spv::Context {
|
|||
struct Environment {
|
||||
std::uint8_t vgprCount;
|
||||
std::uint8_t sgprCount;
|
||||
std::span<const std::uint32_t> userSgprs;
|
||||
std::uint8_t numThreadX;
|
||||
std::uint8_t numThreadY;
|
||||
std::uint8_t numThreadZ;
|
||||
bool supportsBarycentric = true;
|
||||
bool supportsInt8 = false;
|
||||
bool supportsInt64Atomics = false;
|
||||
std::span<const std::uint32_t> userSgprs;
|
||||
};
|
||||
|
||||
ir::Region deserialize(Context &context, const Environment &environment,
|
||||
|
|
|
|||
|
|
@ -10,6 +10,9 @@ void rx::die(const char *message, ...) {
|
|||
std::vfprintf(stderr, message, args);
|
||||
std::fprintf(stderr, "\n");
|
||||
va_end(args);
|
||||
|
||||
std::fflush(stdout);
|
||||
std::fflush(stderr);
|
||||
std::abort();
|
||||
}
|
||||
|
||||
|
|
@ -20,6 +23,9 @@ void rx::dieIf(bool condition, const char *message, ...) {
|
|||
std::vfprintf(stderr, message, args);
|
||||
std::fprintf(stderr, "\n");
|
||||
va_end(args);
|
||||
|
||||
std::fflush(stdout);
|
||||
std::fflush(stderr);
|
||||
std::abort();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue