From 239a0645bc65e2e8c87570416d59a90974133c75 Mon Sep 17 00:00:00 2001 From: DH Date: Mon, 30 Sep 2024 21:43:34 +0300 Subject: [PATCH] gpu2: initial dispatch implementation --- rpcsx-gpu2/Cache.cpp | 172 +++++++++++++++--- rpcsx-gpu2/Cache.hpp | 2 +- rpcsx-gpu2/FlipPipeline.cpp | 3 +- rpcsx-gpu2/Pipe.cpp | 6 +- rpcsx-gpu2/Registers.hpp | 73 ++++++-- rpcsx-gpu2/Renderer.cpp | 85 ++------- rpcsx-gpu2/Renderer.hpp | 4 + .../include/shader/GcnConverter.hpp | 78 ++++++++ .../lib/gcn-shader/include/shader/gcn.hpp | 39 ++-- rx/src/die.cpp | 6 + 10 files changed, 338 insertions(+), 130 deletions(-) diff --git a/rpcsx-gpu2/Cache.cpp b/rpcsx-gpu2/Cache.cpp index b74ec13fd..b514c8667 100644 --- a/rpcsx-gpu2/Cache.cpp +++ b/rpcsx-gpu2/Cache.cpp @@ -762,8 +762,6 @@ Cache::Shader Cache::Tag::getShader(const ShaderKey &key, std::shared_ptr Cache::Tag::findShader(const ShaderKey &key, const ShaderKey *dependedKey) { - auto data = RemoteMemory{mParent->mVmIm}.getPointer(key.address); - auto cacheIt = mParent->mShaders.queryArea(key.address); if (cacheIt == mParent->mShaders.end() || @@ -1088,10 +1086,9 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) { key.mipCount, key.pow2pad); VkImageUsageFlags usage = - VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | - VK_IMAGE_USAGE_SAMPLED_BIT // | VK_IMAGE_USAGE_STORAGE_BIT - ; - + VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT; + if (key.kind == ImageKind::Color) { + usage |= VK_IMAGE_USAGE_SAMPLED_BIT; bool isCompressed = key.dfmt == gnm::kDataFormatBc1 || key.dfmt == gnm::kDataFormatBc2 || key.dfmt == gnm::kDataFormatBc3 || key.dfmt == gnm::kDataFormatBc4 || @@ -1101,6 +1098,9 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) { if (!isCompressed) { usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + } + } else { + usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; } auto image = vk::Image::Allocate( @@ -1151,25 +1151,6 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) { .depth = std::max(key.extent.depth >> mipLevel, 1u), }, }); - - regions.push_back({ - .bufferOffset = info.offset, - .bufferRowLength = - mipLevel > 0 ? 0 : std::max(key.pitch >> mipLevel, 1u), - .imageSubresource = - { - .aspectMask = toAspect(key.kind), - .mipLevel = mipLevel, - .baseArrayLayer = key.baseArrayLayer, - .layerCount = key.arrayLayerCount, - }, - .imageExtent = - { - .width = std::max(key.extent.width >> mipLevel, 1u), - .height = std::max(key.extent.height >> mipLevel, 1u), - .depth = std::max(key.extent.depth >> mipLevel, 1u), - }, - }); } } else { auto &tiler = mParent->mDevice->tiler; @@ -1434,10 +1415,10 @@ Cache::GraphicsTag::getShader(gcn::Stage stage, const SpiShaderPgm &pgm, gcn::Environment env{ .vgprCount = pgm.rsrc1.getVGprCount(), .sgprCount = pgm.rsrc1.getSGprCount(), - .userSgprs = std::span(pgm.userData.data(), pgm.rsrc2.userSgpr), .supportsBarycentric = vk::context->supportsBarycentric, .supportsInt8 = vk::context->supportsInt8, .supportsInt64Atomics = vk::context->supportsInt64Atomics, + .userSgprs = std::span(pgm.userData.data(), pgm.rsrc2.userSgpr), }; auto shader = Tag::getShader({ @@ -1545,6 +1526,10 @@ Cache::GraphicsTag::getShader(gcn::Stage stage, const SpiShaderPgm &pgm, configPtr[index] = std::bit_cast( context.cbColor[slot.data].info.compSwap); break; + + default: + rx::die("unexpected resource slot in graphics shader %u, stage %u", + int(slot.type), int(stage)); } ++index; @@ -1575,7 +1560,140 @@ Cache::GraphicsTag::getShader(gcn::Stage stage, const SpiShaderPgm &pgm, Cache::Shader Cache::ComputeTag::getShader(const Registers::ComputeConfig &pgm) { - return {}; + auto descriptorSet = getDescriptorSet(); + gcn::Environment env{ + .vgprCount = pgm.rsrc1.getVGprCount(), + .sgprCount = pgm.rsrc1.getSGprCount(), + .numThreadX = static_cast(pgm.numThreadX), + .numThreadY = static_cast(pgm.numThreadY), + .numThreadZ = static_cast(pgm.numThreadZ), + .supportsBarycentric = vk::context->supportsBarycentric, + .supportsInt8 = vk::context->supportsInt8, + .supportsInt64Atomics = vk::context->supportsInt64Atomics, + .userSgprs = std::span(pgm.userData.data(), pgm.rsrc2.userSgpr), + }; + + auto shader = Tag::getShader({ + .address = pgm.address << 8, + .stage = gcn::Stage::Cs, + .env = env, + }); + + if (!shader.handle) { + return shader; + } + + std::uint64_t memoryTableAddress = getMemoryTable().deviceAddress; + + std::uint64_t gdsAddress = mParent->getGdsBuffer().getAddress(); + mStorage->shaderResources.cacheTag = this; + + std::uint32_t slotOffset = mStorage->shaderResources.slotOffset; + + mStorage->shaderResources.loadResources( + shader.info->resources, + std::span(pgm.userData.data(), pgm.rsrc2.userSgpr)); + + const auto &configSlots = shader.info->configSlots; + + auto configSize = configSlots.size() * sizeof(std::uint32_t); + auto configBuffer = getInternalHostVisibleBuffer(configSize); + + auto configPtr = reinterpret_cast(configBuffer.data); + + std::uint32_t sgprInput[static_cast(gcn::CsSGprInput::Count)]; + std::uint32_t sgprInputCount = 0; + + if (pgm.rsrc2.tgIdXEn) { + sgprInput[sgprInputCount++] = static_cast(gcn::CsSGprInput::ThreadGroupIdX); + } + + if (pgm.rsrc2.tgIdYEn) { + sgprInput[sgprInputCount++] = static_cast(gcn::CsSGprInput::ThreadGroupIdY); + } + + if (pgm.rsrc2.tgIdZEn) { + sgprInput[sgprInputCount++] = static_cast(gcn::CsSGprInput::ThreadGroupIdZ); + } + + if (pgm.rsrc2.tgSizeEn) { + sgprInput[sgprInputCount++] = static_cast(gcn::CsSGprInput::ThreadGroupSize); + } + + if (pgm.rsrc2.scratchEn) { + sgprInput[sgprInputCount++] = static_cast(gcn::CsSGprInput::Scratch); + } + + for (std::size_t index = 0; const auto &slot : configSlots) { + switch (slot.type) { + case gcn::ConfigType::Imm: + readMemory(&configPtr[index], slot.data, sizeof(std::uint32_t)); + break; + case gcn::ConfigType::UserSgpr: + configPtr[index] = pgm.userData[slot.data]; + break; + case gcn::ConfigType::ResourceSlot: + mStorage->memoryTableConfigSlots.push_back({ + .bufferIndex = + static_cast(mStorage->descriptorBuffers.size()), + .configIndex = static_cast(index), + .resourceSlot = static_cast(slotOffset + slot.data), + }); + break; + + case gcn::ConfigType::MemoryTable: + if (slot.data == 0) { + configPtr[index] = static_cast(memoryTableAddress); + } else { + configPtr[index] = static_cast(memoryTableAddress >> 32); + } + break; + case gcn::ConfigType::Gds: + if (slot.data == 0) { + configPtr[index] = static_cast(gdsAddress); + } else { + configPtr[index] = static_cast(gdsAddress >> 32); + } + break; + + case gcn::ConfigType::CsTgIdCompCnt: + configPtr[index] = pgm.rsrc2.tidIgCompCount; + break; + + case gcn::ConfigType::CsInputSGpr: + if (slot.data < sgprInputCount) { + configPtr[index] = sgprInput[slot.data]; + } else { + configPtr[index] = -1; + } + break; + + default: + rx::die("unexpected resource slot in compute shader %u", int(slot.type)); + } + + ++index; + } + + mStorage->descriptorBuffers.push_back(configPtr); + + VkDescriptorBufferInfo bufferInfo{ + .buffer = configBuffer.handle, + .offset = configBuffer.offset, + .range = configSize, + }; + + VkWriteDescriptorSet writeDescSet{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = descriptorSet, + .dstBinding = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .pBufferInfo = &bufferInfo, + }; + + vkUpdateDescriptorSets(vk::context->device, 1, &writeDescSet, 0, nullptr); + return shader; } Cache::Cache(Device *device, int vmId) : mDevice(device), mVmIm(vmId) { diff --git a/rpcsx-gpu2/Cache.hpp b/rpcsx-gpu2/Cache.hpp index 21f174b34..94802f090 100644 --- a/rpcsx-gpu2/Cache.hpp +++ b/rpcsx-gpu2/Cache.hpp @@ -157,7 +157,7 @@ struct Cache { VkImageSubresourceRange subresource; }; - class Tag; + struct Tag; private: struct MemoryTableSlot { diff --git a/rpcsx-gpu2/FlipPipeline.cpp b/rpcsx-gpu2/FlipPipeline.cpp index 85adec48a..e55bdf774 100644 --- a/rpcsx-gpu2/FlipPipeline.cpp +++ b/rpcsx-gpu2/FlipPipeline.cpp @@ -3,8 +3,7 @@ #include "shaders/flip_alt.frag.h" #include "shaders/flip_std.frag.h" #include "vk.hpp" -#include -#include +#include FlipPipeline::~FlipPipeline() { vkDestroyPipeline(vk::context->device, pipelines[0], vk::context->allocator); diff --git a/rpcsx-gpu2/Pipe.cpp b/rpcsx-gpu2/Pipe.cpp index 842c23eaa..cc8868cb0 100644 --- a/rpcsx-gpu2/Pipe.cpp +++ b/rpcsx-gpu2/Pipe.cpp @@ -485,7 +485,8 @@ bool GraphicsPipe::dispatchDirect(Queue &queue) { auto dispatchInitiator = queue.rptr[4]; sh.compute.computeDispatchInitiator = dispatchInitiator; - // FIXME + amdgpu::dispatch(device->caches[queue.vmId], scheduler, sh.compute, dimX, + dimY, dimZ); return true; } bool GraphicsPipe::dispatchIndirect(Queue &queue) { @@ -500,7 +501,8 @@ bool GraphicsPipe::dispatchIndirect(Queue &queue) { auto dimY = buffer[1]; auto dimZ = buffer[2]; - // FIXME + amdgpu::dispatch(device->caches[queue.vmId], scheduler, sh.compute, dimX, + dimY, dimZ); return true; } diff --git a/rpcsx-gpu2/Registers.hpp b/rpcsx-gpu2/Registers.hpp index 5673aea78..cc829e5e6 100644 --- a/rpcsx-gpu2/Registers.hpp +++ b/rpcsx-gpu2/Registers.hpp @@ -551,20 +551,71 @@ struct Registers { std::uint32_t computeDispatchInitiator; std::uint32_t _pad0[6]; - std::uint32_t computeNumThreadX; - std::uint32_t computeNumThreadY; - std::uint32_t computeNumThreadZ; + std::uint32_t numThreadX; + std::uint32_t numThreadY; + std::uint32_t numThreadZ; std::uint32_t _pad1[2]; - std::uint32_t computePgmLo; - std::uint32_t computePgmHi; + std::uint64_t address; std::uint32_t _pad2[4]; - std::uint32_t computePgmRsrc1; - std::uint32_t computePgmRsrc2; + struct { + union { + std::uint32_t raw; + + struct { + std::uint32_t vgprs : 6; + std::uint32_t sgprs : 4; + std::uint32_t priority : 2; + std::uint32_t floatMode : 8; + std::uint32_t priv : 1; + std::uint32_t dx10Clamp : 1; + std::uint32_t debugMode : 1; + std::uint32_t ieeeMode : 1; + }; + }; + + std::uint8_t getVGprCount() const { return (vgprs + 1) * 4; } + std::uint8_t getSGprCount() const { return (sgprs + 1) * 8; } + } rsrc1; + struct { + union { + std::uint32_t raw; + + struct { + bool scratchEn : 1; + std::uint32_t userSgpr : 5; + bool trapPresent : 1; + bool tgIdXEn : 1; + bool tgIdYEn : 1; + bool tgIdZEn : 1; + bool tgSizeEn : 1; + std::uint32_t tidIgCompCount : 2; + std::uint32_t : 2; + std::uint32_t ldsSize : 9; + std::uint32_t excpEn : 7; + }; + }; + + std::uint32_t getLdsDwordsCount() const { return ldsSize * 64; } + } rsrc2; std::uint32_t _pad3[1]; - std::uint32_t computeResourceLimits; - std::uint32_t computeStaticThreadMgmtSe0; - std::uint32_t computeStaticThreadMgmtSe1; - std::uint32_t computeTmpRingSize; + + struct { + union { + std::uint32_t raw; + struct { + std::uint32_t wavesPerSh : 6; + std::uint32_t : 6; + std::uint32_t tgPerCu : 4; + std::uint32_t lockThreshold: 6; + std::uint32_t simdDestCntl : 1; + }; + + }; + std::uint32_t getWavesPerSh() const { return wavesPerSh << 4; } + } resourceLimits; + std::uint32_t staticThreadMgmtSe0; + std::uint32_t staticThreadMgmtSe1; + std::uint32_t tmpRingSize; std::uint32_t _pad4[39]; std::array userData; }; diff --git a/rpcsx-gpu2/Renderer.cpp b/rpcsx-gpu2/Renderer.cpp index 28ca78eb3..2acf300fa 100644 --- a/rpcsx-gpu2/Renderer.cpp +++ b/rpcsx-gpu2/Renderer.cpp @@ -512,70 +512,23 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex, pipe.scheduler.submit(); } -// void amdgpu::dispatch(Scheduler &sched, -// amdgpu::Registers::ComputeConfig &computeConfig, int -// vmId, std::uint32_t groupCountX, std::uint32_t -// groupCountY, std::uint32_t groupCountZ) { +void amdgpu::dispatch(Cache &cache, Scheduler &sched, + Registers::ComputeConfig &computeConfig, + std::uint32_t groupCountX, std::uint32_t groupCountY, + std::uint32_t groupCountZ) { + auto tag = cache.createComputeTag(sched); + auto descriptorSet = tag.getDescriptorSet(); + auto shader = tag.getShader(computeConfig); + auto pipelineLayout = tag.getComputePipelineLayout(); + tag.buildDescriptors(descriptorSet); -// vkCmdDispatch(sched.getCommandBuffer(), groupCountX, groupCountY, -// groupCountZ); - -// sched.submit(); -// } - -static void -transitionImageLayout(VkCommandBuffer commandBuffer, VkImage image, - VkImageLayout oldLayout, VkImageLayout newLayout, - const VkImageSubresourceRange &subresourceRange) { - VkImageMemoryBarrier barrier{}; - barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - barrier.oldLayout = oldLayout; - barrier.newLayout = newLayout; - barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.image = image; - barrier.subresourceRange = subresourceRange; - - auto layoutToStageAccess = [](VkImageLayout layout) - -> std::pair { - switch (layout) { - case VK_IMAGE_LAYOUT_UNDEFINED: - case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR: - case VK_IMAGE_LAYOUT_GENERAL: - return {VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0}; - - case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: - return {VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT}; - - case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: - return {VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT}; - - case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: - return {VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_ACCESS_SHADER_READ_BIT}; - - case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: - return {VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT, - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT}; - - case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: - return {VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, - VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | - VK_ACCESS_COLOR_ATTACHMENT_READ_BIT}; - - default: - std::abort(); - } - }; - - auto [sourceStage, sourceAccess] = layoutToStageAccess(oldLayout); - auto [destinationStage, destinationAccess] = layoutToStageAccess(newLayout); - - barrier.srcAccessMask = sourceAccess; - barrier.dstAccessMask = destinationAccess; - - vkCmdPipelineBarrier(commandBuffer, sourceStage, destinationStage, 0, 0, - nullptr, 0, nullptr, 1, &barrier); + auto commandBuffer = sched.getCommandBuffer(); + VkShaderStageFlagBits stages[]{VK_SHADER_STAGE_COMPUTE_BIT}; + vk::CmdBindShadersEXT(commandBuffer, 1, stages, &shader.handle); + vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, + pipelineLayout, 0, 1, &descriptorSet, 0, nullptr); + vkCmdDispatch(commandBuffer, groupCountX, groupCountY, groupCountZ); + sched.submit(); } void amdgpu::flip(Cache::Tag &cacheTag, VkCommandBuffer commandBuffer, @@ -604,12 +557,6 @@ void amdgpu::flip(Cache::Tag &cacheTag, VkCommandBuffer commandBuffer, auto imageView = cacheTag.getImageView(framebuffer, Access::Read); auto sampler = cacheTag.getSampler(framebufferSampler); - VkDescriptorImageInfo imageInfo{ - .sampler = sampler.handle, - .imageView = imageView.handle, - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }; - VkRenderingAttachmentInfo colorAttachments[1]{{ .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, .imageView = target, diff --git a/rpcsx-gpu2/Renderer.hpp b/rpcsx-gpu2/Renderer.hpp index 62cc6e079..ed81c939b 100644 --- a/rpcsx-gpu2/Renderer.hpp +++ b/rpcsx-gpu2/Renderer.hpp @@ -11,6 +11,10 @@ void draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex, std::uint32_t vertexCount, std::uint32_t firstInstance, std::uint32_t instanceCount, std::uint64_t indiciesAddress, std::uint32_t indexCount); +void dispatch(Cache &cache, Scheduler &sched, + Registers::ComputeConfig &computeConfig, + std::uint32_t groupCountX, std::uint32_t groupCountY, + std::uint32_t groupCountZ); void flip(Cache::Tag &cacheTag, VkCommandBuffer commandBuffer, VkExtent2D targetExtent, std::uint64_t address, VkImageView target, VkExtent2D imageExtent, FlipType type, TileMode tileMode, diff --git a/rpcsx-gpu2/lib/gcn-shader/include/shader/GcnConverter.hpp b/rpcsx-gpu2/lib/gcn-shader/include/shader/GcnConverter.hpp index a35ff1096..7581be02b 100644 --- a/rpcsx-gpu2/lib/gcn-shader/include/shader/GcnConverter.hpp +++ b/rpcsx-gpu2/lib/gcn-shader/include/shader/GcnConverter.hpp @@ -7,6 +7,70 @@ #include namespace shader::gcn { +enum class VsSGprInput { + State, + StreamOutWriteIndex, + StreamOutBaseOffset0, + StreamOutBaseOffset1, + StreamOutBaseOffset2, + StreamOutBaseOffset3, + OffchipLds, + WaveId, + Scratch, + + Count, +}; + +enum class PsSGprInput { + State, + WaveCount, + Scratch, + + Count, +}; + +enum class GsSGprInput { + GsVsOffset, + GsWaveId, + Scratch, + + Count, +}; + +enum class EsSGprInput { + OffchipLds, + IsOffchip, + EsGsOffset, + Scratch, + + Count, +}; + +enum class HsSGprInput { + OffchipLds, + ThreadGroupSize, + TesselationFactorBase, + Scratch, + + Count, +}; + +enum class LsSGprInput { + Scratch, + + Count, +}; + +enum class CsSGprInput { + ThreadGroupIdX, + ThreadGroupIdY, + ThreadGroupIdZ, + ThreadGroupSize, + Scratch, + + Count, +}; + enum class PsVGprInput { IPerspSample, JPerspSample, @@ -34,6 +98,7 @@ enum class PsVGprInput { Count }; + enum class ConfigType { Imm, UserSgpr, @@ -41,7 +106,20 @@ enum class ConfigType { MemoryTable, Gds, PsInputVGpr, + VsInputSGpr, + PsInputSGpr, + GsInputSGpr, + EsInputSGpr, + HsInputSGpr, + LsInputSGpr, + CsInputSGpr, + GsPrimType, + GsInstanceEn, + InstanceEn, VsPrimType, + PsPrimType, + CsTgIdCompCnt, + VsInputVgprCount, CbCompSwap, ViewPortOffsetX, ViewPortOffsetY, diff --git a/rpcsx-gpu2/lib/gcn-shader/include/shader/gcn.hpp b/rpcsx-gpu2/lib/gcn-shader/include/shader/gcn.hpp index 5dc6821b3..8cc395672 100644 --- a/rpcsx-gpu2/lib/gcn-shader/include/shader/gcn.hpp +++ b/rpcsx-gpu2/lib/gcn-shader/include/shader/gcn.hpp @@ -27,6 +27,23 @@ enum class Stage { Invalid, }; +enum RegId { + Sgpr, + Vgpr, + M0, + Scc, + Vcc, + Exec, + VccZ, + ExecZ, + LdsDirect, + SgprCount, + VgprCount, + ThreadId, + MemoryTable, + Gds, +}; + struct Import : spv::Import { ir::Node getOrCloneImpl(ir::Context &context, ir::Node node, bool isOperand) override; @@ -55,23 +72,6 @@ struct InstructionRegion : ir::RegionLikeImpl { } }; -enum RegId { - Sgpr, - Vgpr, - M0, - Scc, - Vcc, - Exec, - VccZ, - ExecZ, - LdsDirect, - SgprCount, - VgprCount, - ThreadId, - MemoryTable, - Gds, -}; - struct Context : spv::Context { ir::Region body; rx::MemoryAreaTable<> memoryMap; @@ -113,10 +113,13 @@ struct Context : spv::Context { struct Environment { std::uint8_t vgprCount; std::uint8_t sgprCount; - std::span userSgprs; + std::uint8_t numThreadX; + std::uint8_t numThreadY; + std::uint8_t numThreadZ; bool supportsBarycentric = true; bool supportsInt8 = false; bool supportsInt64Atomics = false; + std::span userSgprs; }; ir::Region deserialize(Context &context, const Environment &environment, diff --git a/rx/src/die.cpp b/rx/src/die.cpp index 5bd109927..3e045a6c5 100644 --- a/rx/src/die.cpp +++ b/rx/src/die.cpp @@ -10,6 +10,9 @@ void rx::die(const char *message, ...) { std::vfprintf(stderr, message, args); std::fprintf(stderr, "\n"); va_end(args); + + std::fflush(stdout); + std::fflush(stderr); std::abort(); } @@ -20,6 +23,9 @@ void rx::dieIf(bool condition, const char *message, ...) { std::vfprintf(stderr, message, args); std::fprintf(stderr, "\n"); va_end(args); + + std::fflush(stdout); + std::fflush(stderr); std::abort(); } }