rpcsx-gpu: fix flip image acquire

fix cmpx
fix cs init ordering
implement s_mulk_i32, s_abs_i32, s_cmovk_i32, s_cmov_b32 and s_cmov_b64
fix s_mul_i32
fix s_cbranch_* for cs
This commit is contained in:
DH 2024-10-04 17:45:56 +03:00
parent 23226c966a
commit 113abf20e5
13 changed files with 126 additions and 160 deletions

View file

@ -1299,21 +1299,21 @@ Cache::ImageView Cache::Tag::getImageView(const ImageKey &key, Access access) {
void Cache::Tag::readMemory(void *target, std::uint64_t address,
std::uint64_t size) {
mParent->flush(*mScheduler, address, size);
// mParent->flush(*mScheduler, address, size);
auto memoryPtr = RemoteMemory{mParent->mVmIm}.getPointer(address);
std::memcpy(target, memoryPtr, size);
}
void Cache::Tag::writeMemory(const void *source, std::uint64_t address,
std::uint64_t size) {
mParent->flush(*mScheduler, address, size);
// mParent->invalidate(*mScheduler, address, size);
auto memoryPtr = RemoteMemory{mParent->mVmIm}.getPointer(address);
std::memcpy(memoryPtr, source, size);
}
int Cache::Tag::compareMemory(const void *source, std::uint64_t address,
std::uint64_t size) {
mParent->flush(*mScheduler, address, size);
// mParent->flush(*mScheduler, address, size);
auto memoryPtr = RemoteMemory{mParent->mVmIm}.getPointer(address);
return std::memcmp(memoryPtr, source, size);
}
@ -1348,14 +1348,18 @@ void Cache::Tag::release() {
return;
}
std::vector<std::shared_ptr<Entry>> tmpResources;
while (!mStorage->mAcquiredResources.empty()) {
auto resource = std::move(mStorage->mAcquiredResources.back());
mStorage->mAcquiredResources.pop_back();
resource->flush(*this, *mScheduler, 0, ~static_cast<std::uint64_t>(0));
tmpResources.push_back(std::move(resource));
}
mScheduler->submit();
mScheduler->wait();
if (!tmpResources.empty()) {
mScheduler->submit();
mScheduler->wait();
}
mStorage->clear();
auto storageIndex = mStorage - mParent->mTagStorages;
@ -1865,6 +1869,10 @@ Cache::Cache(Device *device, int vmId) : mDevice(device), mVmIm(vmId) {
}
Cache::~Cache() {
for (auto &samp : mSamplers) {
vkDestroySampler(vk::context->device, samp.second, vk::context->allocator);
}
vkDestroyDescriptorPool(vk::context->device, mDescriptorPool,
vk::context->allocator);

View file

@ -242,8 +242,7 @@ transitionImageLayout(VkCommandBuffer commandBuffer, VkImage image,
}
bool Device::flip(std::int64_t pid, int bufferIndex, std::uint64_t arg,
VkCommandBuffer commandBuffer, VkImage swapchainImage,
VkImageView swapchainImageView, VkFence fence) {
VkImage swapchainImage, VkImageView swapchainImageView) {
auto &pipe = graphicsPipes[0];
auto &scheduler = pipe.scheduler;
auto &process = processInfo[pid];
@ -292,15 +291,11 @@ bool Device::flip(std::int64_t pid, int bufferIndex, std::uint64_t arg,
}
// std::printf("displaying buffer %lx\n", buffer.address);
VkCommandBufferBeginInfo beginInfo{};
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
vkBeginCommandBuffer(commandBuffer, &beginInfo);
auto cacheTag = getCacheTag(process.vmId, scheduler);
auto &sched = cacheTag.getScheduler();
transitionImageLayout(commandBuffer, swapchainImage,
transitionImageLayout(sched.getCommandBuffer(), swapchainImage,
VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
{
@ -310,11 +305,11 @@ bool Device::flip(std::int64_t pid, int bufferIndex, std::uint64_t arg,
});
amdgpu::flip(
cacheTag, commandBuffer, vk::context->swapchainExtent, buffer.address,
cacheTag, vk::context->swapchainExtent, buffer.address,
swapchainImageView, {bufferAttr.width, bufferAttr.height}, flipType,
getDefaultTileModes()[bufferAttr.tilingMode == 1 ? 10 : 8], dfmt, nfmt);
transitionImageLayout(commandBuffer, swapchainImage,
transitionImageLayout(sched.getCommandBuffer(), swapchainImage,
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
{
@ -323,10 +318,25 @@ bool Device::flip(std::int64_t pid, int bufferIndex, std::uint64_t arg,
.layerCount = 1,
});
sched.submit();
auto submitCompleteTask = scheduler.createExternalSubmit();
{
vkEndCommandBuffer(commandBuffer);
VkSemaphoreSubmitInfo waitSemSubmitInfos[] = {
{
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
.semaphore = vk::context->presentCompleteSemaphore,
.value = 1,
.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
},
{
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
.semaphore = scheduler.getSemaphoreHandle(),
.value = submitCompleteTask - 1,
.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
},
};
VkSemaphoreSubmitInfo signalSemSubmitInfos[] = {
{
@ -343,38 +353,15 @@ bool Device::flip(std::int64_t pid, int bufferIndex, std::uint64_t arg,
},
};
VkSemaphoreSubmitInfo waitSemSubmitInfos[] = {
{
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
.semaphore = vk::context->presentCompleteSemaphore,
.value = 1,
.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
},
{
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
.semaphore = scheduler.getSemaphoreHandle(),
.value = submitCompleteTask - 1,
.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
},
};
VkCommandBufferSubmitInfo cmdBufferSubmitInfo{
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
.commandBuffer = commandBuffer,
};
VkSubmitInfo2 submitInfo{
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
.waitSemaphoreInfoCount = 1,
.waitSemaphoreInfoCount = 2,
.pWaitSemaphoreInfos = waitSemSubmitInfos,
.commandBufferInfoCount = 1,
.pCommandBufferInfos = &cmdBufferSubmitInfo,
.signalSemaphoreInfoCount = 2,
.pSignalSemaphoreInfos = signalSemSubmitInfos,
};
vkQueueSubmit2(vk::context->presentQueue, 1, &submitInfo, fence);
vkQueueWaitIdle(vk::context->presentQueue);
vkQueueSubmit2(vk::context->presentQueue, 1, &submitInfo, VK_NULL_HANDLE);
}
scheduler.then([=, this, cacheTag = std::move(cacheTag)] {

View file

@ -86,8 +86,7 @@ struct Device {
std::uint64_t size);
bool processPipes();
bool flip(std::int64_t pid, int bufferIndex, std::uint64_t arg,
VkCommandBuffer commandBuffer, VkImage swapchainImage,
VkImageView swapchainImageView, VkFence fence);
VkImage swapchainImage, VkImageView swapchainImageView);
void mapMemory(std::int64_t pid, std::uint64_t address, std::uint64_t size,
int memoryType, int dmemIndex, int prot, std::int64_t offset);
void registerBuffer(std::int64_t pid, bridge::CmdBuffer buffer);

View file

@ -285,7 +285,8 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
renderTargetInfo.extent.height = vkViewPortScissor.extent.height;
renderTargetInfo.extent.depth = 1;
renderTargetInfo.dfmt = cbColor.info.dfmt;
renderTargetInfo.nfmt = gnm::toNumericFormat(cbColor.info.nfmt, cbColor.info.dfmt);
renderTargetInfo.nfmt =
gnm::toNumericFormat(cbColor.info.nfmt, cbColor.info.dfmt);
renderTargetInfo.mipCount = 1;
renderTargetInfo.arrayLayerCount = 1;
@ -423,6 +424,7 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
cacheTag.buildDescriptors(descriptorSets[0]);
pipe.scheduler.submit();
pipe.scheduler.afterSubmit([cacheTag = std::move(cacheTag)] {});
auto commandBuffer = pipe.scheduler.getCommandBuffer();
@ -479,11 +481,14 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
vkCmdSetStencilReference(commandBuffer, VK_STENCIL_FACE_FRONT_AND_BACK, 0);
VkCullModeFlags cullMode = VK_CULL_MODE_NONE;
if (pipe.uConfig.vgtPrimitiveType != gnm::PrimitiveType::RectList) {
if (pipe.context.paSuScModeCntl.cullBack) {
cullMode |= VK_CULL_MODE_BACK_BIT;
}
if (pipe.context.paSuScModeCntl.cullFront) {
cullMode |= VK_CULL_MODE_FRONT_BIT;
}
}
vkCmdSetCullMode(commandBuffer, cullMode);
@ -512,6 +517,7 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
vkCmdEndRendering(commandBuffer);
pipe.scheduler.submit();
pipe.scheduler.wait();
}
void amdgpu::dispatch(Cache &cache, Scheduler &sched,
@ -530,14 +536,15 @@ void amdgpu::dispatch(Cache &cache, Scheduler &sched,
pipelineLayout, 0, 1, &descriptorSet, 0, nullptr);
vk::CmdBindShadersEXT(commandBuffer, 1, stages, &shader.handle);
vkCmdDispatch(commandBuffer, groupCountX, groupCountY, groupCountZ);
sched.afterSubmit([tag = std::move(tag)] {});
sched.submit();
sched.wait();
}
void amdgpu::flip(Cache::Tag &cacheTag, VkCommandBuffer commandBuffer,
VkExtent2D targetExtent, std::uint64_t address,
VkImageView target, VkExtent2D imageExtent, FlipType type,
TileMode tileMode, gnm::DataFormat dfmt,
gnm::NumericFormat nfmt) {
void amdgpu::flip(Cache::Tag &cacheTag, VkExtent2D targetExtent,
std::uint64_t address, VkImageView target,
VkExtent2D imageExtent, FlipType type, TileMode tileMode,
gnm::DataFormat dfmt, gnm::NumericFormat nfmt) {
ImageKey framebuffer{};
framebuffer.readAddress = address;
framebuffer.type = gnm::TextureType::Dim2D;
@ -601,8 +608,7 @@ void amdgpu::flip(Cache::Tag &cacheTag, VkCommandBuffer commandBuffer,
.pColorAttachments = colorAttachments,
};
commandBuffer = cacheTag.getScheduler().getCommandBuffer();
auto commandBuffer = cacheTag.getScheduler().getCommandBuffer();
vkCmdBeginRendering(commandBuffer, &renderInfo);
cacheTag.getDevice()->flipPipeline.bind(cacheTag.getScheduler(), type,
@ -613,5 +619,4 @@ void amdgpu::flip(Cache::Tag &cacheTag, VkCommandBuffer commandBuffer,
vkCmdDraw(commandBuffer, 6, 1, 0, 0);
vkCmdEndRendering(commandBuffer);
cacheTag.getScheduler().submit();
}

View file

@ -15,8 +15,7 @@ void dispatch(Cache &cache, Scheduler &sched,
Registers::ComputeConfig &computeConfig,
std::uint32_t groupCountX, std::uint32_t groupCountY,
std::uint32_t groupCountZ);
void flip(Cache::Tag &cacheTag, VkCommandBuffer commandBuffer,
VkExtent2D targetExtent, std::uint64_t address, VkImageView target,
VkExtent2D imageExtent, FlipType type, TileMode tileMode,
gnm::DataFormat dfmt, gnm::NumericFormat nfmt);
void flip(Cache::Tag &cacheTag, VkExtent2D targetExtent, std::uint64_t address,
VkImageView target, VkExtent2D imageExtent, FlipType type,
TileMode tileMode, gnm::DataFormat dfmt, gnm::NumericFormat nfmt);
} // namespace amdgpu

View file

@ -116,7 +116,7 @@ struct Environment {
std::uint8_t numThreadX;
std::uint8_t numThreadY;
std::uint8_t numThreadZ;
bool supportsBarycentric = true;
bool supportsBarycentric = false;
bool supportsInt8 = false;
bool supportsInt64Atomics = false;
bool supportsNonSemanticInfo = false;

View file

@ -24,7 +24,7 @@
#define ClampInfToFltMax(x) (isinf(x) ? ((x) < 0 ? -FLT_MAX : FLT_MAX) : (x))
#define ConvertInfToZero(x) (isinf(x) ? 0.0 : (x))
#define Rsqrt(x) (1.0 / sqrt(x))
#define Rsqrt(x) (inversesqrt(x))
#define Rcp(x) (1.0 / x)
#define U32ARRAY_FETCH_BITS(ARRAY, START, BITCOUNT) ((ARRAY[(START) >> 5] >> ((START) & 31)) & ((1 << (BITCOUNT)) - 1))
@ -577,10 +577,12 @@ void set_cond_thread_bit(inout uint64_t sdst, bool cond) {
void set_cond_thread_bit_exec(inout uint64_t sdst, bool cond) {
uint64_t bit = uint64_t(1) << thread_id;
if (cond && (exec & bit) != 0) {
if (cond) {
sdst |= bit;
exec |= bit;
} else {
sdst &= ~bit;
exec &= ~bit;
}
}
@ -995,6 +997,23 @@ void s_cmpk_le_u32(uint32_t a, uint32_t b) { scc = a <= b; }
void s_cmpk_lt_u32(uint32_t a, uint32_t b) { scc = a < b; }
void s_cmpk_lg_u32(uint32_t a, uint32_t b) { scc = a != b; }
void s_cmovk_i32(out uint32_t sdst, uint32_t value) {
if (scc) {
sdst = value;
}
}
void s_cmov_b32(out uint32_t sdst, uint32_t value) {
if (scc) {
sdst = value;
}
}
void s_cmov_b64(out uint64_t sdst, uint64_t value) {
if (scc) {
sdst = value;
}
}
uint32_t s_not_b32(uint32_t x) {
uint32_t result = ~x;
@ -1236,7 +1255,13 @@ int32_t s_ashr_i32(int32_t x, uint32_t y) { int32_t result = x >> (y & 0x1f); sc
int64_t s_ashr_i64(int64_t x, uint32_t y) { int64_t result = x >> (y & 0x3f); scc = result != 0; return result; }
uint32_t s_bfm_b32(uint32_t x, uint32_t y) { uint32_t result = ((1 << (x & 0x1f)) - 1) << (y & 0x1f); scc = result != 0; return result; }
uint64_t s_bfm_b64(uint64_t x, uint64_t y) { uint64_t result = ((uint64_t(1) << (x & 0x1f)) - 1) << (y & 0x1f); scc = result != 0; return result; }
int32_t s_mul_i32(int32_t x, int32_t y) { int32_t result = x * y; scc = result != 0; return result; }
int32_t s_mul_i32(int32_t x, int32_t y) { return x * y; }
int32_t s_mulk_i32(int32_t x, int32_t y) { return x * y; }
int32_t s_abs_i32(int32_t x) {
int32_t result = abs(x);
scc = result == 0;
return result;
}
uint32_t s_bfe_u32(uint32_t x, uint32_t y) {
uint32_t offset = y & 0x1f;
uint32_t width = (y >> 16) & 0x7f;
@ -2168,10 +2193,10 @@ void s_dcache_inv() {
bool s_cbranch_scc0() { return scc == false; }
bool s_cbranch_scc1() { return scc == true; }
bool s_cbranch_vccz() { return vcc == 0; }
bool s_cbranch_vccnz() { return vcc != 0; }
bool s_cbranch_execz() { return exec == 0; }
bool s_cbranch_execnz() { return exec != 0; }
bool s_cbranch_vccz() { return (vcc & (uint64_t(1) << thread_id)) == 0; }
bool s_cbranch_vccnz() { return (vcc & (uint64_t(1) << thread_id)) != 0; }
bool s_cbranch_execz() { return (exec & (uint64_t(1) << thread_id)) == 0; }
bool s_cbranch_execnz() { return (exec & (uint64_t(1) << thread_id)) != 0; }
// DS

View file

@ -1577,12 +1577,6 @@ static void createInitialValues(GcnConverter &converter,
}
}
for (std::int32_t i = 0; i < 3; ++i) {
auto value = builder.createSpvCompositeExtract(loc, uintT,
localInvocationId, {{i}});
context.writeReg(loc, builder, gcn::RegId::Vgpr, i, value);
}
auto workgroupSize = builder.createSpvCompositeConstruct(
loc, uvec3T,
{{context.imm32(env.numThreadX), context.imm32(env.numThreadY),
@ -1590,12 +1584,19 @@ static void createInitialValues(GcnConverter &converter,
auto workgroupSizeLocVar =
converter.createLocalVariable(builder, loc, workgroupSize);
builder.createValue(loc, ir::amdgpu::CS_SET_INITIAL_EXEC,
context.getTypeVoid(), localInvocationIdLocVar,
workgroupSizeLocVar);
builder.createValue(loc, ir::amdgpu::CS_SET_THREAD_ID,
context.getTypeVoid(), localInvocationIdLocVar,
workgroupSizeLocVar);
builder.createValue(loc, ir::amdgpu::CS_SET_INITIAL_EXEC,
context.getTypeVoid(), localInvocationIdLocVar,
workgroupSizeLocVar);
for (std::int32_t i = 0; i < 3; ++i) {
auto value = builder.createSpvCompositeExtract(loc, uintT,
localInvocationId, {{i}});
context.writeReg(loc, builder, gcn::RegId::Vgpr, i, value);
}
}
context.writeReg(loc, builder, gcn::RegId::Vcc, 0, context.imm64(0));

View file

@ -169,9 +169,6 @@ readSopkInst(GcnInstruction &inst, std::uint64_t &address,
inst.addOperand(createSgprGcnOperand(address, sdst).withW());
inst.addOperand(GcnOperand::createConstant(static_cast<std::uint32_t>(simm)));
if (op <= 16) {
inst.addOperand(createImmediateGcnOperand(address));
}
}
static void
@ -264,14 +261,10 @@ readVop3Inst(GcnInstruction &inst, std::uint64_t &address,
auto omod = fetchMaskedValue(words[1], omodMask);
auto neg = fetchMaskedValue(words[1], negMask);
if (op == ir::vop3::Op::MUL_HI_U32) {
std::printf(".");
}
inst.op = op;
bool vop3b = isVop3b(op);
if (!vop3b) {
if (vop3b) {
abs = 0;
clmp = false;
}
@ -291,19 +284,9 @@ readVop3Inst(GcnInstruction &inst, std::uint64_t &address,
inst.addOperand(createSgprGcnOperand(address, sdst).withRW());
}
bool writesVcc = op == ir::vop3::MAD_I64_I32 || op == ir::vop3::MAD_U64_U32 ||
op == ir::vop3::MQSAD_U32_U8 ||
op == ir::vop3::DIV_SCALE_F32 ||
op == ir::vop3::DIV_SCALE_F64;
bool readsVcc = op == ir::vop3::DIV_FMAS_F32 || op == ir::vop3::DIV_FMAS_F64;
bool usesSrc2 =
op >= ir::vop3::MAD_LEGACY_F32 && op <= ir::vop3::DIV_FIXUP_F64;
if (writesVcc) {
inst.addOperand(GcnOperand::createVccLo().withRW());
}
inst.addOperand(createSgprGcnOperand(address, src0)
.withR()
.withAbs((abs & 1) != 0)
@ -347,10 +330,6 @@ readVop3Inst(GcnInstruction &inst, std::uint64_t &address,
.withNeg(((neg >> 2) & 1) != 0));
}
}
if (readsVcc) {
inst.addOperand(GcnOperand::createVccLo().withR());
}
}
static void

View file

@ -16,6 +16,7 @@ class Scheduler {
unsigned mQueueFamily;
vk::CommandPool mCommandPool;
vk::CommandBuffer mCommandBuffer;
bool mIsEmpty = false;
std::uint64_t mNextSignal = 1;
std::mutex mTaskMutex;
@ -40,9 +41,17 @@ public:
unsigned getQueueFamily() const { return mQueueFamily; }
VkQueue getQueue() const { return mQueue; }
VkCommandBuffer getCommandBuffer() const { return mCommandBuffer; }
VkCommandBuffer getCommandBuffer() {
mIsEmpty = false;
return mCommandBuffer;
}
Scheduler &submit() {
if (mIsEmpty) {
return *this;
}
mIsEmpty = true;
mCommandBuffer.end();
VkSemaphoreSubmitInfo waitSemSubmitInfo = {
@ -56,7 +65,7 @@ public:
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
.semaphore = mSemaphore.getHandle(),
.value = mNextSignal,
.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
.stageMask = VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT,
};
VkCommandBufferSubmitInfo cmdBufferSubmitInfo{
@ -96,11 +105,14 @@ public:
return *this;
}
auto afterSubmit = std::move(mAfterSubmitTasks);
mAfterSubmitTasks.clear();
wait();
while (!mAfterSubmitTasks.empty()) {
auto task = std::move(mAfterSubmitTasks.back());
mAfterSubmitTasks.pop_back();
while (!afterSubmit.empty()) {
auto task = std::move(afterSubmit.back());
afterSubmit.pop_back();
std::move(task)();
}

View file

@ -42,7 +42,6 @@ struct Context {
VkFormat swapchainColorFormat = VK_FORMAT_B8G8R8A8_UNORM;
VkColorSpaceKHR swapchainColorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR;
std::vector<VkImageView> swapchainImageViews;
std::vector<VkFence> inFlightFences;
VkSemaphore presentCompleteSemaphore = VK_NULL_HANDLE;
VkSemaphore renderCompleteSemaphore = VK_NULL_HANDLE;
VkPhysicalDeviceDescriptorBufferPropertiesEXT descriptorBufferProps;
@ -69,10 +68,6 @@ struct Context {
vkDestroySwapchainKHR(device, swapchain, allocator);
}
for (auto fence : inFlightFences) {
vkDestroyFence(device, fence, allocator);
}
if (presentCompleteSemaphore != VK_NULL_HANDLE) {
vkDestroySemaphore(device, presentCompleteSemaphore, allocator);
}

View file

@ -198,15 +198,6 @@ void vk::Context::createSwapchain() {
recreateSwapchain();
inFlightFences.resize(swapchainImages.size());
for (auto &fence : inFlightFences) {
VkFenceCreateInfo fenceInfo{};
fenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
fenceInfo.flags = VK_FENCE_CREATE_SIGNALED_BIT;
VK_VERIFY(vkCreateFence(device, &fenceInfo, allocator, &fence));
}
{
VkSemaphoreCreateInfo semaphoreCreateInfo{};
semaphoreCreateInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
@ -422,6 +413,10 @@ void vk::Context::createDevice(VkSurfaceKHR surface, int gpuIndex,
storage_8bit.uniformAndStorageBuffer8BitAccess && float16_int8.shaderInt8;
supportsInt64Atomics = phyDevFeatures12.shaderBufferInt64Atomics;
if (!fsBarycentric.fragmentShaderBarycentric) {
shaderObject.pNext = fsBarycentric.pNext;
}
rx::dieIf(!storage_16bit.uniformAndStorageBuffer16BitAccess,
"16-bit storage is unsupported by this GPU");
rx::dieIf(!float16_int8.shaderFloat16,

View file

@ -18,8 +18,6 @@
#include <cstdio>
#include <cstdlib>
#include <filesystem>
#include <fstream>
#include <iostream>
#include <print>
#include <span>
#include <thread>
@ -39,20 +37,6 @@
#include "Device.hpp"
static void saveImage(const char *name, const void *data, std::uint32_t width,
std::uint32_t height) {
std::ofstream file(name, std::ios::out | std::ios::binary);
file << "P6\n" << width << "\n" << height << "\n" << 255 << "\n";
auto ptr = (unsigned int *)data;
for (uint32_t y = 0; y < height; y++) {
for (uint32_t x = 0; x < width; x++) {
file.write((char *)ptr++, 3);
}
}
}
void transitionImageLayout(VkCommandBuffer commandBuffer, VkImage image,
VkImageLayout oldLayout, VkImageLayout newLayout,
const VkImageSubresourceRange &subresourceRange) {
@ -118,17 +102,6 @@ void transitionImageLayout(VkCommandBuffer commandBuffer, VkImage image,
});
}
static void submit(VkQueue queue, VkCommandBuffer cmdBuffer) {
VkSubmitInfo submit{
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.commandBufferCount = 1,
.pCommandBuffers = &cmdBuffer,
};
VK_VERIFY(vkQueueSubmit(queue, 1, &submit, nullptr));
vkQueueWaitIdle(queue);
}
static void usage(std::FILE *out, const char *argv0) {
std::fprintf(out, "usage: %s [options...]\n", argv0);
std::fprintf(out, " options:\n");
@ -374,12 +347,6 @@ int main(int argc, const char *argv[]) {
VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT);
vkContext.createSwapchain();
std::vector<vk::CommandBuffer> presentCmdBuffers(
vkContext.swapchainImages.size());
for (auto &cmdBuffer : presentCmdBuffers) {
cmdBuffer = commandPool.createPrimaryBuffer({});
}
amdgpu::bridge::BridgePuller bridgePuller{bridge};
amdgpu::bridge::Command commandsBuffer[1];
@ -597,14 +564,11 @@ int main(int argc, const char *argv[]) {
case amdgpu::bridge::CommandId::Flip: {
if (!isImageAcquired) {
vkWaitForFences(vkContext.device, 1,
&vkContext.inFlightFences[imageIndex], VK_TRUE,
UINT64_MAX);
while (true) {
auto acquireNextImageResult = vkAcquireNextImageKHR(
vkContext.device, vkContext.swapchain, UINT64_MAX,
vkContext.presentCompleteSemaphore, nullptr, &imageIndex);
vkContext.presentCompleteSemaphore, VK_NULL_HANDLE,
&imageIndex);
if (acquireNextImageResult == VK_ERROR_OUT_OF_DATE_KHR) {
vkContext.recreateSwapchain();
continue;
@ -613,18 +577,11 @@ int main(int argc, const char *argv[]) {
VK_VERIFY(acquireNextImageResult);
break;
}
vkResetFences(vkContext.device, 1,
&vkContext.inFlightFences[imageIndex]);
}
vkResetCommandBuffer(presentCmdBuffers[imageIndex], 0);
if (!device.flip(cmd.flip.pid, cmd.flip.bufferIndex, cmd.flip.arg,
presentCmdBuffers[imageIndex],
vkContext.swapchainImages[imageIndex],
vkContext.swapchainImageViews[imageIndex],
vkContext.inFlightFences[imageIndex])) {
vkContext.swapchainImageViews[imageIndex])) {
isImageAcquired = true;
break;
}
@ -641,6 +598,8 @@ int main(int argc, const char *argv[]) {
auto vkQueuePresentResult =
vkQueuePresentKHR(vkContext.presentQueue, &presentInfo);
isImageAcquired = false;
if (vkQueuePresentResult == VK_ERROR_OUT_OF_DATE_KHR) {
vkContext.recreateSwapchain();
} else {
@ -678,4 +637,6 @@ int main(int argc, const char *argv[]) {
}
}
}
vkDeviceWaitIdle(vk::context->device);
}