diff --git a/rpcsx-gpu/Cache.cpp b/rpcsx-gpu/Cache.cpp index 71cf48efe..161a8edb2 100644 --- a/rpcsx-gpu/Cache.cpp +++ b/rpcsx-gpu/Cache.cpp @@ -1299,21 +1299,21 @@ Cache::ImageView Cache::Tag::getImageView(const ImageKey &key, Access access) { void Cache::Tag::readMemory(void *target, std::uint64_t address, std::uint64_t size) { - mParent->flush(*mScheduler, address, size); + // mParent->flush(*mScheduler, address, size); auto memoryPtr = RemoteMemory{mParent->mVmIm}.getPointer(address); std::memcpy(target, memoryPtr, size); } void Cache::Tag::writeMemory(const void *source, std::uint64_t address, std::uint64_t size) { - mParent->flush(*mScheduler, address, size); + // mParent->invalidate(*mScheduler, address, size); auto memoryPtr = RemoteMemory{mParent->mVmIm}.getPointer(address); std::memcpy(memoryPtr, source, size); } int Cache::Tag::compareMemory(const void *source, std::uint64_t address, std::uint64_t size) { - mParent->flush(*mScheduler, address, size); + // mParent->flush(*mScheduler, address, size); auto memoryPtr = RemoteMemory{mParent->mVmIm}.getPointer(address); return std::memcmp(memoryPtr, source, size); } @@ -1348,14 +1348,18 @@ void Cache::Tag::release() { return; } + std::vector> tmpResources; while (!mStorage->mAcquiredResources.empty()) { auto resource = std::move(mStorage->mAcquiredResources.back()); mStorage->mAcquiredResources.pop_back(); resource->flush(*this, *mScheduler, 0, ~static_cast(0)); + tmpResources.push_back(std::move(resource)); } - mScheduler->submit(); - mScheduler->wait(); + if (!tmpResources.empty()) { + mScheduler->submit(); + mScheduler->wait(); + } mStorage->clear(); auto storageIndex = mStorage - mParent->mTagStorages; @@ -1865,6 +1869,10 @@ Cache::Cache(Device *device, int vmId) : mDevice(device), mVmIm(vmId) { } Cache::~Cache() { + for (auto &samp : mSamplers) { + vkDestroySampler(vk::context->device, samp.second, vk::context->allocator); + } + vkDestroyDescriptorPool(vk::context->device, mDescriptorPool, vk::context->allocator); diff --git a/rpcsx-gpu/Device.cpp b/rpcsx-gpu/Device.cpp index a55383af7..2b979c511 100644 --- a/rpcsx-gpu/Device.cpp +++ b/rpcsx-gpu/Device.cpp @@ -242,8 +242,7 @@ transitionImageLayout(VkCommandBuffer commandBuffer, VkImage image, } bool Device::flip(std::int64_t pid, int bufferIndex, std::uint64_t arg, - VkCommandBuffer commandBuffer, VkImage swapchainImage, - VkImageView swapchainImageView, VkFence fence) { + VkImage swapchainImage, VkImageView swapchainImageView) { auto &pipe = graphicsPipes[0]; auto &scheduler = pipe.scheduler; auto &process = processInfo[pid]; @@ -292,15 +291,11 @@ bool Device::flip(std::int64_t pid, int bufferIndex, std::uint64_t arg, } // std::printf("displaying buffer %lx\n", buffer.address); - VkCommandBufferBeginInfo beginInfo{}; - beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - - vkBeginCommandBuffer(commandBuffer, &beginInfo); auto cacheTag = getCacheTag(process.vmId, scheduler); + auto &sched = cacheTag.getScheduler(); - transitionImageLayout(commandBuffer, swapchainImage, + transitionImageLayout(sched.getCommandBuffer(), swapchainImage, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, { @@ -310,11 +305,11 @@ bool Device::flip(std::int64_t pid, int bufferIndex, std::uint64_t arg, }); amdgpu::flip( - cacheTag, commandBuffer, vk::context->swapchainExtent, buffer.address, + cacheTag, vk::context->swapchainExtent, buffer.address, swapchainImageView, {bufferAttr.width, bufferAttr.height}, flipType, getDefaultTileModes()[bufferAttr.tilingMode == 1 ? 10 : 8], dfmt, nfmt); - transitionImageLayout(commandBuffer, swapchainImage, + transitionImageLayout(sched.getCommandBuffer(), swapchainImage, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, { @@ -323,10 +318,25 @@ bool Device::flip(std::int64_t pid, int bufferIndex, std::uint64_t arg, .layerCount = 1, }); + sched.submit(); + auto submitCompleteTask = scheduler.createExternalSubmit(); { - vkEndCommandBuffer(commandBuffer); + VkSemaphoreSubmitInfo waitSemSubmitInfos[] = { + { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, + .semaphore = vk::context->presentCompleteSemaphore, + .value = 1, + .stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + }, + { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, + .semaphore = scheduler.getSemaphoreHandle(), + .value = submitCompleteTask - 1, + .stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + }, + }; VkSemaphoreSubmitInfo signalSemSubmitInfos[] = { { @@ -343,38 +353,15 @@ bool Device::flip(std::int64_t pid, int bufferIndex, std::uint64_t arg, }, }; - VkSemaphoreSubmitInfo waitSemSubmitInfos[] = { - { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, - .semaphore = vk::context->presentCompleteSemaphore, - .value = 1, - .stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - }, - { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, - .semaphore = scheduler.getSemaphoreHandle(), - .value = submitCompleteTask - 1, - .stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - }, - }; - - VkCommandBufferSubmitInfo cmdBufferSubmitInfo{ - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO, - .commandBuffer = commandBuffer, - }; - VkSubmitInfo2 submitInfo{ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2, - .waitSemaphoreInfoCount = 1, + .waitSemaphoreInfoCount = 2, .pWaitSemaphoreInfos = waitSemSubmitInfos, - .commandBufferInfoCount = 1, - .pCommandBufferInfos = &cmdBufferSubmitInfo, .signalSemaphoreInfoCount = 2, .pSignalSemaphoreInfos = signalSemSubmitInfos, }; - vkQueueSubmit2(vk::context->presentQueue, 1, &submitInfo, fence); - vkQueueWaitIdle(vk::context->presentQueue); + vkQueueSubmit2(vk::context->presentQueue, 1, &submitInfo, VK_NULL_HANDLE); } scheduler.then([=, this, cacheTag = std::move(cacheTag)] { diff --git a/rpcsx-gpu/Device.hpp b/rpcsx-gpu/Device.hpp index 620b0d32d..bb14a0653 100644 --- a/rpcsx-gpu/Device.hpp +++ b/rpcsx-gpu/Device.hpp @@ -86,8 +86,7 @@ struct Device { std::uint64_t size); bool processPipes(); bool flip(std::int64_t pid, int bufferIndex, std::uint64_t arg, - VkCommandBuffer commandBuffer, VkImage swapchainImage, - VkImageView swapchainImageView, VkFence fence); + VkImage swapchainImage, VkImageView swapchainImageView); void mapMemory(std::int64_t pid, std::uint64_t address, std::uint64_t size, int memoryType, int dmemIndex, int prot, std::int64_t offset); void registerBuffer(std::int64_t pid, bridge::CmdBuffer buffer); diff --git a/rpcsx-gpu/Renderer.cpp b/rpcsx-gpu/Renderer.cpp index 9ede8ef78..0e88fb8af 100644 --- a/rpcsx-gpu/Renderer.cpp +++ b/rpcsx-gpu/Renderer.cpp @@ -285,7 +285,8 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex, renderTargetInfo.extent.height = vkViewPortScissor.extent.height; renderTargetInfo.extent.depth = 1; renderTargetInfo.dfmt = cbColor.info.dfmt; - renderTargetInfo.nfmt = gnm::toNumericFormat(cbColor.info.nfmt, cbColor.info.dfmt); + renderTargetInfo.nfmt = + gnm::toNumericFormat(cbColor.info.nfmt, cbColor.info.dfmt); renderTargetInfo.mipCount = 1; renderTargetInfo.arrayLayerCount = 1; @@ -423,6 +424,7 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex, cacheTag.buildDescriptors(descriptorSets[0]); + pipe.scheduler.submit(); pipe.scheduler.afterSubmit([cacheTag = std::move(cacheTag)] {}); auto commandBuffer = pipe.scheduler.getCommandBuffer(); @@ -479,11 +481,14 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex, vkCmdSetStencilReference(commandBuffer, VK_STENCIL_FACE_FRONT_AND_BACK, 0); VkCullModeFlags cullMode = VK_CULL_MODE_NONE; + + if (pipe.uConfig.vgtPrimitiveType != gnm::PrimitiveType::RectList) { if (pipe.context.paSuScModeCntl.cullBack) { cullMode |= VK_CULL_MODE_BACK_BIT; } if (pipe.context.paSuScModeCntl.cullFront) { cullMode |= VK_CULL_MODE_FRONT_BIT; + } } vkCmdSetCullMode(commandBuffer, cullMode); @@ -512,6 +517,7 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex, vkCmdEndRendering(commandBuffer); pipe.scheduler.submit(); + pipe.scheduler.wait(); } void amdgpu::dispatch(Cache &cache, Scheduler &sched, @@ -530,14 +536,15 @@ void amdgpu::dispatch(Cache &cache, Scheduler &sched, pipelineLayout, 0, 1, &descriptorSet, 0, nullptr); vk::CmdBindShadersEXT(commandBuffer, 1, stages, &shader.handle); vkCmdDispatch(commandBuffer, groupCountX, groupCountY, groupCountZ); + sched.afterSubmit([tag = std::move(tag)] {}); sched.submit(); + sched.wait(); } -void amdgpu::flip(Cache::Tag &cacheTag, VkCommandBuffer commandBuffer, - VkExtent2D targetExtent, std::uint64_t address, - VkImageView target, VkExtent2D imageExtent, FlipType type, - TileMode tileMode, gnm::DataFormat dfmt, - gnm::NumericFormat nfmt) { +void amdgpu::flip(Cache::Tag &cacheTag, VkExtent2D targetExtent, + std::uint64_t address, VkImageView target, + VkExtent2D imageExtent, FlipType type, TileMode tileMode, + gnm::DataFormat dfmt, gnm::NumericFormat nfmt) { ImageKey framebuffer{}; framebuffer.readAddress = address; framebuffer.type = gnm::TextureType::Dim2D; @@ -601,8 +608,7 @@ void amdgpu::flip(Cache::Tag &cacheTag, VkCommandBuffer commandBuffer, .pColorAttachments = colorAttachments, }; - commandBuffer = cacheTag.getScheduler().getCommandBuffer(); - + auto commandBuffer = cacheTag.getScheduler().getCommandBuffer(); vkCmdBeginRendering(commandBuffer, &renderInfo); cacheTag.getDevice()->flipPipeline.bind(cacheTag.getScheduler(), type, @@ -613,5 +619,4 @@ void amdgpu::flip(Cache::Tag &cacheTag, VkCommandBuffer commandBuffer, vkCmdDraw(commandBuffer, 6, 1, 0, 0); vkCmdEndRendering(commandBuffer); - cacheTag.getScheduler().submit(); } diff --git a/rpcsx-gpu/Renderer.hpp b/rpcsx-gpu/Renderer.hpp index ed81c939b..5d6d56e59 100644 --- a/rpcsx-gpu/Renderer.hpp +++ b/rpcsx-gpu/Renderer.hpp @@ -15,8 +15,7 @@ void dispatch(Cache &cache, Scheduler &sched, Registers::ComputeConfig &computeConfig, std::uint32_t groupCountX, std::uint32_t groupCountY, std::uint32_t groupCountZ); -void flip(Cache::Tag &cacheTag, VkCommandBuffer commandBuffer, - VkExtent2D targetExtent, std::uint64_t address, VkImageView target, - VkExtent2D imageExtent, FlipType type, TileMode tileMode, - gnm::DataFormat dfmt, gnm::NumericFormat nfmt); +void flip(Cache::Tag &cacheTag, VkExtent2D targetExtent, std::uint64_t address, + VkImageView target, VkExtent2D imageExtent, FlipType type, + TileMode tileMode, gnm::DataFormat dfmt, gnm::NumericFormat nfmt); } // namespace amdgpu diff --git a/rpcsx-gpu/lib/gcn-shader/include/shader/gcn.hpp b/rpcsx-gpu/lib/gcn-shader/include/shader/gcn.hpp index 95b190ae7..b92df2b52 100644 --- a/rpcsx-gpu/lib/gcn-shader/include/shader/gcn.hpp +++ b/rpcsx-gpu/lib/gcn-shader/include/shader/gcn.hpp @@ -116,7 +116,7 @@ struct Environment { std::uint8_t numThreadX; std::uint8_t numThreadY; std::uint8_t numThreadZ; - bool supportsBarycentric = true; + bool supportsBarycentric = false; bool supportsInt8 = false; bool supportsInt64Atomics = false; bool supportsNonSemanticInfo = false; diff --git a/rpcsx-gpu/lib/gcn-shader/shaders/rdna.glsl b/rpcsx-gpu/lib/gcn-shader/shaders/rdna.glsl index 2a4f8758d..86802154f 100644 --- a/rpcsx-gpu/lib/gcn-shader/shaders/rdna.glsl +++ b/rpcsx-gpu/lib/gcn-shader/shaders/rdna.glsl @@ -24,7 +24,7 @@ #define ClampInfToFltMax(x) (isinf(x) ? ((x) < 0 ? -FLT_MAX : FLT_MAX) : (x)) #define ConvertInfToZero(x) (isinf(x) ? 0.0 : (x)) -#define Rsqrt(x) (1.0 / sqrt(x)) +#define Rsqrt(x) (inversesqrt(x)) #define Rcp(x) (1.0 / x) #define U32ARRAY_FETCH_BITS(ARRAY, START, BITCOUNT) ((ARRAY[(START) >> 5] >> ((START) & 31)) & ((1 << (BITCOUNT)) - 1)) @@ -577,10 +577,12 @@ void set_cond_thread_bit(inout uint64_t sdst, bool cond) { void set_cond_thread_bit_exec(inout uint64_t sdst, bool cond) { uint64_t bit = uint64_t(1) << thread_id; - if (cond && (exec & bit) != 0) { + if (cond) { sdst |= bit; + exec |= bit; } else { sdst &= ~bit; + exec &= ~bit; } } @@ -995,6 +997,23 @@ void s_cmpk_le_u32(uint32_t a, uint32_t b) { scc = a <= b; } void s_cmpk_lt_u32(uint32_t a, uint32_t b) { scc = a < b; } void s_cmpk_lg_u32(uint32_t a, uint32_t b) { scc = a != b; } +void s_cmovk_i32(out uint32_t sdst, uint32_t value) { + if (scc) { + sdst = value; + } +} + +void s_cmov_b32(out uint32_t sdst, uint32_t value) { + if (scc) { + sdst = value; + } +} + +void s_cmov_b64(out uint64_t sdst, uint64_t value) { + if (scc) { + sdst = value; + } +} uint32_t s_not_b32(uint32_t x) { uint32_t result = ~x; @@ -1236,7 +1255,13 @@ int32_t s_ashr_i32(int32_t x, uint32_t y) { int32_t result = x >> (y & 0x1f); sc int64_t s_ashr_i64(int64_t x, uint32_t y) { int64_t result = x >> (y & 0x3f); scc = result != 0; return result; } uint32_t s_bfm_b32(uint32_t x, uint32_t y) { uint32_t result = ((1 << (x & 0x1f)) - 1) << (y & 0x1f); scc = result != 0; return result; } uint64_t s_bfm_b64(uint64_t x, uint64_t y) { uint64_t result = ((uint64_t(1) << (x & 0x1f)) - 1) << (y & 0x1f); scc = result != 0; return result; } -int32_t s_mul_i32(int32_t x, int32_t y) { int32_t result = x * y; scc = result != 0; return result; } +int32_t s_mul_i32(int32_t x, int32_t y) { return x * y; } +int32_t s_mulk_i32(int32_t x, int32_t y) { return x * y; } +int32_t s_abs_i32(int32_t x) { + int32_t result = abs(x); + scc = result == 0; + return result; +} uint32_t s_bfe_u32(uint32_t x, uint32_t y) { uint32_t offset = y & 0x1f; uint32_t width = (y >> 16) & 0x7f; @@ -2168,10 +2193,10 @@ void s_dcache_inv() { bool s_cbranch_scc0() { return scc == false; } bool s_cbranch_scc1() { return scc == true; } -bool s_cbranch_vccz() { return vcc == 0; } -bool s_cbranch_vccnz() { return vcc != 0; } -bool s_cbranch_execz() { return exec == 0; } -bool s_cbranch_execnz() { return exec != 0; } +bool s_cbranch_vccz() { return (vcc & (uint64_t(1) << thread_id)) == 0; } +bool s_cbranch_vccnz() { return (vcc & (uint64_t(1) << thread_id)) != 0; } +bool s_cbranch_execz() { return (exec & (uint64_t(1) << thread_id)) == 0; } +bool s_cbranch_execnz() { return (exec & (uint64_t(1) << thread_id)) != 0; } // DS diff --git a/rpcsx-gpu/lib/gcn-shader/src/GcnConverter.cpp b/rpcsx-gpu/lib/gcn-shader/src/GcnConverter.cpp index c1c801d30..40014dd6f 100644 --- a/rpcsx-gpu/lib/gcn-shader/src/GcnConverter.cpp +++ b/rpcsx-gpu/lib/gcn-shader/src/GcnConverter.cpp @@ -1577,12 +1577,6 @@ static void createInitialValues(GcnConverter &converter, } } - for (std::int32_t i = 0; i < 3; ++i) { - auto value = builder.createSpvCompositeExtract(loc, uintT, - localInvocationId, {{i}}); - context.writeReg(loc, builder, gcn::RegId::Vgpr, i, value); - } - auto workgroupSize = builder.createSpvCompositeConstruct( loc, uvec3T, {{context.imm32(env.numThreadX), context.imm32(env.numThreadY), @@ -1590,12 +1584,19 @@ static void createInitialValues(GcnConverter &converter, auto workgroupSizeLocVar = converter.createLocalVariable(builder, loc, workgroupSize); - builder.createValue(loc, ir::amdgpu::CS_SET_INITIAL_EXEC, - context.getTypeVoid(), localInvocationIdLocVar, - workgroupSizeLocVar); builder.createValue(loc, ir::amdgpu::CS_SET_THREAD_ID, context.getTypeVoid(), localInvocationIdLocVar, workgroupSizeLocVar); + + builder.createValue(loc, ir::amdgpu::CS_SET_INITIAL_EXEC, + context.getTypeVoid(), localInvocationIdLocVar, + workgroupSizeLocVar); + + for (std::int32_t i = 0; i < 3; ++i) { + auto value = builder.createSpvCompositeExtract(loc, uintT, + localInvocationId, {{i}}); + context.writeReg(loc, builder, gcn::RegId::Vgpr, i, value); + } } context.writeReg(loc, builder, gcn::RegId::Vcc, 0, context.imm64(0)); diff --git a/rpcsx-gpu/lib/gcn-shader/src/GcnInstruction.cpp b/rpcsx-gpu/lib/gcn-shader/src/GcnInstruction.cpp index 0f2208626..a0cb01021 100644 --- a/rpcsx-gpu/lib/gcn-shader/src/GcnInstruction.cpp +++ b/rpcsx-gpu/lib/gcn-shader/src/GcnInstruction.cpp @@ -169,9 +169,6 @@ readSopkInst(GcnInstruction &inst, std::uint64_t &address, inst.addOperand(createSgprGcnOperand(address, sdst).withW()); inst.addOperand(GcnOperand::createConstant(static_cast(simm))); - if (op <= 16) { - inst.addOperand(createImmediateGcnOperand(address)); - } } static void @@ -264,14 +261,10 @@ readVop3Inst(GcnInstruction &inst, std::uint64_t &address, auto omod = fetchMaskedValue(words[1], omodMask); auto neg = fetchMaskedValue(words[1], negMask); - if (op == ir::vop3::Op::MUL_HI_U32) { - std::printf("."); - } - inst.op = op; bool vop3b = isVop3b(op); - if (!vop3b) { + if (vop3b) { abs = 0; clmp = false; } @@ -291,19 +284,9 @@ readVop3Inst(GcnInstruction &inst, std::uint64_t &address, inst.addOperand(createSgprGcnOperand(address, sdst).withRW()); } - bool writesVcc = op == ir::vop3::MAD_I64_I32 || op == ir::vop3::MAD_U64_U32 || - op == ir::vop3::MQSAD_U32_U8 || - op == ir::vop3::DIV_SCALE_F32 || - op == ir::vop3::DIV_SCALE_F64; - bool readsVcc = op == ir::vop3::DIV_FMAS_F32 || op == ir::vop3::DIV_FMAS_F64; - bool usesSrc2 = op >= ir::vop3::MAD_LEGACY_F32 && op <= ir::vop3::DIV_FIXUP_F64; - if (writesVcc) { - inst.addOperand(GcnOperand::createVccLo().withRW()); - } - inst.addOperand(createSgprGcnOperand(address, src0) .withR() .withAbs((abs & 1) != 0) @@ -347,10 +330,6 @@ readVop3Inst(GcnInstruction &inst, std::uint64_t &address, .withNeg(((neg >> 2) & 1) != 0)); } } - - if (readsVcc) { - inst.addOperand(GcnOperand::createVccLo().withR()); - } } static void diff --git a/rpcsx-gpu/lib/vk/include/Scheduler.hpp b/rpcsx-gpu/lib/vk/include/Scheduler.hpp index a88521302..96303bd18 100644 --- a/rpcsx-gpu/lib/vk/include/Scheduler.hpp +++ b/rpcsx-gpu/lib/vk/include/Scheduler.hpp @@ -16,6 +16,7 @@ class Scheduler { unsigned mQueueFamily; vk::CommandPool mCommandPool; vk::CommandBuffer mCommandBuffer; + bool mIsEmpty = false; std::uint64_t mNextSignal = 1; std::mutex mTaskMutex; @@ -40,9 +41,17 @@ public: unsigned getQueueFamily() const { return mQueueFamily; } VkQueue getQueue() const { return mQueue; } - VkCommandBuffer getCommandBuffer() const { return mCommandBuffer; } + VkCommandBuffer getCommandBuffer() { + mIsEmpty = false; + return mCommandBuffer; + } Scheduler &submit() { + if (mIsEmpty) { + return *this; + } + mIsEmpty = true; + mCommandBuffer.end(); VkSemaphoreSubmitInfo waitSemSubmitInfo = { @@ -56,7 +65,7 @@ public: .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, .semaphore = mSemaphore.getHandle(), .value = mNextSignal, - .stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + .stageMask = VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT, }; VkCommandBufferSubmitInfo cmdBufferSubmitInfo{ @@ -96,11 +105,14 @@ public: return *this; } + auto afterSubmit = std::move(mAfterSubmitTasks); + mAfterSubmitTasks.clear(); + wait(); - while (!mAfterSubmitTasks.empty()) { - auto task = std::move(mAfterSubmitTasks.back()); - mAfterSubmitTasks.pop_back(); + while (!afterSubmit.empty()) { + auto task = std::move(afterSubmit.back()); + afterSubmit.pop_back(); std::move(task)(); } diff --git a/rpcsx-gpu/lib/vk/include/vk.hpp b/rpcsx-gpu/lib/vk/include/vk.hpp index c9f0dd9e3..367d69c2b 100644 --- a/rpcsx-gpu/lib/vk/include/vk.hpp +++ b/rpcsx-gpu/lib/vk/include/vk.hpp @@ -42,7 +42,6 @@ struct Context { VkFormat swapchainColorFormat = VK_FORMAT_B8G8R8A8_UNORM; VkColorSpaceKHR swapchainColorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR; std::vector swapchainImageViews; - std::vector inFlightFences; VkSemaphore presentCompleteSemaphore = VK_NULL_HANDLE; VkSemaphore renderCompleteSemaphore = VK_NULL_HANDLE; VkPhysicalDeviceDescriptorBufferPropertiesEXT descriptorBufferProps; @@ -69,10 +68,6 @@ struct Context { vkDestroySwapchainKHR(device, swapchain, allocator); } - for (auto fence : inFlightFences) { - vkDestroyFence(device, fence, allocator); - } - if (presentCompleteSemaphore != VK_NULL_HANDLE) { vkDestroySemaphore(device, presentCompleteSemaphore, allocator); } diff --git a/rpcsx-gpu/lib/vk/src/vk.cpp b/rpcsx-gpu/lib/vk/src/vk.cpp index 4dfaf8087..d22e62dc1 100644 --- a/rpcsx-gpu/lib/vk/src/vk.cpp +++ b/rpcsx-gpu/lib/vk/src/vk.cpp @@ -198,15 +198,6 @@ void vk::Context::createSwapchain() { recreateSwapchain(); - inFlightFences.resize(swapchainImages.size()); - - for (auto &fence : inFlightFences) { - VkFenceCreateInfo fenceInfo{}; - fenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; - fenceInfo.flags = VK_FENCE_CREATE_SIGNALED_BIT; - - VK_VERIFY(vkCreateFence(device, &fenceInfo, allocator, &fence)); - } { VkSemaphoreCreateInfo semaphoreCreateInfo{}; semaphoreCreateInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; @@ -422,6 +413,10 @@ void vk::Context::createDevice(VkSurfaceKHR surface, int gpuIndex, storage_8bit.uniformAndStorageBuffer8BitAccess && float16_int8.shaderInt8; supportsInt64Atomics = phyDevFeatures12.shaderBufferInt64Atomics; + if (!fsBarycentric.fragmentShaderBarycentric) { + shaderObject.pNext = fsBarycentric.pNext; + } + rx::dieIf(!storage_16bit.uniformAndStorageBuffer16BitAccess, "16-bit storage is unsupported by this GPU"); rx::dieIf(!float16_int8.shaderFloat16, diff --git a/rpcsx-gpu/main.cpp b/rpcsx-gpu/main.cpp index 20805c64b..35ed5605a 100644 --- a/rpcsx-gpu/main.cpp +++ b/rpcsx-gpu/main.cpp @@ -18,8 +18,6 @@ #include #include #include -#include -#include #include #include #include @@ -39,20 +37,6 @@ #include "Device.hpp" -static void saveImage(const char *name, const void *data, std::uint32_t width, - std::uint32_t height) { - std::ofstream file(name, std::ios::out | std::ios::binary); - - file << "P6\n" << width << "\n" << height << "\n" << 255 << "\n"; - - auto ptr = (unsigned int *)data; - for (uint32_t y = 0; y < height; y++) { - for (uint32_t x = 0; x < width; x++) { - file.write((char *)ptr++, 3); - } - } -} - void transitionImageLayout(VkCommandBuffer commandBuffer, VkImage image, VkImageLayout oldLayout, VkImageLayout newLayout, const VkImageSubresourceRange &subresourceRange) { @@ -118,17 +102,6 @@ void transitionImageLayout(VkCommandBuffer commandBuffer, VkImage image, }); } -static void submit(VkQueue queue, VkCommandBuffer cmdBuffer) { - VkSubmitInfo submit{ - .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, - .commandBufferCount = 1, - .pCommandBuffers = &cmdBuffer, - }; - - VK_VERIFY(vkQueueSubmit(queue, 1, &submit, nullptr)); - vkQueueWaitIdle(queue); -} - static void usage(std::FILE *out, const char *argv0) { std::fprintf(out, "usage: %s [options...]\n", argv0); std::fprintf(out, " options:\n"); @@ -374,12 +347,6 @@ int main(int argc, const char *argv[]) { VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT); vkContext.createSwapchain(); - std::vector presentCmdBuffers( - vkContext.swapchainImages.size()); - - for (auto &cmdBuffer : presentCmdBuffers) { - cmdBuffer = commandPool.createPrimaryBuffer({}); - } amdgpu::bridge::BridgePuller bridgePuller{bridge}; amdgpu::bridge::Command commandsBuffer[1]; @@ -597,14 +564,11 @@ int main(int argc, const char *argv[]) { case amdgpu::bridge::CommandId::Flip: { if (!isImageAcquired) { - vkWaitForFences(vkContext.device, 1, - &vkContext.inFlightFences[imageIndex], VK_TRUE, - UINT64_MAX); - while (true) { auto acquireNextImageResult = vkAcquireNextImageKHR( vkContext.device, vkContext.swapchain, UINT64_MAX, - vkContext.presentCompleteSemaphore, nullptr, &imageIndex); + vkContext.presentCompleteSemaphore, VK_NULL_HANDLE, + &imageIndex); if (acquireNextImageResult == VK_ERROR_OUT_OF_DATE_KHR) { vkContext.recreateSwapchain(); continue; @@ -613,18 +577,11 @@ int main(int argc, const char *argv[]) { VK_VERIFY(acquireNextImageResult); break; } - - vkResetFences(vkContext.device, 1, - &vkContext.inFlightFences[imageIndex]); } - vkResetCommandBuffer(presentCmdBuffers[imageIndex], 0); - if (!device.flip(cmd.flip.pid, cmd.flip.bufferIndex, cmd.flip.arg, - presentCmdBuffers[imageIndex], vkContext.swapchainImages[imageIndex], - vkContext.swapchainImageViews[imageIndex], - vkContext.inFlightFences[imageIndex])) { + vkContext.swapchainImageViews[imageIndex])) { isImageAcquired = true; break; } @@ -641,6 +598,8 @@ int main(int argc, const char *argv[]) { auto vkQueuePresentResult = vkQueuePresentKHR(vkContext.presentQueue, &presentInfo); + isImageAcquired = false; + if (vkQueuePresentResult == VK_ERROR_OUT_OF_DATE_KHR) { vkContext.recreateSwapchain(); } else { @@ -678,4 +637,6 @@ int main(int argc, const char *argv[]) { } } } + + vkDeviceWaitIdle(vk::context->device); }