From 61d58b696fcacb006f6328f93089e1691a7ea161 Mon Sep 17 00:00:00 2001 From: DH Date: Fri, 27 Sep 2024 20:04:24 +0300 Subject: [PATCH] gpu2: fix resource overlapping fixed sampler lod implement shader printf support --- rpcsx-gpu2/Cache.cpp | 46 +++++++- rpcsx-gpu2/Device.cpp | 2 +- rpcsx-gpu2/Renderer.cpp | 32 +++++- .../lib/gcn-shader/include/shader/spv.hpp | 4 + .../lib/gcn-shader/src/GcnConverter.cpp | 5 +- .../lib/gcn-shader/src/SpvConverter.cpp | 8 ++ .../lib/gnm/include/gnm/descriptors.hpp | 8 +- rpcsx-gpu2/lib/vk/include/vk.hpp | 10 +- rpcsx-gpu2/lib/vk/src/vk.cpp | 107 +++++++++++++----- rpcsx-gpu2/main.cpp | 40 ++++++- 10 files changed, 213 insertions(+), 49 deletions(-) diff --git a/rpcsx-gpu2/Cache.cpp b/rpcsx-gpu2/Cache.cpp index 29d4d732a..de343adc6 100644 --- a/rpcsx-gpu2/Cache.cpp +++ b/rpcsx-gpu2/Cache.cpp @@ -103,7 +103,8 @@ static VkShaderStageFlagBits shaderStageToVk(shader::gcn::Stage stage) { } static void fillStageBindings(VkDescriptorSetLayoutBinding *bindings, - VkShaderStageFlagBits stage, int setIndex, std::uint32_t setCount) { + VkShaderStageFlagBits stage, int setIndex, + std::uint32_t setCount) { auto createDescriptorBinding = [&](VkDescriptorType type, uint32_t count, int dim = 0) { @@ -728,15 +729,39 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) { .layerCount = key.arrayLayerCount, }; + bool isLinear = key.tileMode.arrayMode() == kArrayModeLinearGeneral || + key.tileMode.arrayMode() == kArrayModeLinearAligned; + std::vector regions; regions.reserve(key.mipCount); + std::vector bufferRegions; + + std::uint64_t dstAddress = 0; + std::uint64_t srcAddress = 0; + + if (isLinear) { + regions.reserve(key.mipCount); + } else { + dstAddress = detiledBuffer.getAddress(); + srcAddress = tiledBuffer.deviceAddress; + } for (unsigned mipLevel = key.baseMipLevel; mipLevel < key.baseMipLevel + key.mipCount; ++mipLevel) { - tiler.detile(*mScheduler, surfaceInfo, key.tileMode, - tiledBuffer.deviceAddress, detiledBuffer.getAddress(), - mipLevel, key.baseArrayLayer, key.arrayLayerCount); + auto &info = surfaceInfo.getSubresourceInfo(mipLevel); + if (isLinear) { + bufferRegions.push_back({ + .srcOffset = info.offset, + .dstOffset = dstAddress, + .size = info.linearSize * key.arrayLayerCount, + }); + } else { + tiler.detile(*mScheduler, surfaceInfo, key.tileMode, srcAddress, + dstAddress, mipLevel, 0, key.arrayLayerCount); + } + regions.push_back({ + .bufferOffset = info.offset, .bufferRowLength = mipLevel > 0 ? 0 : std::max(key.pitch >> mipLevel, 1u), .imageSubresource = @@ -753,6 +778,15 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) { .depth = std::max(key.extent.depth >> mipLevel, 1u), }, }); + + dstAddress += info.linearSize * key.arrayLayerCount; + srcAddress += info.tiledSize * key.arrayLayerCount; + } + + if (!bufferRegions.empty()) { + vkCmdCopyBuffer(mScheduler->getCommandBuffer(), tiledBuffer.handle, + detiledBuffer.getHandle(), bufferRegions.size(), + bufferRegions.data()); } transitionImageLayout( @@ -765,8 +799,8 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) { regions.data()); transitionImageLayout(mScheduler->getCommandBuffer(), image, - VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, - subresourceRange); + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + VK_IMAGE_LAYOUT_GENERAL, subresourceRange); mScheduler->afterSubmit([detiledBuffer = std::move(detiledBuffer)] {}); } diff --git a/rpcsx-gpu2/Device.cpp b/rpcsx-gpu2/Device.cpp index d82030a23..6fe886a6f 100644 --- a/rpcsx-gpu2/Device.cpp +++ b/rpcsx-gpu2/Device.cpp @@ -291,7 +291,7 @@ bool Device::flip(std::int64_t pid, int bufferIndex, std::uint64_t arg, auto cacheTag = getCacheTag(process.vmId, scheduler); - if (true) { + if (false) { transitionImageLayout(commandBuffer, swapchainImage, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, diff --git a/rpcsx-gpu2/Renderer.cpp b/rpcsx-gpu2/Renderer.cpp index e73483c99..ed5391ec9 100644 --- a/rpcsx-gpu2/Renderer.cpp +++ b/rpcsx-gpu2/Renderer.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -201,6 +202,7 @@ struct ShaderResources : eval::Evaluator { std::map slotResources; std::span userSgprs; + std::uint32_t slotOffset = 0; rx::MemoryTableWithPayload bufferMemoryTable; std::vector> resourceSlotToAddress; std::vector samplerResources; @@ -225,7 +227,7 @@ struct ShaderResources : eval::Evaluator { bufferMemoryTable.map(*pointerBase, *pointerBase + *pointerOffset + pointer.size, Access::Read); - resourceSlotToAddress.push_back({pointer.resourceSlot, *pointerBase}); + resourceSlotToAddress.push_back({slotOffset + pointer.resourceSlot, *pointerBase}); } for (auto &bufferRes : res.buffers) { @@ -252,7 +254,7 @@ struct ShaderResources : eval::Evaluator { bufferMemoryTable.map(buffer.address(), buffer.address() + buffer.size(), bufferRes.access); resourceSlotToAddress.push_back( - {bufferRes.resourceSlot, buffer.address()}); + {slotOffset + bufferRes.resourceSlot, buffer.address()}); } for (auto &texture : res.textures) { @@ -320,7 +322,7 @@ struct ShaderResources : eval::Evaluator { "ShaderResources: unexpected texture type %u", static_cast(buffer.type)); - slotResources[texture.resourceSlot] = resources->size(); + slotResources[slotOffset + texture.resourceSlot] = resources->size(); resources->push_back(cacheTag->getImageView( amdgpu::ImageViewKey::createFrom(buffer), texture.access)); } @@ -350,10 +352,12 @@ struct ShaderResources : eval::Evaluator { sSampler.force_unorm_coords = true; } - slotResources[sampler.resourceSlot] = samplerResources.size(); + slotResources[slotOffset + sampler.resourceSlot] = samplerResources.size(); samplerResources.push_back( cacheTag->getSampler(amdgpu::SamplerKey::createFrom(sSampler))); } + + slotOffset += res.slots; } void buildMemoryTable(MemoryTable &memoryTable) { @@ -474,6 +478,20 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex, return; } + if (pipe.context.cbColorControl.mode == gnm::CbMode::Disable) { + return; + } + + if (pipe.context.cbColorControl.mode != gnm::CbMode::Normal) { + std::println("unimplemented context.cbColorControl.mode = {}", + static_cast(pipe.context.cbColorControl.mode)); + return; + } + + if (pipe.context.cbTargetMask.raw == 0) { + return; + } + auto cacheTag = pipe.device->getCacheTag(vmId, pipe.scheduler); auto targetMask = pipe.context.cbTargetMask.raw; @@ -515,7 +533,7 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex, ImageViewKey renderTargetInfo{}; renderTargetInfo.type = gnm::TextureType::Dim2D; renderTargetInfo.pitch = vkViewPortScissor.extent.width; - renderTargetInfo.address = cbColor.base << 8; + renderTargetInfo.address = static_cast(cbColor.base) << 8; renderTargetInfo.extent.width = vkViewPortScissor.extent.width; renderTargetInfo.extent.height = vkViewPortScissor.extent.height; renderTargetInfo.extent.depth = 1; @@ -647,6 +665,8 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex, .env = env, }); + std::uint32_t slotOffset = shaderResources.slotOffset; + shaderResources.loadResources( shader.info->resources, std::span(pgm.userData.data(), pgm.rsrc2.userSgpr)); @@ -780,7 +800,7 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex, memoryTableConfigSlots.push_back({ .bufferIndex = static_cast(descriptorBuffers.size()), .configIndex = static_cast(index), - .resourceSlot = static_cast(slot.data), + .resourceSlot = static_cast(slotOffset + slot.data), }); break; diff --git a/rpcsx-gpu2/lib/gcn-shader/include/shader/spv.hpp b/rpcsx-gpu2/lib/gcn-shader/include/shader/spv.hpp index 10586d21b..f1e217242 100644 --- a/rpcsx-gpu2/lib/gcn-shader/include/shader/spv.hpp +++ b/rpcsx-gpu2/lib/gcn-shader/include/shader/spv.hpp @@ -16,6 +16,7 @@ struct BinaryLayout { kMemoryModels, kEntryPoints, kExecutionModes, + kDebugStrings, kDebugs, kAnnotations, kGlobals, @@ -53,6 +54,9 @@ struct BinaryLayout { ir::Region getOrCreateExecutionModes(ir::Context &context) { return getOrCreateRegion(context, kExecutionModes); } + ir::Region getOrCreateDebugStrings(ir::Context &context) { + return getOrCreateRegion(context, kDebugStrings); + } ir::Region getOrCreateDebugs(ir::Context &context) { return getOrCreateRegion(context, kDebugs); } diff --git a/rpcsx-gpu2/lib/gcn-shader/src/GcnConverter.cpp b/rpcsx-gpu2/lib/gcn-shader/src/GcnConverter.cpp index c7f705f17..25a108cb6 100644 --- a/rpcsx-gpu2/lib/gcn-shader/src/GcnConverter.cpp +++ b/rpcsx-gpu2/lib/gcn-shader/src/GcnConverter.cpp @@ -1223,7 +1223,8 @@ static void instructionsToSpv(GcnConverter &converter, gcn::Import &importer, } if (!toAnalyze.empty()) { - auto &cfg = context.analysis.get([&] { return buildCFG(body.getFirst()); }); + auto &cfg = + context.analysis.get([&] { return buildCFG(body.getFirst()); }); ModuleInfo moduleInfo; collectModuleInfo(moduleInfo, context.layout); @@ -1618,6 +1619,8 @@ gcn::convertToSpv(Context &context, ir::Region body, extensions.createSpvExtension(context.getUnknownLocation(), "SPV_EXT_physical_storage_buffer"); + extensions.createSpvExtension(context.getUnknownLocation(), + "SPV_KHR_non_semantic_info"); auto merged = context.layout.merge(context); result.spv = spv::serialize(merged); diff --git a/rpcsx-gpu2/lib/gcn-shader/src/SpvConverter.cpp b/rpcsx-gpu2/lib/gcn-shader/src/SpvConverter.cpp index e01ded15f..b4e458e74 100644 --- a/rpcsx-gpu2/lib/gcn-shader/src/SpvConverter.cpp +++ b/rpcsx-gpu2/lib/gcn-shader/src/SpvConverter.cpp @@ -253,6 +253,14 @@ ir::Node spv::Import::getOrCloneImpl(ir::Context &context, ir::Node node, return redefine(result); } + if (isOperand && inst == ir::spv::OpString) { + auto debugs = spvContext.layout.getOrCreateDebugStrings(context); + auto result = CloneMap::getOrCloneImpl(context, node, isOperand); + debugs.addChild(result.staticCast()); + cloneDecorationsAndDebugs(); + return result; + } + if (isOperand && inst == ir::spv::OpVariable) { if (inst == ir::spv::OpVariable) { auto storage = inst.getOperand(1).getAsInt32(); diff --git a/rpcsx-gpu2/lib/gnm/include/gnm/descriptors.hpp b/rpcsx-gpu2/lib/gnm/include/gnm/descriptors.hpp index e1903889b..3a55c89e4 100644 --- a/rpcsx-gpu2/lib/gnm/include/gnm/descriptors.hpp +++ b/rpcsx-gpu2/lib/gnm/include/gnm/descriptors.hpp @@ -33,7 +33,9 @@ struct VBuffer { std::uint32_t type : 2; std::uint64_t address() const { return base; } - std::uint64_t size() const { return stride ? num_records * stride : num_records; } + std::uint64_t size() const { + return stride ? num_records * stride : num_records; + } auto operator<=>(const VBuffer &) const = default; }; @@ -101,8 +103,8 @@ struct SSampler { int32_t disable_cube_wrap : 1; FilterMode filter_mode : 2; int32_t : 1; - int32_t min_lod : 12; - int32_t max_lod : 12; + uint32_t min_lod : 12; + uint32_t max_lod : 12; int32_t perf_mip : 4; int32_t perf_z : 4; int32_t lod_bias : 14; diff --git a/rpcsx-gpu2/lib/vk/include/vk.hpp b/rpcsx-gpu2/lib/vk/include/vk.hpp index e5e331e8b..b1ab2d52a 100644 --- a/rpcsx-gpu2/lib/vk/include/vk.hpp +++ b/rpcsx-gpu2/lib/vk/include/vk.hpp @@ -994,7 +994,7 @@ void CmdSetLogicOpEXT(VkCommandBuffer commandBuffer, VkLogicOp logicOp); void CmdSetPolygonModeEXT(VkCommandBuffer commandBuffer, VkPolygonMode polygonMode); void CmdSetAlphaToOneEnableEXT(VkCommandBuffer commandBuffer, - VkBool32 alphaToOneEnable); + VkBool32 alphaToOneEnable); void CmdSetLogicOpEnableEXT(VkCommandBuffer commandBuffer, VkBool32 logicOpEnable); void CmdSetRasterizationSamplesEXT(VkCommandBuffer commandBuffer, @@ -1040,4 +1040,12 @@ void CmdSetDescriptorBufferOffsetsEXT(VkCommandBuffer commandBuffer, void CmdBindDescriptorBufferEmbeddedSamplersEXT( VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout layout, uint32_t set); + +VkResult CreateDebugUtilsMessengerEXT( + VkInstance instance, const VkDebugUtilsMessengerCreateInfoEXT *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDebugUtilsMessengerEXT *pMessenger); +void DestroyDebugUtilsMessengerEXT(VkInstance instance, + VkDebugUtilsMessengerEXT messenger, + const VkAllocationCallbacks *pAllocator); } // namespace vk diff --git a/rpcsx-gpu2/lib/vk/src/vk.cpp b/rpcsx-gpu2/lib/vk/src/vk.cpp index 7cc6e5a49..03639cab1 100644 --- a/rpcsx-gpu2/lib/vk/src/vk.cpp +++ b/rpcsx-gpu2/lib/vk/src/vk.cpp @@ -1,10 +1,10 @@ #include "vk.hpp" #include +#include #include #include #include #include -#include vk::Context *vk::context; static vk::MemoryResource g_hostVisibleMemory; @@ -729,6 +729,25 @@ vk::Context vk::Context::create(std::vector requiredLayers, instanceCreateInfo.ppEnabledLayerNames = requiredLayers.data(); instanceCreateInfo.enabledLayerCount = requiredLayers.size(); + std::vector validation_feature_enables = { + VK_VALIDATION_FEATURE_ENABLE_DEBUG_PRINTF_EXT}; + VkValidationFeaturesEXT validationFeatures{ + .sType = VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT, + .enabledValidationFeatureCount = + static_cast(validation_feature_enables.size()), + .pEnabledValidationFeatures = validation_feature_enables.data(), + }; + + bool validationPresent = + std::find_if( + requiredLayers.begin(), requiredLayers.end(), [](const char *layer) { + return layer == std::string_view("VK_LAYER_KHRONOS_validation"); + }) != requiredLayers.end(); + + if (validationPresent) { + instanceCreateInfo.pNext = &validationFeatures; + } + Context result; VK_VERIFY(vkCreateInstance(&instanceCreateInfo, nullptr, &result.instance)); return result; @@ -756,26 +775,33 @@ vk::Context::findPhysicalMemoryTypeIndex(std::uint32_t typeBits, vk::MemoryResource &vk::getHostVisibleMemory() { return g_hostVisibleMemory; } vk::MemoryResource &vk::getDeviceLocalMemory() { return g_deviceLocalMemory; } -static auto importVkProc(VkDevice device, const char *name) { +static auto importDeviceVkProc(VkDevice device, const char *name) { auto result = vkGetDeviceProcAddr(device, name); rx::dieIf(result == nullptr, "vkGetDeviceProcAddr: failed to get address of '%s'", name); return result; } +static auto importInstanceVkProc(VkInstance instance, const char *name) { + auto result = vkGetInstanceProcAddr(instance, name); + rx::dieIf(result == nullptr, + "vkGetInstanceProcAddr: failed to get address of '%s'", name); + return result; +} + VkResult vk::CreateShadersEXT(VkDevice device, uint32_t createInfoCount, const VkShaderCreateInfoEXT *pCreateInfos, const VkAllocationCallbacks *pAllocator, VkShaderEXT *pShaders) { - static auto fn = (PFN_vkCreateShadersEXT)importVkProc(context->device, - "vkCreateShadersEXT"); + static auto fn = (PFN_vkCreateShadersEXT)importDeviceVkProc( + context->device, "vkCreateShadersEXT"); return fn(device, createInfoCount, pCreateInfos, pAllocator, pShaders); } void vk::DestroyShaderEXT(VkDevice device, VkShaderEXT shader, const VkAllocationCallbacks *pAllocator) { - static auto fn = (PFN_vkDestroyShaderEXT)importVkProc(context->device, - "vkDestroyShaderEXT"); + static auto fn = (PFN_vkDestroyShaderEXT)importDeviceVkProc( + context->device, "vkDestroyShaderEXT"); fn(device, shader, pAllocator); } @@ -783,8 +809,9 @@ void vk::DestroyShaderEXT(VkDevice device, VkShaderEXT shader, void vk::CmdBindShadersEXT(VkCommandBuffer commandBuffer, uint32_t stageCount, const VkShaderStageFlagBits *pStages, const VkShaderEXT *pShaders) { - static PFN_vkCmdBindShadersEXT fn = (PFN_vkCmdBindShadersEXT)importVkProc( - context->device, "vkCmdBindShadersEXT"); + static PFN_vkCmdBindShadersEXT fn = + (PFN_vkCmdBindShadersEXT)importDeviceVkProc(context->device, + "vkCmdBindShadersEXT"); return fn(commandBuffer, stageCount, pStages, pShaders); } @@ -793,7 +820,7 @@ void vk::CmdSetColorBlendEnableEXT(VkCommandBuffer commandBuffer, uint32_t firstAttachment, uint32_t attachmentCount, const VkBool32 *pColorBlendEnables) { - static auto fn = (PFN_vkCmdSetColorBlendEnableEXT)importVkProc( + static auto fn = (PFN_vkCmdSetColorBlendEnableEXT)importDeviceVkProc( context->device, "vkCmdSetColorBlendEnableEXT"); return fn(commandBuffer, firstAttachment, attachmentCount, @@ -803,7 +830,7 @@ void vk::CmdSetColorBlendEquationEXT( VkCommandBuffer commandBuffer, uint32_t firstAttachment, uint32_t attachmentCount, const VkColorBlendEquationEXT *pColorBlendEquations) { - static auto fn = (PFN_vkCmdSetColorBlendEquationEXT)importVkProc( + static auto fn = (PFN_vkCmdSetColorBlendEquationEXT)importDeviceVkProc( context->device, "vkCmdSetColorBlendEquationEXT"); return fn(commandBuffer, firstAttachment, attachmentCount, @@ -812,22 +839,22 @@ void vk::CmdSetColorBlendEquationEXT( void vk::CmdSetDepthClampEnableEXT(VkCommandBuffer commandBuffer, VkBool32 depthClampEnable) { - static auto fn = (PFN_vkCmdSetDepthClampEnableEXT)importVkProc( + static auto fn = (PFN_vkCmdSetDepthClampEnableEXT)importDeviceVkProc( context->device, "vkCmdSetDepthClampEnableEXT"); return fn(commandBuffer, depthClampEnable); } void vk::CmdSetLogicOpEXT(VkCommandBuffer commandBuffer, VkLogicOp logicOp) { - static auto fn = (PFN_vkCmdSetLogicOpEXT)importVkProc(context->device, - "vkCmdSetLogicOpEXT"); + static auto fn = (PFN_vkCmdSetLogicOpEXT)importDeviceVkProc( + context->device, "vkCmdSetLogicOpEXT"); return fn(commandBuffer, logicOp); } void vk::CmdSetPolygonModeEXT(VkCommandBuffer commandBuffer, VkPolygonMode polygonMode) { - static auto fn = (PFN_vkCmdSetPolygonModeEXT)importVkProc( + static auto fn = (PFN_vkCmdSetPolygonModeEXT)importDeviceVkProc( context->device, "vkCmdSetPolygonModeEXT"); return fn(commandBuffer, polygonMode); @@ -835,7 +862,7 @@ void vk::CmdSetPolygonModeEXT(VkCommandBuffer commandBuffer, void vk::CmdSetAlphaToOneEnableEXT(VkCommandBuffer commandBuffer, VkBool32 alphaToOneEnable) { - static auto fn = (PFN_vkCmdSetAlphaToOneEnableEXT)importVkProc( + static auto fn = (PFN_vkCmdSetAlphaToOneEnableEXT)importDeviceVkProc( context->device, "vkCmdSetAlphaToOneEnableEXT"); return fn(commandBuffer, alphaToOneEnable); @@ -843,14 +870,14 @@ void vk::CmdSetAlphaToOneEnableEXT(VkCommandBuffer commandBuffer, void vk::CmdSetLogicOpEnableEXT(VkCommandBuffer commandBuffer, VkBool32 logicOpEnable) { - static auto fn = (PFN_vkCmdSetLogicOpEnableEXT)importVkProc( + static auto fn = (PFN_vkCmdSetLogicOpEnableEXT)importDeviceVkProc( context->device, "vkCmdSetLogicOpEnableEXT"); return fn(commandBuffer, logicOpEnable); } void vk::CmdSetRasterizationSamplesEXT( VkCommandBuffer commandBuffer, VkSampleCountFlagBits rasterizationSamples) { - static auto fn = (PFN_vkCmdSetRasterizationSamplesEXT)importVkProc( + static auto fn = (PFN_vkCmdSetRasterizationSamplesEXT)importDeviceVkProc( context->device, "vkCmdSetRasterizationSamplesEXT"); return fn(commandBuffer, rasterizationSamples); @@ -858,21 +885,21 @@ void vk::CmdSetRasterizationSamplesEXT( void vk::CmdSetSampleMaskEXT(VkCommandBuffer commandBuffer, VkSampleCountFlagBits samples, const VkSampleMask *pSampleMask) { - static auto fn = (PFN_vkCmdSetSampleMaskEXT)importVkProc( + static auto fn = (PFN_vkCmdSetSampleMaskEXT)importDeviceVkProc( context->device, "vkCmdSetSampleMaskEXT"); return fn(commandBuffer, samples, pSampleMask); } void vk::CmdSetTessellationDomainOriginEXT( VkCommandBuffer commandBuffer, VkTessellationDomainOrigin domainOrigin) { - static auto fn = (PFN_vkCmdSetTessellationDomainOriginEXT)importVkProc( + static auto fn = (PFN_vkCmdSetTessellationDomainOriginEXT)importDeviceVkProc( context->device, "vkCmdSetTessellationDomainOriginEXT"); return fn(commandBuffer, domainOrigin); } void vk::CmdSetAlphaToCoverageEnableEXT(VkCommandBuffer commandBuffer, VkBool32 alphaToCoverageEnable) { - static auto fn = (PFN_vkCmdSetAlphaToCoverageEnableEXT)importVkProc( + static auto fn = (PFN_vkCmdSetAlphaToCoverageEnableEXT)importDeviceVkProc( context->device, "vkCmdSetAlphaToCoverageEnableEXT"); return fn(commandBuffer, alphaToCoverageEnable); @@ -882,7 +909,7 @@ void vk::CmdSetVertexInputEXT( const VkVertexInputBindingDescription2EXT *pVertexBindingDescriptions, uint32_t vertexAttributeDescriptionCount, const VkVertexInputAttributeDescription2EXT *pVertexAttributeDescriptions) { - static auto fn = (PFN_vkCmdSetVertexInputEXT)importVkProc( + static auto fn = (PFN_vkCmdSetVertexInputEXT)importDeviceVkProc( context->device, "vkCmdSetVertexInputEXT"); return fn(commandBuffer, vertexBindingDescriptionCount, @@ -892,7 +919,7 @@ void vk::CmdSetVertexInputEXT( void vk::CmdSetColorWriteMaskEXT( VkCommandBuffer commandBuffer, uint32_t firstAttachment, uint32_t attachmentCount, const VkColorComponentFlags *pColorWriteMasks) { - static auto fn = (PFN_vkCmdSetColorWriteMaskEXT)importVkProc( + static auto fn = (PFN_vkCmdSetColorWriteMaskEXT)importDeviceVkProc( context->device, "vkCmdSetColorWriteMaskEXT"); return fn(commandBuffer, firstAttachment, attachmentCount, pColorWriteMasks); @@ -901,7 +928,7 @@ void vk::CmdSetColorWriteMaskEXT( void vk::GetDescriptorSetLayoutSizeEXT(VkDevice device, VkDescriptorSetLayout layout, VkDeviceSize *pLayoutSizeInBytes) { - static auto fn = (PFN_vkGetDescriptorSetLayoutSizeEXT)importVkProc( + static auto fn = (PFN_vkGetDescriptorSetLayoutSizeEXT)importDeviceVkProc( context->device, "vkGetDescriptorSetLayoutSizeEXT"); return fn(device, layout, pLayoutSizeInBytes); @@ -911,16 +938,17 @@ void vk::GetDescriptorSetLayoutBindingOffsetEXT(VkDevice device, VkDescriptorSetLayout layout, uint32_t binding, VkDeviceSize *pOffset) { - static auto fn = (PFN_vkGetDescriptorSetLayoutBindingOffsetEXT)importVkProc( - context->device, "vkGetDescriptorSetLayoutBindingOffsetEXT"); + static auto fn = + (PFN_vkGetDescriptorSetLayoutBindingOffsetEXT)importDeviceVkProc( + context->device, "vkGetDescriptorSetLayoutBindingOffsetEXT"); return fn(device, layout, binding, pOffset); } void vk::GetDescriptorEXT(VkDevice device, const VkDescriptorGetInfoEXT *pDescriptorInfo, size_t dataSize, void *pDescriptor) { - static auto fn = (PFN_vkGetDescriptorEXT)importVkProc(context->device, - "vkGetDescriptorEXT"); + static auto fn = (PFN_vkGetDescriptorEXT)importDeviceVkProc( + context->device, "vkGetDescriptorEXT"); return fn(device, pDescriptorInfo, dataSize, pDescriptor); } @@ -928,7 +956,7 @@ void vk::GetDescriptorEXT(VkDevice device, void vk::CmdBindDescriptorBuffersEXT( VkCommandBuffer commandBuffer, uint32_t bufferCount, const VkDescriptorBufferBindingInfoEXT *pBindingInfos) { - static auto fn = (PFN_vkCmdBindDescriptorBuffersEXT)importVkProc( + static auto fn = (PFN_vkCmdBindDescriptorBuffersEXT)importDeviceVkProc( context->device, "vkCmdBindDescriptorBuffersEXT"); return fn(commandBuffer, bufferCount, pBindingInfos); @@ -940,7 +968,7 @@ void vk::CmdSetDescriptorBufferOffsetsEXT(VkCommandBuffer commandBuffer, uint32_t firstSet, uint32_t setCount, const uint32_t *pBufferIndices, const VkDeviceSize *pOffsets) { - static auto fn = (PFN_vkCmdSetDescriptorBufferOffsetsEXT)importVkProc( + static auto fn = (PFN_vkCmdSetDescriptorBufferOffsetsEXT)importDeviceVkProc( context->device, "vkCmdSetDescriptorBufferOffsetsEXT"); return fn(commandBuffer, pipelineBindPoint, layout, firstSet, setCount, @@ -951,8 +979,27 @@ void vk::CmdBindDescriptorBufferEmbeddedSamplersEXT( VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout layout, uint32_t set) { static auto fn = - (PFN_vkCmdBindDescriptorBufferEmbeddedSamplersEXT)importVkProc( + (PFN_vkCmdBindDescriptorBufferEmbeddedSamplersEXT)importDeviceVkProc( context->device, "vkCmdBindDescriptorBufferEmbeddedSamplersEXT"); return fn(commandBuffer, pipelineBindPoint, layout, set); } + +VkResult vk::CreateDebugUtilsMessengerEXT( + VkInstance instance, const VkDebugUtilsMessengerCreateInfoEXT *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDebugUtilsMessengerEXT *pMessenger) { + static auto fn = (PFN_vkCreateDebugUtilsMessengerEXT)importInstanceVkProc( + instance, "vkCreateDebugUtilsMessengerEXT"); + + return fn(instance, pCreateInfo, pAllocator, pMessenger); +} + +void vk::DestroyDebugUtilsMessengerEXT( + VkInstance instance, VkDebugUtilsMessengerEXT messenger, + const VkAllocationCallbacks *pAllocator) { + static auto fn = (PFN_vkDestroyDebugUtilsMessengerEXT)importInstanceVkProc( + instance, "vkDestroyDebugUtilsMessengerEXT"); + + return fn(instance, messenger, pAllocator); +} diff --git a/rpcsx-gpu2/main.cpp b/rpcsx-gpu2/main.cpp index 543a29fee..f097d157b 100644 --- a/rpcsx-gpu2/main.cpp +++ b/rpcsx-gpu2/main.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -145,6 +146,17 @@ static void usage(std::FILE *out, const char *argv0) { std::fprintf(out, " window - create and use native window (default)\n"); } +static VKAPI_ATTR VkBool32 VKAPI_CALL debug_utils_message_callback( + VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity, + VkDebugUtilsMessageTypeFlagsEXT messageType, + const VkDebugUtilsMessengerCallbackDataEXT *pCallbackData, + void *pUserData) { + if (pCallbackData->pMessage) { + std::println("{}", pCallbackData->pMessage); + } + return VK_FALSE; +} + int main(int argc, const char *argv[]) { const char *cmdBridgeName = "/rpcsx-gpu-cmds"; const char *shmName = "/rpcsx-os-memory"; @@ -266,12 +278,36 @@ int main(int argc, const char *argv[]) { if (enableValidation) { optionalLayers.push_back("VK_LAYER_KHRONOS_validation"); + requiredExtensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); } auto vkContext = vk::Context::create({}, optionalLayers, requiredExtensions, {}); vk::context = &vkContext; + VkDebugUtilsMessengerEXT debugMessenger = VK_NULL_HANDLE; + + if (enableValidation) { + VkDebugUtilsMessengerCreateInfoEXT debug_utils_messenger_create_info{ + VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT}; + debug_utils_messenger_create_info.messageSeverity = + VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT; + debug_utils_messenger_create_info.messageType = + VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT; + debug_utils_messenger_create_info.pfnUserCallback = + debug_utils_message_callback; + VK_VERIFY(vk::CreateDebugUtilsMessengerEXT( + vkContext.instance, &debug_utils_messenger_create_info, + vk::context->allocator, &debugMessenger)); + } + + rx::atScopeExit _debugMessenger{[=] { + if (debugMessenger != VK_NULL_HANDLE) { + vk::DestroyDebugUtilsMessengerEXT(vk::context->instance, debugMessenger, + vk::context->allocator); + } + }}; + VkSurfaceKHR vkSurface; glfwCreateWindowSurface(vkContext.instance, window, nullptr, &vkSurface); @@ -289,6 +325,7 @@ int main(int argc, const char *argv[]) { VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME, VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME, VK_EXT_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME, + VK_KHR_SHADER_NON_SEMANTIC_INFO_EXTENSION_NAME, }, {VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME}); @@ -315,7 +352,8 @@ int main(int argc, const char *argv[]) { vk::getHostVisibleMemory().initHostVisible( std::min(hostVisibleMemoryTotalSize / 2, 1ul * 1024 * 1024 * 1024)); - vk::getDeviceLocalMemory().initDeviceLocal(std::min(localMemoryTotalSize / 2, 4ul * 1024 * 1024 * 1024)); + vk::getDeviceLocalMemory().initDeviceLocal( + std::min(localMemoryTotalSize / 2, 4ul * 1024 * 1024 * 1024)); auto commandPool = vk::CommandPool::Create(vkContext.presentQueueFamily,