From 4e83c9e12145db2a0f1edee57c6c70a664fea9db Mon Sep 17 00:00:00 2001 From: DH Date: Sat, 28 Sep 2024 18:07:24 +0300 Subject: [PATCH] gpu2: fix flipper static pipeline for flip engine optimize linear tiler fixed out of bound in the tiler implement swizzling for sampled images --- rpcsx-gpu2/CMakeLists.txt | 4 +- rpcsx-gpu2/Cache.cpp | 295 +++++++++++------ rpcsx-gpu2/Cache.hpp | 4 + rpcsx-gpu2/Device.cpp | 126 ++----- rpcsx-gpu2/Device.hpp | 4 +- rpcsx-gpu2/FlipPipeline.cpp | 309 ++++++++++++++++++ rpcsx-gpu2/FlipPipeline.hpp | 30 ++ rpcsx-gpu2/Renderer.cpp | 202 +----------- rpcsx-gpu2/Renderer.hpp | 3 +- .../lib/amdgpu-tiler/src/tiler_vulkan.cpp | 10 +- rpcsx-gpu2/lib/gcn-shader/shaders/rdna.glsl | 24 ++ rpcsx-gpu2/main.cpp | 2 +- rpcsx-gpu2/shaders/flip.frag.glsl | 11 - rpcsx-gpu2/shaders/flip.vert.glsl | 1 - rpcsx-gpu2/shaders/flip_alt.frag.glsl | 11 + rpcsx-gpu2/shaders/flip_std.frag.glsl | 11 + 16 files changed, 637 insertions(+), 410 deletions(-) create mode 100644 rpcsx-gpu2/FlipPipeline.cpp create mode 100644 rpcsx-gpu2/FlipPipeline.hpp delete mode 100644 rpcsx-gpu2/shaders/flip.frag.glsl create mode 100644 rpcsx-gpu2/shaders/flip_alt.frag.glsl create mode 100644 rpcsx-gpu2/shaders/flip_std.frag.glsl diff --git a/rpcsx-gpu2/CMakeLists.txt b/rpcsx-gpu2/CMakeLists.txt index f15d09074..3f19f549d 100644 --- a/rpcsx-gpu2/CMakeLists.txt +++ b/rpcsx-gpu2/CMakeLists.txt @@ -2,7 +2,8 @@ find_package(glfw3 3.3 REQUIRED) add_precompiled_vulkan_spirv(rpcsx-gpu-shaders shaders/fill_red.frag.glsl - shaders/flip.frag.glsl + shaders/flip_std.frag.glsl + shaders/flip_alt.frag.glsl shaders/flip.vert.glsl shaders/rect_list.geom.glsl ) @@ -11,6 +12,7 @@ add_executable(rpcsx-gpu2 Cache.cpp main.cpp Device.cpp + FlipPipeline.cpp Pipe.cpp Registers.cpp Renderer.cpp diff --git a/rpcsx-gpu2/Cache.cpp b/rpcsx-gpu2/Cache.cpp index ef57179e1..74f30c235 100644 --- a/rpcsx-gpu2/Cache.cpp +++ b/rpcsx-gpu2/Cache.cpp @@ -103,8 +103,7 @@ static VkShaderStageFlagBits shaderStageToVk(shader::gcn::Stage stage) { } static void fillStageBindings(VkDescriptorSetLayoutBinding *bindings, - VkShaderStageFlagBits stage, int setIndex, - std::uint32_t setCount) { + VkShaderStageFlagBits stage, int setIndex) { auto createDescriptorBinding = [&](VkDescriptorType type, uint32_t count, int dim = 0) { @@ -113,7 +112,7 @@ static void fillStageBindings(VkDescriptorSetLayoutBinding *bindings, bindings[binding] = VkDescriptorSetLayoutBinding{ .binding = static_cast(binding), .descriptorType = type, - .descriptorCount = count * setCount, + .descriptorCount = count, .stageFlags = VkShaderStageFlags( stage | (binding > 0 && stage != VK_SHADER_STAGE_COMPUTE_BIT ? VK_SHADER_STAGE_ALL_GRAPHICS @@ -268,51 +267,102 @@ struct CachedImage : Cache::Entry { .layerCount = image.getArrayLayers(), }; - auto transferBuffer = vk::Buffer::Allocate( - vk::getDeviceLocalMemory(), info.totalSize, - VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT); - auto tiledBuffer = - tag.getBuffer(baseAddress, info.totalSize, Access::Write); - auto &tiler = tag.getDevice()->tiler; - transitionImageLayout( scheduler.getCommandBuffer(), image, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, subresourceRange); - for (unsigned mipLevel = 0; mipLevel < image.getMipLevels(); ++mipLevel) { - VkBufferImageCopy region = { - .bufferRowLength = - mipLevel > 0 ? 0 : std::max(info.pitch >> mipLevel, 1u), - .imageSubresource = - { - .aspectMask = toAspect(kind), - .mipLevel = mipLevel, - .baseArrayLayer = 0, - .layerCount = image.getArrayLayers(), - }, - .imageExtent = - { - .width = std::max(image.getWidth() >> mipLevel, 1u), - .height = std::max(image.getHeight() >> mipLevel, 1u), - .depth = std::max(image.getDepth() >> mipLevel, 1u), - }, - }; + bool isLinear = acquiredTileMode.arrayMode() == kArrayModeLinearGeneral || + acquiredTileMode.arrayMode() == kArrayModeLinearAligned; + + std::vector regions; + regions.reserve(image.getMipLevels()); + + auto tiledBuffer = + tag.getBuffer(baseAddress, info.totalSize, Access::Write); + + if (isLinear) { + for (unsigned mipLevel = 0; mipLevel < image.getMipLevels(); ++mipLevel) { + auto ®ionInfo = info.getSubresourceInfo(mipLevel); + + regions.push_back({ + .bufferOffset = regionInfo.offset, + .bufferRowLength = + mipLevel > 0 ? 0 : std::max(info.pitch >> mipLevel, 1u), + .imageSubresource = + { + .aspectMask = toAspect(kind), + .mipLevel = mipLevel, + .baseArrayLayer = 0, + .layerCount = image.getArrayLayers(), + }, + .imageExtent = + { + .width = std::max(image.getWidth() >> mipLevel, 1u), + .height = std::max(image.getHeight() >> mipLevel, 1u), + .depth = std::max(image.getDepth() >> mipLevel, 1u), + }, + }); + } vkCmdCopyImageToBuffer(scheduler.getCommandBuffer(), image.getHandle(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, - transferBuffer.getHandle(), 1, ®ion); + tiledBuffer.handle, regions.size(), + regions.data()); + } else { + std::uint64_t linearOffset = 0; + for (unsigned mipLevel = 0; mipLevel < image.getMipLevels(); ++mipLevel) { + auto ®ionInfo = info.getSubresourceInfo(mipLevel); + regions.push_back({ + .bufferOffset = linearOffset, + .bufferRowLength = + mipLevel > 0 ? 0 : std::max(info.pitch >> mipLevel, 1u), + .imageSubresource = + { + .aspectMask = toAspect(kind), + .mipLevel = mipLevel, + .baseArrayLayer = 0, + .layerCount = image.getArrayLayers(), + }, + .imageExtent = + { + .width = std::max(image.getWidth() >> mipLevel, 1u), + .height = std::max(image.getHeight() >> mipLevel, 1u), + .depth = std::max(image.getDepth() >> mipLevel, 1u), + }, + }); - tiler.tile(scheduler, info, acquiredTileMode, acquiredDfmt, - transferBuffer.getAddress(), tiledBuffer.deviceAddress, - mipLevel, 0, image.getArrayLayers()); + linearOffset += regionInfo.linearSize * image.getArrayLayers(); + } + + auto transferBuffer = vk::Buffer::Allocate( + vk::getDeviceLocalMemory(), linearOffset, + VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT); + + vkCmdCopyImageToBuffer(scheduler.getCommandBuffer(), image.getHandle(), + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + transferBuffer.getHandle(), regions.size(), + regions.data()); + + auto &tiler = tag.getDevice()->tiler; + + linearOffset = 0; + for (unsigned mipLevel = 0; mipLevel < image.getMipLevels(); ++mipLevel) { + auto ®ionInfo = info.getSubresourceInfo(mipLevel); + tiler.tile(scheduler, info, acquiredTileMode, acquiredDfmt, + transferBuffer.getAddress() + linearOffset, + tiledBuffer.deviceAddress, mipLevel, 0, + image.getArrayLayers()); + linearOffset += regionInfo.linearSize * image.getArrayLayers(); + } + + scheduler.afterSubmit([transferBuffer = std::move(transferBuffer)] {}); } transitionImageLayout(scheduler.getCommandBuffer(), image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL, subresourceRange); - // scheduler.afterSubmit([transferBuffer = std::move(transferBuffer)] {}); + scheduler.submit(); - scheduler.wait(); } }; @@ -729,97 +779,133 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) { key.mipCount, key.arrayLayerCount, gnm::toVkFormat(key.dfmt, key.nfmt), VK_SAMPLE_COUNT_1_BIT, usage); + VkImageSubresourceRange subresourceRange{ + .aspectMask = toAspect(key.kind), + .baseMipLevel = key.baseMipLevel, + .levelCount = key.mipCount, + .baseArrayLayer = key.baseArrayLayer, + .layerCount = key.arrayLayerCount, + }; + if ((access & Access::Read) != Access::None) { - auto tiledBuffer = - getBuffer(key.readAddress, surfaceInfo.totalSize, Access::Read); - - auto &tiler = mParent->mDevice->tiler; - auto detiledBuffer = - vk::Buffer::Allocate(vk::getDeviceLocalMemory(), surfaceInfo.totalSize, - VK_BUFFER_USAGE_2_TRANSFER_DST_BIT_KHR | - VK_BUFFER_USAGE_2_TRANSFER_SRC_BIT_KHR); - VkImageSubresourceRange subresourceRange{ - .aspectMask = toAspect(key.kind), - .baseMipLevel = key.baseMipLevel, - .levelCount = key.mipCount, - .baseArrayLayer = key.baseArrayLayer, - .layerCount = key.arrayLayerCount, - }; - bool isLinear = key.tileMode.arrayMode() == kArrayModeLinearGeneral || key.tileMode.arrayMode() == kArrayModeLinearAligned; std::vector regions; regions.reserve(key.mipCount); - std::vector bufferRegions; - std::uint64_t dstAddress = 0; - std::uint64_t srcAddress = 0; + VkBuffer sourceBuffer; + + auto tiledBuffer = + getBuffer(key.readAddress, surfaceInfo.totalSize, Access::Read); if (isLinear) { - regions.reserve(key.mipCount); - } else { - dstAddress = detiledBuffer.getAddress(); - srcAddress = tiledBuffer.deviceAddress; - } - - for (unsigned mipLevel = key.baseMipLevel; - mipLevel < key.baseMipLevel + key.mipCount; ++mipLevel) { - auto &info = surfaceInfo.getSubresourceInfo(mipLevel); - if (isLinear) { - bufferRegions.push_back({ - .srcOffset = info.offset, - .dstOffset = dstAddress, - .size = info.linearSize * key.arrayLayerCount, + sourceBuffer = tiledBuffer.handle; + for (unsigned mipLevel = key.baseMipLevel; + mipLevel < key.baseMipLevel + key.mipCount; ++mipLevel) { + auto &info = surfaceInfo.getSubresourceInfo(mipLevel); + regions.push_back({ + .bufferOffset = info.offset, + .bufferRowLength = + mipLevel > 0 ? 0 : std::max(key.pitch >> mipLevel, 1u), + .imageSubresource = + { + .aspectMask = toAspect(key.kind), + .mipLevel = mipLevel, + .baseArrayLayer = key.baseArrayLayer, + .layerCount = key.arrayLayerCount, + }, + .imageExtent = + { + .width = std::max(key.extent.width >> mipLevel, 1u), + .height = std::max(key.extent.height >> mipLevel, 1u), + .depth = std::max(key.extent.depth >> mipLevel, 1u), + }, }); - } else { - tiler.detile(*mScheduler, surfaceInfo, key.tileMode, key.dfmt, - srcAddress, dstAddress, mipLevel, 0, key.arrayLayerCount); + + regions.push_back({ + .bufferOffset = info.offset, + .bufferRowLength = + mipLevel > 0 ? 0 : std::max(key.pitch >> mipLevel, 1u), + .imageSubresource = + { + .aspectMask = toAspect(key.kind), + .mipLevel = mipLevel, + .baseArrayLayer = key.baseArrayLayer, + .layerCount = key.arrayLayerCount, + }, + .imageExtent = + { + .width = std::max(key.extent.width >> mipLevel, 1u), + .height = std::max(key.extent.height >> mipLevel, 1u), + .depth = std::max(key.extent.depth >> mipLevel, 1u), + }, + }); + } + } else { + auto &tiler = mParent->mDevice->tiler; + + std::uint64_t linearOffset = 0; + for (unsigned mipLevel = key.baseMipLevel; + mipLevel < key.baseMipLevel + key.mipCount; ++mipLevel) { + auto &info = surfaceInfo.getSubresourceInfo(mipLevel); + + regions.push_back({ + .bufferOffset = linearOffset, + .bufferRowLength = + mipLevel > 0 ? 0 : std::max(key.pitch >> mipLevel, 1u), + .imageSubresource = + { + .aspectMask = toAspect(key.kind), + .mipLevel = mipLevel, + .baseArrayLayer = key.baseArrayLayer, + .layerCount = key.arrayLayerCount, + }, + .imageExtent = + { + .width = std::max(key.extent.width >> mipLevel, 1u), + .height = std::max(key.extent.height >> mipLevel, 1u), + .depth = std::max(key.extent.depth >> mipLevel, 1u), + }, + }); + + linearOffset += info.linearSize * key.arrayLayerCount; } - regions.push_back({ - .bufferOffset = info.offset, - .bufferRowLength = - mipLevel > 0 ? 0 : std::max(key.pitch >> mipLevel, 1u), - .imageSubresource = - { - .aspectMask = toAspect(key.kind), - .mipLevel = mipLevel, - .baseArrayLayer = key.baseArrayLayer, - .layerCount = key.arrayLayerCount, - }, - .imageExtent = - { - .width = std::max(key.extent.width >> mipLevel, 1u), - .height = std::max(key.extent.height >> mipLevel, 1u), - .depth = std::max(key.extent.depth >> mipLevel, 1u), - }, - }); + auto detiledBuffer = + vk::Buffer::Allocate(vk::getDeviceLocalMemory(), linearOffset, + VK_BUFFER_USAGE_2_TRANSFER_DST_BIT_KHR | + VK_BUFFER_USAGE_2_TRANSFER_SRC_BIT_KHR); - dstAddress += info.linearSize * key.arrayLayerCount; - srcAddress += info.tiledSize * key.arrayLayerCount; - } + sourceBuffer = detiledBuffer.getHandle(); + std::uint64_t dstAddress = detiledBuffer.getAddress(); - if (!bufferRegions.empty()) { - vkCmdCopyBuffer(mScheduler->getCommandBuffer(), tiledBuffer.handle, - detiledBuffer.getHandle(), bufferRegions.size(), - bufferRegions.data()); + mScheduler->afterSubmit([detiledBuffer = std::move(detiledBuffer)] {}); + + + for (unsigned mipLevel = key.baseMipLevel; + mipLevel < key.baseMipLevel + key.mipCount; ++mipLevel) { + auto &info = surfaceInfo.getSubresourceInfo(mipLevel); + + tiler.detile(*mScheduler, surfaceInfo, key.tileMode, key.dfmt, + tiledBuffer.deviceAddress, dstAddress, mipLevel, 0, + key.arrayLayerCount); + + dstAddress += info.linearSize * key.arrayLayerCount; + } } transitionImageLayout( mScheduler->getCommandBuffer(), image, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresourceRange); - vkCmdCopyBufferToImage(mScheduler->getCommandBuffer(), - detiledBuffer.getHandle(), image.getHandle(), - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, regions.size(), - regions.data()); + vkCmdCopyBufferToImage( + mScheduler->getCommandBuffer(), sourceBuffer, image.getHandle(), + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, regions.size(), regions.data()); transitionImageLayout(mScheduler->getCommandBuffer(), image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL, subresourceRange); - - mScheduler->afterSubmit([detiledBuffer = std::move(detiledBuffer)] {}); } auto cached = std::make_shared(); @@ -834,7 +920,7 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) { cached->acquiredDfmt = key.dfmt; mAcquiredResources.push_back(cached); - return {.handle = cached->image.getHandle()}; + return {.handle = cached->image.getHandle(), .subresource = subresourceRange}; } Cache::ImageView Cache::Tag::getImageView(const ImageViewKey &key, @@ -867,6 +953,7 @@ Cache::ImageView Cache::Tag::getImageView(const ImageViewKey &key, return { .handle = cached->view.getHandle(), .imageHandle = image.handle, + .subresource = image.subresource, }; } @@ -933,7 +1020,7 @@ Cache::Cache(Device *device, int vmId) : mDevice(device), mVmIm(vmId) { [kDescriptorBindings.size()]; for (std::size_t index = 0; auto stage : kGraphicsStages) { - fillStageBindings(bindings[index], stage, index, 128); + fillStageBindings(bindings[index], stage, index); ++index; } @@ -956,7 +1043,7 @@ Cache::Cache(Device *device, int vmId) : mDevice(device), mVmIm(vmId) { { VkDescriptorSetLayoutBinding bindings[kDescriptorBindings.size()]; - fillStageBindings(bindings, VK_SHADER_STAGE_COMPUTE_BIT, 0, 128); + fillStageBindings(bindings, VK_SHADER_STAGE_COMPUTE_BIT, 0); VkDescriptorSetLayoutCreateInfo layoutInfo{ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, diff --git a/rpcsx-gpu2/Cache.hpp b/rpcsx-gpu2/Cache.hpp index be197226a..c703af75f 100644 --- a/rpcsx-gpu2/Cache.hpp +++ b/rpcsx-gpu2/Cache.hpp @@ -151,11 +151,13 @@ struct Cache { struct Image { VkImage handle; + VkImageSubresourceRange subresource; }; struct ImageView { VkImageView handle; VkImage imageHandle; + VkImageSubresourceRange subresource; }; class Tag { @@ -185,6 +187,8 @@ struct Cache { mScheduler->wait(); } + Scheduler &getScheduler() const { return *mScheduler; } + ~Tag() { release(); } TagId getReadId() const { return TagId{std::uint64_t(mTagId) - 1}; } diff --git a/rpcsx-gpu2/Device.cpp b/rpcsx-gpu2/Device.cpp index ebdd3b06a..1a17e438e 100644 --- a/rpcsx-gpu2/Device.cpp +++ b/rpcsx-gpu2/Device.cpp @@ -1,4 +1,5 @@ #include "Device.hpp" +#include "FlipPipeline.hpp" #include "Renderer.hpp" #include "amdgpu/tiler.hpp" #include "gnm/constants.hpp" @@ -255,27 +256,28 @@ bool Device::flip(std::int64_t pid, int bufferIndex, std::uint64_t arg, gnm::DataFormat dfmt; gnm::NumericFormat nfmt; - CbCompSwap compSwap; + auto flipType = FlipType::Alt; switch (bufferAttr.pixelFormat) { case 0x80000000: - // bgra dfmt = gnm::kDataFormat8_8_8_8; - nfmt = gnm::kNumericFormatSNormNoZero; - compSwap = CbCompSwap::Alt; + nfmt = gnm::kNumericFormatSrgb; break; case 0x80002200: - // rgba dfmt = gnm::kDataFormat8_8_8_8; - nfmt = gnm::kNumericFormatSNormNoZero; - compSwap = CbCompSwap::Std; + nfmt = gnm::kNumericFormatUNorm; + flipType = FlipType::Std; break; + case 0x88740000: case 0x88060000: - // bgra dfmt = gnm::kDataFormat2_10_10_10; - nfmt = gnm::kNumericFormatSNormNoZero; - compSwap = CbCompSwap::Alt; + nfmt = gnm::kNumericFormatUNorm; + break; + + case 0xc1060000: + dfmt = gnm::kDataFormat16_16_16_16; + nfmt = gnm::kNumericFormatSrgb; break; default: @@ -291,92 +293,28 @@ bool Device::flip(std::int64_t pid, int bufferIndex, std::uint64_t arg, auto cacheTag = getCacheTag(process.vmId, scheduler); - if (false) { - transitionImageLayout(commandBuffer, swapchainImage, - VK_IMAGE_LAYOUT_UNDEFINED, - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, - { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .levelCount = 1, - .layerCount = 1, - }); + transitionImageLayout(commandBuffer, swapchainImage, + VK_IMAGE_LAYOUT_UNDEFINED, + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .levelCount = 1, + .layerCount = 1, + }); - amdgpu::flip( - cacheTag, commandBuffer, vk::context->swapchainExtent, buffer.address, - swapchainImageView, {bufferAttr.width, bufferAttr.height}, compSwap, - getDefaultTileModes()[bufferAttr.tilingMode == 1 ? 10 : 8], dfmt, nfmt); + amdgpu::flip( + cacheTag, commandBuffer, vk::context->swapchainExtent, buffer.address, + swapchainImageView, {bufferAttr.width, bufferAttr.height}, flipType, + getDefaultTileModes()[bufferAttr.tilingMode == 1 ? 10 : 8], dfmt, nfmt); - transitionImageLayout(commandBuffer, swapchainImage, - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, - VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, - { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .levelCount = 1, - .layerCount = 1, - }); - } else { - ImageKey frameKey{ - .readAddress = buffer.address, - .type = gnm::TextureType::Dim2D, - .dfmt = dfmt, - .nfmt = nfmt, - .tileMode = getDefaultTileModes()[bufferAttr.tilingMode == 1 ? 10 : 8], - .extent = - { - .width = bufferAttr.width, - .height = bufferAttr.height, - .depth = 1, - }, - .pitch = bufferAttr.width, - .mipCount = 1, - .arrayLayerCount = 1, - }; - - auto image = cacheTag.getImage(frameKey, Access::Read); - - scheduler.submit(); - scheduler.wait(); - - transitionImageLayout(commandBuffer, swapchainImage, - VK_IMAGE_LAYOUT_UNDEFINED, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .levelCount = 1, - .layerCount = 1, - }); - - VkImageBlit region{ - .srcSubresource = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .mipLevel = 0, - .baseArrayLayer = 0, - .layerCount = 1}, - .srcOffsets = {{}, - {static_cast(bufferAttr.width), - static_cast(bufferAttr.height), 1}}, - .dstSubresource = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .mipLevel = 0, - .baseArrayLayer = 0, - .layerCount = 1}, - .dstOffsets = - {{}, - {static_cast(vk::context->swapchainExtent.width), - static_cast(vk::context->swapchainExtent.height), 1}}, - }; - - vkCmdBlitImage(commandBuffer, image.handle, VK_IMAGE_LAYOUT_GENERAL, - swapchainImage, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, - ®ion, VK_FILTER_LINEAR); - - transitionImageLayout(commandBuffer, swapchainImage, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, - { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .levelCount = 1, - .layerCount = 1, - }); - } + transitionImageLayout(commandBuffer, swapchainImage, + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, + { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .levelCount = 1, + .layerCount = 1, + }); auto submitCompleteTask = scheduler.createExternalSubmit(); diff --git a/rpcsx-gpu2/Device.hpp b/rpcsx-gpu2/Device.hpp index b4f9db789..6759c826c 100644 --- a/rpcsx-gpu2/Device.hpp +++ b/rpcsx-gpu2/Device.hpp @@ -3,7 +3,7 @@ #include "Pipe.hpp" #include "amdgpu/bridge/bridge.hpp" #include "amdgpu/tiler_vulkan.hpp" -#include "gnm/descriptors.hpp" +#include "FlipPipeline.hpp" #include "rx/MemoryTable.hpp" #include "shader/SemanticInfo.hpp" #include "shader/SpvConverter.hpp" @@ -52,9 +52,9 @@ struct Device { Registers::Config config; GpuTiler tiler; - GraphicsPipe graphicsPipes[kGfxPipeCount]{0, 1}; // ComputePipe computePipes[kComputePipeCount]{0, 1, 2, 3, 4, 5, 6, 7}; + FlipPipeline flipPipeline; int dmemFd[3] = {-1, -1, -1}; std::unordered_map processInfo; diff --git a/rpcsx-gpu2/FlipPipeline.cpp b/rpcsx-gpu2/FlipPipeline.cpp new file mode 100644 index 000000000..c170b519f --- /dev/null +++ b/rpcsx-gpu2/FlipPipeline.cpp @@ -0,0 +1,309 @@ +#include "FlipPipeline.hpp" +#include "shaders/flip.vert.h" +#include "shaders/flip_alt.frag.h" +#include "shaders/flip_std.frag.h" +#include "vk.hpp" +#include +#include + +FlipPipeline::~FlipPipeline() { + vkDestroyPipeline(vk::context->device, pipelines[0], vk::context->allocator); + vkDestroyPipeline(vk::context->device, pipelines[1], vk::context->allocator); + vkDestroyPipelineLayout(vk::context->device, pipelineLayout, + vk::context->allocator); + vkDestroyDescriptorPool(vk::context->device, descriptorPool, + vk::context->allocator); + vkDestroyDescriptorSetLayout(vk::context->device, descriptorSetLayout, + vk::context->allocator); + vkDestroyShaderModule(vk::context->device, flipVertShaderModule, + vk::context->allocator); + vkDestroyShaderModule(vk::context->device, flipFragStdShaderModule, + vk::context->allocator); + vkDestroyShaderModule(vk::context->device, flipFragAltShaderModule, + vk::context->allocator); +} + +FlipPipeline::FlipPipeline() { + + VkShaderModuleCreateInfo flipVertexModuleInfo{ + .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, + .codeSize = std::size(spirv_flip_vert) * sizeof(*spirv_flip_std_frag), + .pCode = spirv_flip_vert, + }; + + VkShaderModuleCreateInfo flipFragmentStdInfo{ + .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, + .codeSize = std::size(spirv_flip_std_frag) * sizeof(*spirv_flip_std_frag), + .pCode = spirv_flip_std_frag, + }; + + VkShaderModuleCreateInfo flipFragmentAltInfo{ + .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, + .codeSize = std::size(spirv_flip_alt_frag) * sizeof(*spirv_flip_std_frag), + .pCode = spirv_flip_alt_frag, + }; + + VK_VERIFY(vkCreateShaderModule(vk::context->device, &flipVertexModuleInfo, + vk::context->allocator, + &flipVertShaderModule)); + + VK_VERIFY(vkCreateShaderModule(vk::context->device, &flipFragmentStdInfo, + vk::context->allocator, + &flipFragStdShaderModule)); + + VK_VERIFY(vkCreateShaderModule(vk::context->device, &flipFragmentAltInfo, + vk::context->allocator, + &flipFragAltShaderModule)); + + { + VkDescriptorSetLayoutBinding bindings[] = { + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + }, + { + .binding = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + }, + }; + + VkDescriptorSetLayoutCreateInfo descriptorSetLayoutCreateInfo = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = std::size(bindings), + .pBindings = bindings, + }; + + vkCreateDescriptorSetLayout(vk::context->device, + &descriptorSetLayoutCreateInfo, + vk::context->allocator, &descriptorSetLayout); + } + + { + VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = &descriptorSetLayout, + }; + + VK_VERIFY(vkCreatePipelineLayout(vk::context->device, + &pipelineLayoutCreateInfo, + vk::context->allocator, &pipelineLayout)); + } + + { + VkPipelineShaderStageCreateInfo stagesStd[]{ + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + // .pNext = &flipVertexModuleInfo, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = flipVertShaderModule, + .pName = "main", + }, + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + // .pNext = &flipFragmentStdInfo, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = flipFragStdShaderModule, + .pName = "main", + }}; + VkPipelineShaderStageCreateInfo stagesAlt[]{ + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + // .pNext = &flipVertexModuleInfo, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = flipVertShaderModule, + .pName = "main", + }, + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + // .pNext = &flipFragmentAltInfo, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = flipFragAltShaderModule, + .pName = "main", + }}; + + VkPipelineVertexInputStateCreateInfo vertexInputState{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + }; + VkPipelineInputAssemblyStateCreateInfo inputAssemblyState{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, + }; + VkPipelineTessellationStateCreateInfo tessellationState{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, + }; + VkPipelineRasterizationStateCreateInfo rasterizationState{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + }; + + VkSampleMask sampleMask = -1; + VkPipelineMultisampleStateCreateInfo multisampleState{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, + .pSampleMask = &sampleMask, + }; + + VkPipelineDepthStencilStateCreateInfo depthStencilState{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + }; + VkPipelineColorBlendStateCreateInfo colorBlendState{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + }; + + VkDynamicState dynamicStates[] = { + VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT, + VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT, + }; + + VkPipelineDynamicStateCreateInfo dynamicState{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .dynamicStateCount = std::size(dynamicStates), + .pDynamicStates = dynamicStates, + }; + + VkGraphicsPipelineCreateInfo pipelineCreateInfos[]{ + { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .stageCount = std::size(stagesStd), + .pStages = stagesStd, + .pVertexInputState = &vertexInputState, + .pInputAssemblyState = &inputAssemblyState, + .pTessellationState = &tessellationState, + .pRasterizationState = &rasterizationState, + .pMultisampleState = &multisampleState, + .pDepthStencilState = &depthStencilState, + .pColorBlendState = &colorBlendState, + .pDynamicState = &dynamicState, + .layout = pipelineLayout, + }, + { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .stageCount = std::size(stagesAlt), + .pStages = stagesAlt, + .pVertexInputState = &vertexInputState, + .pInputAssemblyState = &inputAssemblyState, + .pTessellationState = &tessellationState, + .pRasterizationState = &rasterizationState, + .pMultisampleState = &multisampleState, + .pDepthStencilState = &depthStencilState, + .pColorBlendState = &colorBlendState, + .pDynamicState = &dynamicState, + .layout = pipelineLayout, + }, + }; + + VK_VERIFY(vkCreateGraphicsPipelines( + vk::context->device, VK_NULL_HANDLE, std::size(pipelines), + pipelineCreateInfos, vk::context->allocator, pipelines)); + } + + { + VkDescriptorPoolSize poolSizes[]{ + { + .type = VK_DESCRIPTOR_TYPE_SAMPLER, + .descriptorCount = + static_cast(std::size(descriptorSets) * 2), + }, + { + .type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .descriptorCount = + static_cast(std::size(descriptorSets) * 2), + }}; + + VkDescriptorPoolCreateInfo descriptorPoolCreateInfo{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .maxSets = static_cast(std::size(descriptorSets) * 2), + .poolSizeCount = std::size(poolSizes), + .pPoolSizes = poolSizes, + }; + + VK_VERIFY(vkCreateDescriptorPool(vk::context->device, + &descriptorPoolCreateInfo, + vk::context->allocator, &descriptorPool)); + } + + for (auto &set : descriptorSets) { + VkDescriptorSetAllocateInfo descriptorSetAllocateInfo{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = descriptorPool, + .descriptorSetCount = 1, + .pSetLayouts = &descriptorSetLayout, + }; + + VK_VERIFY(vkAllocateDescriptorSets(vk::context->device, + &descriptorSetAllocateInfo, &set)); + } +} + +void FlipPipeline::bind(Scheduler &sched, FlipType type, VkImageView imageView, + VkSampler sampler) { + auto cmdBuffer = sched.getCommandBuffer(); + + auto allocateDescriptorSetIndex = [this] { + auto mask = freeDescriptorSets.load(std::memory_order::acquire); + + while (true) { + auto index = std::countr_one(mask); + if (index >= std::size(descriptorSets)) { + mask = freeDescriptorSets.load(std::memory_order::relaxed); + continue; + } + + if (!freeDescriptorSets.compare_exchange_weak( + mask, mask | (1 << index), std::memory_order::release, + std::memory_order::relaxed)) { + continue; + } + + return index; + } + }; + + auto descriptorIndex = allocateDescriptorSetIndex(); + + sched.afterSubmit([this, descriptorIndex] { + decltype(freeDescriptorSets)::value_type mask = 1 << descriptorIndex; + + while (!freeDescriptorSets.compare_exchange_weak( + mask, mask & ~(1 << descriptorIndex), std::memory_order::release, + std::memory_order::acquire)) { + } + }); + + auto descriptorSet = descriptorSets[descriptorIndex]; + VkDescriptorImageInfo imageInfo = { + .sampler = sampler, + .imageView = imageView, + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }; + + VkWriteDescriptorSet writeDescSets[]{ + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = descriptorSet, + .dstBinding = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .pImageInfo = &imageInfo, + }, + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = descriptorSet, + .dstBinding = 1, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER, + .pImageInfo = &imageInfo, + }, + }; + + vkUpdateDescriptorSets(vk::context->device, std::size(writeDescSets), + writeDescSets, 0, nullptr); + vkCmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, + pipelineLayout, 0, 1, &descriptorSet, 0, nullptr); + vkCmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, + pipelines[static_cast(type)]); +} diff --git a/rpcsx-gpu2/FlipPipeline.hpp b/rpcsx-gpu2/FlipPipeline.hpp new file mode 100644 index 000000000..8e286d45a --- /dev/null +++ b/rpcsx-gpu2/FlipPipeline.hpp @@ -0,0 +1,30 @@ +#pragma once + +#include "Scheduler.hpp" +#include +#include +#include + +enum class FlipType { + Std, + Alt, +}; + +struct FlipPipeline { + VkShaderModule flipVertShaderModule{}; + VkShaderModule flipFragStdShaderModule{}; + VkShaderModule flipFragAltShaderModule{}; + VkPipelineLayout pipelineLayout{}; + VkDescriptorSetLayout descriptorSetLayout{}; + VkPipeline pipelines[2]{}; + VkDescriptorPool descriptorPool{}; + VkDescriptorSet descriptorSets[8]{}; + std::atomic freeDescriptorSets{0}; + + FlipPipeline(const FlipPipeline &) = delete; + FlipPipeline(); + ~FlipPipeline(); + + void bind(Scheduler &sched, FlipType type, VkImageView imageView, + VkSampler sampler); +}; diff --git a/rpcsx-gpu2/Renderer.cpp b/rpcsx-gpu2/Renderer.cpp index 064dea5c8..df21dd8f4 100644 --- a/rpcsx-gpu2/Renderer.cpp +++ b/rpcsx-gpu2/Renderer.cpp @@ -12,8 +12,6 @@ #include #include #include -#include -#include #include #include @@ -114,50 +112,6 @@ static VkShaderEXT getFillRedFragShader(amdgpu::Cache &cache) { return shader; } -static VkShaderEXT getFlipVertexShader(amdgpu::Cache &cache) { - static VkShaderEXT shader = VK_NULL_HANDLE; - if (shader != VK_NULL_HANDLE) { - return shader; - } - - VkShaderCreateInfoEXT createInfo{ - .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO_EXT, - .stage = VK_SHADER_STAGE_VERTEX_BIT, - .codeType = VK_SHADER_CODE_TYPE_SPIRV_EXT, - .codeSize = sizeof(spirv_flip_vert), - .pCode = spirv_flip_vert, - .pName = "main", - .setLayoutCount = - static_cast(cache.getGraphicsDescriptorSetLayouts().size()), - .pSetLayouts = cache.getGraphicsDescriptorSetLayouts().data()}; - - VK_VERIFY(vk::CreateShadersEXT(vk::context->device, 1, &createInfo, - vk::context->allocator, &shader)); - return shader; -} - -static VkShaderEXT getFlipFragmentShader(amdgpu::Cache &cache) { - static VkShaderEXT shader = VK_NULL_HANDLE; - if (shader != VK_NULL_HANDLE) { - return shader; - } - - VkShaderCreateInfoEXT createInfo{ - .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO_EXT, - .stage = VK_SHADER_STAGE_FRAGMENT_BIT, - .codeType = VK_SHADER_CODE_TYPE_SPIRV_EXT, - .codeSize = sizeof(spirv_flip_frag), - .pCode = spirv_flip_frag, - .pName = "main", - .setLayoutCount = - static_cast(cache.getGraphicsDescriptorSetLayouts().size()), - .pSetLayouts = cache.getGraphicsDescriptorSetLayouts().data()}; - - VK_VERIFY(vk::CreateShadersEXT(vk::context->device, 1, &createInfo, - vk::context->allocator, &shader)); - return shader; -} - static VkPrimitiveTopology toVkPrimitiveType(gnm::PrimitiveType type) { switch (type) { case gnm::PrimitiveType::PointList: @@ -728,7 +682,7 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex, .vgprCount = pgm.rsrc1.getVGprCount(), .sgprCount = pgm.rsrc1.getSGprCount(), .userSgprs = std::span(pgm.userData.data(), pgm.rsrc2.userSgpr), - // .supportsBarycentric = vk::context->supportsBarycentric, + .supportsBarycentric = vk::context->supportsBarycentric, .supportsInt8 = vk::context->supportsInt8, .supportsInt64Atomics = vk::context->supportsInt64Atomics, }; @@ -1162,50 +1116,20 @@ transitionImageLayout(VkCommandBuffer commandBuffer, VkImage image, void amdgpu::flip(Cache::Tag &cacheTag, VkCommandBuffer commandBuffer, VkExtent2D targetExtent, std::uint64_t address, VkImageView target, VkExtent2D imageExtent, - CbCompSwap compSwap, TileMode tileMode, gnm::DataFormat dfmt, + FlipType type, TileMode tileMode, gnm::DataFormat dfmt, gnm::NumericFormat nfmt) { - auto pipelineLayout = cacheTag.getGraphicsPipelineLayout(); - auto descriptorSets = cacheTag.createGraphicsDescriptorSets(); - ImageViewKey framebuffer{}; - framebuffer.type = gnm::TextureType::Dim2D; - framebuffer.pitch = imageExtent.width; framebuffer.readAddress = address; + framebuffer.type = gnm::TextureType::Dim2D; + framebuffer.dfmt = dfmt; + framebuffer.nfmt = nfmt; + framebuffer.tileMode = tileMode; framebuffer.extent.width = imageExtent.width; framebuffer.extent.height = imageExtent.height; framebuffer.extent.depth = 1; - framebuffer.dfmt = dfmt; - framebuffer.nfmt = nfmt; + framebuffer.pitch = imageExtent.width; framebuffer.mipCount = 1; framebuffer.arrayLayerCount = 1; - framebuffer.tileMode = tileMode; - - switch (compSwap) { - case CbCompSwap::Std: - framebuffer.R = gnm::Swizzle::R; - framebuffer.G = gnm::Swizzle::G; - framebuffer.B = gnm::Swizzle::B; - framebuffer.A = gnm::Swizzle::A; - break; - case CbCompSwap::Alt: - framebuffer.R = gnm::Swizzle::B; - framebuffer.G = gnm::Swizzle::G; - framebuffer.B = gnm::Swizzle::R; - framebuffer.A = gnm::Swizzle::A; - break; - case CbCompSwap::StdRev: - framebuffer.R = gnm::Swizzle::A; - framebuffer.G = gnm::Swizzle::B; - framebuffer.B = gnm::Swizzle::G; - framebuffer.A = gnm::Swizzle::R; - break; - case CbCompSwap::AltRev: - framebuffer.R = gnm::Swizzle::A; - framebuffer.G = gnm::Swizzle::R; - framebuffer.B = gnm::Swizzle::G; - framebuffer.A = gnm::Swizzle::B; - break; - } SamplerKey framebufferSampler = { .magFilter = VK_FILTER_LINEAR, @@ -1215,49 +1139,20 @@ void amdgpu::flip(Cache::Tag &cacheTag, VkCommandBuffer commandBuffer, auto imageView = cacheTag.getImageView(framebuffer, Access::Read); auto sampler = cacheTag.getSampler(framebufferSampler); - cacheTag.submitAndWait(); - VkDescriptorImageInfo imageInfo{ .sampler = sampler.handle, .imageView = imageView.handle, .imageLayout = VK_IMAGE_LAYOUT_GENERAL, }; - VkWriteDescriptorSet writeDescSet[]{ - { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstSet = descriptorSets[0], - .dstBinding = - Cache::getDescriptorBinding(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 2), - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .pImageInfo = &imageInfo, - }, - { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstSet = descriptorSets[0], - .dstBinding = Cache::getDescriptorBinding(VK_DESCRIPTOR_TYPE_SAMPLER), - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER, - .pImageInfo = &imageInfo, - }}; - - vkUpdateDescriptorSets(vk::context->device, std::size(writeDescSet), - writeDescSet, 0, nullptr); - VkRenderingAttachmentInfo colorAttachments[1]{{ .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, .imageView = target, .imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR, .storeOp = VK_ATTACHMENT_STORE_OP_STORE, - .clearValue = {}, }}; - VkBool32 colorBlendEnable[1]{VK_FALSE}; - VkColorBlendEquationEXT colorBlendEquation[1]{}; - VkColorComponentFlags colorWriteMask[1]{ - VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | - VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT}; + VkViewport viewPorts[1]{ { .width = float(targetExtent.width), @@ -1282,87 +1177,16 @@ void amdgpu::flip(Cache::Tag &cacheTag, VkCommandBuffer commandBuffer, .pColorAttachments = colorAttachments, }; + commandBuffer = cacheTag.getScheduler().getCommandBuffer(); + vkCmdBeginRendering(commandBuffer, &renderInfo); - vkCmdSetRasterizerDiscardEnable(commandBuffer, VK_FALSE); + + cacheTag.getDevice()->flipPipeline.bind(cacheTag.getScheduler(), type, imageView.handle, sampler.handle); vkCmdSetViewportWithCount(commandBuffer, 1, viewPorts); vkCmdSetScissorWithCount(commandBuffer, 1, viewPortScissors); - vk::CmdSetColorBlendEnableEXT(commandBuffer, 0, 1, colorBlendEnable); - vk::CmdSetColorBlendEquationEXT(commandBuffer, 0, 1, colorBlendEquation); - - vk::CmdSetDepthClampEnableEXT(commandBuffer, VK_FALSE); - vkCmdSetDepthTestEnable(commandBuffer, VK_FALSE); - vkCmdSetDepthWriteEnable(commandBuffer, VK_FALSE); - vkCmdSetDepthBounds(commandBuffer, 0.0f, 1.0f); - vkCmdSetDepthBoundsTestEnable(commandBuffer, VK_FALSE); - - vkCmdSetDepthBiasEnable(commandBuffer, VK_FALSE); - vkCmdSetDepthBias(commandBuffer, 0, 1, 1); - vkCmdSetPrimitiveRestartEnable(commandBuffer, VK_FALSE); - - vk::CmdSetAlphaToOneEnableEXT(commandBuffer, VK_FALSE); - - vk::CmdSetLogicOpEnableEXT(commandBuffer, VK_FALSE); - vk::CmdSetLogicOpEXT(commandBuffer, VK_LOGIC_OP_AND); - vk::CmdSetPolygonModeEXT(commandBuffer, VK_POLYGON_MODE_FILL); - vk::CmdSetRasterizationSamplesEXT(commandBuffer, VK_SAMPLE_COUNT_1_BIT); - VkSampleMask sampleMask = ~0; - vk::CmdSetSampleMaskEXT(commandBuffer, VK_SAMPLE_COUNT_1_BIT, &sampleMask); - vk::CmdSetTessellationDomainOriginEXT( - commandBuffer, VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT); - vk::CmdSetAlphaToCoverageEnableEXT(commandBuffer, VK_FALSE); - vk::CmdSetVertexInputEXT(commandBuffer, 0, nullptr, 0, nullptr); - vk::CmdSetColorWriteMaskEXT(commandBuffer, 0, 1, colorWriteMask); - - vkCmdSetStencilCompareMask(commandBuffer, VK_STENCIL_FACE_FRONT_AND_BACK, 0); - vkCmdSetStencilWriteMask(commandBuffer, VK_STENCIL_FACE_FRONT_AND_BACK, 0); - vkCmdSetStencilReference(commandBuffer, VK_STENCIL_FACE_FRONT_AND_BACK, 0); - - vkCmdSetCullMode(commandBuffer, VK_CULL_MODE_NONE); - vkCmdSetFrontFace(commandBuffer, VK_FRONT_FACE_CLOCKWISE); - - vkCmdSetPrimitiveTopology(commandBuffer, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST); - vkCmdSetStencilTestEnable(commandBuffer, VK_FALSE); - - auto stages = Cache::kGraphicsStages; - VkShaderEXT shaders[stages.size()]{}; - - shaders[Cache::getStageIndex(VK_SHADER_STAGE_VERTEX_BIT)] = - getFlipVertexShader(*cacheTag.getCache()); - - shaders[Cache::getStageIndex(VK_SHADER_STAGE_FRAGMENT_BIT)] = - getFlipFragmentShader(*cacheTag.getCache()); - - vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, - pipelineLayout, 0, descriptorSets.size(), - descriptorSets.data(), 0, nullptr); - - vk::CmdBindShadersEXT(commandBuffer, stages.size(), stages.data(), shaders); - vkCmdDraw(commandBuffer, 6, 1, 0, 0); - vkCmdEndRendering(commandBuffer); - - // { - // VkImageMemoryBarrier barrier{ - // .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - // .srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT, - // .dstAccessMask = VK_ACCESS_NONE, - // .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - // .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - // .image = imageView.imageHandle, - // .subresourceRange = - // { - // .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - // .levelCount = 1, - // .layerCount = 1, - // }, - // }; - - // vkCmdPipelineBarrier(commandBuffer, - // VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, - // VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 0, 0, nullptr, - // 0, nullptr, 1, &barrier); - // } + cacheTag.getScheduler().submit(); } diff --git a/rpcsx-gpu2/Renderer.hpp b/rpcsx-gpu2/Renderer.hpp index 20102091d..62cc6e079 100644 --- a/rpcsx-gpu2/Renderer.hpp +++ b/rpcsx-gpu2/Renderer.hpp @@ -1,6 +1,7 @@ #pragma once #include "Cache.hpp" +#include "FlipPipeline.hpp" #include "Pipe.hpp" #include #include @@ -12,6 +13,6 @@ void draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex, std::uint32_t indexCount); void flip(Cache::Tag &cacheTag, VkCommandBuffer commandBuffer, VkExtent2D targetExtent, std::uint64_t address, VkImageView target, - VkExtent2D imageExtent, CbCompSwap compSwap, TileMode tileMode, + VkExtent2D imageExtent, FlipType type, TileMode tileMode, gnm::DataFormat dfmt, gnm::NumericFormat nfmt); } // namespace amdgpu diff --git a/rpcsx-gpu2/lib/amdgpu-tiler/src/tiler_vulkan.cpp b/rpcsx-gpu2/lib/amdgpu-tiler/src/tiler_vulkan.cpp index 12c8a7399..efdba029c 100644 --- a/rpcsx-gpu2/lib/amdgpu-tiler/src/tiler_vulkan.cpp +++ b/rpcsx-gpu2/lib/amdgpu-tiler/src/tiler_vulkan.cpp @@ -190,9 +190,8 @@ void amdgpu::GpuTiler::detile(Scheduler &scheduler, configOffset); auto &subresource = info.getSubresourceInfo(mipLevel); - config->srcAddress = srcTiledAddress + subresource.offset + - (subresource.tiledSize * baseArray); - config->dstAddress = dstLinearAddress + (subresource.linearSize * baseArray); + config->srcAddress = srcTiledAddress + subresource.offset; + config->dstAddress = dstLinearAddress; config->dataWidth = subresource.dataWidth; config->dataHeight = subresource.dataHeight; config->tileMode = tileMode.raw; @@ -287,9 +286,8 @@ void amdgpu::GpuTiler::tile(Scheduler &scheduler, configOffset); auto &subresource = info.getSubresourceInfo(mipLevel); - config->srcAddress = srcLinearAddress + subresource.offset + - subresource.linearSize * baseArray; - config->dstAddress = dstTiledAddress; + config->srcAddress = srcLinearAddress; + config->dstAddress = dstTiledAddress + subresource.offset; config->dataWidth = subresource.dataWidth; config->dataHeight = subresource.dataHeight; config->tileMode = tileMode.raw; diff --git a/rpcsx-gpu2/lib/gcn-shader/shaders/rdna.glsl b/rpcsx-gpu2/lib/gcn-shader/shaders/rdna.glsl index 6fe9f8f69..fdb64f5ea 100644 --- a/rpcsx-gpu2/lib/gcn-shader/shaders/rdna.glsl +++ b/rpcsx-gpu2/lib/gcn-shader/shaders/rdna.glsl @@ -2542,6 +2542,22 @@ int findTexture3DIndex(int32_t textureIndexHint, uint32_t tbuffer[8]) { return textureIndexHint; } +float32_t swizzle(f32vec4 comp, int sel) { + switch (sel) { + case 0: return 0; + case 1: return 1; + case 4: return comp.x; + case 5: return comp.y; + case 6: return comp.z; + case 7: return comp.w; + } + + return 1; +} + +f32vec4 swizzle(f32vec4 comp, int selX, int selY, int selZ, int selW) { + return f32vec4(swizzle(comp, selX), swizzle(comp, selY), swizzle(comp, selZ), swizzle(comp, selW)); +} void image_sample(inout f32vec4 vdata, f32vec3 vaddr, int32_t textureIndexHint, uint32_t tbuffer[8], int32_t samplerIndexHint, u32vec4 ssampler, uint32_t dmask) { uint8_t textureType = tbuffer_type(tbuffer); @@ -2582,6 +2598,14 @@ void image_sample(inout f32vec4 vdata, f32vec3 vaddr, int32_t textureIndexHint, // debugPrintfEXT("image_sample: textureType: %u, coord: %v3f, result: %v4f, dmask: %u", textureType, vaddr, result, dmask); + + result = swizzle(result, + tbuffer_dst_sel_x(tbuffer), + tbuffer_dst_sel_y(tbuffer), + tbuffer_dst_sel_z(tbuffer), + tbuffer_dst_sel_w(tbuffer)); + + int vdataIndex = 0; for (int i = 0; i < 4; ++i) { if ((dmask & (1 << i)) != 0) { diff --git a/rpcsx-gpu2/main.cpp b/rpcsx-gpu2/main.cpp index f097d157b..0ad1b8ac2 100644 --- a/rpcsx-gpu2/main.cpp +++ b/rpcsx-gpu2/main.cpp @@ -263,7 +263,7 @@ int main(int argc, const char *argv[]) { glfwInit(); glfwWindowHint(GLFW_CLIENT_API, GLFW_NO_API); - auto window = glfwCreateWindow(1280, 720, "RPCSX", nullptr, nullptr); + auto window = glfwCreateWindow(1920, 1080, "RPCSX", nullptr, nullptr); rx::atScopeExit _{[window] { glfwDestroyWindow(window); }}; diff --git a/rpcsx-gpu2/shaders/flip.frag.glsl b/rpcsx-gpu2/shaders/flip.frag.glsl deleted file mode 100644 index 3670b384b..000000000 --- a/rpcsx-gpu2/shaders/flip.frag.glsl +++ /dev/null @@ -1,11 +0,0 @@ -#version 450 - -layout(location = 0) in vec2 coord; -layout(location = 0) out vec4 color; -layout(set = 0, binding = 1) uniform sampler samp[]; -layout(set = 0, binding = 3) uniform texture2D tex[]; - -void main() -{ - color = vec4(texture(sampler2D(tex[0], samp[0]), coord.xy).xyz, 1); -} diff --git a/rpcsx-gpu2/shaders/flip.vert.glsl b/rpcsx-gpu2/shaders/flip.vert.glsl index 6d0451496..2f7071731 100644 --- a/rpcsx-gpu2/shaders/flip.vert.glsl +++ b/rpcsx-gpu2/shaders/flip.vert.glsl @@ -2,7 +2,6 @@ layout(location = 0) out vec2 coord; - void main() { float x = float(((gl_VertexIndex + 2) / 3) & 1) * 2 - 1; diff --git a/rpcsx-gpu2/shaders/flip_alt.frag.glsl b/rpcsx-gpu2/shaders/flip_alt.frag.glsl new file mode 100644 index 000000000..6c45d02b8 --- /dev/null +++ b/rpcsx-gpu2/shaders/flip_alt.frag.glsl @@ -0,0 +1,11 @@ +#version 450 + +layout(location = 0) in vec2 coord; +layout(location = 0) out vec4 color; +layout(binding = 0) uniform texture2D tex; +layout(binding = 1) uniform sampler samp; + +void main() +{ + color = vec4(texture(sampler2D(tex, samp), coord.xy).xyz, 1).bgra; +} diff --git a/rpcsx-gpu2/shaders/flip_std.frag.glsl b/rpcsx-gpu2/shaders/flip_std.frag.glsl new file mode 100644 index 000000000..ba6a251ca --- /dev/null +++ b/rpcsx-gpu2/shaders/flip_std.frag.glsl @@ -0,0 +1,11 @@ +#version 450 + +layout(location = 0) in vec2 coord; +layout(location = 0) out vec4 color; +layout(binding = 0) uniform texture2D tex; +layout(binding = 1) uniform sampler samp; + +void main() +{ + color = vec4(texture(sampler2D(tex, samp), coord.xy).xyz, 1).rgba; +}