From dcc4943812b0379c19caebea1962177fe93d3b4a Mon Sep 17 00:00:00 2001 From: DH Date: Tue, 1 Oct 2024 21:51:43 +0300 Subject: [PATCH] gpu2: safe gpu tiler api --- rpcsx-gpu2/Cache.cpp | 13 +++++++++---- .../include/amdgpu/tiler_vulkan.hpp | 8 +++++--- .../amdgpu-tiler/shaders/detiler1d.comp.glsl | 15 ++++++++++++++- .../amdgpu-tiler/shaders/detiler2d.comp.glsl | 15 ++++++++++++++- .../lib/amdgpu-tiler/shaders/tiler.glsl | 2 ++ .../amdgpu-tiler/shaders/tiler1d.comp.glsl | 15 ++++++++++++++- .../amdgpu-tiler/shaders/tiler2d.comp.glsl | 15 ++++++++++++++- .../lib/amdgpu-tiler/src/tiler_vulkan.cpp | 19 ++++++++++++++----- 8 files changed, 86 insertions(+), 16 deletions(-) diff --git a/rpcsx-gpu2/Cache.cpp b/rpcsx-gpu2/Cache.cpp index b514c8667..2d0d99ffc 100644 --- a/rpcsx-gpu2/Cache.cpp +++ b/rpcsx-gpu2/Cache.cpp @@ -565,6 +565,7 @@ struct CachedImage : Cache::Entry { tiledBuffer.handle, regions.size(), regions.data()); } else { + auto tiledSize = info.totalSize; std::uint64_t linearOffset = 0; for (unsigned mipLevel = 0; mipLevel < image.getMipLevels(); ++mipLevel) { auto ®ionInfo = info.getSubresourceInfo(mipLevel); @@ -590,6 +591,7 @@ struct CachedImage : Cache::Entry { linearOffset += regionInfo.linearSize * image.getArrayLayers(); } + auto linearSize = linearOffset; auto transferBuffer = vk::Buffer::Allocate( vk::getDeviceLocalMemory(), linearOffset, VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT); @@ -605,8 +607,8 @@ struct CachedImage : Cache::Entry { for (unsigned mipLevel = 0; mipLevel < image.getMipLevels(); ++mipLevel) { auto ®ionInfo = info.getSubresourceInfo(mipLevel); tiler.tile(scheduler, info, acquiredTileMode, acquiredDfmt, - transferBuffer.getAddress() + linearOffset, - tiledBuffer.deviceAddress, mipLevel, 0, + transferBuffer.getAddress() + linearOffset, linearSize - linearOffset, + tiledBuffer.deviceAddress, tiledSize, mipLevel, 0, image.getArrayLayers()); linearOffset += regionInfo.linearSize * image.getArrayLayers(); } @@ -1182,8 +1184,10 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) { linearOffset += info.linearSize * key.arrayLayerCount; } + auto detiledSize = linearOffset; + auto detiledBuffer = - vk::Buffer::Allocate(vk::getDeviceLocalMemory(), linearOffset, + vk::Buffer::Allocate(vk::getDeviceLocalMemory(), detiledSize, VK_BUFFER_USAGE_2_TRANSFER_DST_BIT_KHR | VK_BUFFER_USAGE_2_TRANSFER_SRC_BIT_KHR); @@ -1197,9 +1201,10 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) { auto &info = surfaceInfo.getSubresourceInfo(mipLevel); tiler.detile(*mScheduler, surfaceInfo, key.tileMode, key.dfmt, - tiledBuffer.deviceAddress, dstAddress, mipLevel, 0, + tiledBuffer.deviceAddress, surfaceInfo.totalSize, dstAddress, detiledSize, mipLevel, 0, key.arrayLayerCount); + detiledSize -= info.linearSize * key.arrayLayerCount; dstAddress += info.linearSize * key.arrayLayerCount; } } diff --git a/rpcsx-gpu2/lib/amdgpu-tiler/include/amdgpu/tiler_vulkan.hpp b/rpcsx-gpu2/lib/amdgpu-tiler/include/amdgpu/tiler_vulkan.hpp index 718fb15c1..8fae711e3 100644 --- a/rpcsx-gpu2/lib/amdgpu-tiler/include/amdgpu/tiler_vulkan.hpp +++ b/rpcsx-gpu2/lib/amdgpu-tiler/include/amdgpu/tiler_vulkan.hpp @@ -12,12 +12,14 @@ struct GpuTiler { void detile(Scheduler &scheduler, const amdgpu::SurfaceInfo &info, amdgpu::TileMode tileMode, gnm::DataFormat dfmt, - std::uint64_t srcTiledAddress, std::uint64_t dstLinearAddress, + std::uint64_t srcTiledAddress, std::uint64_t srcSize, + std::uint64_t dstLinearAddress, std::uint64_t dstSize, int mipLevel, int baseArray, int arrayCount); void tile(Scheduler &scheduler, const amdgpu::SurfaceInfo &info, amdgpu::TileMode tileMode, gnm::DataFormat dfmt, - std::uint64_t srcLinearAddress, std::uint64_t dstTiledAddress, - int mipLevel, int baseArray, int arrayCount); + std::uint64_t srcLinearAddress, std::uint64_t srcSize, + std::uint64_t dstTiledAddress, std::uint64_t dstSize, int mipLevel, + int baseArray, int arrayCount); private: std::unique_ptr mImpl; diff --git a/rpcsx-gpu2/lib/amdgpu-tiler/shaders/detiler1d.comp.glsl b/rpcsx-gpu2/lib/amdgpu-tiler/shaders/detiler1d.comp.glsl index 2f65b404e..e60dc02d9 100644 --- a/rpcsx-gpu2/lib/amdgpu-tiler/shaders/detiler1d.comp.glsl +++ b/rpcsx-gpu2/lib/amdgpu-tiler/shaders/detiler1d.comp.glsl @@ -11,6 +11,7 @@ #extension GL_EXT_null_initializer : enable #extension GL_EXT_buffer_reference2 : enable #extension GL_EXT_buffer_reference_uvec2 : enable +#extension GL_EXT_debug_printf : enable #include "tiler.glsl" @@ -44,7 +45,19 @@ void main() { linearByteOffset += linearSliceOffset; - switch ((config.bitsPerElement + 7) / 8) { + uint32_t bpp = (config.bitsPerElement + 7) / 8; + + if (config.srcAddress + tiledByteOffset + bpp > config.srcEndAddress) { + debugPrintfEXT("detiler1d: out of src buffer %d x %d x %d", pos.x, pos.y, pos.z); + return; + } + + if (config.dstAddress + linearByteOffset + bpp > config.dstEndAddress) { + debugPrintfEXT("detiler1d: out of dst buffer %d x %d x %d", pos.x, pos.y, pos.z); + return; + } + + switch (bpp) { case 1: buffer_reference_uint8_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint8_t(config.srcAddress + tiledByteOffset).data; break; diff --git a/rpcsx-gpu2/lib/amdgpu-tiler/shaders/detiler2d.comp.glsl b/rpcsx-gpu2/lib/amdgpu-tiler/shaders/detiler2d.comp.glsl index ea01560ba..db2b450b6 100644 --- a/rpcsx-gpu2/lib/amdgpu-tiler/shaders/detiler2d.comp.glsl +++ b/rpcsx-gpu2/lib/amdgpu-tiler/shaders/detiler2d.comp.glsl @@ -11,6 +11,7 @@ #extension GL_EXT_null_initializer : enable #extension GL_EXT_buffer_reference2 : enable #extension GL_EXT_buffer_reference_uvec2 : enable +#extension GL_EXT_debug_printf : enable #include "tiler.glsl" @@ -51,7 +52,19 @@ void main() { linearByteOffset += linearSliceOffset; - switch ((config.bitsPerElement + 7) / 8) { + uint32_t bpp = (config.bitsPerElement + 7) / 8; + + if (config.srcAddress + tiledByteOffset + bpp > config.srcEndAddress) { + debugPrintfEXT("detiler2d: out of src buffer %d x %d x %d", pos.x, pos.y, pos.z); + return; + } + + if (config.dstAddress + linearByteOffset + bpp > config.dstEndAddress) { + debugPrintfEXT("detiler2d: out of dst buffer %d x %d x %d", pos.x, pos.y, pos.z); + return; + } + + switch (bpp) { case 1: buffer_reference_uint8_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint8_t(config.srcAddress + tiledByteOffset).data; break; diff --git a/rpcsx-gpu2/lib/amdgpu-tiler/shaders/tiler.glsl b/rpcsx-gpu2/lib/amdgpu-tiler/shaders/tiler.glsl index bcb6f70ff..10507d5c3 100644 --- a/rpcsx-gpu2/lib/amdgpu-tiler/shaders/tiler.glsl +++ b/rpcsx-gpu2/lib/amdgpu-tiler/shaders/tiler.glsl @@ -992,7 +992,9 @@ uint64_t getTiledBitOffset2D(uint32_t dfmt, uint32_t tileMode, uint32_t macroTil layout(binding=0) uniform Config { uint64_t srcAddress; + uint64_t srcEndAddress; uint64_t dstAddress; + uint64_t dstEndAddress; uvec2 dataSize; uint32_t tileMode; uint32_t macroTileMode; diff --git a/rpcsx-gpu2/lib/amdgpu-tiler/shaders/tiler1d.comp.glsl b/rpcsx-gpu2/lib/amdgpu-tiler/shaders/tiler1d.comp.glsl index db92aae0b..e0b383219 100644 --- a/rpcsx-gpu2/lib/amdgpu-tiler/shaders/tiler1d.comp.glsl +++ b/rpcsx-gpu2/lib/amdgpu-tiler/shaders/tiler1d.comp.glsl @@ -11,6 +11,7 @@ #extension GL_EXT_null_initializer : enable #extension GL_EXT_buffer_reference2 : enable #extension GL_EXT_buffer_reference_uvec2 : enable +#extension GL_EXT_debug_printf : enable #include "tiler.glsl" @@ -44,7 +45,19 @@ void main() { linearByteOffset += linearSliceOffset; - switch ((config.bitsPerElement + 7) / 8) { + uint32_t bpp = (config.bitsPerElement + 7) / 8; + + if (config.srcAddress + linearByteOffset + bpp > config.srcEndAddress) { + debugPrintfEXT("tiler1d: out of src buffer %d x %d x %d", pos.x, pos.y, pos.z); + return; + } + + if (config.dstAddress + tiledByteOffset + bpp > config.dstEndAddress) { + debugPrintfEXT("tiler1d: out of dst buffer %d x %d x %d", pos.x, pos.y, pos.z); + return; + } + + switch (bpp) { case 1: buffer_reference_uint8_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint8_t(config.srcAddress + linearByteOffset).data; break; diff --git a/rpcsx-gpu2/lib/amdgpu-tiler/shaders/tiler2d.comp.glsl b/rpcsx-gpu2/lib/amdgpu-tiler/shaders/tiler2d.comp.glsl index 6fc258307..709150cfb 100644 --- a/rpcsx-gpu2/lib/amdgpu-tiler/shaders/tiler2d.comp.glsl +++ b/rpcsx-gpu2/lib/amdgpu-tiler/shaders/tiler2d.comp.glsl @@ -11,6 +11,7 @@ #extension GL_EXT_null_initializer : enable #extension GL_EXT_buffer_reference2 : enable #extension GL_EXT_buffer_reference_uvec2 : enable +#extension GL_EXT_debug_printf : enable #include "tiler.glsl" @@ -50,7 +51,19 @@ void main() { linearByteOffset += linearSliceOffset; - switch ((config.bitsPerElement + 7) / 8) { + uint32_t bpp = (config.bitsPerElement + 7) / 8; + + if (config.srcAddress + linearByteOffset + bpp > config.srcEndAddress) { + debugPrintfEXT("tiler2d: out of src buffer %d x %d x %d", pos.x, pos.y, pos.z); + return; + } + + if (config.dstAddress + tiledByteOffset + bpp > config.dstEndAddress) { + debugPrintfEXT("tiler2d: out of dst buffer %d x %d x %d", pos.x, pos.y, pos.z); + return; + } + + switch (bpp) { case 1: buffer_reference_uint8_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint8_t(config.srcAddress + linearByteOffset).data; break; diff --git a/rpcsx-gpu2/lib/amdgpu-tiler/src/tiler_vulkan.cpp b/rpcsx-gpu2/lib/amdgpu-tiler/src/tiler_vulkan.cpp index b720fe504..43ae86e0e 100644 --- a/rpcsx-gpu2/lib/amdgpu-tiler/src/tiler_vulkan.cpp +++ b/rpcsx-gpu2/lib/amdgpu-tiler/src/tiler_vulkan.cpp @@ -87,9 +87,11 @@ struct amdgpu::GpuTiler::Impl { TilerShader tiler2d{descriptorSetLayout, spirv_tiler2d_comp}; VkPipelineLayout pipelineLayout; - struct Config { + struct alignas(64) Config { uint64_t srcAddress; + uint64_t srcEndAddress; uint64_t dstAddress; + uint64_t dstEndAddress; uint32_t dataWidth; uint32_t dataHeight; uint32_t tileMode; @@ -99,7 +101,6 @@ struct amdgpu::GpuTiler::Impl { uint32_t bitsPerElement; uint32_t tiledSurfaceSize; uint32_t linearSurfaceSize; - uint32_t padding[2]; }; Impl() { @@ -170,7 +171,9 @@ void amdgpu::GpuTiler::detile(Scheduler &scheduler, const amdgpu::SurfaceInfo &info, amdgpu::TileMode tileMode, gnm::DataFormat dfmt, std::uint64_t srcTiledAddress, - std::uint64_t dstLinearAddress, int mipLevel, + std::uint64_t srcSize, + std::uint64_t dstLinearAddress, + std::uint64_t dstSize, int mipLevel, int baseArray, int arrayCount) { auto commandBuffer = scheduler.getCommandBuffer(); auto slot = mImpl->allocateDescriptorSlot(); @@ -181,7 +184,9 @@ void amdgpu::GpuTiler::detile(Scheduler &scheduler, auto &subresource = info.getSubresourceInfo(mipLevel); config->srcAddress = srcTiledAddress + subresource.offset; + config->srcEndAddress = srcTiledAddress + srcSize; config->dstAddress = dstLinearAddress; + config->dstEndAddress = dstLinearAddress + dstSize; config->dataWidth = subresource.dataWidth; config->dataHeight = subresource.dataHeight; config->tileMode = tileMode.raw; @@ -266,8 +271,10 @@ void amdgpu::GpuTiler::tile(Scheduler &scheduler, const amdgpu::SurfaceInfo &info, amdgpu::TileMode tileMode, gnm::DataFormat dfmt, std::uint64_t srcLinearAddress, - std::uint64_t dstTiledAddress, int mipLevel, - int baseArray, int arrayCount) { + std::uint64_t srcSize, + std::uint64_t dstTiledAddress, + std::uint64_t dstSize, int mipLevel, int baseArray, + int arrayCount) { auto commandBuffer = scheduler.getCommandBuffer(); auto slot = mImpl->allocateDescriptorSlot(); @@ -277,7 +284,9 @@ void amdgpu::GpuTiler::tile(Scheduler &scheduler, auto &subresource = info.getSubresourceInfo(mipLevel); config->srcAddress = srcLinearAddress; + config->srcEndAddress = srcLinearAddress + srcSize; config->dstAddress = dstTiledAddress + subresource.offset; + config->dstEndAddress = dstTiledAddress + dstSize; config->dataWidth = subresource.dataWidth; config->dataHeight = subresource.dataHeight; config->tileMode = tileMode.raw;