gpu2: safe gpu tiler api

This commit is contained in:
DH 2024-10-01 21:51:43 +03:00
parent 66234b5b0b
commit dcc4943812
8 changed files with 86 additions and 16 deletions

View file

@ -565,6 +565,7 @@ struct CachedImage : Cache::Entry {
tiledBuffer.handle, regions.size(),
regions.data());
} else {
auto tiledSize = info.totalSize;
std::uint64_t linearOffset = 0;
for (unsigned mipLevel = 0; mipLevel < image.getMipLevels(); ++mipLevel) {
auto &regionInfo = info.getSubresourceInfo(mipLevel);
@ -590,6 +591,7 @@ struct CachedImage : Cache::Entry {
linearOffset += regionInfo.linearSize * image.getArrayLayers();
}
auto linearSize = linearOffset;
auto transferBuffer = vk::Buffer::Allocate(
vk::getDeviceLocalMemory(), linearOffset,
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT);
@ -605,8 +607,8 @@ struct CachedImage : Cache::Entry {
for (unsigned mipLevel = 0; mipLevel < image.getMipLevels(); ++mipLevel) {
auto &regionInfo = info.getSubresourceInfo(mipLevel);
tiler.tile(scheduler, info, acquiredTileMode, acquiredDfmt,
transferBuffer.getAddress() + linearOffset,
tiledBuffer.deviceAddress, mipLevel, 0,
transferBuffer.getAddress() + linearOffset, linearSize - linearOffset,
tiledBuffer.deviceAddress, tiledSize, mipLevel, 0,
image.getArrayLayers());
linearOffset += regionInfo.linearSize * image.getArrayLayers();
}
@ -1182,8 +1184,10 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) {
linearOffset += info.linearSize * key.arrayLayerCount;
}
auto detiledSize = linearOffset;
auto detiledBuffer =
vk::Buffer::Allocate(vk::getDeviceLocalMemory(), linearOffset,
vk::Buffer::Allocate(vk::getDeviceLocalMemory(), detiledSize,
VK_BUFFER_USAGE_2_TRANSFER_DST_BIT_KHR |
VK_BUFFER_USAGE_2_TRANSFER_SRC_BIT_KHR);
@ -1197,9 +1201,10 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) {
auto &info = surfaceInfo.getSubresourceInfo(mipLevel);
tiler.detile(*mScheduler, surfaceInfo, key.tileMode, key.dfmt,
tiledBuffer.deviceAddress, dstAddress, mipLevel, 0,
tiledBuffer.deviceAddress, surfaceInfo.totalSize, dstAddress, detiledSize, mipLevel, 0,
key.arrayLayerCount);
detiledSize -= info.linearSize * key.arrayLayerCount;
dstAddress += info.linearSize * key.arrayLayerCount;
}
}

View file

@ -12,12 +12,14 @@ struct GpuTiler {
void detile(Scheduler &scheduler, const amdgpu::SurfaceInfo &info,
amdgpu::TileMode tileMode, gnm::DataFormat dfmt,
std::uint64_t srcTiledAddress, std::uint64_t dstLinearAddress,
std::uint64_t srcTiledAddress, std::uint64_t srcSize,
std::uint64_t dstLinearAddress, std::uint64_t dstSize,
int mipLevel, int baseArray, int arrayCount);
void tile(Scheduler &scheduler, const amdgpu::SurfaceInfo &info,
amdgpu::TileMode tileMode, gnm::DataFormat dfmt,
std::uint64_t srcLinearAddress, std::uint64_t dstTiledAddress,
int mipLevel, int baseArray, int arrayCount);
std::uint64_t srcLinearAddress, std::uint64_t srcSize,
std::uint64_t dstTiledAddress, std::uint64_t dstSize, int mipLevel,
int baseArray, int arrayCount);
private:
std::unique_ptr<Impl> mImpl;

View file

@ -11,6 +11,7 @@
#extension GL_EXT_null_initializer : enable
#extension GL_EXT_buffer_reference2 : enable
#extension GL_EXT_buffer_reference_uvec2 : enable
#extension GL_EXT_debug_printf : enable
#include "tiler.glsl"
@ -44,7 +45,19 @@ void main() {
linearByteOffset += linearSliceOffset;
switch ((config.bitsPerElement + 7) / 8) {
uint32_t bpp = (config.bitsPerElement + 7) / 8;
if (config.srcAddress + tiledByteOffset + bpp > config.srcEndAddress) {
debugPrintfEXT("detiler1d: out of src buffer %d x %d x %d", pos.x, pos.y, pos.z);
return;
}
if (config.dstAddress + linearByteOffset + bpp > config.dstEndAddress) {
debugPrintfEXT("detiler1d: out of dst buffer %d x %d x %d", pos.x, pos.y, pos.z);
return;
}
switch (bpp) {
case 1:
buffer_reference_uint8_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint8_t(config.srcAddress + tiledByteOffset).data;
break;

View file

@ -11,6 +11,7 @@
#extension GL_EXT_null_initializer : enable
#extension GL_EXT_buffer_reference2 : enable
#extension GL_EXT_buffer_reference_uvec2 : enable
#extension GL_EXT_debug_printf : enable
#include "tiler.glsl"
@ -51,7 +52,19 @@ void main() {
linearByteOffset += linearSliceOffset;
switch ((config.bitsPerElement + 7) / 8) {
uint32_t bpp = (config.bitsPerElement + 7) / 8;
if (config.srcAddress + tiledByteOffset + bpp > config.srcEndAddress) {
debugPrintfEXT("detiler2d: out of src buffer %d x %d x %d", pos.x, pos.y, pos.z);
return;
}
if (config.dstAddress + linearByteOffset + bpp > config.dstEndAddress) {
debugPrintfEXT("detiler2d: out of dst buffer %d x %d x %d", pos.x, pos.y, pos.z);
return;
}
switch (bpp) {
case 1:
buffer_reference_uint8_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint8_t(config.srcAddress + tiledByteOffset).data;
break;

View file

@ -992,7 +992,9 @@ uint64_t getTiledBitOffset2D(uint32_t dfmt, uint32_t tileMode, uint32_t macroTil
layout(binding=0) uniform Config {
uint64_t srcAddress;
uint64_t srcEndAddress;
uint64_t dstAddress;
uint64_t dstEndAddress;
uvec2 dataSize;
uint32_t tileMode;
uint32_t macroTileMode;

View file

@ -11,6 +11,7 @@
#extension GL_EXT_null_initializer : enable
#extension GL_EXT_buffer_reference2 : enable
#extension GL_EXT_buffer_reference_uvec2 : enable
#extension GL_EXT_debug_printf : enable
#include "tiler.glsl"
@ -44,7 +45,19 @@ void main() {
linearByteOffset += linearSliceOffset;
switch ((config.bitsPerElement + 7) / 8) {
uint32_t bpp = (config.bitsPerElement + 7) / 8;
if (config.srcAddress + linearByteOffset + bpp > config.srcEndAddress) {
debugPrintfEXT("tiler1d: out of src buffer %d x %d x %d", pos.x, pos.y, pos.z);
return;
}
if (config.dstAddress + tiledByteOffset + bpp > config.dstEndAddress) {
debugPrintfEXT("tiler1d: out of dst buffer %d x %d x %d", pos.x, pos.y, pos.z);
return;
}
switch (bpp) {
case 1:
buffer_reference_uint8_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint8_t(config.srcAddress + linearByteOffset).data;
break;

View file

@ -11,6 +11,7 @@
#extension GL_EXT_null_initializer : enable
#extension GL_EXT_buffer_reference2 : enable
#extension GL_EXT_buffer_reference_uvec2 : enable
#extension GL_EXT_debug_printf : enable
#include "tiler.glsl"
@ -50,7 +51,19 @@ void main() {
linearByteOffset += linearSliceOffset;
switch ((config.bitsPerElement + 7) / 8) {
uint32_t bpp = (config.bitsPerElement + 7) / 8;
if (config.srcAddress + linearByteOffset + bpp > config.srcEndAddress) {
debugPrintfEXT("tiler2d: out of src buffer %d x %d x %d", pos.x, pos.y, pos.z);
return;
}
if (config.dstAddress + tiledByteOffset + bpp > config.dstEndAddress) {
debugPrintfEXT("tiler2d: out of dst buffer %d x %d x %d", pos.x, pos.y, pos.z);
return;
}
switch (bpp) {
case 1:
buffer_reference_uint8_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint8_t(config.srcAddress + linearByteOffset).data;
break;

View file

@ -87,9 +87,11 @@ struct amdgpu::GpuTiler::Impl {
TilerShader tiler2d{descriptorSetLayout, spirv_tiler2d_comp};
VkPipelineLayout pipelineLayout;
struct Config {
struct alignas(64) Config {
uint64_t srcAddress;
uint64_t srcEndAddress;
uint64_t dstAddress;
uint64_t dstEndAddress;
uint32_t dataWidth;
uint32_t dataHeight;
uint32_t tileMode;
@ -99,7 +101,6 @@ struct amdgpu::GpuTiler::Impl {
uint32_t bitsPerElement;
uint32_t tiledSurfaceSize;
uint32_t linearSurfaceSize;
uint32_t padding[2];
};
Impl() {
@ -170,7 +171,9 @@ void amdgpu::GpuTiler::detile(Scheduler &scheduler,
const amdgpu::SurfaceInfo &info,
amdgpu::TileMode tileMode, gnm::DataFormat dfmt,
std::uint64_t srcTiledAddress,
std::uint64_t dstLinearAddress, int mipLevel,
std::uint64_t srcSize,
std::uint64_t dstLinearAddress,
std::uint64_t dstSize, int mipLevel,
int baseArray, int arrayCount) {
auto commandBuffer = scheduler.getCommandBuffer();
auto slot = mImpl->allocateDescriptorSlot();
@ -181,7 +184,9 @@ void amdgpu::GpuTiler::detile(Scheduler &scheduler,
auto &subresource = info.getSubresourceInfo(mipLevel);
config->srcAddress = srcTiledAddress + subresource.offset;
config->srcEndAddress = srcTiledAddress + srcSize;
config->dstAddress = dstLinearAddress;
config->dstEndAddress = dstLinearAddress + dstSize;
config->dataWidth = subresource.dataWidth;
config->dataHeight = subresource.dataHeight;
config->tileMode = tileMode.raw;
@ -266,8 +271,10 @@ void amdgpu::GpuTiler::tile(Scheduler &scheduler,
const amdgpu::SurfaceInfo &info,
amdgpu::TileMode tileMode, gnm::DataFormat dfmt,
std::uint64_t srcLinearAddress,
std::uint64_t dstTiledAddress, int mipLevel,
int baseArray, int arrayCount) {
std::uint64_t srcSize,
std::uint64_t dstTiledAddress,
std::uint64_t dstSize, int mipLevel, int baseArray,
int arrayCount) {
auto commandBuffer = scheduler.getCommandBuffer();
auto slot = mImpl->allocateDescriptorSlot();
@ -277,7 +284,9 @@ void amdgpu::GpuTiler::tile(Scheduler &scheduler,
auto &subresource = info.getSubresourceInfo(mipLevel);
config->srcAddress = srcLinearAddress;
config->srcEndAddress = srcLinearAddress + srcSize;
config->dstAddress = dstTiledAddress + subresource.offset;
config->dstEndAddress = dstTiledAddress + dstSize;
config->dataWidth = subresource.dataWidth;
config->dataHeight = subresource.dataHeight;
config->tileMode = tileMode.raw;