mirror of
https://github.com/RPCSX/rpcsx.git
synced 2025-12-06 07:12:14 +01:00
rpcsx-gpu: implement 2d tiler
This commit is contained in:
parent
5f23121d33
commit
7bea1e354f
|
|
@ -534,14 +534,14 @@ struct CachedImage : Cache::Entry {
|
|||
regions.reserve(image.getMipLevels());
|
||||
|
||||
auto tiledBuffer =
|
||||
tag.getBuffer(baseAddress, info.totalSize, Access::Write);
|
||||
tag.getBuffer(baseAddress, info.totalTiledSize, Access::Write);
|
||||
|
||||
if (isLinear) {
|
||||
for (unsigned mipLevel = 0; mipLevel < image.getMipLevels(); ++mipLevel) {
|
||||
auto ®ionInfo = info.getSubresourceInfo(mipLevel);
|
||||
|
||||
regions.push_back({
|
||||
.bufferOffset = regionInfo.offset,
|
||||
.bufferOffset = regionInfo.tiledOffset,
|
||||
.bufferRowLength =
|
||||
mipLevel > 0 ? 0 : std::max(info.pitch >> mipLevel, 1u),
|
||||
.imageSubresource =
|
||||
|
|
@ -565,14 +565,11 @@ struct CachedImage : Cache::Entry {
|
|||
tiledBuffer.handle, regions.size(),
|
||||
regions.data());
|
||||
} else {
|
||||
auto tiledSize = info.totalSize;
|
||||
std::uint64_t linearOffset = 0;
|
||||
for (unsigned mipLevel = 0; mipLevel < image.getMipLevels(); ++mipLevel) {
|
||||
auto ®ionInfo = info.getSubresourceInfo(mipLevel);
|
||||
regions.push_back({
|
||||
.bufferOffset = linearOffset,
|
||||
.bufferRowLength =
|
||||
mipLevel > 0 ? 0 : std::max(info.pitch >> mipLevel, 1u),
|
||||
.bufferOffset = regionInfo.linearOffset,
|
||||
.bufferRowLength = regionInfo.linearPitch,
|
||||
.imageSubresource =
|
||||
{
|
||||
.aspectMask = toAspect(kind),
|
||||
|
|
@ -582,18 +579,15 @@ struct CachedImage : Cache::Entry {
|
|||
},
|
||||
.imageExtent =
|
||||
{
|
||||
.width = std::max(image.getWidth() >> mipLevel, 1u),
|
||||
.height = std::max(image.getHeight() >> mipLevel, 1u),
|
||||
.depth = std::max(image.getDepth() >> mipLevel, 1u),
|
||||
.width = regionInfo.linearWidth,
|
||||
.height = regionInfo.linearHeight,
|
||||
.depth = regionInfo.linearDepth,
|
||||
},
|
||||
});
|
||||
|
||||
linearOffset += regionInfo.linearSize * image.getArrayLayers();
|
||||
}
|
||||
|
||||
auto linearSize = linearOffset;
|
||||
auto transferBuffer = vk::Buffer::Allocate(
|
||||
vk::getDeviceLocalMemory(), linearOffset,
|
||||
vk::getDeviceLocalMemory(), info.totalLinearSize,
|
||||
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT);
|
||||
|
||||
vkCmdCopyImageToBuffer(scheduler.getCommandBuffer(), image.getHandle(),
|
||||
|
|
@ -603,14 +597,11 @@ struct CachedImage : Cache::Entry {
|
|||
|
||||
auto &tiler = tag.getDevice()->tiler;
|
||||
|
||||
linearOffset = 0;
|
||||
for (unsigned mipLevel = 0; mipLevel < image.getMipLevels(); ++mipLevel) {
|
||||
auto ®ionInfo = info.getSubresourceInfo(mipLevel);
|
||||
tiler.tile(scheduler, info, acquiredTileMode, acquiredDfmt,
|
||||
transferBuffer.getAddress() + linearOffset,
|
||||
linearSize - linearOffset, tiledBuffer.deviceAddress,
|
||||
tiledSize, mipLevel, 0, image.getArrayLayers());
|
||||
linearOffset += regionInfo.linearSize * image.getArrayLayers();
|
||||
tiler.tile(scheduler, info, acquiredTileMode,
|
||||
transferBuffer.getAddress(), info.totalLinearSize,
|
||||
tiledBuffer.deviceAddress, info.totalTiledSize, mipLevel, 0,
|
||||
image.getArrayLayers());
|
||||
}
|
||||
|
||||
scheduler.afterSubmit([transferBuffer = std::move(transferBuffer)] {});
|
||||
|
|
@ -1157,7 +1148,7 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) {
|
|||
VkBuffer sourceBuffer;
|
||||
|
||||
auto tiledBuffer =
|
||||
getBuffer(key.readAddress, surfaceInfo.totalSize, Access::Read);
|
||||
getBuffer(key.readAddress, surfaceInfo.totalTiledSize, Access::Read);
|
||||
|
||||
if (isLinear) {
|
||||
sourceBuffer = tiledBuffer.handle;
|
||||
|
|
@ -1165,7 +1156,7 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) {
|
|||
mipLevel < key.baseMipLevel + key.mipCount; ++mipLevel) {
|
||||
auto &info = surfaceInfo.getSubresourceInfo(mipLevel);
|
||||
regions.push_back({
|
||||
.bufferOffset = info.offset,
|
||||
.bufferOffset = info.tiledOffset,
|
||||
.bufferRowLength =
|
||||
mipLevel > 0 ? 0 : std::max(key.pitch >> mipLevel, 1u),
|
||||
.imageSubresource =
|
||||
|
|
@ -1186,15 +1177,13 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) {
|
|||
} else {
|
||||
auto &tiler = mParent->mDevice->tiler;
|
||||
|
||||
std::uint64_t linearOffset = 0;
|
||||
for (unsigned mipLevel = key.baseMipLevel;
|
||||
mipLevel < key.baseMipLevel + key.mipCount; ++mipLevel) {
|
||||
auto &info = surfaceInfo.getSubresourceInfo(mipLevel);
|
||||
|
||||
regions.push_back({
|
||||
.bufferOffset = linearOffset,
|
||||
.bufferRowLength =
|
||||
mipLevel > 0 ? 0 : std::max(key.pitch >> mipLevel, 1u),
|
||||
.bufferOffset = info.linearOffset,
|
||||
.bufferRowLength = info.linearPitch,
|
||||
.imageSubresource =
|
||||
{
|
||||
.aspectMask = toAspect(key.kind),
|
||||
|
|
@ -1204,37 +1193,29 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) {
|
|||
},
|
||||
.imageExtent =
|
||||
{
|
||||
.width = std::max(key.extent.width >> mipLevel, 1u),
|
||||
.height = std::max(key.extent.height >> mipLevel, 1u),
|
||||
.depth = std::max(key.extent.depth >> mipLevel, 1u),
|
||||
.width = info.linearWidth,
|
||||
.height = info.linearHeight,
|
||||
.depth = info.linearDepth,
|
||||
},
|
||||
});
|
||||
|
||||
linearOffset += info.linearSize * key.arrayLayerCount;
|
||||
}
|
||||
|
||||
auto detiledSize = linearOffset;
|
||||
|
||||
auto detiledBuffer =
|
||||
vk::Buffer::Allocate(vk::getDeviceLocalMemory(), detiledSize,
|
||||
VK_BUFFER_USAGE_2_TRANSFER_DST_BIT_KHR |
|
||||
VK_BUFFER_USAGE_2_TRANSFER_SRC_BIT_KHR);
|
||||
auto detiledBuffer = vk::Buffer::Allocate(
|
||||
vk::getDeviceLocalMemory(), surfaceInfo.totalLinearSize,
|
||||
VK_BUFFER_USAGE_2_TRANSFER_DST_BIT_KHR |
|
||||
VK_BUFFER_USAGE_2_TRANSFER_SRC_BIT_KHR);
|
||||
|
||||
sourceBuffer = detiledBuffer.getHandle();
|
||||
std::uint64_t dstAddress = detiledBuffer.getAddress();
|
||||
auto linearAddress = detiledBuffer.getAddress();
|
||||
|
||||
mScheduler->afterSubmit([detiledBuffer = std::move(detiledBuffer)] {});
|
||||
|
||||
for (unsigned mipLevel = key.baseMipLevel;
|
||||
mipLevel < key.baseMipLevel + key.mipCount; ++mipLevel) {
|
||||
auto &info = surfaceInfo.getSubresourceInfo(mipLevel);
|
||||
|
||||
tiler.detile(*mScheduler, surfaceInfo, key.tileMode, key.dfmt,
|
||||
tiledBuffer.deviceAddress, surfaceInfo.totalSize,
|
||||
dstAddress, detiledSize, mipLevel, 0, key.arrayLayerCount);
|
||||
|
||||
detiledSize -= info.linearSize * key.arrayLayerCount;
|
||||
dstAddress += info.linearSize * key.arrayLayerCount;
|
||||
tiler.detile(*mScheduler, surfaceInfo, key.tileMode,
|
||||
tiledBuffer.deviceAddress, surfaceInfo.totalTiledSize,
|
||||
linearAddress, surfaceInfo.totalLinearSize, mipLevel, 0,
|
||||
key.arrayLayerCount);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -115,27 +115,27 @@ struct TileMode {
|
|||
|
||||
constexpr TileMode &arrayMode(ArrayMode mode) {
|
||||
raw = (raw & ~0x0000003c) |
|
||||
(static_cast<std::uint32_t>(mode) << 2) & 0x0000003c;
|
||||
((static_cast<std::uint32_t>(mode) << 2) & 0x0000003c);
|
||||
return *this;
|
||||
}
|
||||
constexpr TileMode &pipeConfig(PipeConfig mode) {
|
||||
raw = (raw & ~0x000007c0) |
|
||||
(static_cast<std::uint32_t>(mode) << 6) & 0x000007c0;
|
||||
((static_cast<std::uint32_t>(mode) << 6) & 0x000007c0);
|
||||
return *this;
|
||||
}
|
||||
constexpr TileMode &tileSplit(TileSplit mode) {
|
||||
raw = (raw & ~0x00003800) |
|
||||
(static_cast<std::uint32_t>(mode) << 11) & 0x00003800;
|
||||
((static_cast<std::uint32_t>(mode) << 11) & 0x00003800);
|
||||
return *this;
|
||||
}
|
||||
constexpr TileMode µTileMode(MicroTileMode mode) {
|
||||
raw = (raw & ~0x01c00000) |
|
||||
(static_cast<std::uint32_t>(mode) << 22) & 0x01c00000;
|
||||
((static_cast<std::uint32_t>(mode) << 22) & 0x01c00000);
|
||||
return *this;
|
||||
}
|
||||
constexpr TileMode &sampleSplit(SampleSplit mode) {
|
||||
raw = (raw & ~0x06000000) |
|
||||
(static_cast<std::uint32_t>(mode) << 25) & 0x06000000;
|
||||
((static_cast<std::uint32_t>(mode) << 25) & 0x06000000);
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
|
@ -166,17 +166,24 @@ struct SurfaceInfo {
|
|||
std::uint32_t height;
|
||||
std::uint32_t depth;
|
||||
std::uint32_t pitch;
|
||||
MacroTileMode macroTileMode;
|
||||
int arrayLayerCount;
|
||||
int numFragments;
|
||||
int bitsPerElement;
|
||||
std::uint64_t totalSize;
|
||||
std::uint64_t totalTiledSize;
|
||||
std::uint64_t totalLinearSize;
|
||||
|
||||
struct SubresourceInfo {
|
||||
std::uint32_t dataWidth;
|
||||
std::uint32_t dataHeight;
|
||||
std::uint32_t dataDepth;
|
||||
std::uint64_t offset;
|
||||
std::uint32_t tiledWidth;
|
||||
std::uint32_t tiledHeight;
|
||||
std::uint32_t tiledDepth;
|
||||
std::uint64_t tiledOffset;
|
||||
std::uint64_t tiledSize;
|
||||
std::uint32_t linearPitch;
|
||||
std::uint32_t linearWidth;
|
||||
std::uint32_t linearHeight;
|
||||
std::uint32_t linearDepth;
|
||||
std::uint64_t linearOffset;
|
||||
std::uint64_t linearSize;
|
||||
};
|
||||
|
||||
|
|
@ -524,5 +531,4 @@ SurfaceInfo computeSurfaceInfo(TileMode tileMode, gnm::TextureType type,
|
|||
std::uint32_t pitch, int baseArrayLayer,
|
||||
int arrayCount, int baseMipLevel, int mipCount,
|
||||
bool pow2pad);
|
||||
SurfaceInfo computeSurfaceInfo(const gnm::TBuffer &tbuffer, TileMode tileMode);
|
||||
} // namespace amdgpu
|
||||
|
|
|
|||
|
|
@ -11,15 +11,14 @@ struct GpuTiler {
|
|||
~GpuTiler();
|
||||
|
||||
void detile(Scheduler &scheduler, const amdgpu::SurfaceInfo &info,
|
||||
amdgpu::TileMode tileMode, gnm::DataFormat dfmt,
|
||||
std::uint64_t srcTiledAddress, std::uint64_t srcSize,
|
||||
std::uint64_t dstLinearAddress, std::uint64_t dstSize,
|
||||
int mipLevel, int baseArray, int arrayCount);
|
||||
amdgpu::TileMode tileMode, std::uint64_t srcTiledAddress,
|
||||
std::uint64_t srcSize, std::uint64_t dstLinearAddress,
|
||||
std::uint64_t dstSize, int mipLevel, int baseArray,
|
||||
int arrayCount);
|
||||
void tile(Scheduler &scheduler, const amdgpu::SurfaceInfo &info,
|
||||
amdgpu::TileMode tileMode, gnm::DataFormat dfmt,
|
||||
std::uint64_t srcLinearAddress, std::uint64_t srcSize,
|
||||
std::uint64_t dstTiledAddress, std::uint64_t dstSize, int mipLevel,
|
||||
int baseArray, int arrayCount);
|
||||
amdgpu::TileMode tileMode, std::uint64_t srcLinearAddress,
|
||||
std::uint64_t srcSize, std::uint64_t dstTiledAddress,
|
||||
std::uint64_t dstSize, int mipLevel, int baseArray, int arrayCount);
|
||||
|
||||
private:
|
||||
std::unique_ptr<Impl> mImpl;
|
||||
|
|
|
|||
|
|
@ -37,8 +37,8 @@ void main() {
|
|||
uint64_t linearByteOffset = computeLinearElementByteOffset(
|
||||
pos,
|
||||
0,
|
||||
config.dataSize.x,
|
||||
config.dataSize.x * config.dataSize.y,
|
||||
config.linearDataSize.x,
|
||||
config.linearDataSize.x * config.linearDataSize.y,
|
||||
config.bitsPerElement,
|
||||
1 << config.numFragments
|
||||
);
|
||||
|
|
@ -63,6 +63,9 @@ void main() {
|
|||
|
||||
switch (bpp) {
|
||||
case 1:
|
||||
// buffer_reference_uint8_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint8_t(config.srcAddress + tiledByteOffset).data;
|
||||
// break;
|
||||
|
||||
case 2:
|
||||
buffer_reference_uint16_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint16_t(config.srcAddress + tiledByteOffset).data;
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
#version 460
|
||||
|
||||
#define DEBUG
|
||||
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types : enable
|
||||
#extension GL_EXT_shader_atomic_int64 : enable
|
||||
|
|
@ -32,12 +34,12 @@ void main() {
|
|||
}
|
||||
|
||||
uint64_t tiledByteOffset = getTiledBitOffset2D(
|
||||
config.dfmt,
|
||||
config.tileMode,
|
||||
config.macroTileMode,
|
||||
config.dataSize,
|
||||
arraySlice,
|
||||
config.numFragments,
|
||||
config.bitsPerElement,
|
||||
pos,
|
||||
fragmentIndex
|
||||
) / 8;
|
||||
|
|
@ -47,8 +49,8 @@ void main() {
|
|||
uint64_t linearByteOffset = computeLinearElementByteOffset(
|
||||
pos,
|
||||
0,
|
||||
config.dataSize.x,
|
||||
config.dataSize.x * config.dataSize.y,
|
||||
config.linearDataSize.x,
|
||||
config.linearDataSize.x * config.linearDataSize.y,
|
||||
config.bitsPerElement,
|
||||
1 << config.numFragments
|
||||
);
|
||||
|
|
@ -57,6 +59,10 @@ void main() {
|
|||
|
||||
uint32_t bpp = (config.bitsPerElement + 7) / 8;
|
||||
|
||||
if (bpp == 1 && (linearByteOffset & 1) != 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
if (config.srcAddress + tiledByteOffset + bpp > config.srcEndAddress) {
|
||||
debugPrintfEXT("detiler2d: out of src buffer %d x %d x %d", pos.x, pos.y, pos.z);
|
||||
|
|
@ -71,9 +77,8 @@ void main() {
|
|||
|
||||
switch (bpp) {
|
||||
case 1:
|
||||
buffer_reference_uint8_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint8_t(config.srcAddress + tiledByteOffset).data;
|
||||
break;
|
||||
|
||||
// buffer_reference_uint8_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint8_t(config.srcAddress + tiledByteOffset).data;
|
||||
// break;
|
||||
case 2:
|
||||
buffer_reference_uint16_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint16_t(config.srcAddress + tiledByteOffset).data;
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -36,19 +36,22 @@ void main() {
|
|||
uint64_t linearByteOffset = computeLinearElementByteOffset(
|
||||
pos,
|
||||
0,
|
||||
config.dataSize.x,
|
||||
config.dataSize.x * config.dataSize.y,
|
||||
config.linearDataSize.x,
|
||||
config.linearDataSize.x * config.linearDataSize.y,
|
||||
config.bitsPerElement,
|
||||
1 << config.numFragments
|
||||
);
|
||||
|
||||
linearByteOffset += linearSliceOffset;
|
||||
|
||||
switch ((config.bitsPerElement + 7) / 8) {
|
||||
case 1:
|
||||
buffer_reference_uint8_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint8_t(config.srcAddress + tiledByteOffset).data;
|
||||
break;
|
||||
uint32_t bpp = (config.bitsPerElement + 7) / 8;
|
||||
|
||||
if (bpp == 1 && (linearByteOffset & 1) != 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
switch (bpp) {
|
||||
case 1:
|
||||
case 2:
|
||||
buffer_reference_uint16_t(config.dstAddress + linearByteOffset).data = buffer_reference_uint16_t(config.srcAddress + tiledByteOffset).data;
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -1,7 +1,5 @@
|
|||
|
||||
#define FOR_ALL_BASE_TYPES(OP) \
|
||||
OP(int8_t) \
|
||||
OP(uint8_t) \
|
||||
OP(int16_t) \
|
||||
OP(uint16_t) \
|
||||
OP(float16_t) \
|
||||
|
|
@ -785,11 +783,8 @@ uint64_t getTiledBitOffset1D(uint32_t tileMode, uvec3 pos, uvec2 dataSize, uint3
|
|||
}
|
||||
|
||||
|
||||
uint64_t getTiledBitOffset2D(uint32_t dfmt, uint32_t tileMode, uint32_t macroTileMode,
|
||||
uvec2 dataSize, int arraySlice, uint32_t numFragments, u32vec3 pos, int fragmentIndex) {
|
||||
uint32_t bitsPerFragment = getBitsPerElement(dfmt);
|
||||
|
||||
bool isBlockCompressed = getTexelsPerElement(dfmt) > 1;
|
||||
uint64_t getTiledBitOffset2D(uint32_t tileMode, uint32_t macroTileMode,
|
||||
uvec2 dataSize, int arraySlice, uint32_t numFragments, uint32_t bitsPerElement, u32vec3 pos, int fragmentIndex) {
|
||||
uint32_t tileSwizzleMask = 0;
|
||||
uint32_t numFragmentsPerPixel = 1 << numFragments;
|
||||
uint32_t arrayMode = tileMode_getArrayMode(tileMode);
|
||||
|
|
@ -820,7 +815,6 @@ uint64_t getTiledBitOffset2D(uint32_t dfmt, uint32_t tileMode, uint32_t macroTil
|
|||
break;
|
||||
}
|
||||
|
||||
uint32_t bitsPerElement = bitsPerFragment;
|
||||
uint32_t paddedWidth = dataSize.x;
|
||||
uint32_t paddedHeight = dataSize.y;
|
||||
|
||||
|
|
@ -849,7 +843,8 @@ uint64_t getTiledBitOffset2D(uint32_t dfmt, uint32_t tileMode, uint32_t macroTil
|
|||
|
||||
uint32_t tileSplitBytes = min(kDramRowSize, tileSplitC);
|
||||
|
||||
uint32_t numPipes = getPipeCount(tileMode_getPipeConfig(tileMode));
|
||||
uint32_t pipeConfig = tileMode_getPipeConfig(tileMode);
|
||||
uint32_t numPipes = getPipeCount(pipeConfig);
|
||||
uint32_t pipeInterleaveBits = findLSB(kPipeInterleaveBytes);
|
||||
uint32_t pipeInterleaveMask = (1 << pipeInterleaveBits) - 1;
|
||||
uint32_t pipeBits = findLSB(numPipes);
|
||||
|
|
@ -873,7 +868,7 @@ uint64_t getTiledBitOffset2D(uint32_t dfmt, uint32_t tileMode, uint32_t macroTil
|
|||
xh %= macroTileWidth;
|
||||
yh %= macroTileHeight;
|
||||
}
|
||||
uint64_t pipe = getPipeIndex(xh, yh, tileMode_getPipeConfig(tileMode));
|
||||
uint64_t pipe = getPipeIndex(xh, yh, pipeConfig);
|
||||
uint64_t bank =
|
||||
getBankIndex(xh, yh, bankWidth, bankHeight, numBanks, numPipes);
|
||||
|
||||
|
|
@ -989,16 +984,15 @@ uint64_t getTiledBitOffset2D(uint32_t dfmt, uint32_t tileMode, uint32_t macroTil
|
|||
return (finalByteOffset << 3) | bitOffset;
|
||||
}
|
||||
|
||||
|
||||
layout(push_constant) uniform Config {
|
||||
uint64_t srcAddress;
|
||||
uint64_t srcEndAddress;
|
||||
uint64_t dstAddress;
|
||||
uint64_t dstEndAddress;
|
||||
uvec2 dataSize;
|
||||
uvec2 linearDataSize;
|
||||
uint32_t tileMode;
|
||||
uint32_t macroTileMode;
|
||||
uint32_t dfmt;
|
||||
uint32_t numFragments;
|
||||
uint32_t bitsPerElement;
|
||||
uint32_t tiledSurfaceSize;
|
||||
|
|
|
|||
|
|
@ -40,8 +40,8 @@ void main() {
|
|||
uint64_t linearByteOffset = computeLinearElementByteOffset(
|
||||
pos,
|
||||
0,
|
||||
config.dataSize.x,
|
||||
config.dataSize.x * config.dataSize.y,
|
||||
config.linearDataSize.x,
|
||||
config.linearDataSize.x * config.linearDataSize.y,
|
||||
config.bitsPerElement,
|
||||
1 << config.numFragments
|
||||
);
|
||||
|
|
@ -69,6 +69,8 @@ void main() {
|
|||
|
||||
switch (bpp) {
|
||||
case 1:
|
||||
// buffer_reference_uint8_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint8_t(config.srcAddress + linearByteOffset).data;
|
||||
// break;
|
||||
case 2:
|
||||
buffer_reference_uint16_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint16_t(config.srcAddress + linearByteOffset).data;
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
#version 460
|
||||
#define DEBUG
|
||||
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types : enable
|
||||
|
|
@ -31,12 +32,12 @@ void main() {
|
|||
}
|
||||
|
||||
uint64_t tiledByteOffset = getTiledBitOffset2D(
|
||||
config.dfmt,
|
||||
config.tileMode,
|
||||
config.macroTileMode,
|
||||
config.dataSize,
|
||||
arraySlice,
|
||||
config.numFragments,
|
||||
config.bitsPerElement,
|
||||
pos,
|
||||
fragmentIndex
|
||||
) / 8;
|
||||
|
|
@ -46,8 +47,8 @@ void main() {
|
|||
uint64_t linearByteOffset = computeLinearElementByteOffset(
|
||||
pos,
|
||||
0,
|
||||
config.dataSize.x,
|
||||
config.dataSize.x * config.dataSize.y,
|
||||
config.linearDataSize.x,
|
||||
config.linearDataSize.x * config.linearDataSize.y,
|
||||
config.bitsPerElement,
|
||||
1 << config.numFragments
|
||||
);
|
||||
|
|
@ -55,24 +56,27 @@ void main() {
|
|||
linearByteOffset += linearSliceOffset;
|
||||
|
||||
uint32_t bpp = (config.bitsPerElement + 7) / 8;
|
||||
if (bpp == 1 && (linearByteOffset & 1) != 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
if (config.srcAddress + linearByteOffset + bpp > config.srcEndAddress) {
|
||||
debugPrintfEXT("tiler2d: out of src buffer %d x %d x %d", pos.x, pos.y, pos.z);
|
||||
debugPrintfEXT("tiler2d: out of src buffer %d x %d x %d, src offset: %lu, src size: %lu", pos.x, pos.y, pos.z,
|
||||
linearByteOffset, config.srcEndAddress - config.srcAddress);
|
||||
return;
|
||||
}
|
||||
|
||||
if (config.dstAddress + tiledByteOffset + bpp > config.dstEndAddress) {
|
||||
debugPrintfEXT("tiler2d: out of dst buffer %d x %d x %d", pos.x, pos.y, pos.z);
|
||||
debugPrintfEXT("tiler2d: out of dst buffer %d x %d x %d, offset %lx, size %lx", pos.x, pos.y, pos.z, tiledByteOffset, config.dstEndAddress - config.dstAddress);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
switch (bpp) {
|
||||
case 1:
|
||||
buffer_reference_uint8_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint8_t(config.srcAddress + linearByteOffset).data;
|
||||
break;
|
||||
|
||||
// buffer_reference_uint8_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint8_t(config.srcAddress + linearByteOffset).data;
|
||||
// break;
|
||||
case 2:
|
||||
buffer_reference_uint16_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint16_t(config.srcAddress + linearByteOffset).data;
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -36,19 +36,21 @@ void main() {
|
|||
uint64_t linearByteOffset = computeLinearElementByteOffset(
|
||||
pos,
|
||||
0,
|
||||
config.dataSize.x,
|
||||
config.dataSize.x * config.dataSize.y,
|
||||
config.linearDataSize.x,
|
||||
config.linearDataSize.x * config.linearDataSize.y,
|
||||
config.bitsPerElement,
|
||||
1 << config.numFragments
|
||||
);
|
||||
|
||||
linearByteOffset += linearSliceOffset;
|
||||
|
||||
switch ((config.bitsPerElement + 7) / 8) {
|
||||
case 1:
|
||||
buffer_reference_uint8_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint8_t(config.srcAddress + linearByteOffset).data;
|
||||
break;
|
||||
uint32_t bpp = (config.bitsPerElement + 7) / 8;
|
||||
if (bpp == 1 && (linearByteOffset & 1) != 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
switch (bpp) {
|
||||
case 1:
|
||||
case 2:
|
||||
buffer_reference_uint16_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint16_t(config.srcAddress + linearByteOffset).data;
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -5,9 +5,8 @@
|
|||
|
||||
using namespace amdgpu;
|
||||
|
||||
// FIXME: should be properly implemented
|
||||
static SurfaceInfo
|
||||
computeTexture2dInfo(ArrayMode arrayMode, gnm::TextureType type,
|
||||
computeTexture2dInfo(TileMode tileMode, gnm::TextureType type,
|
||||
gnm::DataFormat dfmt, std::uint32_t width,
|
||||
std::uint32_t height, std::uint32_t depth,
|
||||
std::uint32_t pitch, int baseArrayLayer, int arrayCount,
|
||||
|
|
@ -32,7 +31,7 @@ computeTexture2dInfo(ArrayMode arrayMode, gnm::TextureType type,
|
|||
auto numFragmentsPerPixel = 1 << numFragments;
|
||||
auto isBlockCompressed = getTexelsPerElement(dfmt) > 1;
|
||||
|
||||
auto bitsPerElement = bitsPerFragment;
|
||||
std::uint32_t bitsPerElement = bitsPerFragment;
|
||||
depth = isVolume ? depth : 1;
|
||||
|
||||
if (isBlockCompressed) {
|
||||
|
|
@ -60,6 +59,10 @@ computeTexture2dInfo(ArrayMode arrayMode, gnm::TextureType type,
|
|||
|
||||
std::uint64_t surfaceOffset = 0;
|
||||
std::uint64_t surfaceSize = 0;
|
||||
std::uint64_t linearOffset = 0;
|
||||
|
||||
auto macroTileMode = getDefaultMacroTileModes()[computeMacroTileIndex(
|
||||
tileMode, bitsPerElement, 1 << numFragments)];
|
||||
|
||||
SurfaceInfo result;
|
||||
result.width = width;
|
||||
|
|
@ -69,8 +72,10 @@ computeTexture2dInfo(ArrayMode arrayMode, gnm::TextureType type,
|
|||
result.numFragments = numFragments;
|
||||
result.bitsPerElement = bitsPerElement;
|
||||
result.arrayLayerCount = arraySliceCount;
|
||||
result.macroTileMode = macroTileMode;
|
||||
|
||||
auto thickness = getMicroTileThickness(arrayMode);
|
||||
auto arrayMode = tileMode.arrayMode();
|
||||
auto numPipes = getPipeCount(tileMode.pipeConfig());
|
||||
|
||||
for (int mipLevel = 0; mipLevel < baseMipLevel + mipCount; mipLevel++) {
|
||||
std::uint32_t elemWidth = std::max<std::uint64_t>(width >> mipLevel, 1);
|
||||
|
|
@ -116,55 +121,91 @@ computeTexture2dInfo(ArrayMode arrayMode, gnm::TextureType type,
|
|||
linearPitch = linearWidth;
|
||||
}
|
||||
|
||||
std::uint32_t paddedPitch =
|
||||
(linearPitch + kMicroTileWidth - 1) & ~(kMicroTileWidth - 1);
|
||||
std::uint32_t paddedHeight =
|
||||
(linearHeight + kMicroTileHeight - 1) & ~(kMicroTileHeight - 1);
|
||||
std::uint32_t paddedDepth = linearDepth;
|
||||
auto thickness = getMicroTileThickness(arrayMode);
|
||||
|
||||
if (!isCubemap || (mipLevel > 0 && linearDepth > 1)) {
|
||||
if (isCubemap) {
|
||||
linearDepth = std::bit_ceil(linearDepth);
|
||||
}
|
||||
uint32_t numBanks = 2 << macroTileMode.numBanks();
|
||||
uint32_t macroAspect = 1 << macroTileMode.macroTileAspect();
|
||||
uint32_t tileBytes1x =
|
||||
(thickness * bitsPerElement * kMicroTileWidth * kMicroTileHeight + 7) /
|
||||
8;
|
||||
auto microTileMode = tileMode.microTileMode();
|
||||
uint32_t tileSplit =
|
||||
(microTileMode == kMicroTileModeDepth)
|
||||
? (64 << tileMode.sampleSplit())
|
||||
: std::max(256U, (1 << tileMode.sampleSplit()) * tileBytes1x);
|
||||
uint32_t tileSplitC = std::min(kDramRowSize, tileSplit);
|
||||
uint32_t bankWidth = 1 << macroTileMode.bankWidth();
|
||||
uint32_t bankHeight = 1 << macroTileMode.bankHeight();
|
||||
|
||||
paddedDepth = (linearDepth + thickness - 1) & ~(thickness - 1);
|
||||
uint32_t tileSize = std::min(
|
||||
tileSplitC, (thickness * bitsPerElement * numFragmentsPerPixel *
|
||||
kMicroTileWidth * kMicroTileHeight +
|
||||
7) /
|
||||
8);
|
||||
uint32_t bankHeightAlign =
|
||||
std::max(1U, kPipeInterleaveBytes / (tileSize * bankWidth));
|
||||
|
||||
bankHeight = (bankHeight + bankHeightAlign - 1) & ~(bankHeightAlign - 1);
|
||||
|
||||
if (numFragmentsPerPixel == 1) {
|
||||
uint32_t macroAspectAlign = std::max(
|
||||
1U, kPipeInterleaveBytes / (tileSize * numPipes * bankWidth));
|
||||
macroAspect =
|
||||
(macroAspect + macroAspectAlign - 1) & ~(macroAspectAlign - 1);
|
||||
}
|
||||
|
||||
std::uint32_t tempPitch = paddedPitch;
|
||||
std::uint64_t logicalSliceSizeBytes = std::uint64_t(tempPitch) *
|
||||
paddedHeight * bitsPerElement *
|
||||
numFragmentsPerPixel;
|
||||
auto depthAlign = thickness;
|
||||
|
||||
// FIXME: rotate tile mode for mipLevel > 0
|
||||
|
||||
uint32_t outPitch = linearPitch;
|
||||
uint32_t outHeight = linearHeight;
|
||||
uint32_t outDepth = linearDepth;
|
||||
|
||||
uint32_t macroTileWidth =
|
||||
kMicroTileWidth * bankWidth * numPipes * macroAspect;
|
||||
uint32_t macroTileHeight =
|
||||
kMicroTileHeight * bankHeight * numBanks / macroAspect;
|
||||
|
||||
uint32_t heightAlign = macroTileHeight;
|
||||
auto pitchAlign = macroTileWidth;
|
||||
|
||||
outPitch = (outPitch + pitchAlign - 1) & ~(pitchAlign - 1);
|
||||
outDepth = (outDepth + depthAlign - 1) & ~(depthAlign - 1);
|
||||
outHeight = (outHeight + heightAlign - 1) & ~(heightAlign - 1);
|
||||
|
||||
std::uint64_t logicalSliceSizeBytes = std::uint64_t(outPitch) * outHeight *
|
||||
bitsPerElement * numFragmentsPerPixel;
|
||||
logicalSliceSizeBytes = (logicalSliceSizeBytes + 7) / 8;
|
||||
|
||||
uint64_t physicalSliceSizeBytes = logicalSliceSizeBytes * thickness;
|
||||
while ((physicalSliceSizeBytes % kPipeInterleaveBytes) != 0) {
|
||||
tempPitch += kMicroTileWidth;
|
||||
logicalSliceSizeBytes = std::uint64_t(tempPitch) * paddedHeight *
|
||||
bitsPerElement * numFragmentsPerPixel;
|
||||
logicalSliceSizeBytes = (logicalSliceSizeBytes + 7) / 8;
|
||||
physicalSliceSizeBytes = logicalSliceSizeBytes * thickness;
|
||||
}
|
||||
surfaceSize = static_cast<uint64_t>(outPitch) * outHeight *
|
||||
std::bit_ceil(bitsPerElement) * numFragmentsPerPixel;
|
||||
surfaceSize = (surfaceSize + 7) / 8;
|
||||
|
||||
surfaceSize = logicalSliceSizeBytes * paddedDepth;
|
||||
auto linearSize =
|
||||
linearDepth *
|
||||
(linearPitch * linearHeight * bitsPerElement * numFragmentsPerPixel +
|
||||
7) /
|
||||
8;
|
||||
auto linearSize = uint64_t(linearPitch) * linearHeight * bitsPerElement *
|
||||
numFragmentsPerPixel;
|
||||
linearSize = linearDepth * ((linearSize + 7) / 8);
|
||||
|
||||
result.setSubresourceInfo(mipLevel, {
|
||||
.dataWidth = linearPitch,
|
||||
.dataHeight = linearHeight,
|
||||
.dataDepth = linearDepth,
|
||||
.offset = surfaceOffset,
|
||||
.tiledWidth = outPitch,
|
||||
.tiledHeight = outHeight,
|
||||
.tiledDepth = outDepth,
|
||||
.tiledOffset = surfaceOffset,
|
||||
.tiledSize = surfaceSize,
|
||||
.linearPitch = linearPitch,
|
||||
.linearWidth = linearWidth,
|
||||
.linearHeight = linearHeight,
|
||||
.linearDepth = linearDepth,
|
||||
.linearOffset = linearOffset,
|
||||
.linearSize = linearSize,
|
||||
});
|
||||
|
||||
linearOffset += arraySliceCount * linearSize;
|
||||
surfaceOffset += arraySliceCount * surfaceSize;
|
||||
}
|
||||
|
||||
result.totalSize = surfaceOffset;
|
||||
result.totalTiledSize = surfaceOffset;
|
||||
result.totalLinearSize = linearOffset;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
@ -222,6 +263,7 @@ computeTexture1dInfo(ArrayMode arrayMode, gnm::TextureType type,
|
|||
|
||||
std::uint64_t surfaceOffset = 0;
|
||||
std::uint64_t surfaceSize = 0;
|
||||
std::uint64_t linearOffset = 0;
|
||||
|
||||
SurfaceInfo result;
|
||||
result.width = width;
|
||||
|
|
@ -308,25 +350,30 @@ computeTexture1dInfo(ArrayMode arrayMode, gnm::TextureType type,
|
|||
}
|
||||
|
||||
surfaceSize = logicalSliceSizeBytes * paddedDepth;
|
||||
auto linearSize =
|
||||
linearDepth *
|
||||
(linearPitch * linearHeight * bitsPerElement * numFragmentsPerPixel +
|
||||
7) /
|
||||
8;
|
||||
auto linearSize = uint64_t(linearPitch) * linearHeight * bitsPerElement *
|
||||
numFragmentsPerPixel;
|
||||
linearSize = linearDepth * ((linearSize + 7) / 8);
|
||||
|
||||
result.setSubresourceInfo(mipLevel, {
|
||||
.dataWidth = linearPitch,
|
||||
.dataHeight = linearHeight,
|
||||
.dataDepth = linearDepth,
|
||||
.offset = surfaceOffset,
|
||||
.tiledWidth = linearPitch,
|
||||
.tiledHeight = linearHeight,
|
||||
.tiledDepth = linearDepth,
|
||||
.tiledOffset = surfaceOffset,
|
||||
.tiledSize = surfaceSize,
|
||||
.linearPitch = linearPitch,
|
||||
.linearWidth = linearWidth,
|
||||
.linearHeight = linearHeight,
|
||||
.linearDepth = linearDepth,
|
||||
.linearOffset = linearOffset,
|
||||
.linearSize = linearSize,
|
||||
});
|
||||
|
||||
surfaceOffset += arraySliceCount * surfaceSize;
|
||||
linearOffset += arraySliceCount * linearSize;
|
||||
}
|
||||
|
||||
result.totalSize = surfaceOffset;
|
||||
result.totalTiledSize = surfaceOffset;
|
||||
result.totalLinearSize = linearOffset;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
@ -383,6 +430,7 @@ static SurfaceInfo computeTextureLinearInfo(
|
|||
|
||||
std::uint64_t surfaceOffset = 0;
|
||||
std::uint64_t surfaceSize = 0;
|
||||
std::uint64_t linearOffset = 0;
|
||||
|
||||
SurfaceInfo result;
|
||||
result.width = width;
|
||||
|
|
@ -437,20 +485,25 @@ static SurfaceInfo computeTextureLinearInfo(
|
|||
linearPitch = linearWidth;
|
||||
}
|
||||
|
||||
if (arrayMode == kArrayModeLinearGeneral) {
|
||||
surfaceSize = (static_cast<uint64_t>(linearPitch) *
|
||||
(linearHeight)*bitsPerElement * numFragmentsPerPixel +
|
||||
7) /
|
||||
8;
|
||||
surfaceSize *= linearDepth;
|
||||
auto linearSize = static_cast<uint64_t>(linearPitch) *
|
||||
(linearHeight)*bitsPerElement * numFragmentsPerPixel;
|
||||
|
||||
linearSize = linearDepth * ((linearSize + 7) / 8);
|
||||
|
||||
if (arrayMode == kArrayModeLinearGeneral) {
|
||||
surfaceSize = linearSize;
|
||||
result.setSubresourceInfo(mipLevel, {
|
||||
.dataWidth = linearPitch,
|
||||
.dataHeight = linearHeight,
|
||||
.dataDepth = linearDepth,
|
||||
.offset = surfaceOffset,
|
||||
.tiledWidth = linearPitch,
|
||||
.tiledHeight = linearHeight,
|
||||
.tiledDepth = linearDepth,
|
||||
.tiledOffset = surfaceOffset,
|
||||
.tiledSize = surfaceSize,
|
||||
.linearSize = surfaceSize,
|
||||
.linearPitch = linearPitch,
|
||||
.linearWidth = linearWidth,
|
||||
.linearHeight = linearHeight,
|
||||
.linearDepth = linearDepth,
|
||||
.linearOffset = linearOffset,
|
||||
.linearSize = linearSize,
|
||||
});
|
||||
} else {
|
||||
if (mipLevel > 0 && pitch > 0) {
|
||||
|
|
@ -487,19 +540,26 @@ static SurfaceInfo computeTextureLinearInfo(
|
|||
surfaceSize = (pixelsPerSlice * bitsPerElement + 7) / 8 * paddedDepth;
|
||||
|
||||
result.setSubresourceInfo(mipLevel, {
|
||||
.dataWidth = paddedPitch,
|
||||
.dataHeight = paddedHeight,
|
||||
.dataDepth = paddedDepth,
|
||||
.offset = surfaceOffset,
|
||||
.tiledWidth = paddedPitch,
|
||||
.tiledHeight = paddedHeight,
|
||||
.tiledDepth = paddedDepth,
|
||||
.tiledOffset = surfaceOffset,
|
||||
.tiledSize = surfaceSize,
|
||||
.linearSize = surfaceSize,
|
||||
.linearPitch = linearPitch,
|
||||
.linearWidth = linearWidth,
|
||||
.linearHeight = linearHeight,
|
||||
.linearDepth = linearDepth,
|
||||
.linearOffset = linearOffset,
|
||||
.linearSize = linearSize,
|
||||
});
|
||||
}
|
||||
|
||||
surfaceOffset += arraySliceCount * surfaceSize;
|
||||
surfaceOffset += arraySliceCount * linearSize;
|
||||
}
|
||||
|
||||
result.totalSize = surfaceOffset;
|
||||
result.totalTiledSize = surfaceOffset;
|
||||
result.totalLinearSize = linearOffset;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
@ -533,20 +593,10 @@ SurfaceInfo amdgpu::computeSurfaceInfo(
|
|||
case kArrayMode2dTiledThickPrt:
|
||||
case kArrayMode3dTiledThinPrt:
|
||||
case kArrayMode3dTiledThickPrt:
|
||||
return computeTexture2dInfo(tileMode.arrayMode(), type, dfmt, width, height,
|
||||
depth, pitch, baseArrayLayer, arrayCount,
|
||||
baseMipLevel, mipCount, pow2pad);
|
||||
return computeTexture2dInfo(tileMode, type, dfmt, width, height, depth,
|
||||
pitch, baseArrayLayer, arrayCount, baseMipLevel,
|
||||
mipCount, pow2pad);
|
||||
}
|
||||
|
||||
std::abort();
|
||||
}
|
||||
|
||||
SurfaceInfo amdgpu::computeSurfaceInfo(const gnm::TBuffer &tbuffer,
|
||||
TileMode tileMode) {
|
||||
return computeSurfaceInfo(
|
||||
tileMode, tbuffer.type, tbuffer.dfmt, tbuffer.width + 1,
|
||||
tbuffer.height + 1, tbuffer.depth + 1, tbuffer.pitch + 1,
|
||||
tbuffer.base_array, tbuffer.last_array - tbuffer.base_array + 1,
|
||||
tbuffer.base_level, tbuffer.last_level - tbuffer.base_level + 1,
|
||||
tbuffer.pow2pad != 0);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,9 +21,10 @@ struct Config {
|
|||
uint64_t dstEndAddress;
|
||||
uint32_t dataWidth;
|
||||
uint32_t dataHeight;
|
||||
uint32_t linearDataWidth;
|
||||
uint32_t linearDataHeight;
|
||||
uint32_t tileMode;
|
||||
uint32_t macroTileMode;
|
||||
uint32_t dfmt;
|
||||
uint32_t numFragments;
|
||||
uint32_t bitsPerElement;
|
||||
uint32_t tiledSurfaceSize;
|
||||
|
|
@ -66,7 +67,7 @@ struct TilerShader {
|
|||
struct amdgpu::GpuTiler::Impl {
|
||||
TilerShader detilerLinear{spirv_detilerLinear_comp};
|
||||
TilerShader detiler1d{spirv_detiler1d_comp};
|
||||
TilerShader detiler2d{spirv_detilerLinear_comp};
|
||||
TilerShader detiler2d{spirv_detiler2d_comp};
|
||||
TilerShader tilerLinear{spirv_tiler2d_comp};
|
||||
TilerShader tiler1d{spirv_tiler1d_comp};
|
||||
TilerShader tiler2d{spirv_tiler2d_comp};
|
||||
|
|
@ -98,29 +99,28 @@ struct amdgpu::GpuTiler::Impl {
|
|||
amdgpu::GpuTiler::GpuTiler() { mImpl = std::make_unique<Impl>(); }
|
||||
amdgpu::GpuTiler::~GpuTiler() = default;
|
||||
|
||||
void amdgpu::GpuTiler::detile(Scheduler &scheduler,
|
||||
const amdgpu::SurfaceInfo &info,
|
||||
amdgpu::TileMode tileMode, gnm::DataFormat dfmt,
|
||||
std::uint64_t srcTiledAddress,
|
||||
std::uint64_t srcSize,
|
||||
std::uint64_t dstLinearAddress,
|
||||
std::uint64_t dstSize, int mipLevel,
|
||||
int baseArray, int arrayCount) {
|
||||
void amdgpu::GpuTiler::detile(
|
||||
Scheduler &scheduler, const amdgpu::SurfaceInfo &info,
|
||||
amdgpu::TileMode tileMode, std::uint64_t srcTiledAddress,
|
||||
std::uint64_t srcSize, std::uint64_t dstLinearAddress,
|
||||
std::uint64_t dstSize, int mipLevel, int baseArray, int arrayCount) {
|
||||
auto commandBuffer = scheduler.getCommandBuffer();
|
||||
|
||||
Config config{};
|
||||
auto &subresource = info.getSubresourceInfo(mipLevel);
|
||||
config.srcAddress = srcTiledAddress + subresource.offset;
|
||||
config.srcAddress = srcTiledAddress + subresource.tiledOffset +
|
||||
baseArray * subresource.tiledSize;
|
||||
config.srcEndAddress = srcTiledAddress + srcSize;
|
||||
config.dstAddress = dstLinearAddress;
|
||||
config.dstAddress = dstLinearAddress + subresource.linearOffset +
|
||||
baseArray * subresource.linearSize;
|
||||
config.dstEndAddress = dstLinearAddress + dstSize;
|
||||
config.dataWidth = subresource.dataWidth;
|
||||
config.dataHeight = subresource.dataHeight;
|
||||
config.dataWidth = subresource.tiledWidth;
|
||||
config.dataHeight = subresource.tiledHeight;
|
||||
config.tileMode = tileMode.raw;
|
||||
config.dfmt = dfmt;
|
||||
config.macroTileMode = info.macroTileMode.raw;
|
||||
config.numFragments = info.numFragments;
|
||||
config.bitsPerElement = info.bitsPerElement;
|
||||
uint32_t groupCountZ = subresource.dataDepth;
|
||||
uint32_t groupCountZ = subresource.tiledDepth;
|
||||
|
||||
if (arrayCount > 1) {
|
||||
config.tiledSurfaceSize = subresource.tiledSize;
|
||||
|
|
@ -131,6 +131,9 @@ void amdgpu::GpuTiler::detile(Scheduler &scheduler,
|
|||
config.linearSurfaceSize = 0;
|
||||
}
|
||||
|
||||
config.linearDataWidth = subresource.linearPitch;
|
||||
config.linearDataHeight = subresource.linearHeight;
|
||||
|
||||
VkShaderStageFlagBits stages[]{VK_SHADER_STAGE_COMPUTE_BIT};
|
||||
|
||||
switch (tileMode.arrayMode()) {
|
||||
|
|
@ -157,46 +160,39 @@ void amdgpu::GpuTiler::detile(Scheduler &scheduler,
|
|||
case amdgpu::kArrayMode3dTiledThick:
|
||||
case amdgpu::kArrayMode3dTiledXThick:
|
||||
case amdgpu::kArrayMode3dTiledThickPrt:
|
||||
config.macroTileMode =
|
||||
getDefaultMacroTileModes()[computeMacroTileIndex(
|
||||
tileMode, info.bitsPerElement,
|
||||
1 << info.numFragments)]
|
||||
.raw;
|
||||
|
||||
vk::CmdBindShadersEXT(commandBuffer, 1, stages, &mImpl->detiler1d.shader);
|
||||
vk::CmdBindShadersEXT(commandBuffer, 1, stages, &mImpl->detiler2d.shader);
|
||||
break;
|
||||
}
|
||||
|
||||
vkCmdPushConstants(commandBuffer, mImpl->pipelineLayout,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(config), &config);
|
||||
vkCmdDispatch(commandBuffer, subresource.dataWidth, subresource.dataHeight,
|
||||
groupCountZ);
|
||||
vkCmdDispatch(commandBuffer, subresource.linearWidth,
|
||||
subresource.linearHeight, groupCountZ);
|
||||
}
|
||||
|
||||
void amdgpu::GpuTiler::tile(Scheduler &scheduler,
|
||||
const amdgpu::SurfaceInfo &info,
|
||||
amdgpu::TileMode tileMode, gnm::DataFormat dfmt,
|
||||
std::uint64_t srcLinearAddress,
|
||||
std::uint64_t srcSize,
|
||||
std::uint64_t dstTiledAddress,
|
||||
std::uint64_t dstSize, int mipLevel, int baseArray,
|
||||
int arrayCount) {
|
||||
void amdgpu::GpuTiler::tile(
|
||||
Scheduler &scheduler, const amdgpu::SurfaceInfo &info,
|
||||
amdgpu::TileMode tileMode, std::uint64_t srcLinearAddress,
|
||||
std::uint64_t srcSize, std::uint64_t dstTiledAddress, std::uint64_t dstSize,
|
||||
int mipLevel, int baseArray, int arrayCount) {
|
||||
auto commandBuffer = scheduler.getCommandBuffer();
|
||||
|
||||
Config config{};
|
||||
|
||||
auto &subresource = info.getSubresourceInfo(mipLevel);
|
||||
config.srcAddress = srcLinearAddress;
|
||||
config.srcAddress = srcLinearAddress + subresource.linearOffset +
|
||||
baseArray * subresource.linearSize;
|
||||
config.srcEndAddress = srcLinearAddress + srcSize;
|
||||
config.dstAddress = dstTiledAddress + subresource.offset;
|
||||
config.dstAddress = dstTiledAddress + subresource.tiledOffset +
|
||||
baseArray * subresource.tiledSize;
|
||||
config.dstEndAddress = dstTiledAddress + dstSize;
|
||||
config.dataWidth = subresource.dataWidth;
|
||||
config.dataHeight = subresource.dataHeight;
|
||||
config.dataWidth = subresource.tiledWidth;
|
||||
config.dataHeight = subresource.tiledHeight;
|
||||
config.tileMode = tileMode.raw;
|
||||
config.dfmt = dfmt;
|
||||
config.macroTileMode = info.macroTileMode.raw;
|
||||
config.numFragments = info.numFragments;
|
||||
config.bitsPerElement = info.bitsPerElement;
|
||||
uint32_t groupCountZ = subresource.dataDepth;
|
||||
uint32_t groupCountZ = subresource.tiledDepth;
|
||||
|
||||
if (arrayCount > 1) {
|
||||
config.tiledSurfaceSize = subresource.tiledSize;
|
||||
|
|
@ -207,6 +203,9 @@ void amdgpu::GpuTiler::tile(Scheduler &scheduler,
|
|||
config.linearSurfaceSize = 0;
|
||||
}
|
||||
|
||||
config.linearDataWidth = subresource.linearPitch;
|
||||
config.linearDataHeight = subresource.linearHeight;
|
||||
|
||||
VkShaderStageFlagBits stages[]{VK_SHADER_STAGE_COMPUTE_BIT};
|
||||
|
||||
switch (tileMode.arrayMode()) {
|
||||
|
|
@ -232,18 +231,13 @@ void amdgpu::GpuTiler::tile(Scheduler &scheduler,
|
|||
case amdgpu::kArrayMode3dTiledThick:
|
||||
case amdgpu::kArrayMode3dTiledXThick:
|
||||
case amdgpu::kArrayMode3dTiledThickPrt:
|
||||
config.macroTileMode =
|
||||
getDefaultMacroTileModes()[computeMacroTileIndex(
|
||||
tileMode, info.bitsPerElement,
|
||||
1 << info.numFragments)]
|
||||
.raw;
|
||||
vk::CmdBindShadersEXT(commandBuffer, 1, stages, &mImpl->tiler1d.shader);
|
||||
vk::CmdBindShadersEXT(commandBuffer, 1, stages, &mImpl->tiler2d.shader);
|
||||
break;
|
||||
}
|
||||
|
||||
vkCmdPushConstants(commandBuffer, mImpl->pipelineLayout,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(config), &config);
|
||||
|
||||
vkCmdDispatch(commandBuffer, subresource.dataWidth, subresource.dataHeight,
|
||||
groupCountZ);
|
||||
vkCmdDispatch(commandBuffer, subresource.linearWidth,
|
||||
subresource.linearHeight, groupCountZ);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue