mirror of
https://github.com/RPCSX/rpcsx.git
synced 2026-01-20 23:50:46 +01:00
gpu2: implement depth textures
initial 2d tiler implementation fixed mtbuf index order simplify v_mac_*_f32 instructions
This commit is contained in:
parent
61d58b696f
commit
4185b1aa40
|
|
@ -232,10 +232,25 @@ struct CachedIndexBuffer : Cache::Entry {
|
|||
gnm::PrimitiveType primType;
|
||||
};
|
||||
|
||||
constexpr VkImageAspectFlags toAspect(ImageKind kind) {
|
||||
switch (kind) {
|
||||
case ImageKind::Color:
|
||||
return VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
case ImageKind::Depth:
|
||||
return VK_IMAGE_ASPECT_DEPTH_BIT;
|
||||
case ImageKind::Stencil:
|
||||
return VK_IMAGE_ASPECT_STENCIL_BIT;
|
||||
}
|
||||
|
||||
return VK_IMAGE_ASPECT_NONE;
|
||||
}
|
||||
|
||||
struct CachedImage : Cache::Entry {
|
||||
vk::Image image;
|
||||
ImageKind kind;
|
||||
SurfaceInfo info;
|
||||
TileMode acquiredTileMode;
|
||||
gnm::DataFormat acquiredDfmt{};
|
||||
|
||||
void flush(Cache::Tag &tag, Scheduler &scheduler, std::uint64_t beginAddress,
|
||||
std::uint64_t endAddress) override {
|
||||
|
|
@ -246,7 +261,7 @@ struct CachedImage : Cache::Entry {
|
|||
// std::printf("writing image to buffer to %lx\n", baseAddress);
|
||||
|
||||
VkImageSubresourceRange subresourceRange{
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.aspectMask = toAspect(kind),
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = image.getMipLevels(),
|
||||
.baseArrayLayer = 0,
|
||||
|
|
@ -270,7 +285,7 @@ struct CachedImage : Cache::Entry {
|
|||
mipLevel > 0 ? 0 : std::max(info.pitch >> mipLevel, 1u),
|
||||
.imageSubresource =
|
||||
{
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.aspectMask = toAspect(kind),
|
||||
.mipLevel = mipLevel,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = image.getArrayLayers(),
|
||||
|
|
@ -287,9 +302,9 @@ struct CachedImage : Cache::Entry {
|
|||
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
|
||||
transferBuffer.getHandle(), 1, ®ion);
|
||||
|
||||
tiler.tile(scheduler, info, acquiredTileMode, transferBuffer.getAddress(),
|
||||
tiledBuffer.deviceAddress, mipLevel, 0,
|
||||
image.getArrayLayers());
|
||||
tiler.tile(scheduler, info, acquiredTileMode, acquiredDfmt,
|
||||
transferBuffer.getAddress(), tiledBuffer.deviceAddress,
|
||||
mipLevel, 0, image.getArrayLayers());
|
||||
}
|
||||
|
||||
transitionImageLayout(scheduler.getCommandBuffer(), image,
|
||||
|
|
@ -307,7 +322,8 @@ struct CachedImageView : Cache::Entry {
|
|||
|
||||
ImageKey ImageKey::createFrom(const gnm::TBuffer &buffer) {
|
||||
return {
|
||||
.address = buffer.address(),
|
||||
.readAddress = buffer.address(),
|
||||
.writeAddress = buffer.address(),
|
||||
.type = buffer.type,
|
||||
.dfmt = buffer.dfmt,
|
||||
.nfmt = buffer.nfmt,
|
||||
|
|
@ -324,6 +340,7 @@ ImageKey ImageKey::createFrom(const gnm::TBuffer &buffer) {
|
|||
.mipCount = buffer.last_level - buffer.base_level + 1u,
|
||||
.baseArrayLayer = static_cast<std::uint32_t>(buffer.base_array),
|
||||
.arrayLayerCount = buffer.last_array - buffer.base_array + 1u,
|
||||
.kind = ImageKind::Color,
|
||||
.pow2pad = buffer.pow2pad != 0,
|
||||
};
|
||||
}
|
||||
|
|
@ -714,7 +731,7 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) {
|
|||
|
||||
if ((access & Access::Read) != Access::None) {
|
||||
auto tiledBuffer =
|
||||
getBuffer(key.address, surfaceInfo.totalSize, Access::Read);
|
||||
getBuffer(key.readAddress, surfaceInfo.totalSize, Access::Read);
|
||||
|
||||
auto &tiler = mParent->mDevice->tiler;
|
||||
auto detiledBuffer =
|
||||
|
|
@ -722,7 +739,7 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) {
|
|||
VK_BUFFER_USAGE_2_TRANSFER_DST_BIT_KHR |
|
||||
VK_BUFFER_USAGE_2_TRANSFER_SRC_BIT_KHR);
|
||||
VkImageSubresourceRange subresourceRange{
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.aspectMask = toAspect(key.kind),
|
||||
.baseMipLevel = key.baseMipLevel,
|
||||
.levelCount = key.mipCount,
|
||||
.baseArrayLayer = key.baseArrayLayer,
|
||||
|
|
@ -756,8 +773,8 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) {
|
|||
.size = info.linearSize * key.arrayLayerCount,
|
||||
});
|
||||
} else {
|
||||
tiler.detile(*mScheduler, surfaceInfo, key.tileMode, srcAddress,
|
||||
dstAddress, mipLevel, 0, key.arrayLayerCount);
|
||||
tiler.detile(*mScheduler, surfaceInfo, key.tileMode, key.dfmt,
|
||||
srcAddress, dstAddress, mipLevel, 0, key.arrayLayerCount);
|
||||
}
|
||||
|
||||
regions.push_back({
|
||||
|
|
@ -766,7 +783,7 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) {
|
|||
mipLevel > 0 ? 0 : std::max(key.pitch >> mipLevel, 1u),
|
||||
.imageSubresource =
|
||||
{
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.aspectMask = toAspect(key.kind),
|
||||
.mipLevel = mipLevel,
|
||||
.baseArrayLayer = key.baseArrayLayer,
|
||||
.layerCount = key.arrayLayerCount,
|
||||
|
|
@ -808,8 +825,13 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) {
|
|||
auto cached = std::make_shared<CachedImage>();
|
||||
cached->image = std::move(image);
|
||||
cached->info = std::move(surfaceInfo);
|
||||
cached->baseAddress = key.address;
|
||||
cached->baseAddress = (access & Access::Write) != Access::None
|
||||
? key.writeAddress
|
||||
: key.readAddress;
|
||||
cached->kind = key.kind;
|
||||
cached->acquiredAccess = access;
|
||||
cached->acquiredTileMode = key.tileMode;
|
||||
cached->acquiredDfmt = key.dfmt;
|
||||
mAcquiredResources.push_back(cached);
|
||||
|
||||
return {.handle = cached->image.getHandle()};
|
||||
|
|
@ -827,14 +849,16 @@ Cache::ImageView Cache::Tag::getImageView(const ImageViewKey &key,
|
|||
.a = gnm::toVkComponentSwizzle(key.A),
|
||||
},
|
||||
{
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.aspectMask = toAspect(key.kind),
|
||||
.baseMipLevel = key.baseMipLevel,
|
||||
.levelCount = key.mipCount,
|
||||
.baseArrayLayer = key.baseArrayLayer,
|
||||
.layerCount = key.arrayLayerCount,
|
||||
});
|
||||
auto cached = std::make_shared<CachedImageView>();
|
||||
cached->baseAddress = key.address;
|
||||
cached->baseAddress = (access & Access::Write) != Access::None
|
||||
? key.writeAddress
|
||||
: key.readAddress;
|
||||
cached->acquiredAccess = access;
|
||||
cached->view = std::move(result);
|
||||
|
||||
|
|
|
|||
|
|
@ -22,8 +22,15 @@ struct ShaderKey {
|
|||
shader::gcn::Environment env;
|
||||
};
|
||||
|
||||
enum class ImageKind {
|
||||
Color,
|
||||
Depth,
|
||||
Stencil
|
||||
};
|
||||
|
||||
struct ImageKey {
|
||||
std::uint64_t address;
|
||||
std::uint64_t readAddress;
|
||||
std::uint64_t writeAddress;
|
||||
gnm::TextureType type;
|
||||
gnm::DataFormat dfmt;
|
||||
gnm::NumericFormat nfmt;
|
||||
|
|
@ -35,6 +42,7 @@ struct ImageKey {
|
|||
unsigned mipCount = 1;
|
||||
unsigned baseArrayLayer = 0;
|
||||
unsigned arrayLayerCount = 1;
|
||||
ImageKind kind = ImageKind::Color;
|
||||
bool pow2pad = false;
|
||||
|
||||
static ImageKey createFrom(const gnm::TBuffer &tbuffer);
|
||||
|
|
|
|||
|
|
@ -301,10 +301,10 @@ bool Device::flip(std::int64_t pid, int bufferIndex, std::uint64_t arg,
|
|||
.layerCount = 1,
|
||||
});
|
||||
|
||||
amdgpu::flip(cacheTag, commandBuffer, vk::context->swapchainExtent,
|
||||
buffer.address, swapchainImageView,
|
||||
{bufferAttr.width, bufferAttr.height}, compSwap,
|
||||
getDefaultTileModes()[13], dfmt, nfmt);
|
||||
amdgpu::flip(
|
||||
cacheTag, commandBuffer, vk::context->swapchainExtent, buffer.address,
|
||||
swapchainImageView, {bufferAttr.width, bufferAttr.height}, compSwap,
|
||||
getDefaultTileModes()[bufferAttr.tilingMode == 1 ? 10 : 8], dfmt, nfmt);
|
||||
|
||||
transitionImageLayout(commandBuffer, swapchainImage,
|
||||
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
|
||||
|
|
@ -316,11 +316,11 @@ bool Device::flip(std::int64_t pid, int bufferIndex, std::uint64_t arg,
|
|||
});
|
||||
} else {
|
||||
ImageKey frameKey{
|
||||
.address = buffer.address,
|
||||
.readAddress = buffer.address,
|
||||
.type = gnm::TextureType::Dim2D,
|
||||
.dfmt = dfmt,
|
||||
.nfmt = nfmt,
|
||||
.tileMode = getDefaultTileModes()[13],
|
||||
.tileMode = getDefaultTileModes()[bufferAttr.tilingMode == 1 ? 10 : 8],
|
||||
.extent =
|
||||
{
|
||||
.width = bufferAttr.width,
|
||||
|
|
@ -429,7 +429,7 @@ bool Device::flip(std::int64_t pid, int bufferIndex, std::uint64_t arg,
|
|||
};
|
||||
|
||||
vkQueueSubmit2(vk::context->presentQueue, 1, &submitInfo, fence);
|
||||
// vkQueueWaitIdle(queue);
|
||||
vkQueueWaitIdle(vk::context->presentQueue);
|
||||
}
|
||||
|
||||
scheduler.then([=, this, cacheTag = std::move(cacheTag)] {
|
||||
|
|
|
|||
|
|
@ -405,6 +405,20 @@ struct DbRenderControl {
|
|||
};
|
||||
};
|
||||
|
||||
struct DbDepthView {
|
||||
union {
|
||||
struct {
|
||||
std::uint32_t sliceStart : 11;
|
||||
std::uint32_t : 2;
|
||||
std::uint32_t sliceMax : 11;
|
||||
bool zReadOnly : 1;
|
||||
bool stencilReadOnly : 1;
|
||||
};
|
||||
|
||||
std::uint32_t raw;
|
||||
};
|
||||
};
|
||||
|
||||
struct CbBlendControl {
|
||||
union {
|
||||
struct {
|
||||
|
|
@ -574,7 +588,7 @@ struct Registers {
|
|||
union {
|
||||
Register<0x0, DbRenderControl> dbRenderControl;
|
||||
Register<0x1> dbCountControl;
|
||||
Register<0x2> dbDepthView;
|
||||
Register<0x2, DbDepthView> dbDepthView;
|
||||
Register<0x3> dbRenderOverride;
|
||||
Register<0x4> dbRenderOverride2;
|
||||
Register<0x5> dbHTileDataBase;
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
#include "Renderer.hpp"
|
||||
#include "Device.hpp"
|
||||
#include "gnm/descriptors.hpp"
|
||||
#include "gnm/gnm.hpp"
|
||||
#include "rx/MemoryTable.hpp"
|
||||
|
||||
#include <amdgpu/tiler.hpp>
|
||||
|
|
@ -227,7 +228,8 @@ struct ShaderResources : eval::Evaluator {
|
|||
bufferMemoryTable.map(*pointerBase,
|
||||
*pointerBase + *pointerOffset + pointer.size,
|
||||
Access::Read);
|
||||
resourceSlotToAddress.push_back({slotOffset + pointer.resourceSlot, *pointerBase});
|
||||
resourceSlotToAddress.push_back(
|
||||
{slotOffset + pointer.resourceSlot, *pointerBase});
|
||||
}
|
||||
|
||||
for (auto &bufferRes : res.buffers) {
|
||||
|
|
@ -352,7 +354,8 @@ struct ShaderResources : eval::Evaluator {
|
|||
sSampler.force_unorm_coords = true;
|
||||
}
|
||||
|
||||
slotResources[slotOffset + sampler.resourceSlot] = samplerResources.size();
|
||||
slotResources[slotOffset + sampler.resourceSlot] =
|
||||
samplerResources.size();
|
||||
samplerResources.push_back(
|
||||
cacheTag->getSampler(amdgpu::SamplerKey::createFrom(sSampler)));
|
||||
}
|
||||
|
|
@ -503,11 +506,78 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
|
|||
VkRect2D viewPortScissors[8]{};
|
||||
unsigned renderTargets = 0;
|
||||
|
||||
VkRenderingAttachmentInfo depthAttachment{};
|
||||
VkRenderingAttachmentInfo stencilAttachment{};
|
||||
|
||||
auto depthAccess = Access::None;
|
||||
auto stencilAccess = Access::None;
|
||||
|
||||
if (pipe.context.dbDepthControl.depthEnable) {
|
||||
if (!pipe.context.dbRenderControl.depthClearEnable) {
|
||||
depthAccess |= Access::Read;
|
||||
}
|
||||
if (!pipe.context.dbDepthView.zReadOnly) {
|
||||
depthAccess |= Access::Write;
|
||||
}
|
||||
}
|
||||
|
||||
if (pipe.context.dbDepthControl.stencilEnable) {
|
||||
if (!pipe.context.dbRenderControl.stencilClearEnable) {
|
||||
stencilAccess |= Access::Read;
|
||||
}
|
||||
if (!pipe.context.dbDepthView.stencilReadOnly) {
|
||||
stencilAccess |= Access::Write;
|
||||
}
|
||||
}
|
||||
|
||||
if (depthAccess != Access::None) {
|
||||
auto viewPortScissor = pipe.context.paScScreenScissor;
|
||||
auto viewPortRect = gnm::toVkRect2D(viewPortScissor);
|
||||
|
||||
auto imageView = cacheTag.getImageView(
|
||||
{{
|
||||
.readAddress = pipe.context.dbZReadBase,
|
||||
.writeAddress = pipe.context.dbZWriteBase,
|
||||
.dfmt = gnm::getDataFormat(pipe.context.dbZInfo.format),
|
||||
.nfmt = gnm::getNumericFormat(pipe.context.dbZInfo.format),
|
||||
.extent =
|
||||
{
|
||||
.width = viewPortRect.extent.width,
|
||||
.height = viewPortRect.extent.height,
|
||||
.depth = 1,
|
||||
},
|
||||
.pitch = viewPortRect.extent.width,
|
||||
.kind = ImageKind::Depth,
|
||||
}},
|
||||
depthAccess);
|
||||
|
||||
depthAttachment = {
|
||||
.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
|
||||
.imageView = imageView.handle,
|
||||
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
|
||||
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
|
||||
};
|
||||
|
||||
if ((depthAccess & Access::Read) == Access::None) {
|
||||
depthAttachment.clearValue.depthStencil.depth = pipe.context.dbDepthClear;
|
||||
depthAttachment.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
|
||||
}
|
||||
|
||||
if ((depthAccess & Access::Write) == Access::None) {
|
||||
depthAttachment.storeOp = VK_ATTACHMENT_STORE_OP_NONE;
|
||||
}
|
||||
}
|
||||
|
||||
for (auto &cbColor : pipe.context.cbColor) {
|
||||
if (targetMask == 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (cbColor.info.dfmt == gnm::kDataFormatInvalid) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto viewPortScissor = pipe.context.paScScreenScissor;
|
||||
// viewPortScissor = gnm::intersection(
|
||||
// viewPortScissor, pipe.context.paScVportScissor[renderTargets]);
|
||||
|
|
@ -533,7 +603,9 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
|
|||
ImageViewKey renderTargetInfo{};
|
||||
renderTargetInfo.type = gnm::TextureType::Dim2D;
|
||||
renderTargetInfo.pitch = vkViewPortScissor.extent.width;
|
||||
renderTargetInfo.address = static_cast<std::uint64_t>(cbColor.base) << 8;
|
||||
renderTargetInfo.readAddress = static_cast<std::uint64_t>(cbColor.base)
|
||||
<< 8;
|
||||
renderTargetInfo.writeAddress = renderTargetInfo.readAddress;
|
||||
renderTargetInfo.extent.width = vkViewPortScissor.extent.width;
|
||||
renderTargetInfo.extent.height = vkViewPortScissor.extent.height;
|
||||
renderTargetInfo.extent.depth = 1;
|
||||
|
|
@ -545,9 +617,7 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
|
|||
renderTargetInfo.tileMode =
|
||||
cbColor.info.linearGeneral
|
||||
? TileMode{.raw = 0}
|
||||
: getDefaultTileModes()[/*cbColor.attrib.tileModeIndex*/
|
||||
13];
|
||||
|
||||
: getDefaultTileModes()[cbColor.attrib.tileModeIndex];
|
||||
// std::printf("draw to %lx\n", renderTargetInfo.address);
|
||||
|
||||
auto access = Access::None;
|
||||
|
|
@ -613,6 +683,10 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
|
|||
targetMask >>= 4;
|
||||
}
|
||||
|
||||
if (renderTargets == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// if (pipe.context.cbTargetMask == 0) {
|
||||
// return;
|
||||
// }
|
||||
|
|
@ -654,7 +728,7 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
|
|||
.vgprCount = pgm.rsrc1.getVGprCount(),
|
||||
.sgprCount = pgm.rsrc1.getSGprCount(),
|
||||
.userSgprs = std::span(pgm.userData.data(), pgm.rsrc2.userSgpr),
|
||||
.supportsBarycentric = vk::context->supportsBarycentric,
|
||||
// .supportsBarycentric = vk::context->supportsBarycentric,
|
||||
.supportsInt8 = vk::context->supportsInt8,
|
||||
.supportsInt64Atomics = vk::context->supportsInt64Atomics,
|
||||
};
|
||||
|
|
@ -754,29 +828,33 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
|
|||
break;
|
||||
case shader::gcn::ConfigType::ViewPortOffsetX:
|
||||
configPtr[index] = std::bit_cast<std::uint32_t>(
|
||||
pipe.context.paClVports[0].xOffset / (viewPorts[0].width / 2.f) -
|
||||
pipe.context.paClVports[slot.data].xOffset /
|
||||
(viewPorts[0].width / 2.f) -
|
||||
1);
|
||||
break;
|
||||
case shader::gcn::ConfigType::ViewPortOffsetY:
|
||||
configPtr[index] = std::bit_cast<std::uint32_t>(
|
||||
pipe.context.paClVports[0].yOffset / (viewPorts[0].height / 2.f) -
|
||||
pipe.context.paClVports[slot.data].yOffset /
|
||||
(viewPorts[slot.data].height / 2.f) -
|
||||
1);
|
||||
break;
|
||||
case shader::gcn::ConfigType::ViewPortOffsetZ:
|
||||
configPtr[index] =
|
||||
std::bit_cast<std::uint32_t>(pipe.context.paClVports[0].zOffset);
|
||||
configPtr[index] = std::bit_cast<std::uint32_t>(
|
||||
pipe.context.paClVports[slot.data].zOffset);
|
||||
break;
|
||||
case shader::gcn::ConfigType::ViewPortScaleX:
|
||||
configPtr[index] = std::bit_cast<std::uint32_t>(
|
||||
pipe.context.paClVports[0].xScale / (viewPorts[0].width / 2.f));
|
||||
pipe.context.paClVports[slot.data].xScale /
|
||||
(viewPorts[slot.data].width / 2.f));
|
||||
break;
|
||||
case shader::gcn::ConfigType::ViewPortScaleY:
|
||||
configPtr[index] = std::bit_cast<std::uint32_t>(
|
||||
pipe.context.paClVports[0].yScale / (viewPorts[0].height / 2.f));
|
||||
pipe.context.paClVports[slot.data].yScale /
|
||||
(viewPorts[slot.data].height / 2.f));
|
||||
break;
|
||||
case shader::gcn::ConfigType::ViewPortScaleZ:
|
||||
configPtr[index] =
|
||||
std::bit_cast<std::uint32_t>(pipe.context.paClVports[0].zScale);
|
||||
configPtr[index] = std::bit_cast<std::uint32_t>(
|
||||
pipe.context.paClVports[slot.data].zScale);
|
||||
break;
|
||||
case shader::gcn::ConfigType::PsInputVGpr:
|
||||
if (slot.data > psVgprInputs) {
|
||||
|
|
@ -882,8 +960,8 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
|
|||
.layerCount = 1,
|
||||
.colorAttachmentCount = renderTargets,
|
||||
.pColorAttachments = colorAttachments,
|
||||
// .pDepthAttachment = &depthAttachment,
|
||||
// .pStencilAttachment = &stencilAttachment,
|
||||
.pDepthAttachment = &depthAttachment,
|
||||
// .pStencilAttachment = &stencilAttachment,
|
||||
};
|
||||
|
||||
vkCmdBeginRendering(commandBuffer, &renderInfo);
|
||||
|
|
@ -1092,7 +1170,7 @@ void amdgpu::flip(Cache::Tag &cacheTag, VkCommandBuffer commandBuffer,
|
|||
ImageViewKey framebuffer{};
|
||||
framebuffer.type = gnm::TextureType::Dim2D;
|
||||
framebuffer.pitch = imageExtent.width;
|
||||
framebuffer.address = address;
|
||||
framebuffer.readAddress = address;
|
||||
framebuffer.extent.width = imageExtent.width;
|
||||
framebuffer.extent.height = imageExtent.height;
|
||||
framebuffer.extent.depth = 1;
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <bit>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <gnm/constants.hpp>
|
||||
#include <gnm/descriptors.hpp>
|
||||
#include <bit>
|
||||
|
||||
namespace amdgpu {
|
||||
inline constexpr uint32_t kMicroTileWidth = 8;
|
||||
|
|
@ -496,6 +496,28 @@ constexpr std::uint32_t getPipeCount(PipeConfig pipeConfig) {
|
|||
}
|
||||
}
|
||||
|
||||
constexpr int computeMacroTileIndex(amdgpu::TileMode tileMode,
|
||||
uint32_t bitsPerElement,
|
||||
uint32_t numFragmentsPerPixel) {
|
||||
auto arrayMode = tileMode.arrayMode();
|
||||
auto microTileMode = tileMode.microTileMode();
|
||||
auto sampleSplitHw = tileMode.sampleSplit();
|
||||
auto tileSplitHw = tileMode.tileSplit();
|
||||
|
||||
uint32_t tileThickness = getMicroTileThickness(arrayMode);
|
||||
uint32_t tileBytes1x =
|
||||
bitsPerElement * kMicroTileWidth * kMicroTileHeight * tileThickness / 8;
|
||||
uint32_t sampleSplit = 1 << sampleSplitHw;
|
||||
uint32_t colorTileSplit = std::max(256U, sampleSplit * tileBytes1x);
|
||||
uint32_t tileSplit = (microTileMode == amdgpu::kMicroTileModeDepth)
|
||||
? (64UL << tileSplitHw)
|
||||
: colorTileSplit;
|
||||
uint32_t tileSplitC = std::min(kDramRowSize, tileSplit);
|
||||
uint32_t tileBytes = std::min(tileSplitC, numFragmentsPerPixel * tileBytes1x);
|
||||
uint32_t mtmIndex = std::countr_zero(tileBytes / 64);
|
||||
return isPrt(arrayMode) ? mtmIndex + 8 : mtmIndex;
|
||||
}
|
||||
|
||||
SurfaceInfo computeSurfaceInfo(TileMode tileMode, gnm::TextureType type,
|
||||
gnm::DataFormat dfmt, std::uint32_t width,
|
||||
std::uint32_t height, std::uint32_t depth,
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
#pragma once
|
||||
#include "gnm/constants.hpp"
|
||||
#include "tiler.hpp"
|
||||
#include <Scheduler.hpp>
|
||||
#include <memory>
|
||||
|
|
@ -10,13 +11,13 @@ struct GpuTiler {
|
|||
~GpuTiler();
|
||||
|
||||
void detile(Scheduler &scheduler, const amdgpu::SurfaceInfo &info,
|
||||
amdgpu::TileMode tileMode, std::uint64_t srcTiledAddress,
|
||||
std::uint64_t dstLinearAddress, int mipLevel, int baseArray,
|
||||
int arrayCount);
|
||||
amdgpu::TileMode tileMode, gnm::DataFormat dfmt,
|
||||
std::uint64_t srcTiledAddress, std::uint64_t dstLinearAddress,
|
||||
int mipLevel, int baseArray, int arrayCount);
|
||||
void tile(Scheduler &scheduler, const amdgpu::SurfaceInfo &info,
|
||||
amdgpu::TileMode tileMode, std::uint64_t srcLinearAddress,
|
||||
std::uint64_t dstTiledAddress, int mipLevel, int baseArray,
|
||||
int arrayCount);
|
||||
amdgpu::TileMode tileMode, gnm::DataFormat dfmt,
|
||||
std::uint64_t srcLinearAddress, std::uint64_t dstTiledAddress,
|
||||
int mipLevel, int baseArray, int arrayCount);
|
||||
|
||||
private:
|
||||
std::unique_ptr<Impl> mImpl;
|
||||
|
|
|
|||
|
|
@ -18,17 +18,24 @@ void main() {
|
|||
uvec3 pos = gl_GlobalInvocationID;
|
||||
uint64_t tiledSliceOffset = 0;
|
||||
uint64_t linearSliceOffset = 0;
|
||||
int arraySlice = 0;
|
||||
int fragmentIndex = 0;
|
||||
|
||||
if (config.tiledSurfaceSize != 0) {
|
||||
tiledSliceOffset = pos.z * config.tiledSurfaceSize;
|
||||
linearSliceOffset = pos.z * config.linearSurfaceSize;
|
||||
pos.z = 0;
|
||||
}
|
||||
|
||||
uint64_t tiledByteOffset = getTiledBitOffset1D(
|
||||
uint64_t tiledByteOffset = getTiledBitOffset2D(
|
||||
config.dfmt,
|
||||
config.tileMode,
|
||||
pos,
|
||||
config.macroTileMode,
|
||||
config.dataSize,
|
||||
config.bitsPerElement
|
||||
arraySlice,
|
||||
config.numFragments,
|
||||
pos,
|
||||
fragmentIndex
|
||||
) / 8;
|
||||
|
||||
tiledByteOffset += tiledSliceOffset;
|
||||
|
|
|
|||
|
|
@ -537,6 +537,86 @@ uint32_t tileMode_getSampleSplit(uint32_t tileMode) {
|
|||
return (tileMode & 0x06000000) >> 25;
|
||||
}
|
||||
|
||||
uint32_t macroTileMode_getBankWidth(uint32_t tileMode) {
|
||||
return (tileMode & 0x00000003) >> 0;
|
||||
}
|
||||
uint32_t macroTileMode_getBankHeight(uint32_t tileMode) {
|
||||
return (tileMode & 0x0000000c) >> 2;
|
||||
}
|
||||
uint32_t macroTileMode_getMacroTileAspect(uint32_t tileMode) {
|
||||
return (tileMode & 0x00000030) >> 4;
|
||||
}
|
||||
uint32_t macroTileMode_getNumBanks(uint32_t tileMode) {
|
||||
return (tileMode & 0x000000c0) >> 6;
|
||||
}
|
||||
|
||||
uint32_t getPipeCount(uint32_t pipeConfig) {
|
||||
switch (pipeConfig) {
|
||||
case kPipeConfigP8_32x32_8x16:
|
||||
case kPipeConfigP8_32x32_16x16:
|
||||
return 8;
|
||||
case kPipeConfigP16:
|
||||
return 16;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t getPipeIndex(uint32_t x, uint32_t y, uint32_t pipeCfg) {
|
||||
uint32_t pipe = 0;
|
||||
switch (pipeCfg) {
|
||||
case kPipeConfigP8_32x32_8x16:
|
||||
pipe |= (((x >> 4) ^ (y >> 3) ^ (x >> 5)) & 0x1) << 0;
|
||||
pipe |= (((x >> 3) ^ (y >> 4)) & 0x1) << 1;
|
||||
pipe |= (((x >> 5) ^ (y >> 5)) & 0x1) << 2;
|
||||
break;
|
||||
case kPipeConfigP8_32x32_16x16:
|
||||
pipe |= (((x >> 3) ^ (y >> 3) ^ (x >> 4)) & 0x1) << 0;
|
||||
pipe |= (((x >> 4) ^ (y >> 4)) & 0x1) << 1;
|
||||
pipe |= (((x >> 5) ^ (y >> 5)) & 0x1) << 2;
|
||||
break;
|
||||
case kPipeConfigP16:
|
||||
pipe |= (((x >> 3) ^ (y >> 3) ^ (x >> 4)) & 0x1) << 0;
|
||||
pipe |= (((x >> 4) ^ (y >> 4)) & 0x1) << 1;
|
||||
pipe |= (((x >> 5) ^ (y >> 5)) & 0x1) << 2;
|
||||
pipe |= (((x >> 6) ^ (y >> 5)) & 0x1) << 3;
|
||||
break;
|
||||
}
|
||||
return pipe;
|
||||
}
|
||||
|
||||
uint32_t getBankIndex(uint32_t x, uint32_t y, uint32_t bank_width, uint32_t bank_height, uint32_t num_banks, uint32_t num_pipes) {
|
||||
uint32_t x_shift_offset = findLSB(bank_width * num_pipes);
|
||||
uint32_t y_shift_offset = findLSB(bank_height);
|
||||
uint32_t xs = x >> x_shift_offset;
|
||||
uint32_t ys = y >> y_shift_offset;
|
||||
uint32_t bank = 0;
|
||||
switch (num_banks) {
|
||||
case 2:
|
||||
bank |= (((xs >> 3) ^ (ys >> 3)) & 0x1) << 0;
|
||||
break;
|
||||
case 4:
|
||||
bank |= (((xs >> 3) ^ (ys >> 4)) & 0x1) << 0;
|
||||
bank |= (((xs >> 4) ^ (ys >> 3)) & 0x1) << 1;
|
||||
break;
|
||||
case 8:
|
||||
bank |= (((xs >> 3) ^ (ys >> 5)) & 0x1) << 0;
|
||||
bank |= (((xs >> 4) ^ (ys >> 4) ^ (ys >> 5)) & 0x1) << 1;
|
||||
bank |= (((xs >> 5) ^ (ys >> 3)) & 0x1) << 2;
|
||||
break;
|
||||
case 16:
|
||||
bank |= (((xs >> 3) ^ (ys >> 6)) & 0x1) << 0;
|
||||
bank |= (((xs >> 4) ^ (ys >> 5) ^ (ys >> 6)) & 0x1) << 1;
|
||||
bank |= (((xs >> 5) ^ (ys >> 4)) & 0x1) << 2;
|
||||
bank |= (((xs >> 6) ^ (ys >> 3)) & 0x1) << 3;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return bank;
|
||||
}
|
||||
|
||||
uint32_t bit_ceil(uint32_t x) {
|
||||
x = x - 1;
|
||||
x |= x >> 1;
|
||||
|
|
@ -704,13 +784,223 @@ uint64_t getTiledBitOffset1D(uint32_t tileMode, uvec3 pos, uvec2 dataSize, uint3
|
|||
return (sliceOffset + tileOffset) * 8 + elementOffset;
|
||||
}
|
||||
|
||||
|
||||
uint64_t getTiledBitOffset2D(uint32_t dfmt, uint32_t tileMode, uint32_t macroTileMode,
|
||||
uvec2 dataSize, int arraySlice, uint32_t numFragments, u32vec3 pos, int fragmentIndex) {
|
||||
uint32_t bitsPerFragment = getBitsPerElement(dfmt);
|
||||
|
||||
bool isBlockCompressed = getTexelsPerElement(dfmt) > 1;
|
||||
uint32_t tileSwizzleMask = 0;
|
||||
uint32_t numFragmentsPerPixel = 1 << numFragments;
|
||||
uint32_t arrayMode = tileMode_getArrayMode(tileMode);
|
||||
|
||||
uint32_t tileThickness = 1;
|
||||
|
||||
switch (arrayMode) {
|
||||
case kArrayMode2dTiledThin:
|
||||
case kArrayMode3dTiledThin:
|
||||
case kArrayModeTiledThinPrt:
|
||||
case kArrayMode2dTiledThinPrt:
|
||||
case kArrayMode3dTiledThinPrt:
|
||||
tileThickness = 1;
|
||||
break;
|
||||
case kArrayMode1dTiledThick:
|
||||
case kArrayMode2dTiledThick:
|
||||
case kArrayMode3dTiledThick:
|
||||
case kArrayModeTiledThickPrt:
|
||||
case kArrayMode2dTiledThickPrt:
|
||||
case kArrayMode3dTiledThickPrt:
|
||||
tileThickness = 4;
|
||||
break;
|
||||
case kArrayMode2dTiledXThick:
|
||||
case kArrayMode3dTiledXThick:
|
||||
tileThickness = 8;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
uint32_t bitsPerElement = bitsPerFragment;
|
||||
uint32_t paddedWidth = dataSize.x;
|
||||
uint32_t paddedHeight = dataSize.y;
|
||||
|
||||
uint32_t bankWidthHW = macroTileMode_getBankWidth(macroTileMode);
|
||||
uint32_t bankHeightHW = macroTileMode_getBankHeight(macroTileMode);
|
||||
uint32_t macroAspectHW = macroTileMode_getMacroTileAspect(macroTileMode);
|
||||
uint32_t numBanksHW = macroTileMode_getNumBanks(macroTileMode);
|
||||
|
||||
uint32_t bankWidth = 1 << bankWidthHW;
|
||||
uint32_t bankHeight = 1 << bankHeightHW;
|
||||
uint32_t numBanks = 2 << numBanksHW;
|
||||
uint32_t macroTileAspect = 1 << macroAspectHW;
|
||||
|
||||
uint32_t tileBytes1x =
|
||||
(tileThickness * bitsPerElement * kMicroTileWidth * kMicroTileHeight +
|
||||
7) /
|
||||
8;
|
||||
|
||||
uint32_t sampleSplitHw = tileMode_getSampleSplit(tileMode);
|
||||
uint32_t tileSplitHw = tileMode_getTileSplit(tileMode);
|
||||
uint32_t sampleSplit = 1 << sampleSplitHw;
|
||||
uint32_t tileSplitC =
|
||||
(tileMode_getMicroTileMode(tileMode) == kMicroTileModeDepth)
|
||||
? (64 << tileSplitHw)
|
||||
: max(256U, tileBytes1x * sampleSplit);
|
||||
|
||||
uint32_t tileSplitBytes = min(kDramRowSize, tileSplitC);
|
||||
|
||||
uint32_t numPipes = getPipeCount(tileMode_getPipeConfig(tileMode));
|
||||
uint32_t pipeInterleaveBits = findLSB(kPipeInterleaveBytes);
|
||||
uint32_t pipeInterleaveMask = (1 << pipeInterleaveBits) - 1;
|
||||
uint32_t pipeBits = findLSB(numPipes);
|
||||
uint32_t bankBits = findLSB(numBanks);
|
||||
uint32_t bankSwizzleMask = tileSwizzleMask;
|
||||
uint32_t pipeSwizzleMask = 0;
|
||||
uint32_t macroTileWidth =
|
||||
(kMicroTileWidth * bankWidth * numPipes) * macroTileAspect;
|
||||
uint32_t macroTileHeight =
|
||||
(kMicroTileHeight * bankHeight * numBanks) / macroTileAspect;
|
||||
|
||||
uint32_t microTileMode = tileMode_getMicroTileMode(tileMode);
|
||||
|
||||
uint64_t elementIndex =
|
||||
getElementIndex(pos, bitsPerElement, microTileMode, arrayMode);
|
||||
|
||||
uint32_t xh = pos.x;
|
||||
uint32_t yh = pos.y;
|
||||
if (arrayMode == kArrayModeTiledThinPrt ||
|
||||
arrayMode == kArrayModeTiledThickPrt) {
|
||||
xh %= macroTileWidth;
|
||||
yh %= macroTileHeight;
|
||||
}
|
||||
uint64_t pipe = getPipeIndex(xh, yh, tileMode_getPipeConfig(tileMode));
|
||||
uint64_t bank =
|
||||
getBankIndex(xh, yh, bankWidth, bankHeight, numBanks, numPipes);
|
||||
|
||||
uint32_t tileBytes = (kMicroTileWidth * kMicroTileHeight * tileThickness *
|
||||
bitsPerElement * numFragmentsPerPixel +
|
||||
7) /
|
||||
8;
|
||||
|
||||
uint64_t elementOffset = 0;
|
||||
if (microTileMode == kMicroTileModeDepth) {
|
||||
uint64_t pixelOffset = elementIndex * bitsPerElement * numFragmentsPerPixel;
|
||||
elementOffset = pixelOffset + (fragmentIndex * bitsPerElement);
|
||||
} else {
|
||||
uint64_t fragmentOffset =
|
||||
fragmentIndex * (tileBytes / numFragmentsPerPixel) * 8;
|
||||
elementOffset = fragmentOffset + (elementIndex * bitsPerElement);
|
||||
}
|
||||
|
||||
uint64_t slicesPerTile = 1;
|
||||
uint64_t tileSplitSlice = 0;
|
||||
if (tileBytes > tileSplitBytes && tileThickness == 1) {
|
||||
slicesPerTile = tileBytes / tileSplitBytes;
|
||||
tileSplitSlice = elementOffset / (tileSplitBytes * 8);
|
||||
elementOffset %= (tileSplitBytes * 8);
|
||||
tileBytes = tileSplitBytes;
|
||||
}
|
||||
|
||||
uint64_t macroTileBytes = (macroTileWidth / kMicroTileWidth) *
|
||||
(macroTileHeight / kMicroTileHeight) * tileBytes /
|
||||
(numPipes * numBanks);
|
||||
uint64_t macroTilesPerRow = paddedWidth / macroTileWidth;
|
||||
uint64_t macroTileRowIndex = pos.y / macroTileHeight;
|
||||
uint64_t macroTileColumnIndex = pos.x / macroTileWidth;
|
||||
uint64_t macroTileIndex =
|
||||
(macroTileRowIndex * macroTilesPerRow) + macroTileColumnIndex;
|
||||
uint64_t macro_tile_offset = macroTileIndex * macroTileBytes;
|
||||
uint64_t macroTilesPerSlice =
|
||||
macroTilesPerRow * (paddedHeight / macroTileHeight);
|
||||
uint64_t sliceBytes = macroTilesPerSlice * macroTileBytes;
|
||||
|
||||
uint32_t slice = pos.z;
|
||||
uint64_t sliceOffset =
|
||||
(tileSplitSlice + slicesPerTile * slice / tileThickness) * sliceBytes;
|
||||
if (arraySlice != 0) {
|
||||
slice = arraySlice;
|
||||
}
|
||||
|
||||
uint64_t tileRowIndex = (pos.y / kMicroTileHeight) % bankHeight;
|
||||
uint64_t tileColumnIndex = ((pos.x / kMicroTileWidth) / numPipes) % bankWidth;
|
||||
uint64_t tileIndex = (tileRowIndex * bankWidth) + tileColumnIndex;
|
||||
uint64_t tileOffset = tileIndex * tileBytes;
|
||||
|
||||
uint64_t bankSwizzle = bankSwizzleMask;
|
||||
uint64_t pipeSwizzle = pipeSwizzleMask;
|
||||
|
||||
uint64_t pipeSliceRotation = 0;
|
||||
switch (arrayMode) {
|
||||
case kArrayMode3dTiledThin:
|
||||
case kArrayMode3dTiledThick:
|
||||
case kArrayMode3dTiledXThick:
|
||||
pipeSliceRotation =
|
||||
max(1UL, (numPipes / 2UL) - 1UL) * (slice / tileThickness);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
pipeSwizzle += pipeSliceRotation;
|
||||
pipeSwizzle &= (numPipes - 1);
|
||||
pipe = pipe ^ pipeSwizzle;
|
||||
|
||||
uint64_t sliceRotation = 0;
|
||||
switch (arrayMode) {
|
||||
case kArrayMode2dTiledThin:
|
||||
case kArrayMode2dTiledThick:
|
||||
case kArrayMode2dTiledXThick:
|
||||
sliceRotation = ((numBanks / 2) - 1) * (slice / tileThickness);
|
||||
break;
|
||||
case kArrayMode3dTiledThin:
|
||||
case kArrayMode3dTiledThick:
|
||||
case kArrayMode3dTiledXThick:
|
||||
sliceRotation = max(1UL, (numPipes / 2UL) - 1UL) * (slice / tileThickness) / numPipes;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
uint64_t tileSplitSliceRotation = 0;
|
||||
switch (arrayMode) {
|
||||
case kArrayMode2dTiledThin:
|
||||
case kArrayMode3dTiledThin:
|
||||
case kArrayMode2dTiledThinPrt:
|
||||
case kArrayMode3dTiledThinPrt:
|
||||
tileSplitSliceRotation = ((numBanks / 2) + 1) * tileSplitSlice;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
bank ^= bankSwizzle + sliceRotation;
|
||||
bank ^= tileSplitSliceRotation;
|
||||
bank &= (numBanks - 1);
|
||||
|
||||
uint64_t totalOffset =
|
||||
(sliceOffset + macro_tile_offset + tileOffset) * 8 + elementOffset;
|
||||
uint64_t bitOffset = totalOffset & 0x7;
|
||||
totalOffset /= 8;
|
||||
|
||||
uint64_t pipeInterleaveOffset = totalOffset & pipeInterleaveMask;
|
||||
uint64_t offset = totalOffset >> pipeInterleaveBits;
|
||||
|
||||
uint64_t finalByteOffset =
|
||||
pipeInterleaveOffset | (pipe << (pipeInterleaveBits)) |
|
||||
(bank << (pipeInterleaveBits + pipeBits)) |
|
||||
(offset << (pipeInterleaveBits + pipeBits + bankBits));
|
||||
return (finalByteOffset << 3) | bitOffset;
|
||||
}
|
||||
|
||||
|
||||
layout(binding=0) uniform Config {
|
||||
uint64_t srcAddress;
|
||||
uint64_t dstAddress;
|
||||
uvec2 dataSize;
|
||||
uint32_t tileMode;
|
||||
uint32_t macroTileMode;
|
||||
uint32_t dfmt;
|
||||
uint32_t numFragments;
|
||||
uint32_t bitsPerElement;
|
||||
uint32_t tiledSurfaceSize;
|
||||
uint32_t linearSurfaceSize;
|
||||
uint32_t padding0;
|
||||
uint32_t padding1;
|
||||
} config;
|
||||
|
|
|
|||
|
|
@ -18,17 +18,23 @@ void main() {
|
|||
uvec3 pos = gl_GlobalInvocationID;
|
||||
uint64_t tiledSliceOffset = 0;
|
||||
uint64_t linearSliceOffset = 0;
|
||||
int arraySlice = 0;
|
||||
int fragmentIndex = 0;
|
||||
if (config.tiledSurfaceSize != 0) {
|
||||
tiledSliceOffset = pos.z * config.tiledSurfaceSize;
|
||||
linearSliceOffset = pos.z * config.linearSurfaceSize;
|
||||
pos.z = 0;
|
||||
}
|
||||
|
||||
uint64_t tiledByteOffset = getTiledBitOffset1D(
|
||||
uint64_t tiledByteOffset = getTiledBitOffset2D(
|
||||
config.dfmt,
|
||||
config.tileMode,
|
||||
pos,
|
||||
config.macroTileMode,
|
||||
config.dataSize,
|
||||
config.bitsPerElement
|
||||
arraySlice,
|
||||
config.numFragments,
|
||||
pos,
|
||||
fragmentIndex
|
||||
) / 8;
|
||||
|
||||
tiledByteOffset += tiledSliceOffset;
|
||||
|
|
|
|||
|
|
@ -5,6 +5,169 @@
|
|||
|
||||
using namespace amdgpu;
|
||||
|
||||
// FIXME: should be properly implemented
|
||||
static SurfaceInfo
|
||||
computeTexture2dInfo(ArrayMode arrayMode, gnm::TextureType type,
|
||||
gnm::DataFormat dfmt, std::uint32_t width,
|
||||
std::uint32_t height, std::uint32_t depth,
|
||||
std::uint32_t pitch, int baseArrayLayer, int arrayCount,
|
||||
int baseMipLevel, int mipCount, bool pow2pad) {
|
||||
bool isCubemap = type == gnm::TextureType::Cube;
|
||||
bool isVolume = type == gnm::TextureType::Dim3D;
|
||||
|
||||
auto bitsPerFragment = getBitsPerElement(dfmt);
|
||||
std::uint32_t arraySliceCount = depth;
|
||||
|
||||
if (isCubemap) {
|
||||
arraySliceCount *= 6;
|
||||
} else if (isVolume) {
|
||||
arraySliceCount = 1;
|
||||
}
|
||||
|
||||
int numFragments = (type == gnm::TextureType::Msaa2D ||
|
||||
type == gnm::TextureType::MsaaArray2D)
|
||||
? (baseArrayLayer + arrayCount - 1)
|
||||
: 0;
|
||||
|
||||
auto numFragmentsPerPixel = 1 << numFragments;
|
||||
auto isBlockCompressed = getTexelsPerElement(dfmt) > 1;
|
||||
|
||||
auto bitsPerElement = bitsPerFragment;
|
||||
depth = isVolume ? depth : 1;
|
||||
|
||||
if (isBlockCompressed) {
|
||||
switch (bitsPerFragment) {
|
||||
case 1:
|
||||
bitsPerElement *= 8;
|
||||
break;
|
||||
case 4:
|
||||
case 8:
|
||||
bitsPerElement *= 16;
|
||||
break;
|
||||
case 16:
|
||||
std::abort();
|
||||
break;
|
||||
|
||||
default:
|
||||
std::abort();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (pow2pad) {
|
||||
arraySliceCount = std::bit_ceil(arraySliceCount);
|
||||
}
|
||||
|
||||
std::uint64_t surfaceOffset = 0;
|
||||
std::uint64_t surfaceSize = 0;
|
||||
|
||||
SurfaceInfo result;
|
||||
result.width = width;
|
||||
result.height = height;
|
||||
result.depth = depth;
|
||||
result.pitch = pitch;
|
||||
result.numFragments = numFragments;
|
||||
result.bitsPerElement = bitsPerElement;
|
||||
result.arrayLayerCount = arraySliceCount;
|
||||
|
||||
auto thickness = getMicroTileThickness(arrayMode);
|
||||
|
||||
for (int mipLevel = 0; mipLevel < baseMipLevel + mipCount; mipLevel++) {
|
||||
std::uint32_t elemWidth = std::max<std::uint64_t>(width >> mipLevel, 1);
|
||||
std::uint32_t elemPitch = std::max<std::uint64_t>(pitch >> mipLevel, 1);
|
||||
std::uint32_t elemHeight = std::max<std::uint64_t>(height >> mipLevel, 1);
|
||||
std::uint32_t elemDepth = std::max<std::uint64_t>(depth >> mipLevel, 1);
|
||||
|
||||
std::uint32_t linearPitch = elemPitch;
|
||||
std::uint32_t linearWidth = elemWidth;
|
||||
std::uint32_t linearHeight = elemHeight;
|
||||
std::uint32_t linearDepth = elemDepth;
|
||||
|
||||
if (isBlockCompressed) {
|
||||
switch (bitsPerFragment) {
|
||||
case 1:
|
||||
linearWidth = std::max<std::uint64_t>((linearWidth + 7) / 8, 1);
|
||||
linearPitch = std::max<std::uint64_t>((linearPitch + 7) / 8, 1);
|
||||
break;
|
||||
case 4:
|
||||
case 8:
|
||||
linearWidth = std::max<std::uint64_t>((linearWidth + 3) / 4, 1);
|
||||
linearPitch = std::max<std::uint64_t>((linearPitch + 3) / 4, 1);
|
||||
linearHeight = std::max<std::uint64_t>((linearHeight + 3) / 4, 1);
|
||||
break;
|
||||
case 16:
|
||||
std::abort();
|
||||
break;
|
||||
|
||||
default:
|
||||
std::abort();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (pow2pad) {
|
||||
linearPitch = std::bit_ceil(linearPitch);
|
||||
linearWidth = std::bit_ceil(linearWidth);
|
||||
linearHeight = std::bit_ceil(linearHeight);
|
||||
linearDepth = std::bit_ceil(linearDepth);
|
||||
}
|
||||
|
||||
if (mipLevel > 0 && pitch > 0) {
|
||||
linearPitch = linearWidth;
|
||||
}
|
||||
|
||||
std::uint32_t paddedPitch =
|
||||
(linearPitch + kMicroTileWidth - 1) & ~(kMicroTileWidth - 1);
|
||||
std::uint32_t paddedHeight =
|
||||
(linearHeight + kMicroTileHeight - 1) & ~(kMicroTileHeight - 1);
|
||||
std::uint32_t paddedDepth = linearDepth;
|
||||
|
||||
if (!isCubemap || (mipLevel > 0 && linearDepth > 1)) {
|
||||
if (isCubemap) {
|
||||
linearDepth = std::bit_ceil(linearDepth);
|
||||
}
|
||||
|
||||
paddedDepth = (linearDepth + thickness - 1) & ~(thickness - 1);
|
||||
}
|
||||
|
||||
std::uint32_t tempPitch = paddedPitch;
|
||||
std::uint64_t logicalSliceSizeBytes = std::uint64_t(tempPitch) *
|
||||
paddedHeight * bitsPerElement *
|
||||
numFragmentsPerPixel;
|
||||
logicalSliceSizeBytes = (logicalSliceSizeBytes + 7) / 8;
|
||||
|
||||
uint64_t physicalSliceSizeBytes = logicalSliceSizeBytes * thickness;
|
||||
while ((physicalSliceSizeBytes % kPipeInterleaveBytes) != 0) {
|
||||
tempPitch += kMicroTileWidth;
|
||||
logicalSliceSizeBytes = std::uint64_t(tempPitch) * paddedHeight *
|
||||
bitsPerElement * numFragmentsPerPixel;
|
||||
logicalSliceSizeBytes = (logicalSliceSizeBytes + 7) / 8;
|
||||
physicalSliceSizeBytes = logicalSliceSizeBytes * thickness;
|
||||
}
|
||||
|
||||
surfaceSize = logicalSliceSizeBytes * paddedDepth;
|
||||
auto linearSize =
|
||||
linearDepth *
|
||||
(linearPitch * linearHeight * bitsPerElement * numFragmentsPerPixel +
|
||||
7) /
|
||||
8;
|
||||
|
||||
result.setSubresourceInfo(mipLevel, {
|
||||
.dataWidth = linearPitch,
|
||||
.dataHeight = linearHeight,
|
||||
.dataDepth = linearDepth,
|
||||
.offset = surfaceOffset,
|
||||
.tiledSize = surfaceSize,
|
||||
.linearSize = linearSize,
|
||||
});
|
||||
|
||||
surfaceOffset += arraySliceCount * surfaceSize;
|
||||
}
|
||||
|
||||
result.totalSize = surfaceOffset;
|
||||
return result;
|
||||
}
|
||||
|
||||
static SurfaceInfo
|
||||
computeTexture1dInfo(ArrayMode arrayMode, gnm::TextureType type,
|
||||
gnm::DataFormat dfmt, std::uint32_t width,
|
||||
|
|
@ -370,7 +533,9 @@ SurfaceInfo amdgpu::computeSurfaceInfo(
|
|||
case kArrayMode2dTiledThickPrt:
|
||||
case kArrayMode3dTiledThinPrt:
|
||||
case kArrayMode3dTiledThickPrt:
|
||||
std::abort();
|
||||
return computeTexture2dInfo(tileMode.arrayMode(), type, dfmt, width, height,
|
||||
depth, pitch, baseArrayLayer, arrayCount,
|
||||
baseMipLevel, mipCount, pow2pad);
|
||||
}
|
||||
|
||||
std::abort();
|
||||
|
|
|
|||
|
|
@ -175,9 +175,9 @@ getTiledOffset2D(gnm::TextureType texType, bool isPow2Padded,
|
|||
|
||||
bool isCubemap = texType == gnm::TextureType::Cube;
|
||||
bool isVolume = texType == gnm::TextureType::Dim3D;
|
||||
auto m_bitsPerFragment = getBitsPerElement(dfmt);
|
||||
auto bitsPerFragment = getBitsPerElement(dfmt);
|
||||
|
||||
auto m_isBlockCompressed = getTexelsPerElement(dfmt) > 1;
|
||||
auto isBlockCompressed = getTexelsPerElement(dfmt) > 1;
|
||||
auto tileSwizzleMask = 0;
|
||||
auto numFragmentsPerPixel = 1 << numFragments;
|
||||
auto arrayMode = tileMode.arrayMode();
|
||||
|
|
@ -208,12 +208,12 @@ getTiledOffset2D(gnm::TextureType texType, bool isPow2Padded,
|
|||
break;
|
||||
}
|
||||
|
||||
auto bitsPerElement = m_bitsPerFragment;
|
||||
auto bitsPerElement = bitsPerFragment;
|
||||
auto paddedWidth = pitch;
|
||||
auto paddedHeight = height;
|
||||
|
||||
if (m_isBlockCompressed) {
|
||||
switch (m_bitsPerFragment) {
|
||||
if (isBlockCompressed) {
|
||||
switch (bitsPerFragment) {
|
||||
case 1:
|
||||
bitsPerElement *= 8;
|
||||
paddedWidth = std::max((paddedWidth + 7) / 8, 1);
|
||||
|
|
|
|||
|
|
@ -93,10 +93,13 @@ struct amdgpu::GpuTiler::Impl {
|
|||
uint32_t dataWidth;
|
||||
uint32_t dataHeight;
|
||||
uint32_t tileMode;
|
||||
uint32_t macroTileMode;
|
||||
uint32_t dfmt;
|
||||
uint32_t numFragments;
|
||||
uint32_t bitsPerElement;
|
||||
uint32_t tiledSurfaceSize;
|
||||
uint32_t linearSurfaceSize;
|
||||
uint32_t padding[2];
|
||||
};
|
||||
|
||||
Impl() {
|
||||
|
|
@ -119,7 +122,8 @@ struct amdgpu::GpuTiler::Impl {
|
|||
{
|
||||
VkDescriptorPoolSize poolSizes[]{{
|
||||
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
||||
.descriptorCount = static_cast<std::uint32_t>(std::size(descriptorSets)) * 2,
|
||||
.descriptorCount =
|
||||
static_cast<std::uint32_t>(std::size(descriptorSets)) * 2,
|
||||
}};
|
||||
|
||||
VkDescriptorPoolCreateInfo info{
|
||||
|
|
@ -174,7 +178,7 @@ amdgpu::GpuTiler::~GpuTiler() = default;
|
|||
|
||||
void amdgpu::GpuTiler::detile(Scheduler &scheduler,
|
||||
const amdgpu::SurfaceInfo &info,
|
||||
amdgpu::TileMode tileMode,
|
||||
amdgpu::TileMode tileMode, gnm::DataFormat dfmt,
|
||||
std::uint64_t srcTiledAddress,
|
||||
std::uint64_t dstLinearAddress, int mipLevel,
|
||||
int baseArray, int arrayCount) {
|
||||
|
|
@ -192,6 +196,7 @@ void amdgpu::GpuTiler::detile(Scheduler &scheduler,
|
|||
config->dataWidth = subresource.dataWidth;
|
||||
config->dataHeight = subresource.dataHeight;
|
||||
config->tileMode = tileMode.raw;
|
||||
config->dfmt = dfmt;
|
||||
config->numFragments = info.numFragments;
|
||||
config->bitsPerElement = info.bitsPerElement;
|
||||
uint32_t groupCountZ = subresource.dataDepth;
|
||||
|
|
@ -231,8 +236,13 @@ void amdgpu::GpuTiler::detile(Scheduler &scheduler,
|
|||
case amdgpu::kArrayMode3dTiledThick:
|
||||
case amdgpu::kArrayMode3dTiledXThick:
|
||||
case amdgpu::kArrayMode3dTiledThickPrt:
|
||||
std::abort();
|
||||
vk::CmdBindShadersEXT(commandBuffer, 1, stages, &mImpl->detiler2d.shader);
|
||||
config->macroTileMode =
|
||||
getDefaultMacroTileModes()[computeMacroTileIndex(
|
||||
tileMode, info.bitsPerElement,
|
||||
1 << info.numFragments)]
|
||||
.raw;
|
||||
|
||||
vk::CmdBindShadersEXT(commandBuffer, 1, stages, &mImpl->detiler1d.shader);
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -265,7 +275,7 @@ void amdgpu::GpuTiler::detile(Scheduler &scheduler,
|
|||
|
||||
void amdgpu::GpuTiler::tile(Scheduler &scheduler,
|
||||
const amdgpu::SurfaceInfo &info,
|
||||
amdgpu::TileMode tileMode,
|
||||
amdgpu::TileMode tileMode, gnm::DataFormat dfmt,
|
||||
std::uint64_t srcLinearAddress,
|
||||
std::uint64_t dstTiledAddress, int mipLevel,
|
||||
int baseArray, int arrayCount) {
|
||||
|
|
@ -283,6 +293,7 @@ void amdgpu::GpuTiler::tile(Scheduler &scheduler,
|
|||
config->dataWidth = subresource.dataWidth;
|
||||
config->dataHeight = subresource.dataHeight;
|
||||
config->tileMode = tileMode.raw;
|
||||
config->dfmt = dfmt;
|
||||
config->numFragments = info.numFragments;
|
||||
config->bitsPerElement = info.bitsPerElement;
|
||||
uint32_t groupCountZ = subresource.dataDepth;
|
||||
|
|
@ -321,8 +332,12 @@ void amdgpu::GpuTiler::tile(Scheduler &scheduler,
|
|||
case amdgpu::kArrayMode3dTiledThick:
|
||||
case amdgpu::kArrayMode3dTiledXThick:
|
||||
case amdgpu::kArrayMode3dTiledThickPrt:
|
||||
std::abort();
|
||||
vk::CmdBindShadersEXT(commandBuffer, 1, stages, &mImpl->tiler2d.shader);
|
||||
config->macroTileMode =
|
||||
getDefaultMacroTileModes()[computeMacroTileIndex(
|
||||
tileMode, info.bitsPerElement,
|
||||
1 << info.numFragments)]
|
||||
.raw;
|
||||
vk::CmdBindShadersEXT(commandBuffer, 1, stages, &mImpl->tiler1d.shader);
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@
|
|||
#extension GL_EXT_shader_atomic_float2 : require
|
||||
#extension GL_EXT_nonuniform_qualifier: require
|
||||
#extension GL_EXT_samplerless_texture_functions : require
|
||||
#extension GL_EXT_debug_printf : enable
|
||||
|
||||
#define FLT_MAX 3.402823466e+38
|
||||
#define FLT_MIN 1.175494351e-38
|
||||
|
|
@ -235,6 +236,8 @@ float32_t ps_input_vgpr(int32_t index, f32vec4 fragCoord, bool frontFace) {
|
|||
case kPsVGprInputPosFixed:
|
||||
return 0;
|
||||
}
|
||||
|
||||
// debugPrintfEXT("ps_input_vgpr: invalid index %d", index);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -385,8 +388,10 @@ uint32_t v_cndmask_b32(uint32_t x, uint32_t y, uint64_t mask) {
|
|||
float32_t v_add_f32(float32_t x, float32_t y) { return x + y; }
|
||||
float32_t v_sub_f32(float32_t x, float32_t y) { return x - y; }
|
||||
float32_t v_subrev_f32(float32_t x, float32_t y) { return y - x; }
|
||||
float32_t v_mac_legacy_f32(float32_t x, float32_t y, float32_t dst) {
|
||||
return x == 0 || y == 0 ? dst : fma(x, y, dst);
|
||||
void v_mac_legacy_f32(inout float32_t dst, float32_t x, float32_t y) {
|
||||
if (!(x == 0 || y == 0)) {
|
||||
dst = fma(x, y, dst);
|
||||
}
|
||||
}
|
||||
float32_t v_mul_legacy_f32(float32_t x, float32_t y) {
|
||||
return x == 0 || y == 0 ? 0 : x * y;
|
||||
|
|
@ -425,7 +430,7 @@ uint32_t v_and_b32(uint32_t x, uint32_t y) { return x & y; }
|
|||
uint32_t v_or_b32(uint32_t x, uint32_t y) { return x | y; }
|
||||
uint32_t v_xor_b32(uint32_t x, uint32_t y) { return x ^ y; }
|
||||
uint32_t v_bfm_b32(uint32_t x, uint32_t y) { return ((1 << (x & 0x1f)) - 1) << (y & 0x1f); }
|
||||
float32_t v_mac_f32(float32_t x, float32_t y, float32_t dst) { return fma(x, y, dst); }
|
||||
void v_mac_f32(inout float32_t dst, float32_t x, float32_t y) { dst = fma(x, y, dst); }
|
||||
float32_t v_madmk_f32(float32_t x, float32_t y, float32_t k) { return fma(x, k, y); }
|
||||
float32_t v_madak_f32(float32_t x, float32_t y, float32_t k) { return fma(x, y, k); }
|
||||
uint32_t v_bcnt_u32_b32(uint32_t x) { return bitCount(x); }
|
||||
|
|
@ -2575,6 +2580,8 @@ void image_sample(inout f32vec4 vdata, f32vec3 vaddr, int32_t textureIndexHint,
|
|||
return;
|
||||
}
|
||||
|
||||
// debugPrintfEXT("image_sample: textureType: %u, coord: %v3f, result: %v4f, dmask: %u", textureType, vaddr, result, dmask);
|
||||
|
||||
int vdataIndex = 0;
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if ((dmask & (1 << i)) != 0) {
|
||||
|
|
|
|||
|
|
@ -1422,12 +1422,6 @@ static void createInitialValues(GcnConverter &converter,
|
|||
|
||||
if (stage != gcn::Stage::Cs) {
|
||||
context.writeReg(loc, builder, gcn::RegId::Exec, 0, context.imm64(1));
|
||||
// context.writeReg(loc, builder, gcn::RegId::ThreadId, 0,
|
||||
// context.imm32(0));
|
||||
|
||||
replaceVariableWithConstant(
|
||||
context.getOrCreateRegisterVariable(gcn::RegId::ThreadId),
|
||||
context.imm32(0));
|
||||
}
|
||||
|
||||
if (stage == gcn::Stage::VsVs || stage == gcn::Stage::GsVs ||
|
||||
|
|
@ -1561,6 +1555,12 @@ gcn::convertToSpv(Context &context, ir::Region body,
|
|||
createInitialValues(converter, env, stage, result.info, body);
|
||||
instructionsToSpv(converter, importer, stage, env, semanticInfo, result.info,
|
||||
body);
|
||||
if (stage != gcn::Stage::Cs) {
|
||||
replaceVariableWithConstant(
|
||||
context.getOrCreateRegisterVariable(gcn::RegId::ThreadId),
|
||||
context.imm32(0));
|
||||
}
|
||||
|
||||
createEntryPoint(context, stage, std::move(body));
|
||||
|
||||
for (int userSgpr = std::countr_zero(context.requiredUserSgprs);
|
||||
|
|
|
|||
|
|
@ -127,8 +127,6 @@ readVop2Inst(GcnInstruction &inst, std::uint64_t &address,
|
|||
|
||||
if (op == ir::vop2::MADMK_F32 || op == ir::vop2::MADAK_F32) {
|
||||
inst.addOperand(createImmediateGcnOperand(address));
|
||||
} else if (op == ir::vop2::MAC_F32) {
|
||||
inst.addOperand(createVgprGcnOperand(vdst).withR());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -343,8 +341,6 @@ readVop3Inst(GcnInstruction &inst, std::uint64_t &address,
|
|||
.withNeg(((neg >> 2) & 1) != 0));
|
||||
} else if (op == ir::vop3::MADMK_F32 || op == ir::vop3::MADAK_F32) {
|
||||
inst.addOperand(createImmediateGcnOperand(address));
|
||||
} else if (op == ir::vop3::MAC_F32) {
|
||||
inst.addOperand(createSgprGcnOperand(address, vdst).withRW());
|
||||
}
|
||||
} else if (op >= 384 && op < ir::vop1::OpCount + 384) {
|
||||
// vop1
|
||||
|
|
@ -527,14 +523,14 @@ readMtbufInst(GcnInstruction &inst, std::uint64_t &address,
|
|||
inst.op = op;
|
||||
inst.addOperand(createVgprGcnOperand(vdata).withAccess(dataAccess));
|
||||
|
||||
if (idxen) {
|
||||
inst.addOperand(createVgprGcnOperand(vaddr).withR());
|
||||
if (offen) {
|
||||
inst.addOperand(createVgprGcnOperand(vaddr + (idxen ? 1 : 0)).withR());
|
||||
} else {
|
||||
inst.addOperand(GcnOperand::createConstant(0u));
|
||||
}
|
||||
|
||||
if (offen) {
|
||||
inst.addOperand(createVgprGcnOperand(vaddr + (idxen ? 1 : 0)).withR());
|
||||
if (idxen) {
|
||||
inst.addOperand(createVgprGcnOperand(vaddr).withR());
|
||||
} else {
|
||||
inst.addOperand(GcnOperand::createConstant(0u));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1081,6 +1081,23 @@ static ir::Value deserializeGcnRegion(
|
|||
auto instSem =
|
||||
semInfo.findSemantic(ir::getInstructionId(isaInst.kind, isaInst.op));
|
||||
|
||||
auto createExecTest = [&] {
|
||||
auto mergeBlock = builder.createSpvLabel(loc);
|
||||
gcn::Builder::createInsertBefore(converter, mergeBlock)
|
||||
.createSpvBranch(loc, mergeBlock);
|
||||
auto instBlock = gcn::Builder::createInsertAfter(converter, instrBegin)
|
||||
.createSpvLabel(loc);
|
||||
auto prependInstBuilder =
|
||||
gcn::Builder::createInsertBefore(converter, instBlock);
|
||||
auto exec = prependInstBuilder.createValue(
|
||||
loc, ir::amdgpu::EXEC_TEST,
|
||||
converter.getType(execTestSem->returnType));
|
||||
prependInstBuilder.createSpvSelectionMerge(
|
||||
loc, mergeBlock, ir::spv::SelectionControl::None);
|
||||
prependInstBuilder.createSpvBranchConditional(loc, exec, instBlock,
|
||||
mergeBlock);
|
||||
};
|
||||
|
||||
if (instSem == nullptr) {
|
||||
if (isaInst == ir::sopp::BRANCH) {
|
||||
auto target =
|
||||
|
|
@ -1268,6 +1285,9 @@ static ir::Value deserializeGcnRegion(
|
|||
inst.addOperand(createOperandRead(loc, paramBuilder, uint32TV, op));
|
||||
}
|
||||
|
||||
if (isaInst == ir::exp::EXP) {
|
||||
createExecTest();
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -1400,20 +1420,7 @@ static ir::Value deserializeGcnRegion(
|
|||
}
|
||||
|
||||
if (!hasDestination && injectExecTest) {
|
||||
auto mergeBlock = builder.createSpvLabel(loc);
|
||||
gcn::Builder::createInsertBefore(converter, mergeBlock)
|
||||
.createSpvBranch(loc, mergeBlock);
|
||||
auto instBlock = gcn::Builder::createInsertAfter(converter, instrBegin)
|
||||
.createSpvLabel(loc);
|
||||
auto prependInstBuilder =
|
||||
gcn::Builder::createInsertBefore(converter, instBlock);
|
||||
auto exec = prependInstBuilder.createValue(
|
||||
loc, ir::amdgpu::EXEC_TEST,
|
||||
converter.getType(execTestSem->returnType));
|
||||
prependInstBuilder.createSpvSelectionMerge(
|
||||
loc, mergeBlock, ir::spv::SelectionControl::None);
|
||||
prependInstBuilder.createSpvBranchConditional(loc, exec, instBlock,
|
||||
mergeBlock);
|
||||
createExecTest();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -252,5 +252,56 @@ constexpr ZFormat getZFormat(DataFormat dfmt) {
|
|||
constexpr StencilFormat getStencilFormat(DataFormat dfmt) {
|
||||
return dfmt == kDataFormat8 ? kStencil8 : kStencilInvalid;
|
||||
}
|
||||
} // namespace gnm
|
||||
|
||||
constexpr DataFormat getDataFormat(ZFormat format) {
|
||||
switch (format) {
|
||||
case kZFormat32Float:
|
||||
return kDataFormat32;
|
||||
case kZFormat16:
|
||||
return kDataFormat16;
|
||||
|
||||
case kZFormatInvalid:
|
||||
break;
|
||||
}
|
||||
|
||||
return kDataFormatInvalid;
|
||||
}
|
||||
|
||||
constexpr NumericFormat getNumericFormat(ZFormat format) {
|
||||
switch (format) {
|
||||
case kZFormat32Float:
|
||||
return kNumericFormatFloat;
|
||||
case kZFormat16:
|
||||
return kNumericFormatUInt;
|
||||
|
||||
case kZFormatInvalid:
|
||||
break;
|
||||
}
|
||||
|
||||
return kNumericFormatUNorm;
|
||||
}
|
||||
|
||||
constexpr DataFormat getDataFormat(StencilFormat format) {
|
||||
switch (format) {
|
||||
case kStencil8:
|
||||
return kDataFormat8;
|
||||
|
||||
case kStencilInvalid:
|
||||
break;
|
||||
}
|
||||
|
||||
return kDataFormatInvalid;
|
||||
}
|
||||
|
||||
constexpr NumericFormat getNumericFormat(StencilFormat format) {
|
||||
switch (format) {
|
||||
case kStencil8:
|
||||
return kNumericFormatSInt;
|
||||
|
||||
case kStencilInvalid:
|
||||
break;
|
||||
}
|
||||
|
||||
return kNumericFormatUNorm;
|
||||
}
|
||||
} // namespace gnm
|
||||
|
|
|
|||
Loading…
Reference in a new issue