mirror of
https://github.com/RPCSX/rpcsx.git
synced 2025-12-06 07:12:14 +01:00
rpcsx-gpu: fix linear tiler offset
implement depth clear implemented shader resource merge fix smrd offset fix discard export
This commit is contained in:
parent
7bea1e354f
commit
fb64f8b4f8
|
|
@ -99,8 +99,8 @@ void Cache::ShaderResources::loadResources(
|
|||
bufferMemoryTable.map(*pointerBase,
|
||||
*pointerBase + *pointerOffset + pointer.size,
|
||||
Access::Read);
|
||||
resourceSlotToAddress.push_back(
|
||||
{slotOffset + pointer.resourceSlot, *pointerBase});
|
||||
resourceSlotToAddress.emplace_back(slotOffset + pointer.resourceSlot,
|
||||
*pointerBase);
|
||||
}
|
||||
|
||||
for (auto &bufferRes : res.buffers) {
|
||||
|
|
@ -124,10 +124,16 @@ void Cache::ShaderResources::loadResources(
|
|||
std::memcpy(reinterpret_cast<std::uint32_t *>(&buffer) + 3, &*word3,
|
||||
sizeof(std::uint32_t));
|
||||
|
||||
bufferMemoryTable.map(buffer.address(), buffer.address() + buffer.size(),
|
||||
bufferRes.access);
|
||||
resourceSlotToAddress.push_back(
|
||||
{slotOffset + bufferRes.resourceSlot, buffer.address()});
|
||||
if (auto it = bufferMemoryTable.queryArea(buffer.address());
|
||||
it != bufferMemoryTable.end() &&
|
||||
it.beginAddress() == buffer.address() && it.size() == buffer.size()) {
|
||||
it.get() |= bufferRes.access;
|
||||
} else {
|
||||
bufferMemoryTable.map(buffer.address(), buffer.address() + buffer.size(),
|
||||
bufferRes.access);
|
||||
}
|
||||
resourceSlotToAddress.emplace_back(slotOffset + bufferRes.resourceSlot,
|
||||
buffer.address());
|
||||
}
|
||||
|
||||
for (auto &texture : res.textures) {
|
||||
|
|
@ -569,7 +575,6 @@ struct CachedImage : Cache::Entry {
|
|||
auto ®ionInfo = info.getSubresourceInfo(mipLevel);
|
||||
regions.push_back({
|
||||
.bufferOffset = regionInfo.linearOffset,
|
||||
.bufferRowLength = regionInfo.linearPitch,
|
||||
.imageSubresource =
|
||||
{
|
||||
.aspectMask = toAspect(kind),
|
||||
|
|
@ -579,9 +584,9 @@ struct CachedImage : Cache::Entry {
|
|||
},
|
||||
.imageExtent =
|
||||
{
|
||||
.width = regionInfo.linearWidth,
|
||||
.height = regionInfo.linearHeight,
|
||||
.depth = regionInfo.linearDepth,
|
||||
.width = std::max(image.getWidth() >> mipLevel, 1u),
|
||||
.height = std::max(image.getHeight() >> mipLevel, 1u),
|
||||
.depth = std::max(image.getDepth() >> mipLevel, 1u),
|
||||
},
|
||||
});
|
||||
}
|
||||
|
|
@ -1183,7 +1188,6 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) {
|
|||
|
||||
regions.push_back({
|
||||
.bufferOffset = info.linearOffset,
|
||||
.bufferRowLength = info.linearPitch,
|
||||
.imageSubresource =
|
||||
{
|
||||
.aspectMask = toAspect(key.kind),
|
||||
|
|
@ -1193,9 +1197,9 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) {
|
|||
},
|
||||
.imageExtent =
|
||||
{
|
||||
.width = info.linearWidth,
|
||||
.height = info.linearHeight,
|
||||
.depth = info.linearDepth,
|
||||
.width = std::max(key.extent.width >> mipLevel, 1u),
|
||||
.height = std::max(key.extent.height >> mipLevel, 1u),
|
||||
.depth = std::max(key.extent.depth >> mipLevel, 1u),
|
||||
},
|
||||
});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,14 +3,11 @@
|
|||
#include "Pipe.hpp"
|
||||
#include "amdgpu/tiler.hpp"
|
||||
#include "gnm/constants.hpp"
|
||||
#include "rx/die.hpp"
|
||||
#include "shader/Access.hpp"
|
||||
#include "shader/Evaluator.hpp"
|
||||
#include "shader/GcnConverter.hpp"
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <ostream>
|
||||
#include <print>
|
||||
#include <rx/ConcurrentBitPool.hpp>
|
||||
#include <rx/MemoryTable.hpp>
|
||||
|
|
@ -27,7 +24,7 @@ struct ShaderKey {
|
|||
shader::gcn::Environment env;
|
||||
};
|
||||
|
||||
enum class ImageKind { Color, Depth, Stencil };
|
||||
enum class ImageKind : std::uint8_t { Color, Depth, Stencil };
|
||||
|
||||
struct ImageKey {
|
||||
std::uint64_t readAddress;
|
||||
|
|
@ -265,7 +262,7 @@ public:
|
|||
}
|
||||
~Tag() { release(); }
|
||||
|
||||
void swap(Tag &other) {
|
||||
void swap(Tag &other) noexcept {
|
||||
std::swap(static_cast<TagData &>(*this), static_cast<TagData &>(other));
|
||||
}
|
||||
|
||||
|
|
@ -364,7 +361,7 @@ public:
|
|||
std::span<const VkViewport> viewPorts);
|
||||
void release();
|
||||
|
||||
void swap(GraphicsTag &other) {
|
||||
void swap(GraphicsTag &other) noexcept {
|
||||
Tag::swap(other);
|
||||
std::swap(mAcquiredGraphicsDescriptorSet,
|
||||
other.mAcquiredGraphicsDescriptorSet);
|
||||
|
|
@ -396,7 +393,7 @@ public:
|
|||
|
||||
void release();
|
||||
|
||||
void swap(ComputeTag &other) {
|
||||
void swap(ComputeTag &other) noexcept {
|
||||
Tag::swap(other);
|
||||
std::swap(mAcquiredComputeDescriptorSet,
|
||||
other.mAcquiredComputeDescriptorSet);
|
||||
|
|
|
|||
|
|
@ -139,10 +139,6 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
|
|||
std::uint32_t vertexCount, std::uint32_t firstInstance,
|
||||
std::uint32_t instanceCount, std::uint64_t indiciesAddress,
|
||||
std::uint32_t indexCount) {
|
||||
if (pipe.uConfig.vgtPrimitiveType == gnm::PrimitiveType::None) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (pipe.context.cbColorControl.mode == gnm::CbMode::Disable) {
|
||||
return;
|
||||
}
|
||||
|
|
@ -153,10 +149,6 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
|
|||
return;
|
||||
}
|
||||
|
||||
if (pipe.context.cbTargetMask.raw == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto cacheTag = pipe.device->getGraphicsTag(vmId, pipe.scheduler);
|
||||
auto targetMask = pipe.context.cbTargetMask.raw;
|
||||
|
||||
|
|
@ -200,50 +192,6 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
|
|||
// FIXME
|
||||
stencilAccess = Access::None;
|
||||
|
||||
if (depthAccess != Access::None) {
|
||||
auto viewPortScissor = pipe.context.paScScreenScissor;
|
||||
auto viewPortRect = gnm::toVkRect2D(viewPortScissor);
|
||||
|
||||
auto imageView = cacheTag.getImageView(
|
||||
{
|
||||
.readAddress = static_cast<std::uint64_t>(pipe.context.dbZReadBase)
|
||||
<< 8,
|
||||
.writeAddress =
|
||||
static_cast<std::uint64_t>(pipe.context.dbZWriteBase) << 8,
|
||||
.type = gnm::TextureType::Dim2D,
|
||||
.dfmt = gnm::getDataFormat(pipe.context.dbZInfo.format),
|
||||
.nfmt = gnm::getNumericFormat(pipe.context.dbZInfo.format),
|
||||
.extent =
|
||||
{
|
||||
.width = viewPortRect.extent.width,
|
||||
.height = viewPortRect.extent.height,
|
||||
.depth = 1,
|
||||
},
|
||||
.pitch = viewPortRect.extent.width,
|
||||
.mipCount = 1,
|
||||
.arrayLayerCount = 1,
|
||||
.kind = ImageKind::Depth,
|
||||
},
|
||||
depthAccess);
|
||||
|
||||
depthAttachment = {
|
||||
.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
|
||||
.imageView = imageView.handle,
|
||||
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
|
||||
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
|
||||
};
|
||||
|
||||
if ((depthAccess & Access::Read) == Access::None) {
|
||||
depthAttachment.clearValue.depthStencil.depth = pipe.context.dbDepthClear;
|
||||
depthAttachment.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
|
||||
}
|
||||
|
||||
if ((depthAccess & Access::Write) == Access::None) {
|
||||
depthAttachment.storeOp = VK_ATTACHMENT_STORE_OP_NONE;
|
||||
}
|
||||
}
|
||||
|
||||
for (auto &cbColor : pipe.context.cbColor) {
|
||||
if (targetMask == 0) {
|
||||
break;
|
||||
|
|
@ -304,6 +252,26 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
|
|||
access |= Access::Write;
|
||||
}
|
||||
|
||||
if (pipe.uConfig.vgtPrimitiveType == gnm::PrimitiveType::None) {
|
||||
if (cbColor.info.fastClear) {
|
||||
auto image = cacheTag.getImage(renderTargetInfo, access);
|
||||
VkClearColorValue clearValue = {
|
||||
.uint32 =
|
||||
{
|
||||
cbColor.clearWord0,
|
||||
cbColor.clearWord1,
|
||||
cbColor.clearWord2,
|
||||
},
|
||||
};
|
||||
|
||||
vkCmdClearColorImage(cacheTag.getScheduler().getCommandBuffer(),
|
||||
image.handle, VK_IMAGE_LAYOUT_GENERAL, &clearValue,
|
||||
1, &image.subresource);
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
auto imageView = cacheTag.getImageView(renderTargetInfo, access);
|
||||
|
||||
colorAttachments[renderTargets] = {
|
||||
|
|
@ -359,8 +327,94 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
|
|||
}
|
||||
|
||||
if (renderTargets == 0) {
|
||||
if ((depthAccess & Access::Write) != Access::None) {
|
||||
auto screenRect = gnm::toVkRect2D(pipe.context.paScScreenScissor);
|
||||
|
||||
auto image = cacheTag.getImage(
|
||||
{
|
||||
.readAddress =
|
||||
static_cast<std::uint64_t>(pipe.context.dbZReadBase) << 8,
|
||||
.writeAddress =
|
||||
static_cast<std::uint64_t>(pipe.context.dbZWriteBase) << 8,
|
||||
.type = gnm::TextureType::Dim2D,
|
||||
.dfmt = gnm::getDataFormat(pipe.context.dbZInfo.format),
|
||||
.nfmt = gnm::getNumericFormat(pipe.context.dbZInfo.format),
|
||||
.extent =
|
||||
{
|
||||
.width = screenRect.extent.width,
|
||||
.height = screenRect.extent.height,
|
||||
.depth = 1,
|
||||
},
|
||||
.pitch = screenRect.extent.width,
|
||||
.mipCount = 1,
|
||||
.arrayLayerCount = 1,
|
||||
.kind = ImageKind::Depth,
|
||||
},
|
||||
Access::Write);
|
||||
|
||||
VkClearDepthStencilValue depthStencil = {
|
||||
.depth = pipe.context.dbDepthClear,
|
||||
};
|
||||
|
||||
vkCmdClearDepthStencilImage(cacheTag.getScheduler().getCommandBuffer(),
|
||||
image.handle, VK_IMAGE_LAYOUT_GENERAL,
|
||||
&depthStencil, 1, &image.subresource);
|
||||
pipe.scheduler.submit();
|
||||
pipe.scheduler.wait();
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (pipe.uConfig.vgtPrimitiveType == gnm::PrimitiveType::None) {
|
||||
pipe.scheduler.submit();
|
||||
pipe.scheduler.wait();
|
||||
return;
|
||||
}
|
||||
|
||||
if (depthAccess != Access::None) {
|
||||
auto screenRect = gnm::toVkRect2D(pipe.context.paScScreenScissor);
|
||||
|
||||
auto imageView = cacheTag.getImageView(
|
||||
{
|
||||
.readAddress = static_cast<std::uint64_t>(pipe.context.dbZReadBase)
|
||||
<< 8,
|
||||
.writeAddress =
|
||||
static_cast<std::uint64_t>(pipe.context.dbZWriteBase) << 8,
|
||||
.type = gnm::TextureType::Dim2D,
|
||||
.dfmt = gnm::getDataFormat(pipe.context.dbZInfo.format),
|
||||
.nfmt = gnm::getNumericFormat(pipe.context.dbZInfo.format),
|
||||
.extent =
|
||||
{
|
||||
.width = screenRect.extent.width,
|
||||
.height = screenRect.extent.height,
|
||||
.depth = 1,
|
||||
},
|
||||
.pitch = screenRect.extent.width,
|
||||
.mipCount = 1,
|
||||
.arrayLayerCount = 1,
|
||||
.kind = ImageKind::Depth,
|
||||
},
|
||||
depthAccess);
|
||||
|
||||
depthAttachment = {
|
||||
.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
|
||||
.imageView = imageView.handle,
|
||||
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
|
||||
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
|
||||
};
|
||||
|
||||
if ((depthAccess & Access::Read) == Access::None) {
|
||||
depthAttachment.clearValue.depthStencil.depth = pipe.context.dbDepthClear;
|
||||
depthAttachment.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
|
||||
}
|
||||
|
||||
if ((depthAccess & Access::Write) == Access::None) {
|
||||
depthAttachment.storeOp = VK_ATTACHMENT_STORE_OP_NONE;
|
||||
}
|
||||
}
|
||||
|
||||
if (indiciesAddress == 0) {
|
||||
indexCount = vertexCount;
|
||||
}
|
||||
|
|
@ -483,11 +537,11 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
|
|||
VkCullModeFlags cullMode = VK_CULL_MODE_NONE;
|
||||
|
||||
if (pipe.uConfig.vgtPrimitiveType != gnm::PrimitiveType::RectList) {
|
||||
if (pipe.context.paSuScModeCntl.cullBack) {
|
||||
cullMode |= VK_CULL_MODE_BACK_BIT;
|
||||
}
|
||||
if (pipe.context.paSuScModeCntl.cullFront) {
|
||||
cullMode |= VK_CULL_MODE_FRONT_BIT;
|
||||
if (pipe.context.paSuScModeCntl.cullBack) {
|
||||
cullMode |= VK_CULL_MODE_BACK_BIT;
|
||||
}
|
||||
if (pipe.context.paSuScModeCntl.cullFront) {
|
||||
cullMode |= VK_CULL_MODE_FRONT_BIT;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -521,12 +575,11 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
|
|||
}
|
||||
|
||||
void amdgpu::dispatch(Cache &cache, Scheduler &sched,
|
||||
Registers::ComputeConfig &computeConfig,
|
||||
std::uint32_t groupCountX, std::uint32_t groupCountY,
|
||||
std::uint32_t groupCountZ) {
|
||||
Registers::ComputeConfig &pgm, std::uint32_t groupCountX,
|
||||
std::uint32_t groupCountY, std::uint32_t groupCountZ) {
|
||||
auto tag = cache.createComputeTag(sched);
|
||||
auto descriptorSet = tag.getDescriptorSet();
|
||||
auto shader = tag.getShader(computeConfig);
|
||||
auto shader = tag.getShader(pgm);
|
||||
auto pipelineLayout = tag.getComputePipelineLayout();
|
||||
tag.buildDescriptors(descriptorSet);
|
||||
|
||||
|
|
|
|||
|
|
@ -555,7 +555,7 @@ static SurfaceInfo computeTextureLinearInfo(
|
|||
}
|
||||
|
||||
surfaceOffset += arraySliceCount * surfaceSize;
|
||||
surfaceOffset += arraySliceCount * linearSize;
|
||||
linearOffset += arraySliceCount * linearSize;
|
||||
}
|
||||
|
||||
result.totalTiledSize = surfaceOffset;
|
||||
|
|
|
|||
|
|
@ -2101,7 +2101,7 @@ void tbuffer_store_format_xyzw(u32vec4 vdata, uint32_t vOFFSET, uint32_t vINDEX,
|
|||
#define S_LOAD_DWORD(dest, memoryLocationHint, sbase, offset, N) \
|
||||
int32_t _offset = 0; \
|
||||
uint64_t deviceAreaSize = 0; \
|
||||
uint64_t deviceAddress = findMemoryAddress(sbase + offset, SIZEOF(uint32_t) * N, memoryLocationHint, deviceAreaSize); \
|
||||
uint64_t deviceAddress = findMemoryAddress((sbase & ~uint64_t(3)) + (offset & ~3), SIZEOF(uint32_t) * N, memoryLocationHint, deviceAreaSize); \
|
||||
if (deviceAddress == kInvalidAddress || deviceAreaSize < SIZEOF(uint32_t) * N) { \
|
||||
for (int i = 0; i < (N); ++i) { \
|
||||
dest[i] = 0; \
|
||||
|
|
|
|||
|
|
@ -133,7 +133,7 @@ struct ResourcesBuilder {
|
|||
std::printf("failed to resolve function call to %s\n",
|
||||
ns->getNameOf(call.getOperand(1).getAsValue()).c_str());
|
||||
|
||||
for (auto op : call.getOperands().subspan(2)) {
|
||||
for (auto &op : call.getOperands().subspan(2)) {
|
||||
std::cerr << "arg: ";
|
||||
op.print(std::cerr, *ns);
|
||||
auto argValue = op.getAsValue();
|
||||
|
|
@ -728,6 +728,20 @@ static void expToSpv(GcnConverter &converter, gcn::Stage stage,
|
|||
|
||||
auto channelType = context.getTypeFloat32();
|
||||
|
||||
if (swizzle == 0 && done) {
|
||||
auto termBuilder = gcn::Builder::createAppend(
|
||||
context, context.layout.getOrCreateFunctions(context));
|
||||
auto terminateFn = termBuilder.createSpvFunction(
|
||||
loc, context.getTypeVoid(), ir::spv::FunctionControl::None,
|
||||
context.getTypeFunction(context.getTypeVoid(), {}));
|
||||
termBuilder.createSpvLabel(loc);
|
||||
termBuilder.createSpvKill(loc);
|
||||
termBuilder.createSpvFunctionEnd(loc);
|
||||
|
||||
builder.createSpvFunctionCall(loc, context.getTypeVoid(), terminateFn,
|
||||
{});
|
||||
}
|
||||
|
||||
for (int channel = 0; channel < 4; ++channel) {
|
||||
if (~swizzle & (1 << channel)) {
|
||||
continue;
|
||||
|
|
|
|||
|
|
@ -207,8 +207,7 @@ readSmrdInst(GcnInstruction &inst, std::uint64_t &address,
|
|||
|
||||
if (op != ir::smrd::MEMTIME) {
|
||||
auto baseOperand = createSgprGcnOperand(address, sbase);
|
||||
auto offsetOperand = imm ? GcnOperand::createConstant(
|
||||
std::uint32_t(std::int8_t(offset << 2)))
|
||||
auto offsetOperand = imm ? GcnOperand::createConstant(offset << 2)
|
||||
: createSgprGcnOperand(address, offset).withR();
|
||||
|
||||
if (isBuffer) {
|
||||
|
|
|
|||
|
|
@ -1283,7 +1283,7 @@ static ir::Value deserializeGcnRegion(
|
|||
inst.addOperand(createOperandRead(loc, paramBuilder, uint32TV, op));
|
||||
}
|
||||
|
||||
if (isaInst == ir::exp::EXP) {
|
||||
if (isaInst == ir::exp::EXP && isaInst.getOperand(1).value != 0) {
|
||||
createExecTest();
|
||||
}
|
||||
continue;
|
||||
|
|
|
|||
Loading…
Reference in a new issue