rpcsx-gpu: fix drawIndexOffset2

This commit is contained in:
DH 2024-10-06 04:06:44 +03:00
parent 930cf2a86b
commit e4866cd2eb
8 changed files with 71 additions and 42 deletions

View file

@ -488,6 +488,7 @@ struct CachedBuffer : Cache::Entry {
struct CachedIndexBuffer : Cache::Entry {
vk::Buffer buffer;
std::uint64_t offset;
std::size_t size;
gnm::IndexType indexType;
gnm::PrimitiveType primType;
@ -966,6 +967,7 @@ void Cache::Tag::buildDescriptors(VkDescriptorSet descriptorSet) {
}
Cache::IndexBuffer Cache::Tag::getIndexBuffer(std::uint64_t address,
std::uint32_t indexOffset,
std::uint32_t indexCount,
gnm::PrimitiveType primType,
gnm::IndexType indexType) {
@ -980,14 +982,16 @@ Cache::IndexBuffer Cache::Tag::getIndexBuffer(std::uint64_t address,
return {
.handle = VK_NULL_HANDLE,
.offset = 0,
.offset = indexOffset,
.indexCount = indexCount,
.primType = primType,
.indexType = indexType,
};
}
auto indexBuffer = getBuffer(address, size, Access::Read);
auto indexBuffer = getBuffer(
address + static_cast<std::uint64_t>(indexOffset) * origIndexSize, size,
Access::Read);
if (!isPrimRequiresConversion(primType)) {
return {
@ -1010,7 +1014,7 @@ Cache::IndexBuffer Cache::Tag::getIndexBuffer(std::uint64_t address,
return {
.handle = indexBuffer->buffer.getHandle(),
.offset = 0,
.offset = indexBuffer->offset,
.indexCount = indexCount,
.primType = indexBuffer->primType,
.indexType = indexBuffer->indexType,
@ -1059,6 +1063,7 @@ Cache::IndexBuffer Cache::Tag::getIndexBuffer(std::uint64_t address,
cached->baseAddress = address;
cached->acquiredAccess = Access::Read;
cached->buffer = std::move(convertedIndexBuffer);
cached->offset = indexBuffer.offset;
cached->size = size;
cached->tagId = indexBuffer.tagId;
cached->primType = primType;
@ -1234,11 +1239,15 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) {
transitionImageLayout(mScheduler->getCommandBuffer(), image,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
VK_IMAGE_LAYOUT_GENERAL, subresourceRange);
} else {
transitionImageLayout(mScheduler->getCommandBuffer(), image,
VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL,
subresourceRange);
}
auto cached = std::make_shared<CachedImage>();
cached->image = std::move(image);
cached->info = std::move(surfaceInfo);
cached->info = surfaceInfo;
cached->baseAddress = (access & Access::Write) != Access::None
? key.writeAddress
: key.readAddress;
@ -1420,24 +1429,22 @@ Cache::GraphicsTag::getPixelShader(const SpiShaderPgm &pgm,
psVgprInput[psVgprInputs++] = gcn::PsVGprInput::PosFixed;
}
return getShader(gcn::Stage::Ps, pgm, context, {}, viewPorts,
return getShader(gcn::Stage::Ps, pgm, context, 0, {}, viewPorts,
{psVgprInput, psVgprInputs});
}
Cache::Shader
Cache::GraphicsTag::getVertexShader(gcn::Stage stage, const SpiShaderPgm &pgm,
const Registers::Context &context,
gnm::PrimitiveType vsPrimType,
std::span<const VkViewport> viewPorts) {
return getShader(stage, pgm, context, vsPrimType, viewPorts, {});
Cache::Shader Cache::GraphicsTag::getVertexShader(
gcn::Stage stage, const SpiShaderPgm &pgm,
const Registers::Context &context, std::uint32_t indexOffset,
gnm::PrimitiveType vsPrimType, std::span<const VkViewport> viewPorts) {
return getShader(stage, pgm, context, indexOffset, vsPrimType, viewPorts, {});
}
Cache::Shader
Cache::GraphicsTag::getShader(gcn::Stage stage, const SpiShaderPgm &pgm,
const Registers::Context &context,
gnm::PrimitiveType vsPrimType,
std::span<const VkViewport> viewPorts,
std::span<const gcn::PsVGprInput> psVgprInput) {
Cache::Shader Cache::GraphicsTag::getShader(
gcn::Stage stage, const SpiShaderPgm &pgm,
const Registers::Context &context, std::uint32_t indexOffset,
gnm::PrimitiveType vsPrimType, std::span<const VkViewport> viewPorts,
std::span<const gcn::PsVGprInput> psVgprInput) {
auto descriptorSets = getDescriptorSets();
gcn::Environment env{
.vgprCount = pgm.rsrc1.getVGprCount(),
@ -1522,6 +1529,10 @@ Cache::GraphicsTag::getShader(gcn::Stage stage, const SpiShaderPgm &pgm,
configPtr[index] = static_cast<std::uint32_t>(vsPrimType);
break;
case gcn::ConfigType::VsIndexOffset:
configPtr[index] = static_cast<std::uint32_t>(indexOffset);
break;
case gcn::ConfigType::ResourceSlot:
mStorage->memoryTableConfigSlots.push_back({
.bufferIndex =

View file

@ -284,7 +284,8 @@ public:
Sampler getSampler(const SamplerKey &key);
Buffer getBuffer(std::uint64_t address, std::uint64_t size, Access access);
IndexBuffer getIndexBuffer(std::uint64_t address, std::uint32_t indexCount,
IndexBuffer getIndexBuffer(std::uint64_t address, std::uint32_t offset,
std::uint32_t indexCount,
gnm::PrimitiveType primType,
gnm::IndexType indexType);
Image getImage(const ImageKey &key, Access access);
@ -347,7 +348,7 @@ public:
Shader getShader(shader::gcn::Stage stage, const SpiShaderPgm &pgm,
const Registers::Context &context,
gnm::PrimitiveType vsPrimType,
std::uint32_t indexOffset, gnm::PrimitiveType vsPrimType,
std::span<const VkViewport> viewPorts,
std::span<const shader::gcn::PsVGprInput> psVgprInput);
@ -357,6 +358,7 @@ public:
Shader getVertexShader(shader::gcn::Stage stage, const SpiShaderPgm &pgm,
const Registers::Context &context,
std::uint32_t indexOffset,
gnm::PrimitiveType vsPrimType,
std::span<const VkViewport> viewPorts);
void release();

View file

@ -545,7 +545,7 @@ bool GraphicsPipe::drawIndirect(Queue &queue) {
std::uint32_t startInstanceLocation = buffer[3];
draw(*this, queue.vmId, startVertexLocation, vertexCountPerInstance,
startInstanceLocation, instanceCount, 0, 0);
startInstanceLocation, instanceCount, 0, 0, 0);
return true;
}
bool GraphicsPipe::drawIndexIndirect(Queue &queue) {
@ -565,7 +565,7 @@ bool GraphicsPipe::drawIndexIndirect(Queue &queue) {
std::uint32_t startInstanceLocation = buffer[4];
draw(*this, queue.vmId, baseVertexLocation, indexCountPerInstance,
startInstanceLocation, instanceCount, vgtIndexBase + startIndexLocation,
startInstanceLocation, instanceCount, vgtIndexBase, startIndexLocation,
indexCountPerInstance);
return true;
}
@ -586,7 +586,7 @@ bool GraphicsPipe::drawIndex2(Queue &queue) {
uConfig.vgtNumIndices = indexCount;
draw(*this, queue.vmId, 0, indexCount, 0, uConfig.vgtNumInstances,
vgtIndexBase + indexOffset, maxSize);
vgtIndexBase + indexOffset, 0, maxSize);
return true;
}
bool GraphicsPipe::indexType(Queue &queue) {
@ -600,7 +600,7 @@ bool GraphicsPipe::drawIndexAuto(Queue &queue) {
uConfig.vgtNumIndices = indexCount;
context.vgtDrawInitiator = drawInitiator;
draw(*this, queue.vmId, 0, indexCount, 0, uConfig.vgtNumInstances, 0, 0);
draw(*this, queue.vmId, 0, indexCount, 0, uConfig.vgtNumInstances, 0, 0, 0);
return true;
}
bool GraphicsPipe::numInstances(Queue &queue) {
@ -620,8 +620,8 @@ bool GraphicsPipe::drawIndexMultiAuto(Queue &queue) {
uConfig.vgtPrimitiveType = static_cast<gnm::PrimitiveType>(primType);
uConfig.vgtNumIndices = indexCount;
draw(*this, queue.vmId, 0, indexCount, 0, uConfig.vgtNumInstances,
vgtIndexBase + indexOffset, primCount);
draw(*this, queue.vmId, 0, primCount, 0, uConfig.vgtNumInstances,
vgtIndexBase, indexOffset, indexCount);
return true;
}
bool GraphicsPipe::drawIndexOffset2(Queue &queue) {
@ -632,7 +632,7 @@ bool GraphicsPipe::drawIndexOffset2(Queue &queue) {
context.vgtDrawInitiator = drawInitiator;
draw(*this, queue.vmId, 0, indexCount, 0, uConfig.vgtNumInstances,
vgtIndexBase + indexOffset, maxSize);
vgtIndexBase, indexOffset, maxSize);
return true;
}
bool GraphicsPipe::writeData(Queue &queue) {
@ -1033,7 +1033,12 @@ bool GraphicsPipe::setShReg(Queue &queue) {
std::memcpy(reinterpret_cast<std::uint32_t *>(&sh) + offset, data,
sizeof(std::uint32_t) * len);
// for (std::size_t i = 0; i < len; ++i) {
// std::fprintf(
// stderr, "writing to %s value %x\n",
// gnm::mmio::registerName(decltype(sh)::kMmioOffset + offset + i),
// data[i]);
// }
return true;
}
@ -1064,7 +1069,12 @@ bool GraphicsPipe::setUConfigReg(Queue &queue) {
std::memcpy(reinterpret_cast<std::uint32_t *>(&uConfig) + offset, data,
sizeof(std::uint32_t) * len);
// for (std::size_t i = 0; i < len; ++i) {
// std::fprintf(
// stderr, "writing to %s value %x\n",
// gnm::mmio::registerName(decltype(uConfig)::kMmioOffset + offset + i),
// data[i]);
// }
return true;
}
@ -1097,8 +1107,8 @@ bool GraphicsPipe::setContextReg(Queue &queue) {
sizeof(std::uint32_t) * len);
// for (std::size_t i = 0; i < len; ++i) {
// std::fprintf(stderr,
// "writing to %s value %x\n",
// std::fprintf(
// stderr, "writing to %s value %x\n",
// gnm::mmio::registerName(decltype(context)::kMmioOffset + offset + i),
// data[i]);
// }

View file

@ -138,7 +138,7 @@ static VkPrimitiveTopology toVkPrimitiveType(gnm::PrimitiveType type) {
void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
std::uint32_t vertexCount, std::uint32_t firstInstance,
std::uint32_t instanceCount, std::uint64_t indiciesAddress,
std::uint32_t indexCount) {
std::uint32_t indexOffset, std::uint32_t indexCount) {
if (pipe.context.cbColorControl.mode == gnm::CbMode::Disable) {
return;
}
@ -419,9 +419,9 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
indexCount = vertexCount;
}
auto indexBuffer = cacheTag.getIndexBuffer(indiciesAddress, indexCount,
pipe.uConfig.vgtPrimitiveType,
pipe.uConfig.vgtIndexType);
auto indexBuffer = cacheTag.getIndexBuffer(
indiciesAddress, indexOffset, indexCount, pipe.uConfig.vgtPrimitiveType,
pipe.uConfig.vgtIndexType);
auto stages = Cache::kGraphicsStages;
VkShaderEXT shaders[stages.size()]{};
@ -437,9 +437,9 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
vsPrimType = pipe.uConfig.vgtPrimitiveType.value;
}
vertexShader =
cacheTag.getVertexShader(gcn::Stage::VsVs, pipe.sh.spiShaderPgmVs,
pipe.context, vsPrimType, viewPorts);
vertexShader = cacheTag.getVertexShader(
gcn::Stage::VsVs, pipe.sh.spiShaderPgmVs, pipe.context,
indexBuffer.offset, vsPrimType, viewPorts);
}
auto pixelShader =

View file

@ -10,7 +10,7 @@ namespace amdgpu {
void draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
std::uint32_t vertexCount, std::uint32_t firstInstance,
std::uint32_t instanceCount, std::uint64_t indiciesAddress,
std::uint32_t indexCount);
std::uint32_t indexOffset, std::uint32_t indexCount);
void dispatch(Cache &cache, Scheduler &sched,
Registers::ComputeConfig &computeConfig,
std::uint32_t groupCountX, std::uint32_t groupCountY,

View file

@ -117,6 +117,7 @@ enum class ConfigType {
GsInstanceEn,
InstanceEn,
VsPrimType,
VsIndexOffset,
PsPrimType,
CsTgIdCompCnt,
VsInputVgprCount,

View file

@ -298,7 +298,9 @@ void cs_set_thread_id(u32vec3 localInvocationId, u32vec3 workgroupSize) {
const uint32_t kPrimTypeQuadList = 0x13;
const uint32_t kPrimTypeQuadStrip = 0x14;
uint32_t vs_get_index(uint32_t mode, uint32_t index) {
uint32_t vs_get_index(uint32_t mode, uint32_t index, uint32_t indexOffset) {
index += indexOffset;
switch (mode) {
case kPrimTypeQuadList: {
const uint32_t indicies[] = {0, 1, 2, 2, 3, 0};

View file

@ -1465,12 +1465,15 @@ static void createInitialValues(GcnConverter &converter,
auto primType = converter.createReadConfig(
stage, builder, info.create(gcn::ConfigType::VsPrimType, 0));
auto indexOffset = converter.createReadConfig(
stage, builder, info.create(gcn::ConfigType::VsIndexOffset, 0));
primType = converter.createLocalVariable(builder, loc, primType);
vertexIndex = converter.createLocalVariable(builder, loc, vertexIndex);
indexOffset = converter.createLocalVariable(builder, loc, indexOffset);
vertexIndex =
builder.createValue(loc, ir::amdgpu::VS_GET_INDEX,
{{context.getTypeUInt32(), primType, vertexIndex}});
vertexIndex = builder.createValue(
loc, ir::amdgpu::VS_GET_INDEX,
{{context.getTypeUInt32(), primType, vertexIndex, indexOffset}});
context.writeReg(loc, builder, gcn::RegId::Vgpr, 0, vertexIndex);
} else if (stage == gcn::Stage::Ps) {