diff --git a/rpcsx-gpu/Cache.cpp b/rpcsx-gpu/Cache.cpp index 2d0d99ffc..e5cdda1da 100644 --- a/rpcsx-gpu/Cache.cpp +++ b/rpcsx-gpu/Cache.cpp @@ -607,9 +607,9 @@ struct CachedImage : Cache::Entry { for (unsigned mipLevel = 0; mipLevel < image.getMipLevels(); ++mipLevel) { auto ®ionInfo = info.getSubresourceInfo(mipLevel); tiler.tile(scheduler, info, acquiredTileMode, acquiredDfmt, - transferBuffer.getAddress() + linearOffset, linearSize - linearOffset, - tiledBuffer.deviceAddress, tiledSize, mipLevel, 0, - image.getArrayLayers()); + transferBuffer.getAddress() + linearOffset, + linearSize - linearOffset, tiledBuffer.deviceAddress, + tiledSize, mipLevel, 0, image.getArrayLayers()); linearOffset += regionInfo.linearSize * image.getArrayLayers(); } @@ -690,9 +690,15 @@ Cache::Shader Cache::Tag::getShader(const ShaderKey &key, std::optional converted; { + auto env = key.env; + env.supportsBarycentric = vk::context->supportsBarycentric; + env.supportsInt8 = vk::context->supportsInt8; + env.supportsInt64Atomics = vk::context->supportsInt64Atomics; + env.supportsNonSemanticInfo = vk::context->supportsNonSemanticInfo; + gcn::Context context; auto deserialized = gcn::deserialize( - context, key.env, mParent->mDevice->gcnSemantic, key.address, + context, env, mParent->mDevice->gcnSemantic, key.address, [vmId](std::uint64_t address) -> std::uint32_t { return *RemoteMemory{vmId}.getPointer(address); }); @@ -701,7 +707,7 @@ Cache::Shader Cache::Tag::getShader(const ShaderKey &key, converted = gcn::convertToSpv(context, deserialized, mParent->mDevice->gcnSemanticModuleInfo, - key.stage, key.env); + key.stage, env); if (!converted) { return {}; } @@ -1091,15 +1097,15 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) { VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT; if (key.kind == ImageKind::Color) { usage |= VK_IMAGE_USAGE_SAMPLED_BIT; - bool isCompressed = - key.dfmt == gnm::kDataFormatBc1 || key.dfmt == gnm::kDataFormatBc2 || - key.dfmt == gnm::kDataFormatBc3 || key.dfmt == gnm::kDataFormatBc4 || - key.dfmt == gnm::kDataFormatBc5 || key.dfmt == gnm::kDataFormatBc6 || - key.dfmt == gnm::kDataFormatBc7 || key.dfmt == gnm::kDataFormatGB_GR || - key.dfmt == gnm::kDataFormatBG_RG; + bool isCompressed = + key.dfmt == gnm::kDataFormatBc1 || key.dfmt == gnm::kDataFormatBc2 || + key.dfmt == gnm::kDataFormatBc3 || key.dfmt == gnm::kDataFormatBc4 || + key.dfmt == gnm::kDataFormatBc5 || key.dfmt == gnm::kDataFormatBc6 || + key.dfmt == gnm::kDataFormatBc7 || key.dfmt == gnm::kDataFormatGB_GR || + key.dfmt == gnm::kDataFormatBG_RG; - if (!isCompressed) { - usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + if (!isCompressed) { + usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; } } else { usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; @@ -1201,8 +1207,8 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) { auto &info = surfaceInfo.getSubresourceInfo(mipLevel); tiler.detile(*mScheduler, surfaceInfo, key.tileMode, key.dfmt, - tiledBuffer.deviceAddress, surfaceInfo.totalSize, dstAddress, detiledSize, mipLevel, 0, - key.arrayLayerCount); + tiledBuffer.deviceAddress, surfaceInfo.totalSize, + dstAddress, detiledSize, mipLevel, 0, key.arrayLayerCount); detiledSize -= info.linearSize * key.arrayLayerCount; dstAddress += info.linearSize * key.arrayLayerCount; @@ -1420,9 +1426,6 @@ Cache::GraphicsTag::getShader(gcn::Stage stage, const SpiShaderPgm &pgm, gcn::Environment env{ .vgprCount = pgm.rsrc1.getVGprCount(), .sgprCount = pgm.rsrc1.getSGprCount(), - .supportsBarycentric = vk::context->supportsBarycentric, - .supportsInt8 = vk::context->supportsInt8, - .supportsInt64Atomics = vk::context->supportsInt64Atomics, .userSgprs = std::span(pgm.userData.data(), pgm.rsrc2.userSgpr), }; @@ -1572,9 +1575,6 @@ Cache::ComputeTag::getShader(const Registers::ComputeConfig &pgm) { .numThreadX = static_cast(pgm.numThreadX), .numThreadY = static_cast(pgm.numThreadY), .numThreadZ = static_cast(pgm.numThreadZ), - .supportsBarycentric = vk::context->supportsBarycentric, - .supportsInt8 = vk::context->supportsInt8, - .supportsInt64Atomics = vk::context->supportsInt64Atomics, .userSgprs = std::span(pgm.userData.data(), pgm.rsrc2.userSgpr), }; @@ -1610,23 +1610,28 @@ Cache::ComputeTag::getShader(const Registers::ComputeConfig &pgm) { std::uint32_t sgprInputCount = 0; if (pgm.rsrc2.tgIdXEn) { - sgprInput[sgprInputCount++] = static_cast(gcn::CsSGprInput::ThreadGroupIdX); + sgprInput[sgprInputCount++] = + static_cast(gcn::CsSGprInput::ThreadGroupIdX); } if (pgm.rsrc2.tgIdYEn) { - sgprInput[sgprInputCount++] = static_cast(gcn::CsSGprInput::ThreadGroupIdY); + sgprInput[sgprInputCount++] = + static_cast(gcn::CsSGprInput::ThreadGroupIdY); } if (pgm.rsrc2.tgIdZEn) { - sgprInput[sgprInputCount++] = static_cast(gcn::CsSGprInput::ThreadGroupIdZ); + sgprInput[sgprInputCount++] = + static_cast(gcn::CsSGprInput::ThreadGroupIdZ); } if (pgm.rsrc2.tgSizeEn) { - sgprInput[sgprInputCount++] = static_cast(gcn::CsSGprInput::ThreadGroupSize); + sgprInput[sgprInputCount++] = + static_cast(gcn::CsSGprInput::ThreadGroupSize); } if (pgm.rsrc2.scratchEn) { - sgprInput[sgprInputCount++] = static_cast(gcn::CsSGprInput::Scratch); + sgprInput[sgprInputCount++] = + static_cast(gcn::CsSGprInput::Scratch); } for (std::size_t index = 0; const auto &slot : configSlots) { diff --git a/rpcsx-gpu/lib/amdgpu-tiler/shaders/detiler2d.comp.glsl b/rpcsx-gpu/lib/amdgpu-tiler/shaders/detiler2d.comp.glsl index db2b450b6..d6eb6a44f 100644 --- a/rpcsx-gpu/lib/amdgpu-tiler/shaders/detiler2d.comp.glsl +++ b/rpcsx-gpu/lib/amdgpu-tiler/shaders/detiler2d.comp.glsl @@ -11,7 +11,10 @@ #extension GL_EXT_null_initializer : enable #extension GL_EXT_buffer_reference2 : enable #extension GL_EXT_buffer_reference_uvec2 : enable + +#ifdef DEBUG #extension GL_EXT_debug_printf : enable +#endif #include "tiler.glsl" @@ -54,6 +57,7 @@ void main() { uint32_t bpp = (config.bitsPerElement + 7) / 8; +#ifdef DEBUG if (config.srcAddress + tiledByteOffset + bpp > config.srcEndAddress) { debugPrintfEXT("detiler2d: out of src buffer %d x %d x %d", pos.x, pos.y, pos.z); return; @@ -63,6 +67,7 @@ void main() { debugPrintfEXT("detiler2d: out of dst buffer %d x %d x %d", pos.x, pos.y, pos.z); return; } +#endif switch (bpp) { case 1: diff --git a/rpcsx-gpu/lib/amdgpu-tiler/shaders/tiler1d.comp.glsl b/rpcsx-gpu/lib/amdgpu-tiler/shaders/tiler1d.comp.glsl index e0b383219..d23be7ba8 100644 --- a/rpcsx-gpu/lib/amdgpu-tiler/shaders/tiler1d.comp.glsl +++ b/rpcsx-gpu/lib/amdgpu-tiler/shaders/tiler1d.comp.glsl @@ -11,7 +11,10 @@ #extension GL_EXT_null_initializer : enable #extension GL_EXT_buffer_reference2 : enable #extension GL_EXT_buffer_reference_uvec2 : enable + +#ifdef DEBUG #extension GL_EXT_debug_printf : enable +#endif #include "tiler.glsl" @@ -47,6 +50,7 @@ void main() { uint32_t bpp = (config.bitsPerElement + 7) / 8; +#ifdef DEBUG if (config.srcAddress + linearByteOffset + bpp > config.srcEndAddress) { debugPrintfEXT("tiler1d: out of src buffer %d x %d x %d", pos.x, pos.y, pos.z); return; @@ -56,6 +60,7 @@ void main() { debugPrintfEXT("tiler1d: out of dst buffer %d x %d x %d", pos.x, pos.y, pos.z); return; } +#endif switch (bpp) { case 1: diff --git a/rpcsx-gpu/lib/amdgpu-tiler/shaders/tiler2d.comp.glsl b/rpcsx-gpu/lib/amdgpu-tiler/shaders/tiler2d.comp.glsl index 709150cfb..074ff0aed 100644 --- a/rpcsx-gpu/lib/amdgpu-tiler/shaders/tiler2d.comp.glsl +++ b/rpcsx-gpu/lib/amdgpu-tiler/shaders/tiler2d.comp.glsl @@ -11,7 +11,10 @@ #extension GL_EXT_null_initializer : enable #extension GL_EXT_buffer_reference2 : enable #extension GL_EXT_buffer_reference_uvec2 : enable + +#ifdef DEBUG #extension GL_EXT_debug_printf : enable +#endif #include "tiler.glsl" @@ -53,6 +56,7 @@ void main() { uint32_t bpp = (config.bitsPerElement + 7) / 8; +#ifdef DEBUG if (config.srcAddress + linearByteOffset + bpp > config.srcEndAddress) { debugPrintfEXT("tiler2d: out of src buffer %d x %d x %d", pos.x, pos.y, pos.z); return; @@ -62,6 +66,7 @@ void main() { debugPrintfEXT("tiler2d: out of dst buffer %d x %d x %d", pos.x, pos.y, pos.z); return; } +#endif switch (bpp) { case 1: diff --git a/rpcsx-gpu/lib/gcn-shader/include/shader/gcn.hpp b/rpcsx-gpu/lib/gcn-shader/include/shader/gcn.hpp index 8cc395672..95b190ae7 100644 --- a/rpcsx-gpu/lib/gcn-shader/include/shader/gcn.hpp +++ b/rpcsx-gpu/lib/gcn-shader/include/shader/gcn.hpp @@ -119,6 +119,7 @@ struct Environment { bool supportsBarycentric = true; bool supportsInt8 = false; bool supportsInt64Atomics = false; + bool supportsNonSemanticInfo = false; std::span userSgprs; }; diff --git a/rpcsx-gpu/lib/gcn-shader/src/GcnConverter.cpp b/rpcsx-gpu/lib/gcn-shader/src/GcnConverter.cpp index 5f63a2018..c1c801d30 100644 --- a/rpcsx-gpu/lib/gcn-shader/src/GcnConverter.cpp +++ b/rpcsx-gpu/lib/gcn-shader/src/GcnConverter.cpp @@ -226,7 +226,6 @@ struct ResourcesBuilder { auto cloned = ir::clone(inst, resources.context, importer); if (inst == ir::spv::OpLoad) { - auto load = inst.staticCast(); auto def = memorySSA.getDef(inst, inst.getOperand(1).getAsValue()); auto resourceInst = unpackResourceDef(memorySSA, importer, def); @@ -444,7 +443,6 @@ ir::Value GcnConverter::getGlPosition(gcn::Builder &builder) { ir::spv::StorageClass::Output, gcnContext.getTypeVector(gcnContext.getTypeFloat32(), 4)); - auto index = gcnContext.simm32(0); return builder.createSpvAccessChain(gcnContext.getUnknownLocation(), float4OutPtrT, gcnContext.perVertex, {{gcnContext.simm32(0)}}); @@ -572,9 +570,7 @@ static void replaceVariableWithConstant(ir::Value variable, } } -static void expToSpv(GcnConverter &converter, gcn::Import &importer, - gcn::Stage stage, - const SemanticModuleInfo &semanticModuleInfo, +static void expToSpv(GcnConverter &converter, gcn::Stage stage, gcn::ShaderInfo &info, ir::Instruction inst) { enum Target : unsigned { ET_MRT0 = 0, @@ -832,10 +828,8 @@ static void instructionsToSpv(GcnConverter &converter, gcn::Import &importer, .createSpvExtInstImport(context.getUnknownLocation(), "GLSL.std.450"); auto boolT = context.getTypeBool(); auto f32T = context.getTypeFloat32(); - auto u32T = context.getTypeUInt32(); auto s32T = context.getTypeSInt32(); auto f32x3 = context.getTypeVector(f32T, 3); - auto f32x4 = context.getTypeVector(f32T, 4); auto s32PT = context.getTypePointer(ir::spv::StorageClass::Input, s32T); auto f32x3PT = context.getTypePointer(ir::spv::StorageClass::Input, f32x3); @@ -876,7 +870,7 @@ static void instructionsToSpv(GcnConverter &converter, gcn::Import &importer, } if (inst == ir::exp::EXP) { - expToSpv(converter, importer, stage, semanticModuleInfo, info, inst); + expToSpv(converter, stage, info, inst); inst.remove(); continue; } @@ -1696,8 +1690,11 @@ gcn::convertToSpv(Context &context, ir::Region body, extensions.createSpvExtension(context.getUnknownLocation(), "SPV_KHR_physical_storage_buffer"); - extensions.createSpvExtension(context.getUnknownLocation(), - "SPV_KHR_non_semantic_info"); + + if (env.supportsNonSemanticInfo) { + extensions.createSpvExtension(context.getUnknownLocation(), + "SPV_KHR_non_semantic_info"); + } auto merged = context.layout.merge(context); result.spv = spv::serialize(merged); diff --git a/rpcsx-gpu/lib/gcn-shader/src/GcnInstruction.cpp b/rpcsx-gpu/lib/gcn-shader/src/GcnInstruction.cpp index 3e945fc2f..0f2208626 100644 --- a/rpcsx-gpu/lib/gcn-shader/src/GcnInstruction.cpp +++ b/rpcsx-gpu/lib/gcn-shader/src/GcnInstruction.cpp @@ -93,7 +93,6 @@ static GcnOperand createSgprGcnOperand(std::uint64_t &address, unsigned id) { static void readVop2Inst(GcnInstruction &inst, std::uint64_t &address, const std::function &readMemory) { - constexpr int kMinInstSize = 1; constexpr auto src0Mask = genMask(0, 9); constexpr auto vsrc1Mask = genMask(getMaskEnd(src0Mask), 8); constexpr auto vdstMask = genMask(getMaskEnd(vsrc1Mask), 8); @@ -133,7 +132,6 @@ readVop2Inst(GcnInstruction &inst, std::uint64_t &address, static void readSop2Inst(GcnInstruction &inst, std::uint64_t &address, const std::function &readMemory) { - constexpr int kMinInstSize = 1; constexpr auto ssrc0Mask = genMask(0, 8); constexpr auto ssrc1Mask = genMask(getMaskEnd(ssrc0Mask), 8); constexpr auto sdstMask = genMask(getMaskEnd(ssrc1Mask), 7); @@ -156,7 +154,6 @@ readSop2Inst(GcnInstruction &inst, std::uint64_t &address, static void readSopkInst(GcnInstruction &inst, std::uint64_t &address, const std::function &readMemory) { - constexpr int kMinInstSize = 1; constexpr auto simmMask = genMask(0, 16); constexpr auto sdstMask = genMask(getMaskEnd(simmMask), 7); constexpr auto opMask = genMask(getMaskEnd(sdstMask), 5); @@ -180,7 +177,6 @@ readSopkInst(GcnInstruction &inst, std::uint64_t &address, static void readSmrdInst(GcnInstruction &inst, std::uint64_t &address, const std::function &readMemory) { - constexpr int kMinInstSize = 1; constexpr auto offsetMask = genMask(0, 8); constexpr auto immMask = genMask(getMaskEnd(offsetMask), 1); constexpr auto sbaseMask = genMask(getMaskEnd(immMask), 6); @@ -214,9 +210,9 @@ readSmrdInst(GcnInstruction &inst, std::uint64_t &address, if (op != ir::smrd::MEMTIME) { auto baseOperand = createSgprGcnOperand(address, sbase); - auto offsetOperand = - imm ? GcnOperand::createConstant(std::uint32_t(std::int8_t(offset << 2))) - : createSgprGcnOperand(address, offset).withR(); + auto offsetOperand = imm ? GcnOperand::createConstant( + std::uint32_t(std::int8_t(offset << 2))) + : createSgprGcnOperand(address, offset).withR(); if (isBuffer) { inst.addOperand(GcnOperand::createBuffer(baseOperand).withR()); @@ -235,13 +231,9 @@ readSmrdInst(GcnInstruction &inst, std::uint64_t &address, static void readVop3Inst(GcnInstruction &inst, std::uint64_t &address, const std::function &readMemory) { - constexpr int kMinInstSize = 2; constexpr auto vdstMask = genMask(0, 8); constexpr auto absMask = genMask(getMaskEnd(vdstMask), 3); - constexpr auto abs0Mask = genMask(getMaskEnd(vdstMask), 1); - constexpr auto abs1Mask = genMask(getMaskEnd(abs0Mask), 1); - constexpr auto abs2Mask = genMask(getMaskEnd(abs1Mask), 1); constexpr auto clmpMask = genMask(getMaskEnd(absMask), 1); constexpr auto sdstMask = genMask(getMaskEnd(vdstMask), 7); @@ -253,9 +245,6 @@ readVop3Inst(GcnInstruction &inst, std::uint64_t &address, constexpr auto src2Mask = genMask(getMaskEnd(src1Mask), 9); constexpr auto omodMask = genMask(getMaskEnd(src2Mask), 2); constexpr auto negMask = genMask(getMaskEnd(omodMask), 3); - constexpr auto neg0Mask = genMask(getMaskEnd(omodMask), 1); - constexpr auto neg1Mask = genMask(getMaskEnd(neg0Mask), 1); - constexpr auto neg2Mask = genMask(getMaskEnd(neg1Mask), 1); std::uint32_t words[2]; words[0] = readMemory(address); @@ -308,7 +297,8 @@ readVop3Inst(GcnInstruction &inst, std::uint64_t &address, op == ir::vop3::DIV_SCALE_F64; bool readsVcc = op == ir::vop3::DIV_FMAS_F32 || op == ir::vop3::DIV_FMAS_F64; - bool usesSrc2 = op >= ir::vop3::MAD_LEGACY_F32 && op <= ir::vop3::DIV_FIXUP_F64; + bool usesSrc2 = + op >= ir::vop3::MAD_LEGACY_F32 && op <= ir::vop3::DIV_FIXUP_F64; if (writesVcc) { inst.addOperand(GcnOperand::createVccLo().withRW()); @@ -366,7 +356,6 @@ readVop3Inst(GcnInstruction &inst, std::uint64_t &address, static void readMubufInst(GcnInstruction &inst, std::uint64_t &address, const std::function &readMemory) { - constexpr int kMinInstSize = 2; constexpr auto offsetMask = genMask(0, 12); constexpr auto offenMask = genMask(getMaskEnd(offsetMask), 1); constexpr auto idxenMask = genMask(getMaskEnd(offenMask), 1); @@ -465,8 +454,6 @@ readMubufInst(GcnInstruction &inst, std::uint64_t &address, static void readMtbufInst(GcnInstruction &inst, std::uint64_t &address, const std::function &readMemory) { - constexpr int kMinInstSize = 2; - constexpr auto offsetMask = genMask(0, 12); constexpr auto offenMask = genMask(getMaskEnd(offsetMask), 1); constexpr auto idxenMask = genMask(getMaskEnd(offenMask), 1); @@ -554,8 +541,6 @@ readMtbufInst(GcnInstruction &inst, std::uint64_t &address, static void readMimgInst(GcnInstruction &inst, std::uint64_t &address, const std::function &readMemory) { - constexpr int kMinInstSize = 2; - constexpr auto dmaskMask = genMask(8, 4); constexpr auto unrmMask = genMask(getMaskEnd(dmaskMask), 1); constexpr auto glcMask = genMask(getMaskEnd(unrmMask), 1); @@ -638,7 +623,6 @@ readMimgInst(GcnInstruction &inst, std::uint64_t &address, static void readDsInst(GcnInstruction &inst, std::uint64_t &address, const std::function &readMemory) { - constexpr int kMinInstSize = 2; constexpr auto offset0Mask = genMask(0, 8); constexpr auto offset1Mask = genMask(getMaskEnd(offset0Mask), 8); constexpr auto gdsMask = genMask(getMaskEnd(offset1Mask) + 1, 1); @@ -713,7 +697,6 @@ readDsInst(GcnInstruction &inst, std::uint64_t &address, static void readVintrpInst(GcnInstruction &inst, std::uint64_t &address, const std::function &readMemory) { - constexpr int kMinInstSize = 1; constexpr auto vsrcMask = genMask(0, 8); constexpr auto attrChanMask = genMask(getMaskEnd(vsrcMask), 2); constexpr auto attrMask = genMask(getMaskEnd(attrChanMask), 6); @@ -743,8 +726,6 @@ readVintrpInst(GcnInstruction &inst, std::uint64_t &address, static void readExpInst(GcnInstruction &inst, std::uint64_t &address, const std::function &readMemory) { - constexpr int kMinInstSize = 2; - constexpr auto enMask = genMask(0, 4); constexpr auto targetMask = genMask(getMaskEnd(enMask), 6); constexpr auto comprMask = genMask(getMaskEnd(targetMask), 1); @@ -797,8 +778,6 @@ readExpInst(GcnInstruction &inst, std::uint64_t &address, static void readVop1Inst(GcnInstruction &inst, std::uint64_t &address, const std::function &readMemory) { - constexpr int kMinInstSize = 1; - constexpr auto src0Mask = genMask(0, 9); constexpr auto opMask = genMask(getMaskEnd(src0Mask), 8); constexpr auto vdstMask = genMask(getMaskEnd(opMask), 8); @@ -818,7 +797,6 @@ readVop1Inst(GcnInstruction &inst, std::uint64_t &address, static void readVopcInst(GcnInstruction &inst, std::uint64_t &address, const std::function &readMemory) { - constexpr int kMinInstSize = 1; constexpr auto src0Mask = genMask(0, 9); constexpr auto vsrc1Mask = genMask(getMaskEnd(src0Mask), 8); constexpr auto opMask = genMask(getMaskEnd(vsrc1Mask), 8); @@ -840,8 +818,6 @@ readVopcInst(GcnInstruction &inst, std::uint64_t &address, static void readSop1Inst(GcnInstruction &inst, std::uint64_t &address, const std::function &readMemory) { - constexpr int kMinInstSize = 1; - constexpr auto ssrc0Mask = genMask(0, 8); constexpr auto opMask = genMask(getMaskEnd(ssrc0Mask), 8); constexpr auto sdstMask = genMask(getMaskEnd(opMask), 7); @@ -857,8 +833,7 @@ readSop1Inst(GcnInstruction &inst, std::uint64_t &address, inst.op = op; bool readsM0 = op == ir::sop1::MOVRELS_B32 || op == ir::sop1::MOVRELS_B64 || - op == ir::sop1::MOVRELD_B32 || op == ir::sop1::MOVRELD_B64 || - op == ir::sop1::ABS_I32; + op == ir::sop1::MOVRELD_B32 || op == ir::sop1::MOVRELD_B64; inst.addOperand(createSgprGcnOperand(address, sdst).withW()); inst.addOperand(createSgprGcnOperand(address, ssrc0).withR()); @@ -870,8 +845,6 @@ readSop1Inst(GcnInstruction &inst, std::uint64_t &address, static void readSopcInst(GcnInstruction &inst, std::uint64_t &address, const std::function &readMemory) { - constexpr int kMinInstSize = 1; - constexpr auto ssrc0Mask = genMask(0, 8); constexpr auto ssrc1Mask = genMask(getMaskEnd(ssrc0Mask), 8); constexpr auto opMask = genMask(getMaskEnd(ssrc1Mask), 7); @@ -892,8 +865,6 @@ readSopcInst(GcnInstruction &inst, std::uint64_t &address, static void readSoppInst(GcnInstruction &inst, std::uint64_t &address, const std::function &readMemory) { - static constexpr int kMinInstSize = 1; - static constexpr auto simmMask = genMask(0, 16); static constexpr auto opMask = genMask(getMaskEnd(simmMask), 7); @@ -1030,7 +1001,7 @@ void GcnInstruction::print(std::ostream &os) const { if (operandCount > 0) { os << ' '; - for (int i = 0; i < operandCount; ++i) { + for (std::size_t i = 0; i < operandCount; ++i) { if (i != 0) { os << ", "; } diff --git a/rpcsx-gpu/lib/gcn-shader/src/SpvConverter.cpp b/rpcsx-gpu/lib/gcn-shader/src/SpvConverter.cpp index b4e458e74..c77b3f342 100644 --- a/rpcsx-gpu/lib/gcn-shader/src/SpvConverter.cpp +++ b/rpcsx-gpu/lib/gcn-shader/src/SpvConverter.cpp @@ -570,7 +570,6 @@ ir::Value spv::Context::createOutput(ir::Location loc, int index) { Builder::createAppend(*this, layout.getOrCreateGlobals(*this)); auto annotations = Builder::createAppend(*this, layout.getOrCreateAnnotations(*this)); - auto debugs = Builder::createAppend(*this, layout.getOrCreateDebugs(*this)); auto variable = globals.createSpvVariable(loc, variableType, ir::spv::StorageClass::Output); @@ -598,7 +597,6 @@ ir::Value spv::Context::createInput(ir::Location loc, int index) { Builder::createAppend(*this, layout.getOrCreateGlobals(*this)); auto annotations = Builder::createAppend(*this, layout.getOrCreateAnnotations(*this)); - auto debugs = Builder::createAppend(*this, layout.getOrCreateDebugs(*this)); auto variable = globals.createSpvVariable(loc, variableType, ir::spv::StorageClass::Input); @@ -630,7 +628,6 @@ ir::Value spv::Context::createAttr(ir::Location loc, int attrId, bool perVertex, Builder::createAppend(*this, layout.getOrCreateGlobals(*this)); auto annotations = Builder::createAppend(*this, layout.getOrCreateAnnotations(*this)); - auto debugs = Builder::createAppend(*this, layout.getOrCreateDebugs(*this)); auto variable = globals.createSpvVariable(loc, variableType, ir::spv::StorageClass::Input); diff --git a/rpcsx-gpu/lib/gcn-shader/src/gcn.cpp b/rpcsx-gpu/lib/gcn-shader/src/gcn.cpp index 0fb854fbb..5642d1b04 100644 --- a/rpcsx-gpu/lib/gcn-shader/src/gcn.cpp +++ b/rpcsx-gpu/lib/gcn-shader/src/gcn.cpp @@ -252,7 +252,6 @@ gcn::Context::getOrCreateLabel(ir::Location loc, ir::Region body, std::uint64_t address) { auto it = instructions.lower_bound(address); - bool exists = false; if (it != instructions.end() && it->first == address) { if (it->second == ir::spv::OpLabel) { return {it->second.staticCast(), false}; @@ -408,7 +407,6 @@ ir::Value gcn::Context::getOrCreateRegisterVariable(gcn::RegId id) { auto pRegTxN = getTypePointer(storageClass, regT); auto globals = Builder::createAppend(*this, layout.getOrCreateGlobals(*this)); - auto debugs = Builder::createAppend(*this, layout.getOrCreateDebugs(*this)); entity = globals.createSpvVariable(location, pRegTxN, storageClass); setName(entity, getRegisterName(id)); @@ -489,7 +487,6 @@ ir::Value gcn::Context::readReg(ir::Location loc, Builder &builder, } int regCount = valWidth / regWidth; - auto sint32 = getTypeSInt32(); auto channelType = getTypeInt(regWidth, false); auto splittedType = regCount > 4 ? getTypeArray(channelType, imm32(regCount)) : getTypeVector(channelType, regCount); @@ -602,7 +599,6 @@ void gcn::Context::writeReg(ir::Location loc, Builder &builder, gcn::RegId id, int regCount = valWidth / regWidth; - auto sint32 = getTypeSInt32(); auto channelType = getTypeInt(regWidth, false); auto splittedType = regCount > 4 ? getTypeArray(channelType, imm32(regCount)) : getTypeVector(channelType, regCount); @@ -701,11 +697,9 @@ static ir::Value deserializeGcnRegion( const std::function &readMemory, std::vector &branchesToUnknown, std::unordered_set &processed) { - BinaryLayout &resultLayout = converter.layout; AddressLocationBuilder locBuilder{&converter}; ir::Value boolTV = converter.getTypeBool(); - ir::Value float64TV = converter.getTypeFloat64(); ir::Value float32TV = converter.getTypeFloat32(); ir::Value uint16TV = converter.getTypeUInt16(); ir::Value sint16TV = converter.getTypeSInt16(); @@ -800,7 +794,6 @@ static ir::Value deserializeGcnRegion( auto f32 = converter.getTypeFloat32(); auto attrChannelPtrType = converter.getTypePointer(ir::spv::StorageClass::Input, f32); - auto resultType = converter.getTypeArray(f32, converter.simm32(3)); auto attr = converter.createAttr(loc, op.attrId, environment.supportsBarycentric, @@ -947,8 +940,8 @@ static ir::Value deserializeGcnRegion( case GcnOperand::Kind::LdsDirect: return converter.writeReg(loc, builder, gcn::RegId::LdsDirect, 0, value); case GcnOperand::Kind::Vgpr: - return converter.writeReg(loc, builder, gcn::RegId::Vgpr, op.value, - value); + return converter.writeReg(loc, builder, gcn::RegId::Vgpr, op.value, value, + lane); case GcnOperand::Kind::Sgpr: return converter.writeReg(loc, builder, gcn::RegId::Sgpr, op.value, value); @@ -1281,7 +1274,7 @@ static ir::Value deserializeGcnRegion( auto inst = builder.createInstruction(loc, isaInst.kind, isaInst.op); auto paramBuilder = gcn::Builder::createInsertBefore(converter, inst); - for (std::size_t index = 0; auto &op : operands) { + for (auto &op : operands) { inst.addOperand(createOperandRead(loc, paramBuilder, uint32TV, op)); } @@ -1430,7 +1423,6 @@ static ir::Value deserializeGcnRegion( } static void canonicalizeRegisterVariableType(ir::Context &context, - const BinaryLayout &layout, gcn::RegId regId, ir::Value variable) { auto varPointerType = variable.getOperand(0).getAsValue(); @@ -1534,7 +1526,7 @@ void gcn::canonicalizeSemantic(ir::Context &context, auto &name = *entry.getOperand(1).getAsString(); if (auto regId = getRegIdByName(name)) { - canonicalizeRegisterVariableType(context, layout, *regId, node); + canonicalizeRegisterVariableType(context, *regId, node); } } } diff --git a/rpcsx-gpu/lib/vk/include/vk.hpp b/rpcsx-gpu/lib/vk/include/vk.hpp index b1ab2d52a..c9f0dd9e3 100644 --- a/rpcsx-gpu/lib/vk/include/vk.hpp +++ b/rpcsx-gpu/lib/vk/include/vk.hpp @@ -50,6 +50,7 @@ struct Context { bool supportsBarycentric = false; bool supportsInt8 = false; bool supportsInt64Atomics = false; + bool supportsNonSemanticInfo = false; Context() = default; Context(const Context &) = delete; diff --git a/rpcsx-gpu/lib/vk/src/vk.cpp b/rpcsx-gpu/lib/vk/src/vk.cpp index 03639cab1..4dfaf8087 100644 --- a/rpcsx-gpu/lib/vk/src/vk.cpp +++ b/rpcsx-gpu/lib/vk/src/vk.cpp @@ -481,6 +481,11 @@ void vk::Context::createDevice(VkSurfaceKHR surface, int gpuIndex, } for (auto ext : requiredExtensions) { + if (ext == + std::string_view(VK_KHR_SHADER_NON_SEMANTIC_INFO_EXTENSION_NAME)) { + supportsNonSemanticInfo = true; + } + deviceExtensions.push_back(ext); } diff --git a/rpcsx-gpu/main.cpp b/rpcsx-gpu/main.cpp index c072ebacb..20805c64b 100644 --- a/rpcsx-gpu/main.cpp +++ b/rpcsx-gpu/main.cpp @@ -337,9 +337,11 @@ int main(int argc, const char *argv[]) { VK_EXT_SHADER_OBJECT_EXTENSION_NAME, VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME, VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME, - VK_KHR_SHADER_NON_SEMANTIC_INFO_EXTENSION_NAME, }, - {VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME}); + { + VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME, + VK_KHR_SHADER_NON_SEMANTIC_INFO_EXTENSION_NAME, + }); auto getTotalMemorySize = [&](int memoryType) -> VkDeviceSize { auto deviceLocalMemoryType =