From 17a7717584d2bbe9bc42a2d23e4140c5e19cfe90 Mon Sep 17 00:00:00 2001 From: DH Date: Wed, 3 Dec 2025 03:36:12 +0300 Subject: [PATCH] shader: fix createRouteBlock & sort switch cases log invalid loops do not split construct and move block on selection construct creation --- .../shader/transform/transformations.hpp | 7 +- .../include/shader/transform/wrap.hpp | 2 +- rpcsx/gpu/lib/gcn-shader/src/transform.cpp | 2 - .../gcn-shader/src/transform/construct.cpp | 5 +- .../lib/gcn-shader/src/transform/merge.cpp | 14 +- .../lib/gcn-shader/src/transform/route.cpp | 135 +++++++++++------- .../src/transform/transformations.cpp | 13 +- .../gpu/lib/gcn-shader/src/transform/wrap.cpp | 20 ++- rx/include/rx/FunctionRef.hpp | 3 +- 9 files changed, 126 insertions(+), 75 deletions(-) diff --git a/rpcsx/gpu/lib/gcn-shader/include/shader/transform/transformations.hpp b/rpcsx/gpu/lib/gcn-shader/include/shader/transform/transformations.hpp index abcbf970f..264ca0e2e 100644 --- a/rpcsx/gpu/lib/gcn-shader/include/shader/transform/transformations.hpp +++ b/rpcsx/gpu/lib/gcn-shader/include/shader/transform/transformations.hpp @@ -3,10 +3,9 @@ #include "SpvConverter.hpp" #include "ir.hpp" - namespace shader::transform { -ir::Value transformToCanonicalRegion(spv::Context &context, +ir::Value transformToCanonicalRegion(spv::Context &context, ir::RegionLike region); void transformToCf(spv::Context &context, ir::RegionLike region); -void transformToFlat(spv::Context &context, ir::RegionLike region); -} \ No newline at end of file +void transformToFlat(spv::Context &context, ir::RegionLike region); +} // namespace shader::transform diff --git a/rpcsx/gpu/lib/gcn-shader/include/shader/transform/wrap.hpp b/rpcsx/gpu/lib/gcn-shader/include/shader/transform/wrap.hpp index 33d396bf1..f65bb8b17 100644 --- a/rpcsx/gpu/lib/gcn-shader/include/shader/transform/wrap.hpp +++ b/rpcsx/gpu/lib/gcn-shader/include/shader/transform/wrap.hpp @@ -5,4 +5,4 @@ namespace shader::transform { void wrapLoopConstructs(spv::Context &context, ir::RegionLike root); void wrapSelectionConstructs(spv::Context &context, ir::RegionLike root); -} \ No newline at end of file +} // namespace shader::transform diff --git a/rpcsx/gpu/lib/gcn-shader/src/transform.cpp b/rpcsx/gpu/lib/gcn-shader/src/transform.cpp index 772f8ea48..1e18b9fb8 100644 --- a/rpcsx/gpu/lib/gcn-shader/src/transform.cpp +++ b/rpcsx/gpu/lib/gcn-shader/src/transform.cpp @@ -11,8 +11,6 @@ using namespace shader::transform; using Builder = ir::Builder; - - void shader::structurizeCfg(spv::Context &context, ir::RegionLike region) { // std::cerr << "before transforms: "; // region.print(std::cerr, context.ns); diff --git a/rpcsx/gpu/lib/gcn-shader/src/transform/construct.cpp b/rpcsx/gpu/lib/gcn-shader/src/transform/construct.cpp index e160595a5..03e0a4ff9 100644 --- a/rpcsx/gpu/lib/gcn-shader/src/transform/construct.cpp +++ b/rpcsx/gpu/lib/gcn-shader/src/transform/construct.cpp @@ -72,7 +72,10 @@ shader::transform::createSelectionConstruct(spv::Context &context, std::unordered_set successors; if (auto construct = block.cast()) { - successors = {construct.getMerge()}; + auto merge = construct.getMerge(); + merge.erase(); + selectionConstruct.addChild(merge); + successors = getSuccessors(merge); } else { successors = getSuccessors(block); } diff --git a/rpcsx/gpu/lib/gcn-shader/src/transform/merge.cpp b/rpcsx/gpu/lib/gcn-shader/src/transform/merge.cpp index d0d1498af..dd4716a04 100644 --- a/rpcsx/gpu/lib/gcn-shader/src/transform/merge.cpp +++ b/rpcsx/gpu/lib/gcn-shader/src/transform/merge.cpp @@ -1,8 +1,9 @@ -#include "SpvConverter.hpp" #include "transform/merge.hpp" +#include "SpvConverter.hpp" #include "analyze.hpp" -#include "transform/replace.hpp" #include "dialect.hpp" +#include "rx/debug.hpp" +#include "transform/replace.hpp" #include using namespace shader; @@ -10,10 +11,9 @@ using namespace shader::transform; using Builder = ir::Builder; -ir::Block shader::transform::createMergeBlock(spv::Context &context, - ir::InsertionPoint insertPoint, - const std::unordered_set &preds, - ir::Block to) { +ir::Block shader::transform::createMergeBlock( + spv::Context &context, ir::InsertionPoint insertPoint, + const std::unordered_set &preds, ir::Block to) { rx::dieIf(preds.empty(), "createMergeBlock: unexpected edges count"); auto loc = to.getLocation(); @@ -74,4 +74,4 @@ ir::Block shader::transform::createMergeBlock(spv::Context &context, } return mergeBlock; -} \ No newline at end of file +} diff --git a/rpcsx/gpu/lib/gcn-shader/src/transform/route.cpp b/rpcsx/gpu/lib/gcn-shader/src/transform/route.cpp index fe2dce0c4..a9971858c 100644 --- a/rpcsx/gpu/lib/gcn-shader/src/transform/route.cpp +++ b/rpcsx/gpu/lib/gcn-shader/src/transform/route.cpp @@ -4,6 +4,7 @@ #include "analyze.hpp" #include "dialect.hpp" #include "ir/Block.hpp" +#include "rx/FunctionRef.hpp" #include "transform/merge.hpp" #include #include @@ -24,6 +25,7 @@ struct RouteBlockData { std::unordered_map> toPredecessors; std::unordered_map> toAllPredecessors; + std::unordered_set routePredecessors; std::unordered_set patchPredecessors; }; @@ -81,10 +83,9 @@ static void logPhiPredecessorsMismatch(spv::Context &context, ir::Block to) { static RouteBlockData analyzeEdges(spv::Context &context, const std::vector &edges) { RouteBlockData data; - std::unordered_set routePredecessors; for (auto edge : edges) { - if (!routePredecessors.insert(edge.from()).second) { + if (!data.routePredecessors.insert(edge.from()).second) { data.patchPredecessors.insert(edge.from()); } @@ -150,6 +151,32 @@ static std::unordered_map createRouteTerminator( successorToId.reserve(toPreds.size()); + auto hasBranchesTo = [](ir::Block from, ir::Block to) { + std::vector workList; + std::unordered_set visited; + + workList.push_back(from); + visited.insert(from); + + while (!workList.empty()) { + auto block = workList.back(); + workList.pop_back(); + + if (block == to) { + return true; + } + + for (auto succ : getSuccessors(block)) { + if (visited.insert(succ).second) { + workList.push_back(succ); + } + } + } + + visited.insert(from); + return false; + }; + for (std::uint32_t id = 0; auto &[succ, pred] : toPreds) { if (id) { routeSwitch.addOperand(id); @@ -157,6 +184,29 @@ static std::unordered_map createRouteTerminator( } successorToId[succ] = id++; } + + auto caseCount = routeSwitch.getOperandCount() / 2 - 1; + for (std::size_t i = 1; i < caseCount; ++i) { + auto caseValue0 = routeSwitch.getOperand(2 + i * 2); + auto caseTarget0 = routeSwitch.getOperand(2 + i * 2 + 1) + .getAsValue() + .staticCast(); + + for (std::size_t t = 0; t < i; ++t) { + auto caseValue1 = routeSwitch.getOperand(2 + t * 2); + auto caseTarget1 = routeSwitch.getOperand(2 + t * 2 + 1) + .getAsValue() + .staticCast(); + + if (hasBranchesTo(caseTarget0, caseTarget1)) { + routeSwitch.replaceOperand(2 + i * 2, caseValue1); + routeSwitch.replaceOperand(2 + i * 2 + 1, caseTarget1); + routeSwitch.replaceOperand(2 + t * 2, caseValue0); + routeSwitch.replaceOperand(2 + t * 2 + 1, caseTarget0); + break; + } + } + } } return successorToId; @@ -164,21 +214,20 @@ static std::unordered_map createRouteTerminator( // Get successor ID based on routing strategy static ir::Value getSuccessorIdValue( - spv::Context &context, ir::Block successor, - const std::unordered_map> &toPreds, + spv::Context &context, ir::Block successor, const RouteBlockData &data, const std::unordered_map &successorToId) { - if (toPreds.size() == 2) { - return context.getBool(successor == toPreds.begin()->first); + if (data.toPredecessors.size() == 2) { + return context.getBool(successor == data.toPredecessors.begin()->first); } return context.imm32(successorToId.at(successor)); } // Process single predecessor block that needs patching -static void patchPredecessorBlock( - spv::Context &context, ir::Block patchBlock, ir::Block route, - ir::Value routePhi, const RouteBlockData &data, - const std::unordered_map> &toPreds, - const std::function &getSuccessorId) { +static void +patchPredecessorBlock(spv::Context &context, ir::Block patchBlock, + ir::Block route, ir::Value routePhi, + const RouteBlockData &data, + rx::FunctionRef getSuccessorId) { auto predSuccessors = getAllSuccessors(patchBlock); auto terminator = getTerminator(patchBlock); @@ -249,6 +298,8 @@ static void patchPredecessorBlock( terminator.replaceOperand(1, route); } } else { + assert(terminator == ir::spv::OpBranchConditional); + if (routeSuccessors.contains(1)) { condValueToSucc[context.getTrue()] = terminator.getOperand(1).getAsValue().staticCast(); @@ -271,8 +322,7 @@ static void patchPredecessorBlock( selector = getSuccessorId(defaultSucc); } - auto selectorType = - toPreds.size() == 2 ? boolType : context.getTypeUInt32(); + auto selectorType = routePhi.getOperand(0).getAsValue(); for (auto &[value, to] : condValueToSucc) { if (!selector) { selector = getSuccessorId(to); @@ -357,8 +407,8 @@ static void updatePhiNodesForSinglePred(ir::Block to, ir::Block pred, static void updatePhiNodesPartial(spv::Context &context, ir::Block to, ir::Block route, const std::unordered_set &preds, - const std::vector &edges) { - for (auto phi : ir::range(ir::Block(to).getFirst())) { + RouteBlockData &data) { + for (auto phi : ir::range(to.getFirst())) { if (phi != ir::spv::OpPhi) { break; } @@ -381,16 +431,15 @@ static void updatePhiNodesPartial(spv::Context &context, ir::Block to, phi.addOperand(newPhi); phi.addOperand(route); - if (preds.size() != edges.size()) { + if (preds.size() != data.routePredecessors.size()) { // merge block has additional edges. add dummy nodes to phi, this // block not reachable from new blocks - auto dummyValue = phi.getOperand(1).getAsValue(); - - for (auto edge : edges) { - if (!preds.contains(edge.from())) { - phi.addOperand(dummyValue); - phi.addOperand(edge.from()); + for (auto pred : data.routePredecessors) { + if (!preds.contains(pred)) { + auto dummyValue = context.getUndef(phi.getOperand(0).getAsValue()); + newPhi.addOperand(dummyValue); + newPhi.addOperand(pred); } } } @@ -398,33 +447,25 @@ static void updatePhiNodesPartial(spv::Context &context, ir::Block to, } // Process all target blocks and update their phi nodes -static void processTargetBlocks( - spv::Context &context, ir::Block route, ir::Value routePhi, - const RouteBlockData &data, - const std::unordered_map> &toPreds, - const std::vector &edges, - const std::function &getSuccessorId) { +static void +processTargetBlocks(spv::Context &context, ir::Block route, ir::Value routePhi, + RouteBlockData &data, const std::vector &edges, + const std::function &getSuccessorId) { - for (auto &[to, preds] : toPreds) { - if (toPreds.size() > 1) { - auto successorId = getSuccessorId(to); - - for (auto from : preds) { - // branches already resolved - if (data.patchPredecessors.contains(from)) { - continue; - } - - routePhi.addOperand(successorId); - routePhi.addOperand(from); - } - } + for (auto &[to, preds] : data.toPredecessors) { + auto successorId = routePhi ? getSuccessorId(to) : ir::Value(); for (auto from : preds) { + // branches already resolved if (data.patchPredecessors.contains(from)) { continue; } + if (routePhi) { + routePhi.addOperand(successorId); + routePhi.addOperand(from); + } + replaceTerminatorTarget(getTerminator(from), to, route); } @@ -441,7 +482,7 @@ static void processTargetBlocks( } // partial predecessors replacement, update PHIs - updatePhiNodesPartial(context, to, route, preds, edges); + updatePhiNodesPartial(context, to, route, preds, data); } } @@ -472,19 +513,17 @@ ir::Block shader::transform::createRouteBlock(spv::Context &context, // Step 5: Create lambda for getting successor IDs auto getSuccessorId = [&](ir::Block successor) { - return getSuccessorIdValue(context, successor, data.toPredecessors, - successorToId); + return getSuccessorIdValue(context, successor, data, successorToId); }; // Step 6: Patch predecessor blocks that have multiple routes for (auto patchBlock : data.patchPredecessors) { patchPredecessorBlock(context, patchBlock, route, routePhi, data, - data.toPredecessors, getSuccessorId); + getSuccessorId); } // Step 7: Process target blocks and update phi nodes - processTargetBlocks(context, route, routePhi, data, data.toPredecessors, - edges, getSuccessorId); + processTargetBlocks(context, route, routePhi, data, edges, getSuccessorId); return route; } diff --git a/rpcsx/gpu/lib/gcn-shader/src/transform/transformations.cpp b/rpcsx/gpu/lib/gcn-shader/src/transform/transformations.cpp index 6f36682f3..62aa03b0c 100644 --- a/rpcsx/gpu/lib/gcn-shader/src/transform/transformations.cpp +++ b/rpcsx/gpu/lib/gcn-shader/src/transform/transformations.cpp @@ -1,6 +1,6 @@ +#include "transform/transformations.hpp" #include "SpvConverter.hpp" #include "analyze.hpp" -#include "transform/transformations.hpp" #include "dialect.hpp" #include @@ -14,7 +14,7 @@ using namespace shader::transform; using Builder = ir::Builder; ir::Value shader::transform::transformToCanonicalRegion(spv::Context &context, - ir::RegionLike region) { + ir::RegionLike region) { auto cfg = buildCFG(region.getFirst()); std::vector exitNodes; for (auto node : cfg.getPreorderNodes()) { @@ -136,7 +136,8 @@ ir::Value shader::transform::transformToCanonicalRegion(spv::Context &context, return newExitBlock; } -void shader::transform::transformToCf(spv::Context &context, ir::RegionLike region) { +void shader::transform::transformToCf(spv::Context &context, + ir::RegionLike region) { ir::Block currentBlock; for (auto inst : region.children()) { @@ -174,7 +175,8 @@ void shader::transform::transformToCf(spv::Context &context, ir::RegionLike regi } } -void shader::transform::transformToFlat(spv::Context &context, ir::RegionLike region) { +void shader::transform::transformToFlat(spv::Context &context, + ir::RegionLike region) { std::vector workList; workList.push_back(region.getFirst()); @@ -268,7 +270,7 @@ void shader::transform::transformToFlat(spv::Context &context, ir::RegionLike re } if (auto block = inst.cast()) { - std::cout << "processing " << context.ns.getNameOf(block) << "\n"; + // std::cout << "processing " << context.ns.getNameOf(block) << "\n"; unwrapBlock(block); block.erase(); continue; @@ -277,4 +279,3 @@ void shader::transform::transformToFlat(spv::Context &context, ir::RegionLike re insertPoint.eraseAndInsert(inst); } } - diff --git a/rpcsx/gpu/lib/gcn-shader/src/transform/wrap.cpp b/rpcsx/gpu/lib/gcn-shader/src/transform/wrap.cpp index cb61f4859..40bd72795 100644 --- a/rpcsx/gpu/lib/gcn-shader/src/transform/wrap.cpp +++ b/rpcsx/gpu/lib/gcn-shader/src/transform/wrap.cpp @@ -1,10 +1,12 @@ +#include "transform/wrap.hpp" #include "SpvConverter.hpp" +#include "dialect.hpp" +#include "rx/print.hpp" #include "transform/Edge.hpp" #include "transform/construct.hpp" #include "transform/merge.hpp" #include "transform/route.hpp" -#include "transform/wrap.hpp" -#include "dialect.hpp" +#include #include using namespace shader; @@ -53,7 +55,6 @@ calculateCycleEdges(const std::unordered_set &cycles) { return result; } - static ir::Instruction skipPhis(ir::Instruction inst) { while (inst && inst == ir::spv::OpPhi) { inst = inst.getNext(); @@ -140,7 +141,8 @@ findSCCs(ir::Range nodes) { return sccs; } -void shader::transform::wrapLoopConstructs(spv::Context &context, ir::RegionLike root) { +void shader::transform::wrapLoopConstructs(spv::Context &context, + ir::RegionLike root) { auto region = root.children(); auto sccs = findSCCs(region); @@ -281,12 +283,20 @@ void shader::transform::wrapLoopConstructs(spv::Context &context, ir::RegionLike phi.erase(); loopConstruct.prependChild(phi); } + } else { + rx::print(stderr, "infinity loop in the shader:"); + for (auto block : scc) { + rx::print(stderr, " {}", context.ns.getNameOf(block)); + } + rx::println(""); + root.print(std::cerr, context.ns); + rx::die("infinity loop"); } } } void shader::transform::wrapSelectionConstructs(spv::Context &context, - ir::RegionLike root) { + ir::RegionLike root) { std::vector> workList; workList.push_back(root.children()); std::unordered_set usedMergeBlocks; diff --git a/rx/include/rx/FunctionRef.hpp b/rx/include/rx/FunctionRef.hpp index 2f030f243..0894a65e2 100644 --- a/rx/include/rx/FunctionRef.hpp +++ b/rx/include/rx/FunctionRef.hpp @@ -20,7 +20,8 @@ public: : context( const_cast> *>(&object)), invoke(+[](void *context, ArgsT... args) -> RT { - return (*reinterpret_cast(context))(args...); + return (*reinterpret_cast *>(context))( + args...); }) {} template