rsx: Implement FP to CFG analyzer

This commit is contained in:
kd-11 2025-11-22 20:26:14 +03:00 committed by kd-11
parent 683baf46b2
commit f300832edb
11 changed files with 525 additions and 0 deletions

View file

@ -192,6 +192,7 @@ if(BUILD_RPCS3_TESTS)
tests/test_tuple.cpp
tests/test_simple_array.cpp
tests/test_address_range.cpp
tests/test_rsx_cfg.cpp
)
target_link_libraries(rpcs3_test

View file

@ -516,6 +516,7 @@ target_sources(rpcs3_emu PRIVATE
RSX/Overlays/overlay_video.cpp
RSX/Overlays/Shaders/shader_loading_dialog.cpp
RSX/Overlays/Shaders/shader_loading_dialog_native.cpp
RSX/Program/Assembler/FPToCFG.cpp
RSX/Program/CgBinaryProgram.cpp
RSX/Program/CgBinaryFragmentProgram.cpp
RSX/Program/CgBinaryVertexProgram.cpp

View file

@ -0,0 +1,39 @@
#pragma once
#include <util/asm.hpp>
#include "IR.h"
#include <list>
struct RSXFragmentProgram;
namespace rsx::assembler
{
struct FlowGraph
{
std::list<BasicBlock> blocks;
BasicBlock* push(BasicBlock* parent = nullptr, u32 pc = 0)
{
if (!parent && !blocks.empty())
{
parent = &blocks.back();
}
blocks.push_back({});
BasicBlock* new_block = &blocks.back();
if (parent)
{
parent->insert_succ(new_block);
new_block->insert_pred(parent);
}
new_block->id = pc;
return new_block;
}
};
FlowGraph deconstruct_fragment_program(const RSXFragmentProgram& prog);
}

View file

@ -0,0 +1,158 @@
#include "stdafx.h"
#pragma optimize("", off)
#include "CFG.h"
#include "Emu/RSX/Common/simple_array.hpp"
#include "Emu/RSX/Program/RSXFragmentProgram.h"
#include "Emu/RSX/Program/ProgramStateCache.h"
#include <util/asm.hpp>
#include <span>
using namespace program_hash_util;
namespace rsx::assembler
{
inline v128 decode_instruction(const v128& raw_inst)
{
// Fixup of RSX's weird half-word shuffle for FP instructions
// Convert input stream into LE u16 array
__m128i _mask0 = _mm_set1_epi32(0xff00ff00);
__m128i _mask1 = _mm_set1_epi32(0x00ff00ff);
__m128i a = _mm_slli_epi32(static_cast<__m128i>(raw_inst), 8);
__m128i b = _mm_srli_epi32(static_cast<__m128i>(raw_inst), 8);
__m128i ret = _mm_or_epi32(
_mm_and_epi32(_mask0, a),
_mm_and_epi32(_mask1, b)
);
return v128::loadu(&ret);
}
FlowGraph deconstruct_fragment_program(const RSXFragmentProgram& prog)
{
// For a flowgraph, we don't care at all about the actual contents, just flow control instructions.
OPDEST dst{};
SRC0 src0{};
SRC1 src1{};
SRC2 src2{};
u32 pc = 0; // Program counter
u32 instruction_size = 0;
bool end = false;
// Flow control data
rsx::simple_array<BasicBlock*> end_blocks;
rsx::simple_array<BasicBlock*> else_blocks;
// Data block
u32* data = static_cast<u32*>(prog.get_data());
// Output
FlowGraph graph{};
BasicBlock* bb = graph.push();
auto find_block_for_pc = [&](u32 id) -> BasicBlock*
{
auto found = std::find_if(graph.blocks.begin(), graph.blocks.end(), FN(x.id == id));
if (found != graph.blocks.end())
{
return &(*found);
}
return nullptr;
};
auto safe_insert_block = [&](BasicBlock* parent, u32 id) -> BasicBlock*
{
if (auto found = find_block_for_pc(id))
{
parent->insert_succ(found);
found->insert_pred(parent);
return found;
}
return graph.push(parent, id);
};
while (!end)
{
BasicBlock** found = end_blocks.find_if(FN(x->id == pc));
if (!found)
{
found = else_blocks.find_if(FN(x->id == pc));
}
if (found)
{
bb = *found;
}
const v128 raw_inst = v128::loadu(data, pc);
v128 decoded = decode_instruction(raw_inst);
dst.HEX = decoded._u32[0];
src0.HEX = decoded._u32[1];
src1.HEX = decoded._u32[2];
src2.HEX = decoded._u32[3];
const u32 opcode = dst.opcode | (src1.opcode_is_branch << 6);
if (opcode == RSX_FP_OPCODE_NOP)
{
pc++;
continue;
}
end = !!dst.end;
bb->instructions.push_back({});
auto& ir_inst = bb->instructions.back();
std::memcpy(ir_inst.bytecode, &decoded._u32[0], 16);
switch (opcode)
{
case RSX_FP_OPCODE_CAL:
// Unimplemented. Also unused by the RSX compiler
fmt::throw_exception("Unimplemented FP CAL instruction.");
break;
case RSX_FP_OPCODE_RET:
// Outside a subroutine, this doesn't mean much. The main block can conditionally return to stop execution early.
// This will not alter flow control.
break;
case RSX_FP_OPCODE_IFE:
{
// Inserts if and else and end blocks
auto parent = bb;
bb = safe_insert_block(parent, pc + 1);
if (src2.end_offset != src1.else_offset)
{
else_blocks.push_back(safe_insert_block(parent, src1.else_offset >> 2));
}
end_blocks.push_back(safe_insert_block(parent, src2.end_offset >> 2));
break;
}
case RSX_FP_OPCODE_LOOP:
case RSX_FP_OPCODE_REP:
{
// Inserts for and end blocks
auto parent = bb;
bb = safe_insert_block(parent, pc + 1);
end_blocks.push_back(safe_insert_block(parent, src2.end_offset >> 2));
break;
}
default:
if (fragment_program_utils::is_any_src_constant(decoded))
{
pc++;
}
}
pc++;
}
graph.blocks.sort(FN(x.id < y.id));
return graph;
}
}

View file

@ -0,0 +1,70 @@
#pragma once
#include <util/asm.hpp>
namespace rsx::assembler
{
struct BasicBlock;
struct Register
{
int id = 0;
bool f16 = false;
};
struct RegisterRef
{
Register reg{};
// Vector information
union
{
u32 mask;
struct
{
bool x : 1;
bool y : 1;
bool z : 1;
bool w : 1;
};
};
};
struct Instruction
{
// Raw data. Every instruction is max 128 bits
u32 bytecode[4];
// Decoded
u32 opcode = 0;
std::vector<RegisterRef> srcs;
std::vector<RegisterRef> dsts;
};
struct FlowEdge
{
BasicBlock* from = nullptr;
BasicBlock* to = nullptr;
};
struct BasicBlock
{
u32 id = 0;
std::vector<Instruction> instructions;
std::vector<FlowEdge> succ; // [0] = if/loop, [1] = else
std::vector<FlowEdge> pred; // Back edge.
void insert_succ(BasicBlock* b)
{
FlowEdge e{ .from = this, .to = b };
succ.push_back(e);
}
void insert_pred(BasicBlock* b)
{
FlowEdge e{ .from = this, .to = b };
pred.push_back(e);
}
};
}

View file

@ -3,6 +3,8 @@
#include "FragmentProgramRegister.h"
#include "RSXFragmentProgram.h"
#include "Assembler/CFG.h"
#include <sstream>
#include <unordered_map>

View file

@ -156,6 +156,7 @@
<ClCompile Include="Emu\RSX\Overlays\Shaders\shader_loading_dialog.cpp" />
<ClCompile Include="Emu\RSX\Overlays\Shaders\shader_loading_dialog_native.cpp" />
<ClCompile Include="Emu\RSX\Overlays\Trophies\overlay_trophy_list_dialog.cpp" />
<ClCompile Include="Emu\RSX\Program\Assembler\FPToCFG.cpp" />
<ClCompile Include="Emu\RSX\Program\FragmentProgramRegister.cpp" />
<ClCompile Include="Emu\RSX\Program\ProgramStateCache.cpp" />
<ClCompile Include="Emu\RSX\Program\program_util.cpp" />
@ -699,6 +700,8 @@
<ClInclude Include="Emu\RSX\Overlays\overlay_progress_bar.hpp" />
<ClInclude Include="Emu\RSX\Overlays\overlay_video.h" />
<ClInclude Include="Emu\RSX\Overlays\Trophies\overlay_trophy_list_dialog.h" />
<ClInclude Include="Emu\RSX\Program\Assembler\CFG.h" />
<ClInclude Include="Emu\RSX\Program\Assembler\IR.h" />
<ClInclude Include="Emu\RSX\Program\FragmentProgramRegister.h" />
<ClInclude Include="Emu\RSX\Program\GLSLTypes.h" />
<ClInclude Include="Emu\RSX\Program\ProgramStateCache.h" />

View file

@ -133,6 +133,9 @@
<Filter Include="Emu\GPU\RSX\Program\MSAA">
<UniqueIdentifier>{ce6d6b90-8313-4273-b46c-d92bd450c002}</UniqueIdentifier>
</Filter>
<Filter Include="Emu\GPU\RSX\Program\Assembler">
<UniqueIdentifier>{d99df916-8a99-428b-869a-9f14ac0ab411}</UniqueIdentifier>
</Filter>
</ItemGroup>
<ItemGroup>
<ClCompile Include="Crypto\aes.cpp">
@ -1372,6 +1375,9 @@
<ClCompile Include="Emu\Io\evdev_gun_handler.cpp">
<Filter>Emu\Io</Filter>
</ClCompile>
<ClCompile Include="Emu\RSX\Program\Assembler\FPToCFG.cpp">
<Filter>Emu\GPU\RSX\Program\Assembler</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="Crypto\aes.h">
@ -2764,6 +2770,12 @@
<ClInclude Include="util\pair.hpp">
<Filter>Utilities</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\Program\Assembler\CFG.h">
<Filter>Emu\GPU\RSX\Program\Assembler</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\Program\Assembler\IR.h">
<Filter>Emu\GPU\RSX\Program\Assembler</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="Emu\RSX\Program\GLSLSnippets\GPUDeswizzle.glsl">

View file

@ -88,6 +88,7 @@
<ItemGroup>
<ClCompile Include="test.cpp" />
<ClCompile Include="test_fmt.cpp" />
<ClCompile Include="test_rsx_cfg.cpp" />
<ClCompile Include="test_simple_array.cpp" />
<ClCompile Include="test_address_range.cpp" />
<ClCompile Include="test_tuple.cpp" />

View file

@ -0,0 +1,218 @@
#pragma optimize("", off)
#include <gtest/gtest.h>
#include "Emu/RSX/Common/simple_array.hpp"
#include "Emu/RSX/Program/Assembler/CFG.h"
#include "Emu/RSX/Program/RSXFragmentProgram.h"
#include <util/v128.hpp>
namespace rsx::assembler
{
auto swap_bytes16 = [](u32 dword) -> u32
{
// Lazy encode, but good enough for what we need here.
union v32
{
u32 HEX;
u8 _v[4];
};
u8* src_bytes = reinterpret_cast<u8*>(&dword);
v32 dst_bytes;
dst_bytes._v[0] = src_bytes[1];
dst_bytes._v[1] = src_bytes[0];
dst_bytes._v[2] = src_bytes[3];
dst_bytes._v[3] = src_bytes[2];
return dst_bytes.HEX;
};
// Instruction mocks because we don't have a working assember (yet)
auto encode_instruction = [](u32 opcode, bool end = false) -> v128
{
OPDEST dst{};
dst.opcode = opcode;
if (end)
{
dst.end = 1;
}
return v128::from32(swap_bytes16(dst.HEX), 0, 0, 0);
};
auto create_if(u32 end, u32 _else = 0)
{
OPDEST dst{};
dst.opcode = RSX_FP_OPCODE_IFE;
SRC1 src1{};
src1.else_offset = (_else ? _else : end) << 2;
src1.opcode_is_branch = 1;
SRC2 src2{};
src2.end_offset = end << 2;
return v128::from32(swap_bytes16(dst.HEX), 0, swap_bytes16(src1.HEX), swap_bytes16(src2.HEX));
};
TEST(CFG, FpToCFG_Basic)
{
rsx::simple_array<v128> buffer = {
encode_instruction(RSX_FP_OPCODE_ADD),
encode_instruction(RSX_FP_OPCODE_MOV, true)
};
RSXFragmentProgram program{};
program.data = buffer.data();
FlowGraph graph = deconstruct_fragment_program(program);
EXPECT_EQ(graph.blocks.size(), 1);
EXPECT_EQ(graph.blocks.front().instructions.size(), 2);
}
TEST(CFG, FpToCFG_IF)
{
rsx::simple_array<v128> buffer = {
encode_instruction(RSX_FP_OPCODE_ADD), // 0
encode_instruction(RSX_FP_OPCODE_MOV), // 1
create_if(4), // 2 (BR, 4)
encode_instruction(RSX_FP_OPCODE_ADD), // 3
encode_instruction(RSX_FP_OPCODE_MOV, true), // 4 (Merge block)
};
const std::pair<int, size_t> expected_block_data[3] = {
{ 0, 3 }, // Head
{ 3, 1 }, // Branch
{ 4, 1 }, // Merge
};
RSXFragmentProgram program{};
program.data = buffer.data();
FlowGraph graph = deconstruct_fragment_program(program);
ASSERT_EQ(graph.blocks.size(), 3);
int i = 0;
for (auto it = graph.blocks.begin(); it != graph.blocks.end(); ++it)
{
const auto& expected = expected_block_data[i++];
EXPECT_EQ(it->id, expected.first);
EXPECT_EQ(it->instructions.size(), expected.second);
}
}
TEST(CFG, FpToCFG_NestedIF)
{
rsx::simple_array<v128> buffer = {
encode_instruction(RSX_FP_OPCODE_ADD), // 0
encode_instruction(RSX_FP_OPCODE_MOV), // 1
create_if(8), // 2 (BR, 8)
encode_instruction(RSX_FP_OPCODE_ADD), // 3
create_if(6), // 4 (BR, 6)
encode_instruction(RSX_FP_OPCODE_MOV), // 5
encode_instruction(RSX_FP_OPCODE_MOV), // 6 (merge block 1)
encode_instruction(RSX_FP_OPCODE_ADD), // 7
encode_instruction(RSX_FP_OPCODE_MOV, true) // 8 (merge block 2
};
const std::pair<int, size_t> expected_block_data[5] = {
{ 0, 3 }, // Head
{ 3, 2 }, // Branch 1
{ 5, 1 }, // Branch 2
{ 6, 2 }, // Merge 1
{ 8, 1 }, // Merge 2
};
RSXFragmentProgram program{};
program.data = buffer.data();
FlowGraph graph = deconstruct_fragment_program(program);
ASSERT_EQ(graph.blocks.size(), 5);
int i = 0;
for (auto it = graph.blocks.begin(); it != graph.blocks.end(); ++it)
{
const auto& expected = expected_block_data[i++];
EXPECT_EQ(it->id, expected.first);
EXPECT_EQ(it->instructions.size(), expected.second);
}
}
TEST(CFG, FpToCFG_NestedIF_MultiplePred)
{
rsx::simple_array<v128> buffer = {
encode_instruction(RSX_FP_OPCODE_ADD), // 0
encode_instruction(RSX_FP_OPCODE_MOV), // 1
create_if(6), // 2 (BR, 6)
encode_instruction(RSX_FP_OPCODE_ADD), // 3
create_if(6), // 4 (BR, 6)
encode_instruction(RSX_FP_OPCODE_MOV), // 5
encode_instruction(RSX_FP_OPCODE_MOV), // 6 (merge block)
encode_instruction(RSX_FP_OPCODE_ADD), // 7
encode_instruction(RSX_FP_OPCODE_MOV, true) // 8
};
const std::pair<int, size_t> expected_block_data[4] = {
{ 0, 3 }, // Head
{ 3, 2 }, // Branch 1
{ 5, 1 }, // Branch 2
{ 6, 3 }, // Merge
};
RSXFragmentProgram program{};
program.data = buffer.data();
FlowGraph graph = deconstruct_fragment_program(program);
ASSERT_EQ(graph.blocks.size(), 4);
int i = 0;
for (auto it = graph.blocks.begin(); it != graph.blocks.end(); ++it)
{
const auto& expected = expected_block_data[i++];
EXPECT_EQ(it->id, expected.first);
EXPECT_EQ(it->instructions.size(), expected.second);
}
}
TEST(CFG, FpToCFG_IF_ELSE)
{
rsx::simple_array<v128> buffer = {
encode_instruction(RSX_FP_OPCODE_ADD), // 0
encode_instruction(RSX_FP_OPCODE_MOV), // 1
create_if(6, 4), // 2 (BR, 6)
encode_instruction(RSX_FP_OPCODE_ADD), // 3
encode_instruction(RSX_FP_OPCODE_MOV), // 4 (Else)
encode_instruction(RSX_FP_OPCODE_ADD), // 5
encode_instruction(RSX_FP_OPCODE_MOV, true), // 6 (Merge)
};
const std::pair<int, size_t> expected_block_data[4] = {
{ 0, 3 }, // Head
{ 3, 1 }, // Branch positive
{ 4, 2 }, // Branch negative
{ 6, 1 }, // Merge
};
RSXFragmentProgram program{};
program.data = buffer.data();
FlowGraph graph = deconstruct_fragment_program(program);
ASSERT_EQ(graph.blocks.size(), 4);
int i = 0;
for (auto it = graph.blocks.begin(); it != graph.blocks.end(); ++it)
{
const auto& expected = expected_block_data[i++];
EXPECT_EQ(it->id, expected.first);
EXPECT_EQ(it->instructions.size(), expected.second);
}
}
}

View file

@ -303,4 +303,24 @@ namespace rsx
EXPECT_EQ(data_ptr1 & 15, 0);
EXPECT_EQ(data_ptr2 & 127, 0);
}
TEST(SimpleArray, Find)
{
const rsx::simple_array<u32> arr{
0, 1, 2, 3, 4, 5, 6, 7, 8, 9
};
EXPECT_EQ(*arr.find(8), 8);
EXPECT_EQ(arr.find(99), nullptr);
}
TEST(SimpleArray, FindIf)
{
const rsx::simple_array<u32> arr{
0, 1, 2, 3, 4, 5, 6, 7, 8, 9
};
EXPECT_EQ(*arr.find_if(FN(x == 8)), 8);
EXPECT_EQ(arr.find_if(FN(x == 99)), nullptr);
}
}