mirror of
https://github.com/RPCSX/rpcsx.git
synced 2026-04-05 22:47:03 +00:00
vp: Improve vertex program analyser
- Adds dead code elimination - Fix absolute branch target addresses to take base address into account - Patch branch targets relative to base address to improve hash matching - Bumps shader cache version - Enables shader logging option to write out vertex program binary, helpful when debugging problems.
This commit is contained in:
parent
bd915bfebd
commit
2ca935a26b
12 changed files with 427 additions and 172 deletions
|
|
@ -1,5 +1,8 @@
|
|||
#include "stdafx.h"
|
||||
#include "ProgramStateCache.h"
|
||||
#include "Emu/System.h"
|
||||
|
||||
#include <stack>
|
||||
|
||||
using namespace program_hash_util;
|
||||
|
||||
|
|
@ -12,54 +15,222 @@ size_t vertex_program_utils::get_vertex_program_ucode_hash(const RSXVertexProgra
|
|||
bool end = false;
|
||||
for (unsigned i = 0; i < program.data.size() / 4; i++)
|
||||
{
|
||||
const qword inst = instbuffer[instIndex];
|
||||
hash ^= inst.dword[0];
|
||||
hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40);
|
||||
hash ^= inst.dword[1];
|
||||
hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40);
|
||||
if (program.instruction_mask[i])
|
||||
{
|
||||
const qword inst = instbuffer[instIndex];
|
||||
hash ^= inst.dword[0];
|
||||
hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40);
|
||||
hash ^= inst.dword[1];
|
||||
hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40);
|
||||
}
|
||||
|
||||
instIndex++;
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
|
||||
vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vertex_program(const std::vector<u32>& data)
|
||||
vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vertex_program(const u32* data, u32 entry, RSXVertexProgram& dst_prog)
|
||||
{
|
||||
u32 ucode_size = 0;
|
||||
u32 current_instrution = 0;
|
||||
vertex_program_utils::vertex_program_metadata result;
|
||||
u32 last_instruction_address = 0;
|
||||
u32 first_instruction_address = entry;
|
||||
|
||||
std::stack<u32> call_stack;
|
||||
std::pair<u32, u32> instruction_range = { UINT32_MAX, 0 };
|
||||
std::bitset<512> instructions_to_patch;
|
||||
bool has_branch_instruction = false;
|
||||
|
||||
D3 d3;
|
||||
D2 d2;
|
||||
D1 d1;
|
||||
D0 d0;
|
||||
|
||||
for (; ucode_size < data.size(); ucode_size += 4)
|
||||
std::function<void(u32, bool)> walk_function = [&](u32 start, bool fast_exit)
|
||||
{
|
||||
d1.HEX = data[ucode_size + 1];
|
||||
d3.HEX = data[ucode_size + 3];
|
||||
u32 current_instrution = start;
|
||||
std::set<u32> conditional_targets;
|
||||
|
||||
switch (d1.sca_opcode)
|
||||
while (true)
|
||||
{
|
||||
case RSX_SCA_OPCODE_BRI:
|
||||
case RSX_SCA_OPCODE_BRB:
|
||||
case RSX_SCA_OPCODE_CAL:
|
||||
case RSX_SCA_OPCODE_CLI:
|
||||
case RSX_SCA_OPCODE_CLB:
|
||||
{
|
||||
d2.HEX = data[ucode_size + 2];
|
||||
verify(HERE), current_instrution < 512;
|
||||
|
||||
u32 jump_address = ((d2.iaddrh << 3) | d3.iaddrl) * 4;
|
||||
last_instruction_address = std::max(last_instruction_address, jump_address);
|
||||
break;
|
||||
}
|
||||
if (result.instruction_mask[current_instrution])
|
||||
{
|
||||
if (!fast_exit)
|
||||
{
|
||||
// This can be harmless if a dangling RET was encountered before
|
||||
LOG_ERROR(RSX, "vp_analyser: Possible infinite loop detected");
|
||||
current_instrution++;
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Block walk, looking for earliest exit
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const qword* instruction = (const qword*)&data[current_instrution * 4];
|
||||
d1.HEX = instruction->word[1];
|
||||
d3.HEX = instruction->word[3];
|
||||
|
||||
// Touch current instruction
|
||||
result.instruction_mask[current_instrution] = 1;
|
||||
instruction_range.first = std::min(current_instrution, instruction_range.first);
|
||||
instruction_range.second = std::max(current_instrution, instruction_range.second);
|
||||
|
||||
bool static_jump = false;
|
||||
bool function_call = true;
|
||||
|
||||
switch (d1.sca_opcode)
|
||||
{
|
||||
case RSX_SCA_OPCODE_BRI:
|
||||
{
|
||||
d0.HEX = instruction->word[0];
|
||||
static_jump = (d0.cond == 0x7);
|
||||
// Fall through
|
||||
}
|
||||
case RSX_SCA_OPCODE_BRB:
|
||||
{
|
||||
function_call = false;
|
||||
// Fall through
|
||||
}
|
||||
case RSX_SCA_OPCODE_CAL:
|
||||
case RSX_SCA_OPCODE_CLI:
|
||||
case RSX_SCA_OPCODE_CLB:
|
||||
{
|
||||
// Need to patch the jump address to be consistent wherever the program is located
|
||||
instructions_to_patch[current_instrution] = true;
|
||||
has_branch_instruction = true;
|
||||
|
||||
d2.HEX = instruction->word[2];
|
||||
const u32 jump_address = ((d2.iaddrh << 3) | d3.iaddrl);
|
||||
|
||||
if (function_call)
|
||||
{
|
||||
call_stack.push(current_instrution + 1);
|
||||
current_instrution = jump_address;
|
||||
continue;
|
||||
}
|
||||
else if (static_jump)
|
||||
{
|
||||
// NOTE: This will skip potential jump target blocks between current->target
|
||||
current_instrution = jump_address;
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Set possible end address and proceed as usual
|
||||
conditional_targets.emplace(jump_address);
|
||||
instruction_range.second = std::max(jump_address, instruction_range.second);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case RSX_SCA_OPCODE_RET:
|
||||
{
|
||||
if (call_stack.empty())
|
||||
{
|
||||
LOG_ERROR(RSX, "vp_analyser: RET found outside subroutine call");
|
||||
}
|
||||
else
|
||||
{
|
||||
current_instrution = call_stack.top();
|
||||
call_stack.pop();
|
||||
continue;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (d3.end && (fast_exit || current_instrution >= instruction_range.second) ||
|
||||
(current_instrution + 1) == 512)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
current_instrution++;
|
||||
}
|
||||
|
||||
if (d3.end && (ucode_size >= last_instruction_address))
|
||||
for (const u32 target : conditional_targets)
|
||||
{
|
||||
//Jumping over an end label is legal (verified)
|
||||
break;
|
||||
if (!result.instruction_mask[target])
|
||||
{
|
||||
walk_function(target, true);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
if (g_cfg.video.log_programs)
|
||||
{
|
||||
fs::file dump(fs::get_config_dir() + "shaderlog/vp_analyser.bin", fs::rewrite);
|
||||
dump.write(&entry, 4);
|
||||
dump.write(data, 512 * 16);
|
||||
dump.close();
|
||||
}
|
||||
|
||||
walk_function(entry, false);
|
||||
|
||||
const u32 instruction_count = (instruction_range.second - instruction_range.first + 1);
|
||||
result.ucode_length = instruction_count * 16;
|
||||
|
||||
dst_prog.base_address = instruction_range.first;
|
||||
dst_prog.entry = entry;
|
||||
dst_prog.data.resize(instruction_count * 4);
|
||||
dst_prog.instruction_mask = (result.instruction_mask >> instruction_range.first);
|
||||
|
||||
if (!has_branch_instruction)
|
||||
{
|
||||
verify(HERE), instruction_range.first == entry;
|
||||
std::memcpy(dst_prog.data.data(), data + (instruction_range.first * 4), result.ucode_length);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (u32 i = instruction_range.first, count = 0; i <= instruction_range.second; ++i, ++count)
|
||||
{
|
||||
const qword* instruction = (const qword*)&data[i * 4];
|
||||
qword* dst = (qword*)&dst_prog.data[count * 4];
|
||||
|
||||
if (result.instruction_mask[i])
|
||||
{
|
||||
dst->dword[0] = instruction->dword[0];
|
||||
dst->dword[1] = instruction->dword[1];
|
||||
|
||||
if (instructions_to_patch[i])
|
||||
{
|
||||
d2.HEX = dst->word[2];
|
||||
d3.HEX = dst->word[3];
|
||||
|
||||
u32 address = ((d2.iaddrh << 3) | d3.iaddrl);
|
||||
address -= instruction_range.first;
|
||||
|
||||
d2.iaddrh = (address >> 3);
|
||||
d3.iaddrl = (address & 0x7);
|
||||
dst->word[2] = d2.HEX;
|
||||
dst->word[3] = d3.HEX;
|
||||
|
||||
dst_prog.jump_table.emplace(address);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
dst->dword[0] = 0ull;
|
||||
dst->dword[1] = 0ull;
|
||||
}
|
||||
}
|
||||
|
||||
// Verification
|
||||
for (const u32 target : dst_prog.jump_table)
|
||||
{
|
||||
if (!result.instruction_mask[target])
|
||||
{
|
||||
LOG_ERROR(RSX, "vp_analyser: Failed, branch target 0x%x was not resolved", target);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return{ ucode_size + 4 };
|
||||
return result;
|
||||
}
|
||||
|
||||
size_t vertex_program_storage_hash::operator()(const RSXVertexProgram &program) const
|
||||
|
|
@ -75,6 +246,8 @@ bool vertex_program_compare::operator()(const RSXVertexProgram &binary1, const R
|
|||
return false;
|
||||
if (binary1.data.size() != binary2.data.size())
|
||||
return false;
|
||||
if (binary1.jump_table != binary2.jump_table)
|
||||
return false;
|
||||
if (!binary1.skip_vertex_input_check && !binary2.skip_vertex_input_check && binary1.rsx_vertex_inputs != binary2.rsx_vertex_inputs)
|
||||
return false;
|
||||
|
||||
|
|
@ -83,10 +256,22 @@ bool vertex_program_compare::operator()(const RSXVertexProgram &binary1, const R
|
|||
size_t instIndex = 0;
|
||||
for (unsigned i = 0; i < binary1.data.size() / 4; i++)
|
||||
{
|
||||
const qword& inst1 = instBuffer1[instIndex];
|
||||
const qword& inst2 = instBuffer2[instIndex];
|
||||
if (inst1.dword[0] != inst2.dword[0] || inst1.dword[1] != inst2.dword[1])
|
||||
const auto active = binary1.instruction_mask[instIndex];
|
||||
if (active != binary2.instruction_mask[instIndex])
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (active)
|
||||
{
|
||||
const qword& inst1 = instBuffer1[instIndex];
|
||||
const qword& inst2 = instBuffer2[instIndex];
|
||||
if (inst1.dword[0] != inst2.dword[0] || inst1.dword[1] != inst2.dword[1])
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
instIndex++;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue