[rsx/wip] Use transform_branch_bits register for branch ops. Not yet correct (part 1)

This commit is contained in:
kd-11 2017-01-12 14:15:49 +03:00 committed by Zangetsu38
parent 8aca7f4808
commit 5765b47bee
6 changed files with 63 additions and 31 deletions

View file

@ -216,7 +216,8 @@ std::string VertexProgramDecompiler::Format(const std::string& code)
return "if(" + cond + ") ";
}
},
{ "$cond", std::bind(std::mem_fn(&VertexProgramDecompiler::GetCond), this) }
{ "$cond", std::bind(std::mem_fn(&VertexProgramDecompiler::GetCond), this) },
{ "$ifbcond", std::bind(std::mem_fn(&VertexProgramDecompiler::GetOptionalBranchCond), this) }
};
return fmt::replace_all(code, repl_list);
@ -257,6 +258,22 @@ std::string VertexProgramDecompiler::GetCond()
return "any(" + compareFunction(cond_string_table[d0.cond], "cc" + std::to_string(d0.cond_reg_sel_1) + swizzle, getFloatTypeName(4) + "(0., 0., 0., 0.)" + swizzle) + ")";
}
std::string VertexProgramDecompiler::GetOptionalBranchCond()
{
u32 bit_index = d3.branch_index;
u32 lookup_field = 0;
if (bit_index > 31)
{
bit_index -= 32;
lookup_field = 1;
}
std::string lookup[] = {"transform_branch_bits_lo", "transform_branch_bits_hi"};
std::string cond = "(" + lookup[lookup_field] + " & (1 << " + std::to_string(bit_index) + ")) == 0";
return "if (" + cond + ")";
}
void VertexProgramDecompiler::AddCodeCond(const std::string& dst, const std::string& src)
{
enum
@ -664,17 +681,16 @@ std::string VertexProgramDecompiler::Decompile()
LOG_WARNING(RSX, "sca_opcode BRB, d0=0x%X, d1=0x%X, d2=0x%X, d3=0x%X", d0.HEX, d1.HEX, d2.HEX, d3.HEX);
AddCode(fmt::format("//BRB opcode, d0=0x%X, d1=0x%X, d2=0x%X, d3=0x%X", d0.HEX, d1.HEX, d2.HEX, d3.HEX));
// BRB is identifiable by having dst_tmp=0x3f, sca_dst_tmp=0x3f, cond=true, cond_test_enable = false, cond_test_update=false and bit 25 on D3 is set
// When a vector opcode is issued together with BRB, it seems to be ignored. Since cc update and test are disabled, its possible that the compiler
// uses this as some kind of optimization to allow the same shader to execute differently based on some other state.
// Tested using saint seiya games as well as hellboy: the science of evil
if (d3.brb_cond_true && d1.vec_opcode == RSX_VEC_OPCODE_NOP)
{
u32 jump_position = find_jump_lvl(GetAddr());
AddCode(fmt::format("jump_position = %u;", jump_position));
AddCode("continue;");
AddCode("");
}
u32 jump_position = find_jump_lvl(GetAddr());
AddCode("$ifbcond //BRB");
AddCode("{");
m_cur_instr->open_scopes++;
AddCode(fmt::format("jump_position = %u;", jump_position));
AddCode("continue;");
m_cur_instr->close_scopes++;
AddCode("}");
AddCode("");
break;
}
@ -683,11 +699,8 @@ std::string VertexProgramDecompiler::Decompile()
LOG_WARNING(RSX, "sca_opcode CLB, d0=0x%X, d1=0x%X, d2=0x%X, d3=0x%X", d0.HEX, d1.HEX, d2.HEX, d3.HEX);
AddCode("//CLB");
if (d3.brb_cond_true && d1.vec_opcode == RSX_VEC_OPCODE_NOP)
{
AddCode("$f(); //CLB");
AddCode("");
}
AddCode("$ifbcond $f(); //CLB");
AddCode("");
break;
case RSX_SCA_OPCODE_PSH: break;

View file

@ -69,6 +69,7 @@ struct VertexProgramDecompiler
std::string GetFunc();
std::string GetTex();
std::string GetCond();
std::string GetOptionalBranchCond(); //Conditional branch expression modified externally at runtime
std::string AddAddrMask();
std::string AddAddrReg();
std::string AddAddrRegWithoutMask();

View file

@ -33,11 +33,13 @@ void GLFragmentDecompilerThread::insertHeader(std::stringstream & OS)
OS << "layout(std140, binding = 0) uniform ScaleOffsetBuffer\n";
OS << "{\n";
OS << " mat4 scaleOffsetMat;\n";
OS << " mat4 scaleOffsetMat;\n";
OS << " float fog_param0;\n";
OS << " float fog_param1;\n";
OS << " uint alpha_test;\n";
OS << " uint alpha_test;\n";
OS << " float alpha_ref;\n";
OS << " uint transform_branch_bits_lo;\n";
OS << " uint transform_branch_bits_hi;\n";
OS << "};\n";
}
@ -120,6 +122,8 @@ void GLFragmentDecompilerThread::insertConstants(std::stringstream & OS)
}
}
int constants_count = 0;
OS << std::endl;
OS << "layout(std140, binding = 2) uniform FragmentConstantsBuffer" << std::endl;
OS << "{" << std::endl;
@ -133,11 +137,18 @@ void GLFragmentDecompilerThread::insertConstants(std::stringstream & OS)
continue;
for (const ParamItem& PI : PT.items)
{
OS << " " << PT.type << " " << PI.name << ";" << std::endl;
constants_count++;
}
}
if (!constants_count)
{
// A dummy value otherwise it's invalid to create an empty uniform buffer
OS << " vec4 void_value;" << std::endl;
}
// A dummy value otherwise it's invalid to create an empty uniform buffer
OS << " vec4 void_value;" << std::endl;
OS << "};" << std::endl;
}

View file

@ -720,7 +720,6 @@ bool GLGSRender::load_program()
m_program = &m_prog_buffer.getGraphicPipelineState(vertex_program, fragment_program, nullptr);
m_program->use();
LOG_ERROR(RSX, "Program id=%d, Transform program bits = %llx", m_program->id(), rsx::method_registers.transform_branch_bits());
if (old_program == m_program && !m_transform_constants_dirty)
{
@ -784,10 +783,15 @@ bool GLGSRender::load_program()
float alpha_ref = rsx::method_registers.alpha_ref() / 255.f;
f32 fog0 = rsx::method_registers.fog_params_0();
f32 fog1 = rsx::method_registers.fog_params_1();
u32 branch_bits_lo = (rsx::method_registers.transform_branch_bits() & 0xffffffff);
u32 branch_bits_hi = (rsx::method_registers.transform_branch_bits() >> 32) & 0xffffffff;
memcpy(buf + 16 * sizeof(float), &fog0, sizeof(float));
memcpy(buf + 17 * sizeof(float), &fog1, sizeof(float));
memcpy(buf + 18 * sizeof(float), &is_alpha_tested, sizeof(u32));
memcpy(buf + 19 * sizeof(float), &alpha_ref, sizeof(float));
memcpy(buf + 20 * sizeof(float), &branch_bits_lo, sizeof(u32));
memcpy(buf + 21 * sizeof(float), &branch_bits_hi, sizeof(u32));
// Vertex constants
mapping = m_uniform_ring_buffer->alloc_from_heap(8192, m_uniform_buffer_offset_align);

View file

@ -30,15 +30,18 @@ std::string GLVertexDecompilerThread::compareFunction(COMPARE f, const std::stri
void GLVertexDecompilerThread::insertHeader(std::stringstream &OS)
{
OS << "#version 430" << std::endl << std::endl;
OS << "layout(std140, binding = 0) uniform ScaleOffsetBuffer" << std::endl;
OS << "{" << std::endl;
OS << " mat4 scaleOffsetMat;" << std::endl;
OS << "#version 430" << std::endl;
OS << "layout(std140, binding = 0) uniform ScaleOffsetBuffer\n";
OS << "{\n";
OS << " mat4 scaleOffsetMat;\n";
OS << " float fog_param0;\n";
OS << " float fog_param1;\n";
OS << " uint alpha_test;\n";
OS << " uint alpha_test;\n";
OS << " float alpha_ref;\n";
OS << "};" << std::endl;
OS << " uint transform_branch_bits_lo;\n";
OS << " uint transform_branch_bits_hi;\n";
OS << "};\n";
}
void GLVertexDecompilerThread::insertInputs(std::stringstream & OS, const std::vector<ParamType>& inputs)
@ -96,6 +99,7 @@ void GLVertexDecompilerThread::insertConstants(std::stringstream & OS, const std
OS << "layout(std140, binding = 1) uniform VertexConstantsBuffer" << std::endl;
OS << "{" << std::endl;
OS << " vec4 vc[468];" << std::endl;
OS << "" << std::endl;
OS << "};" << std::endl << std::endl;
for (const ParamType &PT: constants)

View file

@ -121,7 +121,7 @@ union D2
struct
{
u32 : 8;
u32 tex_num : 2; /* Actual field may be 4 bits wide, but we only have 4 TIUs */
u32 tex_num : 2; // Actual field may be 4 bits wide, but we only have 4 TIUs
u32 : 22;
};
};
@ -150,8 +150,7 @@ union D3
struct
{
u32 : 24;
u32 brb_cond_true : 1;
u32 : 4;
u32 branch_index : 5; //Index into transform_program_branch_bits
u32 iaddrl : 3;
};
};