This commit is contained in:
kd-11 2025-12-05 23:06:44 +00:00 committed by GitHub
commit 574b78bee3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 254 additions and 12 deletions

View file

@ -2,8 +2,10 @@
#include "FragmentProgramDecompiler.h" #include "FragmentProgramDecompiler.h"
#include "ProgramStateCache.h" #include "ProgramStateCache.h"
#include "Emu/RSX/Common/simple_array.hpp"
#include <algorithm> #include <algorithm>
#include <unordered_set>
namespace rsx namespace rsx
{ {
@ -31,6 +33,191 @@ enum VectorLane : u8
W = 3, W = 3,
}; };
u32 FragmentProgramDecompiler::get_src_vector_mask(u32 opcode, u32 operand) const
{
constexpr u32 x = 0b0001;
constexpr u32 y = 0b0010;
constexpr u32 z = 0b0100;
constexpr u32 w = 0b1000;
constexpr u32 xy = 0b0011;
constexpr u32 xyz = 0b0111;
constexpr u32 xyzw = 0b1111;
constexpr u32 use_dst_mask = 1u << 31;
const auto decode = [&](const rsx::simple_array<u32>& masks) -> u32
{
return operand < masks.size()
? masks[operand]
: 0u;
};
switch (opcode)
{
case RSX_FP_OPCODE_NOP:
return 0;
case RSX_FP_OPCODE_MOV:
case RSX_FP_OPCODE_MUL:
case RSX_FP_OPCODE_ADD:
case RSX_FP_OPCODE_MAD:
return xyzw | use_dst_mask;
case RSX_FP_OPCODE_DP3:
return xyz;
case RSX_FP_OPCODE_DP4:
return xyzw;
case RSX_FP_OPCODE_DST:
return decode({ y|z, y|w });
case RSX_FP_OPCODE_MIN:
case RSX_FP_OPCODE_MAX:
return xyzw | use_dst_mask;
case RSX_FP_OPCODE_SLT:
case RSX_FP_OPCODE_SGE:
case RSX_FP_OPCODE_SLE:
case RSX_FP_OPCODE_SGT:
case RSX_FP_OPCODE_SNE:
case RSX_FP_OPCODE_SEQ:
return xyzw | use_dst_mask;
case RSX_FP_OPCODE_FRC:
case RSX_FP_OPCODE_FLR:
return xyzw | use_dst_mask;
case RSX_FP_OPCODE_KIL:
return 0;
case RSX_FP_OPCODE_PK4:
return xyzw;
case RSX_FP_OPCODE_UP4:
return x;
case RSX_FP_OPCODE_DDX:
case RSX_FP_OPCODE_DDY:
return xyzw | use_dst_mask;
case RSX_FP_OPCODE_TEX:
case RSX_FP_OPCODE_TXD:
switch (m_prog.get_texture_dimension(dst.tex_num))
{
case rsx::texture_dimension_extended::texture_dimension_1d:
return x;
case rsx::texture_dimension_extended::texture_dimension_2d:
return xy;
case rsx::texture_dimension_extended::texture_dimension_3d:
case rsx::texture_dimension_extended::texture_dimension_cubemap:
return xyz;
default:
return 0;
}
case RSX_FP_OPCODE_TXP:
switch (m_prog.get_texture_dimension(dst.tex_num))
{
case rsx::texture_dimension_extended::texture_dimension_1d:
return xy;
case rsx::texture_dimension_extended::texture_dimension_2d:
return xyz;
case rsx::texture_dimension_extended::texture_dimension_3d:
case rsx::texture_dimension_extended::texture_dimension_cubemap:
return xyzw;
default:
return 0;
}
case RSX_FP_OPCODE_RCP:
case RSX_FP_OPCODE_RSQ:
case RSX_FP_OPCODE_EX2:
case RSX_FP_OPCODE_LG2:
return x;
case RSX_FP_OPCODE_LIT:
return xyzw;
case RSX_FP_OPCODE_LRP:
return xyzw | use_dst_mask;
case RSX_FP_OPCODE_STR:
case RSX_FP_OPCODE_SFL:
return xyzw | use_dst_mask;
case RSX_FP_OPCODE_COS:
case RSX_FP_OPCODE_SIN:
return x;
case RSX_FP_OPCODE_PK2:
return xy;
case RSX_FP_OPCODE_UP2:
return x;
case RSX_FP_OPCODE_POW:
fmt::throw_exception("Unimplemented POW instruction."); // Unused ??
case RSX_FP_OPCODE_PKB:
return xyzw;
case RSX_FP_OPCODE_UPB:
return x;
case RSX_FP_OPCODE_PK16:
return xy;
case RSX_FP_OPCODE_UP16:
return x;
case RSX_FP_OPCODE_BEM:
return decode({ xy, xy, xyzw });
case RSX_FP_OPCODE_PKG:
return xyzw;
case RSX_FP_OPCODE_UPG:
return x;
case RSX_FP_OPCODE_DP2A:
return decode({ xy, xy, x });
case RSX_FP_OPCODE_TXL:
case RSX_FP_OPCODE_TXB:
return decode({ xy, x });
case RSX_FP_OPCODE_TEXBEM:
case RSX_FP_OPCODE_TXPBEM:
return decode({ xy, xy, xyzw }); // Coordinate generated from BEM operation
case RSX_FP_OPCODE_BEMLUM:
fmt::throw_exception("Unimplemented BEMLUM instruction"); // Unused
case RSX_FP_OPCODE_REFL:
return xyzw;
case RSX_FP_OPCODE_TIMESWTEX:
fmt::throw_exception("Unimplemented TIMESWTEX instruction"); // Unused
case RSX_FP_OPCODE_DP2:
return xy;
case RSX_FP_OPCODE_NRM:
return xyz;
case RSX_FP_OPCODE_DIV:
case RSX_FP_OPCODE_DIVSQ:
return decode({ xyzw, x });
case RSX_FP_OPCODE_LIF:
return decode({ y|w });
case RSX_FP_OPCODE_FENCT:
case RSX_FP_OPCODE_FENCB:
case RSX_FP_OPCODE_BRK:
case RSX_FP_OPCODE_CAL:
case RSX_FP_OPCODE_IFE:
case RSX_FP_OPCODE_LOOP:
case RSX_FP_OPCODE_REP:
case RSX_FP_OPCODE_RET:
// Flow control. Special registers are provided for these outside the common file
return 0;
default:
break;
}
return 0;
}
bool FragmentProgramDecompiler::is_delay_slot() const
{
if (dst.opcode != RSX_FP_OPCODE_MOV || // These slots are always populated with MOV
dst.no_dest || // Must have a sink
src0.reg_type != RSX_FP_REGISTER_TYPE_TEMP || // Must read from reg
dst.dest_reg != src0.tmp_reg_index || // Must be a write-to-self
dst.fp16 || // Always full lane. We need to collect more data on this but it won't matter
dst.saturate || // Precision modifier
(dst.prec != RSX_FP_PRECISION_REAL &&
dst.prec != RSX_FP_PRECISION_UNKNOWN)) // Cannot have precision modifiers
{
return false;
}
// Check if we have precision modifiers on the source
if (src0.abs || src0.neg || src1.scale)
{
return false;
}
if (dst.mask_x && src0.swizzle_x != 0) return false;
if (dst.mask_y && src0.swizzle_y != 1) return false;
if (dst.mask_z && src0.swizzle_z != 2) return false;
if (dst.mask_w && src0.swizzle_w != 3) return false;
return true;
}
FragmentProgramDecompiler::FragmentProgramDecompiler(const RSXFragmentProgram &prog, u32& size) FragmentProgramDecompiler::FragmentProgramDecompiler(const RSXFragmentProgram &prog, u32& size)
: m_size(size) : m_size(size)
, m_prog(prog) , m_prog(prog)
@ -166,8 +353,11 @@ void FragmentProgramDecompiler::SetDst(std::string code, u32 flags)
} }
} }
if (!is_delay_slot())
{
temp_registers[reg_index].tag(dst.dest_reg, !!dst.fp16, dst.mask_x, dst.mask_y, dst.mask_z, dst.mask_w); temp_registers[reg_index].tag(dst.dest_reg, !!dst.fp16, dst.mask_x, dst.mask_y, dst.mask_z, dst.mask_w);
} }
}
void FragmentProgramDecompiler::AddFlowOp(const std::string& code) void FragmentProgramDecompiler::AddFlowOp(const std::string& code)
{ {
@ -501,22 +691,33 @@ void FragmentProgramDecompiler::AddCodeCond(const std::string& lhs, const std::s
AddCode(lhs + " = _select(" + lhs + ", " + src_prefix + rhs + ", " + cond + ");"); AddCode(lhs + " = _select(" + lhs + ", " + src_prefix + rhs + ", " + cond + ");");
} }
template<typename T> std::string FragmentProgramDecompiler::GetSRC(T src) template<typename T>
requires std::is_same_v<T, SRC0> || std::is_same_v<T, SRC1> || std::is_same_v<T, SRC2>
std::string FragmentProgramDecompiler::GetSRC(T src)
{ {
std::string ret; std::string ret;
u32 precision_modifier = 0; u32 precision_modifier = 0;
u32 operand_idx = umax;
if constexpr (std::is_same_v<T, SRC0>) if constexpr (std::is_same_v<T, SRC0>)
{ {
precision_modifier = src1.src0_prec_mod; precision_modifier = src1.src0_prec_mod;
operand_idx = 0;
} }
else if constexpr (std::is_same_v<T, SRC1>) else if constexpr (std::is_same_v<T, SRC1>)
{ {
precision_modifier = src1.src1_prec_mod; precision_modifier = src1.src1_prec_mod;
operand_idx = 1;
} }
else if constexpr (std::is_same_v<T, SRC2>) else if constexpr (std::is_same_v<T, SRC2>)
{ {
precision_modifier = src1.src2_prec_mod; precision_modifier = src1.src2_prec_mod;
operand_idx = 2;
}
else
{
// Unreachable unless we add another SRC type
fmt::throw_exception("Invalid SRC input");
} }
switch (src.reg_type) switch (src.reg_type)
@ -525,21 +726,45 @@ template<typename T> std::string FragmentProgramDecompiler::GetSRC(T src)
if (!src.fp16) if (!src.fp16)
{ {
if (dst.opcode == RSX_FP_OPCODE_UP16 || // We need to determine if any vector lanes need a gather op
dst.opcode == RSX_FP_OPCODE_UP2 || // In theory, splitting can also be required, but that is currently unsupported
dst.opcode == RSX_FP_OPCODE_UP4 || u32 src_lane_mask = is_delay_slot() ? 0u : get_src_vector_mask(dst.opcode, operand_idx);
dst.opcode == RSX_FP_OPCODE_UPB || std::unordered_set<u8> lanes_to_gather;
dst.opcode == RSX_FP_OPCODE_UPG)
const bool apply_dst_mask = src_lane_mask & (1u << 31);
src_lane_mask &= ~(1u << 31);
if (apply_dst_mask && !dst.no_dest)
{ {
if (!dst.mask_x) src_lane_mask &= ~(1u << 0);
if (!dst.mask_y) src_lane_mask &= ~(1u << 1);
if (!dst.mask_z) src_lane_mask &= ~(1u << 2);
if (!dst.mask_w) src_lane_mask &= ~(1u << 3);
}
if (src_lane_mask & (1u << 0)) lanes_to_gather.insert(src.swizzle_x);
if (src_lane_mask & (1u << 1)) lanes_to_gather.insert(src.swizzle_y);
if (src_lane_mask & (1u << 2)) lanes_to_gather.insert(src.swizzle_z);
if (src_lane_mask & (1u << 3)) lanes_to_gather.insert(src.swizzle_w);
auto& reg = temp_registers[src.tmp_reg_index]; auto& reg = temp_registers[src.tmp_reg_index];
if (reg.requires_gather(src.swizzle_x)) bool skip_reg_assign = false;
for (const auto& ch : lanes_to_gather)
{
if (reg.requires_gather(ch))
{ {
properties.has_gather_op = true; properties.has_gather_op = true;
AddReg(src.tmp_reg_index, src.fp16); AddReg(src.tmp_reg_index, src.fp16);
ret = getFloatTypeName(4) + reg.gather_r(); ret = getFloatTypeName(4) + reg.gather_r();
skip_reg_assign = true;
break; break;
} }
} }
if (skip_reg_assign)
{
break;
}
} }
else if (precision_modifier == RSX_FP_PRECISION_HALF) else if (precision_modifier == RSX_FP_PRECISION_HALF)
{ {

View file

@ -81,7 +81,11 @@ class FragmentProgramDecompiler
void AddCodeCond(const std::string& lhs, const std::string& rhs); void AddCodeCond(const std::string& lhs, const std::string& rhs);
std::string GetRawCond(); std::string GetRawCond();
std::string GetCond(); std::string GetCond();
template<typename T> std::string GetSRC(T src);
template<typename T>
requires std::is_same_v<T, SRC0> || std::is_same_v<T, SRC1> || std::is_same_v<T, SRC2>
std::string GetSRC(T src);
std::string BuildCode(); std::string BuildCode();
static u32 GetData(const u32 d) { return d << 16 | d >> 16; } static u32 GetData(const u32 d) { return d << 16 | d >> 16; }
@ -100,6 +104,17 @@ class FragmentProgramDecompiler
*/ */
bool handle_tex_srb(u32 opcode); bool handle_tex_srb(u32 opcode);
/**
* Calculates the lane mask for a given input
* This is a temporary workaround until the decompiler is rewritten with some IR to allow granular split/gather passes
*/
u32 get_src_vector_mask(u32 opcode, u32 operand) const;
/**
* Detects delay slots. These evaluate to a NOP so we don't actually need to emit them
*/
bool is_delay_slot() const;
protected: protected:
const RSXFragmentProgram &m_prog; const RSXFragmentProgram &m_prog;
u32 m_ctrl = 0; u32 m_ctrl = 0;

View file

@ -251,7 +251,7 @@ namespace rsx
u8 fragment_texture::convolution_filter() const u8 fragment_texture::convolution_filter() const
{ {
return ((registers[NV4097_SET_TEXTURE_FILTER + (m_index * 8)] >> 13) & 0xf); return ((registers[NV4097_SET_TEXTURE_FILTER + (m_index * 8)] >> 13) & 0x7);
} }
u8 fragment_texture::argb_signed() const u8 fragment_texture::argb_signed() const

View file

@ -716,7 +716,7 @@ namespace vk
view_swizzle = source->native_component_map; view_swizzle = source->native_component_map;
} }
image->set_debug_name("Temp view"); image->set_debug_name(fmt::format("Temp view, fmt=0x%x", gcm_format));
image->set_native_component_layout(view_swizzle); image->set_native_component_layout(view_swizzle);
auto view = image->get_view(remap_vector); auto view = image->get_view(remap_vector);

View file

@ -954,6 +954,8 @@ namespace gcm
CELL_GCM_TEXTURE_LINEAR_LINEAR = 6, CELL_GCM_TEXTURE_LINEAR_LINEAR = 6,
CELL_GCM_TEXTURE_CONVOLUTION_MIN = 7, CELL_GCM_TEXTURE_CONVOLUTION_MIN = 7,
CELL_GCM_TEXTURE_CONVOLUTION_MAG = 4, CELL_GCM_TEXTURE_CONVOLUTION_MAG = 4,
// Convolution mode
CELL_GCM_TEXTURE_CONVOLUTION_QUINCUNX = 1, CELL_GCM_TEXTURE_CONVOLUTION_QUINCUNX = 1,
CELL_GCM_TEXTURE_CONVOLUTION_GAUSSIAN = 2, CELL_GCM_TEXTURE_CONVOLUTION_GAUSSIAN = 2,
CELL_GCM_TEXTURE_CONVOLUTION_QUINCUNX_ALT = 3, CELL_GCM_TEXTURE_CONVOLUTION_QUINCUNX_ALT = 3,