2020-12-05 13:08:24 +01:00
# include "stdafx.h"
2016-01-10 20:09:56 +01:00
# include "ProgramStateCache.h"
2020-02-15 23:36:20 +01:00
# include "Emu/system_config.h"
2018-07-01 19:37:05 +02:00
# include <stack>
2024-08-09 13:07:37 +02:00
# if defined(ARCH_X64)
# include "emmintrin.h"
# include "immintrin.h"
# endif
# ifdef ARCH_ARM64
# ifndef _MSC_VER
# pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Wstrict-aliasing"
# pragma GCC diagnostic ignored "-Wold-style-cast"
# endif
# include "Emu/CPU/sse2neon.h"
# ifndef _MSC_VER
# pragma GCC diagnostic pop
# endif
# endif
2016-01-10 20:09:56 +01:00
using namespace program_hash_util ;
2020-12-18 08:39:54 +01:00
usz vertex_program_utils : : get_vertex_program_ucode_hash ( const RSXVertexProgram & program )
2016-01-10 20:09:56 +01:00
{
2025-02-14 21:26:16 +01:00
// Checksum as hash with rotated data
2020-04-10 09:05:23 +02:00
const void * instbuffer = program . data . data ( ) ;
2025-02-14 21:26:16 +01:00
u32 instIndex = 0 ;
usz acc0 = 0 ;
usz acc1 = 0 ;
2021-01-12 11:01:06 +01:00
2025-02-14 21:26:16 +01:00
do
2016-01-10 20:09:56 +01:00
{
2025-02-14 21:26:16 +01:00
if ( program . instruction_mask [ instIndex ] )
2018-07-01 19:37:05 +02:00
{
2020-04-10 09:05:23 +02:00
const auto inst = v128 : : loadu ( instbuffer , instIndex ) ;
2025-02-14 21:26:16 +01:00
usz tmp0 = std : : rotr ( inst . _u64 [ 0 ] , instIndex * 2 ) ;
acc0 + = tmp0 ;
usz tmp1 = std : : rotr ( inst . _u64 [ 1 ] , ( instIndex * 2 ) + 1 ) ;
acc1 + = tmp1 ;
2018-07-01 19:37:05 +02:00
}
2016-01-10 20:09:56 +01:00
instIndex + + ;
2025-02-14 21:26:16 +01:00
} while ( instIndex < ( program . data . size ( ) / 4 ) ) ;
return acc0 + acc1 ;
2016-01-10 20:09:56 +01:00
}
2018-07-01 19:37:05 +02:00
vertex_program_utils : : vertex_program_metadata vertex_program_utils : : analyse_vertex_program ( const u32 * data , u32 entry , RSXVertexProgram & dst_prog )
2018-03-20 12:14:45 +01:00
{
2018-07-09 20:31:31 +02:00
vertex_program_utils : : vertex_program_metadata result { } ;
2021-01-12 11:01:06 +01:00
//u32 last_instruction_address = 0;
//u32 first_instruction_address = entry;
2018-07-01 19:37:05 +02:00
2021-06-27 14:36:18 +02:00
std : : bitset < rsx : : max_vertex_program_instructions > instructions_to_patch ;
std : : pair < u32 , u32 > instruction_range { umax , 0 } ;
2018-07-01 19:37:05 +02:00
bool has_branch_instruction = false ;
2021-06-27 14:36:18 +02:00
std : : stack < u32 > call_stack ;
2018-07-01 19:37:05 +02:00
2018-03-20 12:14:45 +01:00
D3 d3 ;
D2 d2 ;
D1 d1 ;
2018-07-01 19:37:05 +02:00
D0 d0 ;
2018-03-20 12:14:45 +01:00
2018-07-01 19:37:05 +02:00
std : : function < void ( u32 , bool ) > walk_function = [ & ] ( u32 start , bool fast_exit )
2018-03-20 12:14:45 +01:00
{
2020-03-28 03:46:48 +01:00
u32 current_instruction = start ;
2018-07-01 19:37:05 +02:00
std : : set < u32 > conditional_targets ;
2018-03-20 12:14:45 +01:00
2018-07-01 19:37:05 +02:00
while ( true )
2018-03-20 12:14:45 +01:00
{
2021-06-27 14:36:18 +02:00
ensure ( current_instruction < rsx : : max_vertex_program_instructions ) ;
2018-07-01 19:37:05 +02:00
2020-03-28 03:46:48 +01:00
if ( result . instruction_mask [ current_instruction ] )
2018-07-01 19:37:05 +02:00
{
if ( ! fast_exit )
{
2023-11-09 00:07:25 +01:00
// This can be harmless if a dangling RET was encountered before.
// This can also be legal in case of BRB...BRI loops since BRIs are conditional. Might just be a loop with exit cond.
rsx_log . warning ( " vp_analyser: Possible infinite loop detected " ) ;
2018-07-01 19:37:05 +02:00
}
2023-11-09 00:07:25 +01:00
// There is never any reason to continue scanning after self-intersecting on the control-flow tree.
break ;
2018-07-01 19:37:05 +02:00
}
2020-04-10 09:05:23 +02:00
const auto instruction = v128 : : loadu ( & data [ current_instruction * 4 ] ) ;
d1 . HEX = instruction . _u32 [ 1 ] ;
2021-12-07 21:06:13 +01:00
d2 . HEX = instruction . _u32 [ 2 ] ;
2020-04-10 09:05:23 +02:00
d3 . HEX = instruction . _u32 [ 3 ] ;
2018-07-01 19:37:05 +02:00
// Touch current instruction
2020-03-28 03:46:48 +01:00
result . instruction_mask [ current_instruction ] = true ;
instruction_range . first = std : : min ( current_instruction , instruction_range . first ) ;
instruction_range . second = std : : max ( current_instruction , instruction_range . second ) ;
2018-07-01 19:37:05 +02:00
2021-09-15 19:46:03 +02:00
// Whether to check if the current instruction references an input stream
2021-12-07 21:06:13 +01:00
auto input_attribute_ref = [ & ] ( )
{
if ( ! d1 . input_src )
{
// It is possible to reference ATTR0, but this is mandatory anyway. No need to explicitly test for it
return ;
}
const auto ref_mask = ( 1u < < d1 . input_src ) ;
if ( ( result . referenced_inputs_mask & ref_mask ) = = 0 )
{
// Type is encoded in the first 2 bits of each block
const auto src0 = d2 . src0l & 0x3 ;
const auto src1 = d2 . src1 & 0x3 ;
const auto src2 = d3 . src2l & 0x3 ;
if ( ( src0 = = RSX_VP_REGISTER_TYPE_INPUT ) | |
( src1 = = RSX_VP_REGISTER_TYPE_INPUT ) | |
( src2 = = RSX_VP_REGISTER_TYPE_INPUT ) )
{
result . referenced_inputs_mask | = ref_mask ;
}
}
} ;
auto branch_to = [ & ] ( const u32 target )
{
input_attribute_ref ( ) ;
current_instruction = target ;
} ;
2021-09-15 19:46:03 +02:00
2018-07-09 20:31:31 +02:00
// Basic vec op analysis, must be done before flow analysis
switch ( d1 . vec_opcode )
{
2021-12-07 21:06:13 +01:00
case RSX_VEC_OPCODE_NOP :
{
break ;
}
2018-07-09 20:31:31 +02:00
case RSX_VEC_OPCODE_TXL :
{
result . referenced_textures_mask | = ( 1 < < d2 . tex_num ) ;
break ;
}
2021-09-15 19:46:03 +02:00
default :
{
2021-12-07 21:06:13 +01:00
input_attribute_ref ( ) ;
2021-09-15 19:46:03 +02:00
break ;
}
2018-07-09 20:31:31 +02:00
}
2018-07-01 19:37:05 +02:00
bool static_jump = false ;
bool function_call = true ;
2018-03-20 12:14:45 +01:00
2018-07-01 19:37:05 +02:00
switch ( d1 . sca_opcode )
{
2021-12-07 21:06:13 +01:00
case RSX_SCA_OPCODE_NOP :
{
break ;
}
2018-07-01 19:37:05 +02:00
case RSX_SCA_OPCODE_BRI :
{
2020-04-10 09:05:23 +02:00
d0 . HEX = instruction . _u32 [ 0 ] ;
2018-07-01 19:37:05 +02:00
static_jump = ( d0 . cond = = 0x7 ) ;
2018-09-06 13:28:12 +02:00
[[fallthrough]] ;
2018-07-01 19:37:05 +02:00
}
case RSX_SCA_OPCODE_BRB :
{
function_call = false ;
2018-09-06 13:28:12 +02:00
[[fallthrough]] ;
2018-07-01 19:37:05 +02:00
}
case RSX_SCA_OPCODE_CAL :
case RSX_SCA_OPCODE_CLI :
case RSX_SCA_OPCODE_CLB :
{
// Need to patch the jump address to be consistent wherever the program is located
2020-03-28 03:46:48 +01:00
instructions_to_patch [ current_instruction ] = true ;
2018-07-01 19:37:05 +02:00
has_branch_instruction = true ;
2021-06-27 14:36:18 +02:00
d0 . HEX = instruction . _u32 [ 0 ] ;
const u32 jump_address = ( d0 . iaddrh2 < < 9 ) | ( d2 . iaddrh < < 3 ) | d3 . iaddrl ;
2018-07-01 19:37:05 +02:00
if ( function_call )
{
2020-03-28 03:46:48 +01:00
call_stack . push ( current_instruction + 1 ) ;
2021-12-07 21:06:13 +01:00
branch_to ( jump_address ) ;
2018-07-01 19:37:05 +02:00
continue ;
}
else if ( static_jump )
{
// NOTE: This will skip potential jump target blocks between current->target
2021-12-07 21:06:13 +01:00
branch_to ( jump_address ) ;
2018-07-01 19:37:05 +02:00
continue ;
}
else
{
// Set possible end address and proceed as usual
conditional_targets . emplace ( jump_address ) ;
instruction_range . second = std : : max ( jump_address , instruction_range . second ) ;
}
break ;
}
case RSX_SCA_OPCODE_RET :
{
if ( call_stack . empty ( ) )
{
2020-02-01 09:07:25 +01:00
rsx_log . error ( " vp_analyser: RET found outside subroutine call " ) ;
2018-07-01 19:37:05 +02:00
}
else
{
2021-12-07 21:06:13 +01:00
branch_to ( call_stack . top ( ) ) ;
2018-07-01 19:37:05 +02:00
call_stack . pop ( ) ;
continue ;
}
break ;
}
2021-09-15 19:46:03 +02:00
default :
{
2021-12-07 21:06:13 +01:00
input_attribute_ref ( ) ;
2021-09-15 19:46:03 +02:00
break ;
}
}
2023-11-09 00:07:25 +01:00
// Check exit conditions...
if ( d3 . end )
{
// We have seen an end of instructions marker.
// Multiple exits may exist, usually skipped over by branching. Do not exit on end unless there is no branching.
if ( ! has_branch_instruction | | fast_exit | | current_instruction > = instruction_range . second )
{
// Conditions:
// 1. No branching so far. This will always be the exit.
// 2. Fast exit flag is set. This happens when walking through subroutines.
// 3. We've gone beyond the known instruction range. In this scenario, this is the furthest end marker seen so far. It has to be reached by some earlier branch.
break ;
}
}
else if ( ( current_instruction + 1 ) = = rsx : : max_vertex_program_instructions )
2018-07-01 19:37:05 +02:00
{
2023-11-09 00:07:25 +01:00
// No more instructions to read.
2018-07-01 19:37:05 +02:00
break ;
}
2020-03-28 03:46:48 +01:00
current_instruction + + ;
2018-03-20 12:14:45 +01:00
}
2018-07-01 19:37:05 +02:00
for ( const u32 target : conditional_targets )
{
if ( ! result . instruction_mask [ target ] )
{
walk_function ( target , true ) ;
}
2018-03-20 12:14:45 +01:00
}
2018-07-01 19:37:05 +02:00
} ;
2020-05-07 23:22:12 +02:00
if ( g_cfg . video . debug_program_analyser )
2018-07-01 19:37:05 +02:00
{
2018-12-24 16:47:46 +01:00
fs : : file dump ( fs : : get_cache_dir ( ) + " shaderlog/vp_analyser.bin " , fs : : rewrite ) ;
2018-07-01 19:37:05 +02:00
dump . write ( & entry , 4 ) ;
2021-06-27 14:36:18 +02:00
dump . write ( data , rsx : : max_vertex_program_instructions * 16 ) ;
2018-07-01 19:37:05 +02:00
dump . close ( ) ;
}
2018-03-20 12:14:45 +01:00
2018-07-01 19:37:05 +02:00
walk_function ( entry , false ) ;
const u32 instruction_count = ( instruction_range . second - instruction_range . first + 1 ) ;
result . ucode_length = instruction_count * 16 ;
dst_prog . base_address = instruction_range . first ;
dst_prog . entry = entry ;
dst_prog . data . resize ( instruction_count * 4 ) ;
dst_prog . instruction_mask = ( result . instruction_mask > > instruction_range . first ) ;
if ( ! has_branch_instruction )
{
2020-12-09 08:47:45 +01:00
ensure ( instruction_range . first = = entry ) ;
2018-07-01 19:37:05 +02:00
std : : memcpy ( dst_prog . data . data ( ) , data + ( instruction_range . first * 4 ) , result . ucode_length ) ;
}
else
{
for ( u32 i = instruction_range . first , count = 0 ; i < = instruction_range . second ; + + i , + + count )
2018-03-20 12:14:45 +01:00
{
2020-04-10 09:05:23 +02:00
const u32 * instruction = & data [ i * 4 ] ;
u32 * dst = & dst_prog . data [ count * 4 ] ;
2018-07-01 19:37:05 +02:00
if ( result . instruction_mask [ i ] )
{
2020-04-10 09:05:23 +02:00
v128 : : storeu ( v128 : : loadu ( instruction ) , dst ) ;
2018-07-01 19:37:05 +02:00
if ( instructions_to_patch [ i ] )
{
2021-06-27 14:36:18 +02:00
d0 . HEX = dst [ 0 ] ;
2020-04-10 09:05:23 +02:00
d2 . HEX = dst [ 2 ] ;
d3 . HEX = dst [ 3 ] ;
2018-07-01 19:37:05 +02:00
2021-06-27 14:36:18 +02:00
u32 address = ( d0 . iaddrh2 < < 9 ) | ( d2 . iaddrh < < 3 ) | d3 . iaddrl ;
2018-07-01 19:37:05 +02:00
address - = instruction_range . first ;
2021-06-27 14:36:18 +02:00
d0 . iaddrh2 = ( address > > 9 ) & 0x1 ;
d2 . iaddrh = ( address > > 3 ) & 0x3F ;
2018-07-01 19:37:05 +02:00
d3 . iaddrl = ( address & 0x7 ) ;
2021-06-27 14:36:18 +02:00
dst [ 0 ] = d0 . HEX ;
2020-04-10 09:05:23 +02:00
dst [ 2 ] = d2 . HEX ;
dst [ 3 ] = d3 . HEX ;
2018-07-01 19:37:05 +02:00
dst_prog . jump_table . emplace ( address ) ;
}
}
else
{
2020-04-10 09:05:23 +02:00
v128 : : storeu ( { } , dst ) ;
2018-07-01 19:37:05 +02:00
}
}
2023-11-08 23:29:37 +01:00
// Typical ubershaders have the dispatch at the top with subroutines following. However...
// some games have the dispatch block at the end and the subroutines above them.
// We need to simulate a jump-to-entry in this situation
// Normally this condition is handled by the conditional_targets walk, but sometimes this doesn't work due to cyclic branches
if ( instruction_range . first < dst_prog . entry )
{
// Is there a subroutine that jumps into the entry? If not, add to jump table
const auto target = dst_prog . entry - instruction_range . first ;
dst_prog . jump_table . insert ( target ) ;
}
2018-07-01 19:37:05 +02:00
// Verification
for ( const u32 target : dst_prog . jump_table )
{
2018-07-09 20:31:31 +02:00
if ( ! dst_prog . instruction_mask [ target ] )
2018-07-01 19:37:05 +02:00
{
2020-02-01 09:07:25 +01:00
rsx_log . error ( " vp_analyser: Failed, branch target 0x%x was not resolved " , target ) ;
2018-07-01 19:37:05 +02:00
}
2018-03-20 12:14:45 +01:00
}
}
2021-09-15 19:46:03 +02:00
result . referenced_inputs_mask | = 1u ; // VPOS is always enabled, else no rendering can happen
2018-07-01 19:37:05 +02:00
return result ;
2018-03-20 12:14:45 +01:00
}
2020-12-18 08:39:54 +01:00
usz vertex_program_storage_hash : : operator ( ) ( const RSXVertexProgram & program ) const
2017-12-02 16:20:52 +01:00
{
2025-01-09 01:22:12 +01:00
const usz ucode_hash = vertex_program_utils : : get_vertex_program_ucode_hash ( program ) ;
const u32 state_params [ ] =
{
program . ctrl ,
program . output_mask ,
program . texture_state . texture_dimensions ,
program . texture_state . multisampled_textures ,
} ;
const usz metadata_hash = rpcs3 : : hash_array ( state_params ) ;
return rpcs3 : : hash64 ( ucode_hash , metadata_hash ) ;
2017-12-02 16:20:52 +01:00
}
2016-01-18 20:10:55 +01:00
bool vertex_program_compare : : operator ( ) ( const RSXVertexProgram & binary1 , const RSXVertexProgram & binary2 ) const
2016-01-10 20:09:56 +01:00
{
2016-01-18 20:10:55 +01:00
if ( binary1 . output_mask ! = binary2 . output_mask )
return false ;
2024-12-26 18:03:08 +01:00
if ( binary1 . ctrl ! = binary2 . ctrl )
return false ;
2021-05-12 23:56:01 +02:00
if ( binary1 . texture_state ! = binary2 . texture_state )
2018-07-09 20:31:31 +02:00
return false ;
2017-06-27 19:46:36 +02:00
if ( binary1 . data . size ( ) ! = binary2 . data . size ( ) )
return false ;
2018-07-01 19:37:05 +02:00
if ( binary1 . jump_table ! = binary2 . jump_table )
return false ;
2017-06-27 19:46:36 +02:00
2020-04-10 09:05:23 +02:00
const void * instBuffer1 = binary1 . data . data ( ) ;
const void * instBuffer2 = binary2 . data . data ( ) ;
2020-12-18 08:39:54 +01:00
usz instIndex = 0 ;
2016-01-18 20:10:55 +01:00
for ( unsigned i = 0 ; i < binary1 . data . size ( ) / 4 ; i + + )
2016-01-10 20:09:56 +01:00
{
2025-02-16 03:39:53 +01:00
if ( binary1 . instruction_mask [ instIndex ] )
2018-07-01 19:37:05 +02:00
{
2020-04-10 09:05:23 +02:00
const auto inst1 = v128 : : loadu ( instBuffer1 , instIndex ) ;
const auto inst2 = v128 : : loadu ( instBuffer2 , instIndex ) ;
2020-12-21 15:12:05 +01:00
if ( inst1 . _u ^ inst2 . _u )
2018-07-01 19:37:05 +02:00
{
return false ;
}
}
2016-01-10 20:09:56 +01:00
instIndex + + ;
}
2017-06-27 19:46:36 +02:00
2016-01-10 20:09:56 +01:00
return true ;
}
2025-02-16 03:39:53 +01:00
bool fragment_program_utils : : is_any_src_constant ( v128 sourceOperand )
2016-01-10 20:09:56 +01:00
{
2025-02-16 03:39:53 +01:00
const u64 masked = sourceOperand . _u64 [ 1 ] & 0x30000000300 ;
return ( sourceOperand . _u32 [ 1 ] & 0x300 ) = = 0x200 | | ( static_cast < u32 > ( masked ) = = 0x200 | | static_cast < u32 > ( masked > > 32 ) = = 0x200 ) ;
2016-01-10 20:09:56 +01:00
}
2020-12-18 08:39:54 +01:00
usz fragment_program_utils : : get_fragment_program_ucode_size ( const void * ptr )
2016-01-10 20:09:56 +01:00
{
2020-04-10 09:05:23 +02:00
const auto instBuffer = ptr ;
2020-12-18 08:39:54 +01:00
usz instIndex = 0 ;
2016-01-10 20:09:56 +01:00
while ( true )
{
2020-04-10 09:05:23 +02:00
const v128 inst = v128 : : loadu ( instBuffer , instIndex ) ;
bool end = ( inst . _u32 [ 0 ] > > 8 ) & 0x1 ;
2016-01-10 20:09:56 +01:00
2025-02-16 03:39:53 +01:00
if ( is_any_src_constant ( inst ) )
2016-01-10 20:09:56 +01:00
{
instIndex + = 2 ;
if ( end )
return instIndex * 4 * 4 ;
continue ;
}
instIndex + + ;
if ( end )
return ( instIndex ) * 4 * 4 ;
}
}
2020-04-10 09:05:23 +02:00
fragment_program_utils : : fragment_program_metadata fragment_program_utils : : analyse_fragment_program ( const void * ptr )
2017-12-02 16:20:52 +01:00
{
2020-04-23 23:38:24 +02:00
fragment_program_utils : : fragment_program_metadata result { } ;
2021-05-22 20:46:10 +02:00
result . program_start_offset = - 1 ;
2020-04-10 09:05:23 +02:00
const auto instBuffer = ptr ;
2018-07-11 22:51:29 +02:00
s32 index = 0 ;
2018-03-20 12:14:45 +01:00
2017-12-02 16:20:52 +01:00
while ( true )
{
2020-04-10 09:05:23 +02:00
const auto inst = v128 : : loadu ( instBuffer , index ) ;
2018-03-20 12:14:45 +01:00
2020-04-23 23:38:24 +02:00
// Check for opcode high bit which indicates a branch instructions (opcode 0x40...0x45)
if ( inst . _u32 [ 2 ] & ( 1 < < 23 ) )
2018-03-20 12:14:45 +01:00
{
2020-05-10 19:14:58 +02:00
// NOTE: Jump instructions are not yet proved to work outside of loops and if/else blocks
// Otherwise we would need to follow the execution chain
2020-04-23 23:38:24 +02:00
result . has_branch_instructions = true ;
}
else
{
const u32 opcode = ( inst . _u32 [ 0 ] > > 16 ) & 0x3F ;
if ( opcode )
2018-03-20 12:14:45 +01:00
{
2020-04-23 23:38:24 +02:00
if ( result . program_start_offset = = umax )
2020-05-10 19:14:58 +02:00
{
2020-04-23 23:38:24 +02:00
result . program_start_offset = index * 16 ;
2020-05-10 19:14:58 +02:00
}
2020-04-23 23:38:24 +02:00
switch ( opcode )
{
case RSX_FP_OPCODE_TEX :
case RSX_FP_OPCODE_TEXBEM :
case RSX_FP_OPCODE_TXP :
case RSX_FP_OPCODE_TXPBEM :
case RSX_FP_OPCODE_TXD :
case RSX_FP_OPCODE_TXB :
case RSX_FP_OPCODE_TXL :
{
//Bits 17-20 of word 1, swapped within u16 sections
//Bits 16-23 are swapped into the upper 8 bits (24-31)
const u32 tex_num = ( inst . _u32 [ 0 ] > > 25 ) & 15 ;
result . referenced_textures_mask | = ( 1 < < tex_num ) ;
break ;
}
case RSX_FP_OPCODE_PK4 :
case RSX_FP_OPCODE_UP4 :
case RSX_FP_OPCODE_PK2 :
case RSX_FP_OPCODE_UP2 :
case RSX_FP_OPCODE_PKB :
case RSX_FP_OPCODE_UPB :
case RSX_FP_OPCODE_PK16 :
case RSX_FP_OPCODE_UP16 :
case RSX_FP_OPCODE_PKG :
case RSX_FP_OPCODE_UPG :
{
result . has_pack_instructions = true ;
break ;
}
}
2018-03-20 12:14:45 +01:00
}
2025-02-16 03:39:53 +01:00
if ( is_any_src_constant ( inst ) )
2018-03-20 12:14:45 +01:00
{
//Instruction references constant, skip one slot occupied by data
2018-07-11 22:51:29 +02:00
index + + ;
2020-04-23 23:38:24 +02:00
result . program_ucode_length + = 16 ;
result . program_constants_buffer_length + = 16 ;
2018-03-20 12:14:45 +01:00
}
}
2020-04-23 23:38:24 +02:00
if ( result . program_start_offset ! = umax )
2018-05-13 12:48:28 +02:00
{
2020-04-23 23:38:24 +02:00
result . program_ucode_length + = 16 ;
2018-05-13 12:48:28 +02:00
}
2020-04-10 09:05:23 +02:00
if ( ( inst . _u32 [ 0 ] > > 8 ) & 0x1 )
2018-03-20 12:14:45 +01:00
{
2020-04-23 23:38:24 +02:00
if ( result . program_start_offset = = umax )
2018-05-13 12:48:28 +02:00
{
2020-04-23 23:38:24 +02:00
result . program_start_offset = index * 16 ;
2020-05-10 19:14:58 +02:00
result . program_ucode_length = 16 ;
result . is_nop_shader = true ;
2018-05-13 12:48:28 +02:00
}
2018-03-20 12:14:45 +01:00
2017-12-02 16:20:52 +01:00
break ;
2018-03-20 12:14:45 +01:00
}
2017-12-02 16:20:52 +01:00
2018-07-11 22:51:29 +02:00
index + + ;
2017-12-02 16:20:52 +01:00
}
2020-04-23 23:38:24 +02:00
return result ;
2017-12-02 16:20:52 +01:00
}
2020-12-18 08:39:54 +01:00
usz fragment_program_utils : : get_fragment_program_ucode_hash ( const RSXFragmentProgram & program )
2016-01-10 20:09:56 +01:00
{
2025-02-14 21:26:16 +01:00
// Checksum as hash with rotated data
2020-10-27 21:41:20 +01:00
const void * instbuffer = program . get_data ( ) ;
2025-02-14 21:26:16 +01:00
u32 instIndex = 0 ;
usz acc0 = 0 ;
usz acc1 = 0 ;
2016-01-10 20:09:56 +01:00
while ( true )
{
2020-04-10 09:05:23 +02:00
const auto inst = v128 : : loadu ( instbuffer , instIndex ) ;
2025-02-14 21:26:16 +01:00
usz tmp0 = std : : rotr ( inst . _u64 [ 0 ] , instIndex * 2 ) ;
acc0 + = tmp0 ;
usz tmp1 = std : : rotr ( inst . _u64 [ 1 ] , ( instIndex * 2 ) + 1 ) ;
acc1 + = tmp1 ;
2016-01-10 20:09:56 +01:00
instIndex + + ;
// Skip constants
2025-02-16 03:39:53 +01:00
if ( fragment_program_utils : : is_any_src_constant ( inst ) )
2016-01-10 20:09:56 +01:00
instIndex + + ;
2020-04-10 09:05:23 +02:00
bool end = ( inst . _u32 [ 0 ] > > 8 ) & 0x1 ;
2016-01-10 20:09:56 +01:00
if ( end )
2025-02-14 21:26:16 +01:00
return acc0 + acc1 ;
2016-01-10 20:09:56 +01:00
}
return 0 ;
}
2020-12-18 08:39:54 +01:00
usz fragment_program_storage_hash : : operator ( ) ( const RSXFragmentProgram & program ) const
2017-12-02 16:20:52 +01:00
{
2025-01-09 01:22:12 +01:00
const usz ucode_hash = fragment_program_utils : : get_fragment_program_ucode_hash ( program ) ;
const u32 state_params [ ] =
{
program . ctrl ,
program . two_sided_lighting ? 1u : 0u ,
program . texture_state . texture_dimensions ,
program . texture_state . shadow_textures ,
program . texture_state . redirected_textures ,
program . texture_state . multisampled_textures ,
program . texcoord_control_mask ,
program . mrt_buffers_count
} ;
const usz metadata_hash = rpcs3 : : hash_array ( state_params ) ;
return rpcs3 : : hash64 ( ucode_hash , metadata_hash ) ;
2017-12-02 16:20:52 +01:00
}
2016-01-26 20:42:54 +01:00
bool fragment_program_compare : : operator ( ) ( const RSXFragmentProgram & binary1 , const RSXFragmentProgram & binary2 ) const
2016-01-10 20:09:56 +01:00
{
2025-01-03 23:57:03 +01:00
if ( binary1 . ucode_length ! = binary2 . ucode_length | |
binary1 . ctrl ! = binary2 . ctrl | |
binary1 . texture_state ! = binary2 . texture_state | |
2021-08-25 18:15:35 +02:00
binary1 . texcoord_control_mask ! = binary2 . texcoord_control_mask | |
2025-01-09 01:22:12 +01:00
binary1 . two_sided_lighting ! = binary2 . two_sided_lighting | |
binary1 . mrt_buffers_count ! = binary2 . mrt_buffers_count )
2025-01-03 23:57:03 +01:00
{
2016-01-26 20:42:54 +01:00
return false ;
2025-01-03 23:57:03 +01:00
}
2017-06-27 19:46:36 +02:00
2020-10-27 21:41:20 +01:00
const void * instBuffer1 = binary1 . get_data ( ) ;
const void * instBuffer2 = binary2 . get_data ( ) ;
2020-12-18 08:39:54 +01:00
usz instIndex = 0 ;
2016-01-10 20:09:56 +01:00
while ( true )
{
2020-04-10 09:05:23 +02:00
const auto inst1 = v128 : : loadu ( instBuffer1 , instIndex ) ;
const auto inst2 = v128 : : loadu ( instBuffer2 , instIndex ) ;
2016-01-10 20:09:56 +01:00
2020-12-21 15:12:05 +01:00
if ( inst1 . _u ^ inst2 . _u )
2025-01-03 23:57:03 +01:00
{
2016-01-10 20:09:56 +01:00
return false ;
2025-01-03 23:57:03 +01:00
}
2017-06-27 19:46:36 +02:00
2016-01-10 20:09:56 +01:00
instIndex + + ;
// Skip constants
2025-02-16 03:39:53 +01:00
if ( fragment_program_utils : : is_any_src_constant ( inst1 ) )
2016-01-10 20:09:56 +01:00
instIndex + + ;
2025-02-16 03:39:53 +01:00
const bool end = ( ( inst1 . _u32 [ 0 ] > > 8 ) & 0x1 ) ;
2016-01-10 20:09:56 +01:00
if ( end )
2025-01-03 23:57:03 +01:00
{
2016-01-10 20:09:56 +01:00
return true ;
2025-01-03 23:57:03 +01:00
}
2016-01-10 20:09:56 +01:00
}
}
2024-08-09 13:07:37 +02:00
namespace rsx
{
# if defined(ARCH_X64) || defined(ARCH_ARM64)
2024-08-09 21:36:06 +02:00
static inline void write_fragment_constants_to_buffer_sse2 ( const std : : span < f32 > & buffer , const RSXFragmentProgram & rsx_prog , const std : : vector < usz > & offsets_cache , bool sanitize )
2024-08-09 13:07:37 +02:00
{
f32 * dst = buffer . data ( ) ;
for ( usz offset_in_fragment_program : offsets_cache )
{
char * data = static_cast < char * > ( rsx_prog . get_data ( ) ) + offset_in_fragment_program ;
const __m128i vector = _mm_loadu_si128 ( reinterpret_cast < __m128i * > ( data ) ) ;
const __m128i shuffled_vector = _mm_or_si128 ( _mm_slli_epi16 ( vector , 8 ) , _mm_srli_epi16 ( vector , 8 ) ) ;
if ( sanitize )
{
//Convert NaNs and Infs to 0
const auto masked = _mm_and_si128 ( shuffled_vector , _mm_set1_epi32 ( 0x7fffffff ) ) ;
const auto valid = _mm_cmplt_epi32 ( masked , _mm_set1_epi32 ( 0x7f800000 ) ) ;
const auto result = _mm_and_si128 ( shuffled_vector , valid ) ;
_mm_stream_si128 ( utils : : bless < __m128i > ( dst ) , result ) ;
}
else
{
_mm_stream_si128 ( utils : : bless < __m128i > ( dst ) , shuffled_vector ) ;
}
dst + = 4 ;
}
}
2024-08-09 21:36:06 +02:00
# else
static inline void write_fragment_constants_to_buffer_fallback ( const std : : span < f32 > & buffer , const RSXFragmentProgram & rsx_prog , const std : : vector < usz > & offsets_cache , bool sanitize )
2024-08-09 13:07:37 +02:00
{
f32 * dst = buffer . data ( ) ;
for ( usz offset_in_fragment_program : offsets_cache )
{
char * data = static_cast < char * > ( rsx_prog . get_data ( ) ) + offset_in_fragment_program ;
for ( u32 i = 0 ; i < 4 ; i + + )
{
const u32 value = reinterpret_cast < u32 * > ( data ) [ i ] ;
const u32 shuffled = ( ( value > > 8 ) & 0xff00ff ) | ( ( value < < 8 ) & 0xff00ff00 ) ;
if ( sanitize & & ( shuffled & 0x7fffffff ) > = 0x7f800000 )
{
dst [ i ] = 0.f ;
}
else
{
dst [ i ] = std : : bit_cast < f32 > ( shuffled ) ;
}
}
dst + = 4 ;
}
}
2024-08-09 21:36:06 +02:00
# endif
2024-08-09 13:07:37 +02:00
void write_fragment_constants_to_buffer ( const std : : span < f32 > & buffer , const RSXFragmentProgram & rsx_prog , const std : : vector < usz > & offsets_cache , bool sanitize )
{
# if defined(ARCH_X64) || defined(ARCH_ARM64)
write_fragment_constants_to_buffer_sse2 ( buffer , rsx_prog , offsets_cache , sanitize ) ;
# else
write_fragment_constants_to_buffer_fallback ( buffer , rsx_prog , offsets_cache , sanitize ) ;
# endif
}
}