2020-12-05 13:08:24 +01:00
# include "stdafx.h"
2018-09-29 00:12:00 +02:00
# include "SPURecompiler.h"
2018-04-30 18:44:01 +02:00
# include "Emu/System.h"
2020-02-15 23:36:20 +01:00
# include "Emu/system_config.h"
2021-04-21 22:12:21 +02:00
# include "Emu/system_progress.hpp"
# include "Emu/system_utils.hpp"
# include "Emu/cache_utils.hpp"
2015-08-26 04:54:06 +02:00
# include "Emu/IdManager.h"
2018-05-02 20:49:19 +02:00
# include "Crypto/sha1.h"
2018-05-04 23:01:27 +02:00
# include "Utilities/StrUtil.h"
2019-05-11 18:21:07 +02:00
# include "Utilities/JIT.h"
2019-11-10 23:10:23 +01:00
# include "util/init_mutex.hpp"
2022-08-25 09:27:51 +02:00
# include "util/shared_ptr.hpp"
2015-08-26 04:54:06 +02:00
2024-04-26 18:29:18 +02:00
# include "Emu/Cell/Modules/cellSync.h"
2015-08-26 04:54:06 +02:00
# include "SPUThread.h"
2018-04-09 16:45:37 +02:00
# include "SPUAnalyser.h"
2018-04-30 18:44:01 +02:00
# include "SPUInterpreter.h"
# include "SPUDisAsm.h"
2017-07-20 16:20:28 +02:00
# include <algorithm>
2021-03-31 15:31:21 +02:00
# include <optional>
2021-07-27 08:27:03 +02:00
# include <unordered_set>
2015-08-26 04:54:06 +02:00
2020-12-13 14:34:45 +01:00
# include "util/v128.hpp"
2021-12-30 17:39:18 +01:00
# include "util/simd.hpp"
2020-12-18 10:55:54 +01:00
# include "util/sysinfo.hpp"
2020-12-13 14:34:45 +01:00
2021-12-30 17:39:18 +01:00
const extern spu_decoder < spu_itype > g_spu_itype ;
const extern spu_decoder < spu_iname > g_spu_iname ;
const extern spu_decoder < spu_iflag > g_spu_iflag ;
2019-07-15 15:16:30 +02:00
2024-03-06 16:28:07 +01:00
constexpr u32 s_reg_max = spu_recompiler_base : : s_reg_max ;
2019-05-11 18:21:07 +02:00
// Move 4 args for calling native function from a GHC calling convention function
2022-07-10 08:38:48 +02:00
# if defined(ARCH_X64)
2019-05-11 18:21:07 +02:00
static u8 * move_args_ghc_to_native ( u8 * raw )
{
# ifdef _WIN32
// mov rcx, r13
// mov rdx, rbp
// mov r8, r12
// mov r9, rbx
std : : memcpy ( raw , " \x4C \x89 \xE9 \x48 \x89 \xEA \x4D \x89 \xE0 \x49 \x89 \xD9 " , 12 ) ;
# else
// mov rdi, r13
// mov rsi, rbp
// mov rdx, r12
// mov rcx, rbx
std : : memcpy ( raw , " \x4C \x89 \xEF \x48 \x89 \xEE \x4C \x89 \xE2 \x48 \x89 \xD9 " , 12 ) ;
# endif
return raw + 12 ;
}
2022-07-10 08:38:48 +02:00
# elif defined(ARCH_ARM64)
static void ghc_cpp_trampoline ( u64 fn_target , native_asm & c , auto & args )
{
using namespace asmjit ;
c . mov ( args [ 0 ] , a64 : : x19 ) ;
c . mov ( args [ 1 ] , a64 : : x20 ) ;
c . mov ( args [ 2 ] , a64 : : x21 ) ;
c . mov ( args [ 3 ] , a64 : : x22 ) ;
2024-08-07 04:31:46 +02:00
c . mov ( a64 : : x15 , Imm ( fn_target ) ) ;
2022-07-10 08:38:48 +02:00
c . br ( a64 : : x15 ) ;
}
# endif
2019-05-11 18:21:07 +02:00
2019-03-18 17:36:08 +01:00
DECLARE ( spu_runtime : : tr_dispatch ) = [ ]
{
2022-06-14 14:28:38 +02:00
# ifdef __APPLE__
pthread_jit_write_protect_np ( false ) ;
# endif
2022-07-10 08:38:48 +02:00
# if defined(ARCH_X64)
2019-03-18 17:36:08 +01:00
// Generate a special trampoline to spu_recompiler_base::dispatch with pause instruction
2019-05-11 18:21:07 +02:00
u8 * const trptr = jit_runtime : : alloc ( 32 , 16 ) ;
u8 * raw = move_args_ghc_to_native ( trptr ) ;
* raw + + = 0xf3 ; // pause
* raw + + = 0x90 ;
* raw + + = 0xff ; // jmp [rip]
* raw + + = 0x25 ;
std : : memset ( raw , 0 , 4 ) ;
2019-03-18 17:36:08 +01:00
const u64 target = reinterpret_cast < u64 > ( & spu_recompiler_base : : dispatch ) ;
2019-05-11 18:21:07 +02:00
std : : memcpy ( raw + 4 , & target , 8 ) ;
2019-03-18 17:36:08 +01:00
return reinterpret_cast < spu_function_t > ( trptr ) ;
2022-07-10 08:38:48 +02:00
# elif defined(ARCH_ARM64)
auto trptr = build_function_asm < spu_function_t > ( " tr_dispatch " ,
[ ] ( native_asm & c , auto & args )
{
c . yield ( ) ;
ghc_cpp_trampoline ( reinterpret_cast < u64 > ( & spu_recompiler_base : : dispatch ) , c , args ) ;
} ) ;
return trptr ;
# else
# error "Unimplemented"
# endif
2019-03-18 17:36:08 +01:00
} ( ) ;
DECLARE ( spu_runtime : : tr_branch ) = [ ]
{
2022-07-10 08:38:48 +02:00
# if defined(ARCH_X64)
2019-03-18 17:36:08 +01:00
// Generate a trampoline to spu_recompiler_base::branch
2019-05-11 18:21:07 +02:00
u8 * const trptr = jit_runtime : : alloc ( 32 , 16 ) ;
u8 * raw = move_args_ghc_to_native ( trptr ) ;
* raw + + = 0xff ; // jmp [rip]
* raw + + = 0x25 ;
std : : memset ( raw , 0 , 4 ) ;
2019-03-18 17:36:08 +01:00
const u64 target = reinterpret_cast < u64 > ( & spu_recompiler_base : : branch ) ;
2019-05-11 18:21:07 +02:00
std : : memcpy ( raw + 4 , & target , 8 ) ;
2019-03-18 17:36:08 +01:00
return reinterpret_cast < spu_function_t > ( trptr ) ;
2022-07-10 08:38:48 +02:00
# elif defined(ARCH_ARM64)
auto trptr = build_function_asm < spu_function_t > ( " tr_branch " ,
[ ] ( native_asm & c , auto & args )
{
ghc_cpp_trampoline ( reinterpret_cast < u64 > ( & spu_recompiler_base : : branch ) , c , args ) ;
} ) ;
return trptr ;
# else
# error "Unimplemented"
# endif
2019-03-18 17:36:08 +01:00
} ( ) ;
2019-07-15 15:16:30 +02:00
DECLARE ( spu_runtime : : tr_interpreter ) = [ ]
{
2022-07-10 08:38:48 +02:00
# if defined(ARCH_X64)
2019-07-15 15:16:30 +02:00
u8 * const trptr = jit_runtime : : alloc ( 32 , 16 ) ;
u8 * raw = move_args_ghc_to_native ( trptr ) ;
* raw + + = 0xff ; // jmp [rip]
* raw + + = 0x25 ;
std : : memset ( raw , 0 , 4 ) ;
const u64 target = reinterpret_cast < u64 > ( & spu_recompiler_base : : old_interpreter ) ;
std : : memcpy ( raw + 4 , & target , 8 ) ;
return reinterpret_cast < spu_function_t > ( trptr ) ;
2022-07-10 08:38:48 +02:00
# elif defined(ARCH_ARM64)
auto trptr = build_function_asm < spu_function_t > ( " tr_interpreter " ,
[ ] ( native_asm & c , auto & args )
{
ghc_cpp_trampoline ( reinterpret_cast < u64 > ( & spu_recompiler_base : : old_interpreter ) , c , args ) ;
} ) ;
return trptr ;
# endif
2019-07-15 15:16:30 +02:00
} ( ) ;
2019-01-28 14:14:01 +01:00
DECLARE ( spu_runtime : : g_dispatcher ) = [ ]
{
2019-10-03 18:57:32 +02:00
// Allocate 2^20 positions in data area
2021-03-07 16:49:42 +01:00
const auto ptr = reinterpret_cast < std : : remove_const_t < decltype ( spu_runtime : : g_dispatcher ) > > ( jit_runtime : : alloc ( sizeof ( * g_dispatcher ) , 64 , false ) ) ;
2019-10-03 18:57:32 +02:00
for ( auto & x : * ptr )
{
x . raw ( ) = tr_dispatch ;
}
2019-01-28 14:14:01 +01:00
return ptr ;
} ( ) ;
2019-10-03 18:57:32 +02:00
DECLARE ( spu_runtime : : tr_all ) = [ ]
{
2022-07-10 08:38:48 +02:00
# if defined(ARCH_X64)
2019-10-03 18:57:32 +02:00
u8 * const trptr = jit_runtime : : alloc ( 32 , 16 ) ;
u8 * raw = trptr ;
// Load PC: mov eax, [r13 + spu_thread::pc]
* raw + + = 0x41 ;
* raw + + = 0x8b ;
* raw + + = 0x45 ;
* raw + + = : : narrow < s8 > ( : : offset32 ( & spu_thread : : pc ) ) ;
// Get LS address starting from PC: lea rcx, [rbp + rax]
* raw + + = 0x48 ;
* raw + + = 0x8d ;
* raw + + = 0x4c ;
* raw + + = 0x05 ;
* raw + + = 0x00 ;
// mov eax, [rcx]
* raw + + = 0x8b ;
* raw + + = 0x01 ;
// shr eax, (32 - 20)
* raw + + = 0xc1 ;
* raw + + = 0xe8 ;
* raw + + = 0x0c ;
// Load g_dispatcher to rdx
* raw + + = 0x48 ;
* raw + + = 0x8d ;
* raw + + = 0x15 ;
2020-12-09 14:03:15 +01:00
const s32 r32 = : : narrow < s32 > ( reinterpret_cast < u64 > ( g_dispatcher ) - reinterpret_cast < u64 > ( raw ) - 4 ) ;
2019-10-03 18:57:32 +02:00
std : : memcpy ( raw , & r32 , 4 ) ;
raw + = 4 ;
2019-10-14 19:41:31 +02:00
// Update block_hash (set zero): mov [r13 + spu_thread::m_block_hash], 0
* raw + + = 0x49 ;
* raw + + = 0xc7 ;
* raw + + = 0x45 ;
* raw + + = : : narrow < s8 > ( : : offset32 ( & spu_thread : : block_hash ) ) ;
* raw + + = 0x00 ;
* raw + + = 0x00 ;
* raw + + = 0x00 ;
* raw + + = 0x00 ;
2019-10-03 18:57:32 +02:00
// jmp [rdx + rax * 8]
* raw + + = 0xff ;
* raw + + = 0x24 ;
* raw + + = 0xc2 ;
return reinterpret_cast < spu_function_t > ( trptr ) ;
2022-07-10 08:38:48 +02:00
# elif defined(ARCH_ARM64)
auto trptr = build_function_asm < spu_function_t > ( " tr_all " ,
[ ] ( native_asm & c , auto & args )
{
using namespace asmjit ;
2024-08-19 21:44:32 +02:00
// Args implicitly defined via registers
UNUSED ( args ) ;
2024-08-07 04:31:46 +02:00
// Inputs:
// x19 = m_thread a.k.a arg[0]
// x20 = ls_base
// x21 - x22 = args[2 - 3]
//ensure(::offset32(&spu_thread::pc) <= 32760);
//ensure(::offset32(&spu_thread::block_hash) <= 32760);
2022-07-10 08:38:48 +02:00
// Load PC
2024-08-07 04:31:46 +02:00
c . ldr ( a64 : : w1 , arm : : Mem ( a64 : : x19 , : : offset32 ( & spu_thread : : pc ) ) ) ; // REG_Base + offset(spu_thread::pc)
2022-07-10 08:38:48 +02:00
// Compute LS address = REG_Sp + PC, store into x7 (use later)
c . add ( a64 : : x7 , a64 : : x20 , a64 : : x1 ) ;
// Load 32b from LS address
c . ldr ( a64 : : w3 , arm : : Mem ( a64 : : x7 ) ) ;
// shr (32 - 20)
c . lsr ( a64 : : w3 , a64 : : w3 , Imm ( 32 - 20 ) ) ;
// Load g_dispatcher
2024-08-07 04:31:46 +02:00
c . mov ( a64 : : x4 , Imm ( reinterpret_cast < u64 > ( g_dispatcher ) ) ) ;
2022-07-10 08:38:48 +02:00
// Update block hash
c . mov ( a64 : : x5 , Imm ( 0 ) ) ;
2024-08-07 04:31:46 +02:00
c . str ( a64 : : x5 , arm : : Mem ( a64 : : x19 , : : offset32 ( & spu_thread : : block_hash ) ) ) ; // REG_Base + offset(spu_thread::block_hash)
2022-07-10 08:38:48 +02:00
// Jump to [g_dispatcher + idx * 8]
c . mov ( a64 : : x6 , Imm ( 8 ) ) ;
c . mul ( a64 : : x6 , a64 : : x3 , a64 : : x6 ) ;
c . add ( a64 : : x4 , a64 : : x4 , a64 : : x6 ) ;
c . ldr ( a64 : : x4 , arm : : Mem ( a64 : : x4 ) ) ;
c . br ( a64 : : x4 ) ;
2024-08-07 04:31:46 +02:00
// Unreachable guard
c . brk ( 0x42 ) ;
2022-07-10 08:38:48 +02:00
} ) ;
return trptr ;
# else
# error "Unimplemented"
# endif
2019-10-03 18:57:32 +02:00
} ( ) ;
2022-01-23 13:20:07 +01:00
DECLARE ( spu_runtime : : g_gateway ) = build_function_asm < spu_function_t > ( " spu_gateway " , [ ] ( native_asm & c , auto & args )
2019-05-11 18:21:07 +02:00
{
// Gateway for SPU dispatcher, converts from native to GHC calling convention, also saves RSP value for spu_escape
using namespace asmjit ;
2021-12-30 17:39:18 +01:00
# if defined(ARCH_X64)
2019-05-11 18:21:07 +02:00
# ifdef _WIN32
c . push ( x86 : : r15 ) ;
c . push ( x86 : : r14 ) ;
c . push ( x86 : : r13 ) ;
c . push ( x86 : : r12 ) ;
c . push ( x86 : : rsi ) ;
c . push ( x86 : : rdi ) ;
c . push ( x86 : : rbp ) ;
c . push ( x86 : : rbx ) ;
c . sub ( x86 : : rsp , 0xa8 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rsp , 0x90 ) , x86 : : xmm15 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rsp , 0x80 ) , x86 : : xmm14 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rsp , 0x70 ) , x86 : : xmm13 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rsp , 0x60 ) , x86 : : xmm12 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rsp , 0x50 ) , x86 : : xmm11 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rsp , 0x40 ) , x86 : : xmm10 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rsp , 0x30 ) , x86 : : xmm9 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rsp , 0x20 ) , x86 : : xmm8 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rsp , 0x10 ) , x86 : : xmm7 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rsp , 0 ) , x86 : : xmm6 ) ;
# else
c . push ( x86 : : rbp ) ;
c . push ( x86 : : r15 ) ;
c . push ( x86 : : r14 ) ;
c . push ( x86 : : r13 ) ;
c . push ( x86 : : r12 ) ;
c . push ( x86 : : rbx ) ;
c . push ( x86 : : rax ) ;
# endif
// Save native stack pointer for longjmp emulation
2024-08-07 04:31:46 +02:00
c . mov ( x86 : : qword_ptr ( args [ 0 ] , : : offset32 ( & spu_thread : : hv_ctx , & rpcs3 : : hypervisor_context_t : : regs ) ) , x86 : : rsp ) ;
2019-05-11 18:21:07 +02:00
// Move 4 args (despite spu_function_t def)
c . mov ( x86 : : r13 , args [ 0 ] ) ;
c . mov ( x86 : : rbp , args [ 1 ] ) ;
c . mov ( x86 : : r12 , args [ 2 ] ) ;
c . mov ( x86 : : rbx , args [ 3 ] ) ;
if ( utils : : has_avx ( ) )
{
c . vzeroupper ( ) ;
}
2022-01-17 17:24:53 +01:00
c . call ( spu_runtime : : tr_all ) ;
2019-05-11 18:21:07 +02:00
if ( utils : : has_avx ( ) )
{
c . vzeroupper ( ) ;
}
# ifdef _WIN32
c . movaps ( x86 : : xmm6 , x86 : : oword_ptr ( x86 : : rsp , 0 ) ) ;
c . movaps ( x86 : : xmm7 , x86 : : oword_ptr ( x86 : : rsp , 0x10 ) ) ;
c . movaps ( x86 : : xmm8 , x86 : : oword_ptr ( x86 : : rsp , 0x20 ) ) ;
c . movaps ( x86 : : xmm9 , x86 : : oword_ptr ( x86 : : rsp , 0x30 ) ) ;
c . movaps ( x86 : : xmm10 , x86 : : oword_ptr ( x86 : : rsp , 0x40 ) ) ;
c . movaps ( x86 : : xmm11 , x86 : : oword_ptr ( x86 : : rsp , 0x50 ) ) ;
c . movaps ( x86 : : xmm12 , x86 : : oword_ptr ( x86 : : rsp , 0x60 ) ) ;
c . movaps ( x86 : : xmm13 , x86 : : oword_ptr ( x86 : : rsp , 0x70 ) ) ;
c . movaps ( x86 : : xmm14 , x86 : : oword_ptr ( x86 : : rsp , 0x80 ) ) ;
c . movaps ( x86 : : xmm15 , x86 : : oword_ptr ( x86 : : rsp , 0x90 ) ) ;
c . add ( x86 : : rsp , 0xa8 ) ;
c . pop ( x86 : : rbx ) ;
c . pop ( x86 : : rbp ) ;
c . pop ( x86 : : rdi ) ;
c . pop ( x86 : : rsi ) ;
c . pop ( x86 : : r12 ) ;
c . pop ( x86 : : r13 ) ;
c . pop ( x86 : : r14 ) ;
c . pop ( x86 : : r15 ) ;
# else
c . add ( x86 : : rsp , + 8 ) ;
c . pop ( x86 : : rbx ) ;
c . pop ( x86 : : r12 ) ;
c . pop ( x86 : : r13 ) ;
c . pop ( x86 : : r14 ) ;
c . pop ( x86 : : r15 ) ;
c . pop ( x86 : : rbp ) ;
# endif
c . ret ( ) ;
2022-07-10 08:38:48 +02:00
# elif defined(ARCH_ARM64)
2024-08-07 04:31:46 +02:00
// Save non-volatile regs. We do this within the thread context instead of normal stack
const u32 hv_regs_base = : : offset32 ( & spu_thread : : hv_ctx , & rpcs3 : : hypervisor_context_t : : regs ) ;
// NOTE: A64 gp-gp-imm add only takes immediates of upto 4095. Larger numbers can work, but need to be multiples of 2 for lowering to replace the instruction correctly
// Unfortunately asmjit fails silently on these patterns which can generate incorrect code
c . mov ( a64 : : x15 , args [ 0 ] ) ;
c . mov ( a64 : : x14 , Imm ( hv_regs_base ) ) ;
c . add ( a64 : : x14 , a64 : : x14 , a64 : : x15 ) ; // Reg context offset
// Return address of escape should jump to the restore block
auto epilogue_addr = c . newLabel ( ) ;
c . adr ( a64 : : x15 , epilogue_addr ) ;
c . mov ( a64 : : x16 , a64 : : sp ) ;
c . stp ( a64 : : x15 , a64 : : x16 , arm : : Mem ( a64 : : x14 ) ) ;
c . stp ( a64 : : x18 , a64 : : x19 , arm : : Mem ( a64 : : x14 , 16 ) ) ;
c . stp ( a64 : : x20 , a64 : : x21 , arm : : Mem ( a64 : : x14 , 32 ) ) ;
c . stp ( a64 : : x22 , a64 : : x23 , arm : : Mem ( a64 : : x14 , 48 ) ) ;
c . stp ( a64 : : x24 , a64 : : x25 , arm : : Mem ( a64 : : x14 , 64 ) ) ;
c . stp ( a64 : : x26 , a64 : : x27 , arm : : Mem ( a64 : : x14 , 80 ) ) ;
c . stp ( a64 : : x28 , a64 : : x29 , arm : : Mem ( a64 : : x14 , 96 ) ) ;
c . str ( a64 : : x30 , arm : : Mem ( a64 : : x14 , 112 ) ) ;
2022-07-10 08:38:48 +02:00
// Move 4 args (despite spu_function_t def)
c . mov ( a64 : : x19 , args [ 0 ] ) ;
c . mov ( a64 : : x20 , args [ 1 ] ) ;
c . mov ( a64 : : x21 , args [ 2 ] ) ;
c . mov ( a64 : : x22 , args [ 3 ] ) ;
2024-08-07 04:31:46 +02:00
// Inject stack frame for scratchpad. Alternatively use per-function frames but that adds some overhead
c . sub ( a64 : : sp , a64 : : sp , Imm ( 8192 ) ) ;
2022-07-10 08:38:48 +02:00
2024-08-07 04:31:46 +02:00
c . mov ( a64 : : x0 , Imm ( reinterpret_cast < u64 > ( spu_runtime : : tr_all ) ) ) ;
2022-07-10 08:38:48 +02:00
c . blr ( a64 : : x0 ) ;
2024-08-07 04:31:46 +02:00
// This is the return point for the far ret. Never jump back into host code without coming through this exit
c . bind ( epilogue_addr ) ;
2022-07-10 08:38:48 +02:00
2024-08-07 04:31:46 +02:00
// Cleanup scratchpad (not needed, we'll reload sp shortly)
// c.add(a64::sp, a64::sp, Imm(8192));
// Restore thread context
c . mov ( a64 : : x14 , Imm ( hv_regs_base ) ) ;
c . add ( a64 : : x14 , a64 : : x14 , a64 : : x19 ) ;
c . ldr ( a64 : : x16 , arm : : Mem ( a64 : : x14 , 8 ) ) ;
c . ldp ( a64 : : x18 , a64 : : x19 , arm : : Mem ( a64 : : x14 , 16 ) ) ;
c . ldp ( a64 : : x20 , a64 : : x21 , arm : : Mem ( a64 : : x14 , 32 ) ) ;
c . ldp ( a64 : : x22 , a64 : : x23 , arm : : Mem ( a64 : : x14 , 48 ) ) ;
c . ldp ( a64 : : x24 , a64 : : x25 , arm : : Mem ( a64 : : x14 , 64 ) ) ;
c . ldp ( a64 : : x26 , a64 : : x27 , arm : : Mem ( a64 : : x14 , 80 ) ) ;
c . ldp ( a64 : : x28 , a64 : : x29 , arm : : Mem ( a64 : : x14 , 96 ) ) ;
c . ldr ( a64 : : x30 , arm : : Mem ( a64 : : x14 , 112 ) ) ;
2022-07-10 08:38:48 +02:00
// Return
2024-08-07 04:31:46 +02:00
c . mov ( a64 : : sp , a64 : : x16 ) ;
2021-12-30 17:39:18 +01:00
c . ret ( a64 : : x30 ) ;
2022-07-10 08:38:48 +02:00
# else
# error "Unimplemented"
2021-12-30 17:39:18 +01:00
# endif
2019-05-11 18:21:07 +02:00
} ) ;
2021-12-30 17:39:18 +01:00
DECLARE ( spu_runtime : : g_escape ) = build_function_asm < void ( * ) ( spu_thread * ) > ( " spu_escape " , [ ] ( native_asm & c , auto & args )
2019-05-13 20:17:31 +02:00
{
using namespace asmjit ;
2021-12-30 17:39:18 +01:00
# if defined(ARCH_X64)
2019-05-13 20:17:31 +02:00
// Restore native stack pointer (longjmp emulation)
2024-08-07 04:31:46 +02:00
c . mov ( x86 : : rsp , x86 : : qword_ptr ( args [ 0 ] , : : offset32 ( & spu_thread : : hv_ctx , & rpcs3 : : hypervisor_context_t : : regs ) ) ) ;
2019-05-15 17:44:53 +02:00
// Return to the return location
2021-12-24 18:33:32 +01:00
c . sub ( x86 : : rsp , 8 ) ;
c . ret ( ) ;
2022-07-10 08:38:48 +02:00
# elif defined(ARCH_ARM64)
2024-08-07 04:31:46 +02:00
// Far ret, jumps to gateway epilogue
const u32 reg_base = : : offset32 ( & spu_thread : : hv_ctx , & rpcs3 : : hypervisor_context_t : : regs ) ;
c . mov ( a64 : : x19 , args [ 0 ] ) ;
c . mov ( a64 : : x15 , Imm ( reg_base ) ) ;
c . add ( a64 : : x15 , a64 : : x15 , args [ 0 ] ) ;
c . ldr ( a64 : : x30 , arm : : Mem ( a64 : : x15 ) ) ;
2022-07-10 08:38:48 +02:00
c . ret ( a64 : : x30 ) ;
# else
# error "Unimplemented"
2021-12-30 17:39:18 +01:00
# endif
2019-05-15 17:44:53 +02:00
} ) ;
2021-12-30 17:39:18 +01:00
DECLARE ( spu_runtime : : g_tail_escape ) = build_function_asm < void ( * ) ( spu_thread * , spu_function_t , u8 * ) > ( " spu_tail_escape " , [ ] ( native_asm & c , auto & args )
2019-05-15 17:44:53 +02:00
{
using namespace asmjit ;
2021-12-30 17:39:18 +01:00
# if defined(ARCH_X64)
2019-05-15 17:44:53 +02:00
// Restore native stack pointer (longjmp emulation)
2024-08-07 04:31:46 +02:00
c . mov ( x86 : : rsp , x86 : : qword_ptr ( args [ 0 ] , : : offset32 ( & spu_thread : : hv_ctx , & rpcs3 : : hypervisor_context_t : : regs ) ) ) ;
2019-05-15 17:44:53 +02:00
2019-05-17 15:50:02 +02:00
// Adjust stack for initial call instruction in the gateway
2021-12-24 18:33:32 +01:00
c . sub ( x86 : : rsp , 16 ) ;
2019-05-17 15:50:02 +02:00
2019-05-15 17:44:53 +02:00
// Tail call, GHC CC (second arg)
c . mov ( x86 : : r13 , args [ 0 ] ) ;
2020-07-15 20:57:39 +02:00
c . mov ( x86 : : rbp , x86 : : qword_ptr ( args [ 0 ] , : : offset32 ( & spu_thread : : ls ) ) ) ;
2019-05-15 17:44:53 +02:00
c . mov ( x86 : : r12 , args [ 2 ] ) ;
c . xor_ ( x86 : : ebx , x86 : : ebx ) ;
2021-12-24 18:33:32 +01:00
c . mov ( x86 : : qword_ptr ( x86 : : rsp ) , args [ 1 ] ) ;
c . ret ( ) ;
2022-07-10 08:38:48 +02:00
# elif defined(ARCH_ARM64)
2024-08-07 04:31:46 +02:00
// HV pointer
const u32 reg_base = : : offset32 ( & spu_thread : : hv_ctx , & rpcs3 : : hypervisor_context_t : : regs ) ;
2022-07-10 08:38:48 +02:00
// Tail call, GHC CC
c . mov ( a64 : : x19 , args [ 0 ] ) ; // REG_Base
2024-08-07 04:31:46 +02:00
c . mov ( a64 : : x15 , Imm ( : : offset32 ( & spu_thread : : ls ) ) ) ; // SPU::ls offset cannot be correctly encoded for ldr as it is too large
c . ldr ( a64 : : x20 , arm : : Mem ( a64 : : x19 , a64 : : x15 ) ) ; // REG_Sp
c . mov ( a64 : : x21 , args [ 2 ] ) ; // REG_Hp
c . mov ( a64 : : x22 , a64 : : xzr ) ; // REG_R1
// Reset sp to patch leaks. Calls to tail escape may leave their stack "dirty" due to optimizations.
c . mov ( a64 : : x14 , Imm ( reg_base + 8 ) ) ;
c . ldr ( a64 : : x15 , arm : : Mem ( a64 : : x19 , a64 : : x14 ) ) ;
c . mov ( a64 : : sp , a64 : : x15 ) ;
// Push context. This gateway can be returned to normally through a ret chain.
// FIXME: Push the current PC and "this" as part of the pseudo-frame and return here directly.
c . sub ( a64 : : sp , a64 : : sp , Imm ( 16 ) ) ;
c . str ( args [ 0 ] , arm : : Mem ( a64 : : sp ) ) ;
// Allocate scratchpad. Not needed if using per-function frames, or if we just don't care about returning to C++ (jump to gw exit instead)
c . sub ( a64 : : sp , a64 : : sp , Imm ( 8192 ) ) ;
// Make the far jump
c . mov ( a64 : : x15 , args [ 1 ] ) ;
c . blr ( a64 : : x15 ) ;
2022-07-10 08:38:48 +02:00
2024-08-07 04:31:46 +02:00
// Clear scratch allocation
c . add ( a64 : : sp , a64 : : sp , Imm ( 8192 ) ) ;
2022-07-10 08:38:48 +02:00
2024-08-07 04:31:46 +02:00
// Restore context. Escape point expects the current thread pointer at x19
c . ldr ( a64 : : x19 , arm : : Mem ( a64 : : sp ) ) ;
c . add ( a64 : : sp , a64 : : sp , Imm ( 16 ) ) ;
2022-07-10 08:38:48 +02:00
2024-08-07 04:31:46 +02:00
// <Optional> We could technically just emit a return here, but we may not want to for now until support is more mature.
// Should we attempt a normal return after this point, we'd be going back to C++ code which we really don't want.
// We can't guarantee stack sanity for the C++ code and it's cookies since we're basically treating stack as a scratch playground since we entered the main gateway.
// Instead, just fall back to hypervisor here. It also makes debugging easier.
c . mov ( a64 : : x15 , Imm ( reg_base ) ) ;
c . ldr ( a64 : : x30 , arm : : Mem ( a64 : : x19 , a64 : : x15 ) ) ;
c . ret ( a64 : : x30 ) ;
2022-07-10 08:38:48 +02:00
# else
# error "Unimplemented"
2021-12-30 17:39:18 +01:00
# endif
2019-05-13 20:17:31 +02:00
} ) ;
2019-05-17 22:54:47 +02:00
DECLARE ( spu_runtime : : g_interpreter_table ) = { } ;
2019-03-25 19:31:16 +01:00
DECLARE ( spu_runtime : : g_interpreter ) = nullptr ;
2018-05-04 23:01:27 +02:00
spu_cache : : spu_cache ( const std : : string & loc )
2019-01-21 19:04:32 +01:00
: m_file ( loc , fs : : read + fs : : write + fs : : create + fs : : append )
2018-05-04 23:01:27 +02:00
{
}
spu_cache : : ~ spu_cache ( )
{
}
2023-08-26 10:23:42 +02:00
extern void utilize_spu_data_segment ( u32 vaddr , const void * ls_data_vaddr , u32 size )
{
if ( vaddr % 4 )
{
return ;
}
size & = - 4 ;
if ( ! size | | vaddr + size > SPU_LS_SIZE )
{
return ;
}
if ( ! g_cfg . core . llvm_precompilation )
{
return ;
}
2023-09-01 14:07:46 +02:00
g_fxo - > need < spu_cache > ( ) ;
2023-08-26 10:23:42 +02:00
2023-09-01 14:07:46 +02:00
if ( ! g_fxo - > get < spu_cache > ( ) . collect_funcs_to_precompile )
2023-08-26 10:23:42 +02:00
{
return ;
}
std : : basic_string < u32 > data ( size / 4 , 0 ) ;
std : : memcpy ( data . data ( ) , ls_data_vaddr , size ) ;
2023-09-01 14:07:46 +02:00
spu_cache : : precompile_data_t obj { vaddr , std : : move ( data ) } ;
2023-08-26 10:23:42 +02:00
2023-08-29 14:32:26 +02:00
obj . funcs = spu_thread : : discover_functions ( vaddr , { reinterpret_cast < const u8 * > ( ls_data_vaddr ) , size } , vaddr ! = 0 , umax ) ;
2023-08-26 10:23:42 +02:00
if ( obj . funcs . empty ( ) )
{
// Nothing to add
return ;
}
2024-08-16 21:17:49 +02:00
if ( spu_log . notice )
2023-08-26 10:23:42 +02:00
{
2024-08-16 21:17:49 +02:00
std : : string to_log ;
for ( usz i = 0 ; i < obj . funcs . size ( ) ; i + + )
{
if ( i = = 0 & & obj . funcs . size ( ) < 4 )
{
// Skip newline in this case
to_log + = ' ' ;
}
else if ( i % 4 = = 0 )
{
2024-08-17 11:25:14 +02:00
fmt : : append ( to_log , " \n [%02u] " , i / 8 ) ;
2024-08-16 21:17:49 +02:00
}
else
{
to_log + = " , " ;
}
fmt : : append ( to_log , " 0x%05x " , obj . funcs [ i ] ) ;
}
spu_log . notice ( " Found SPU function(s) at:%s " , to_log ) ;
2023-08-26 10:23:42 +02:00
}
2024-08-16 21:17:49 +02:00
spu_log . success ( " Found %u SPU function(s) " , obj . funcs . size ( ) ) ;
2023-08-26 10:23:42 +02:00
2023-09-01 14:07:46 +02:00
g_fxo - > get < spu_cache > ( ) . precompile_funcs . push ( std : : move ( obj ) ) ;
2023-08-26 10:23:42 +02:00
}
2023-09-02 10:55:21 +02:00
// For SPU cache validity check
static u16 calculate_crc16 ( const uchar * data , usz length )
{
u16 crc = umax ;
while ( length - - )
{
u8 x = ( crc > > 8 ) ^ * data + + ;
x ^ = ( x > > 4 ) ;
crc = static_cast < u16 > ( ( crc < < 8 ) ^ ( x < < 12 ) ^ ( x < < 5 ) ^ x ) ;
}
return crc ;
}
2019-11-23 17:30:54 +01:00
std : : deque < spu_program > spu_cache : : get ( )
2018-05-04 23:01:27 +02:00
{
2019-11-23 17:30:54 +01:00
std : : deque < spu_program > result ;
2018-05-04 23:01:27 +02:00
if ( ! m_file )
{
return result ;
}
m_file . seek ( 0 ) ;
// TODO: signal truncated or otherwise broken file
while ( true )
{
2023-09-02 10:55:21 +02:00
struct block_info_t
{
be_t < u16 > crc ;
be_t < u16 > size ;
be_t < u32 > addr ;
} block_info { } ;
2018-05-04 23:01:27 +02:00
2023-09-02 10:55:21 +02:00
if ( ! m_file . read ( block_info ) )
2018-05-04 23:01:27 +02:00
{
break ;
}
2023-09-02 10:55:21 +02:00
const u32 crc = block_info . crc ;
const u32 size = block_info . size ;
const u32 addr = block_info . addr ;
if ( utils : : add_saturate < u32 > ( addr , size * 4 ) > SPU_LS_SIZE )
2023-09-02 08:29:58 +02:00
{
break ;
}
2018-05-04 23:01:27 +02:00
2023-09-02 10:55:21 +02:00
std : : vector < u32 > func ;
2023-09-02 08:29:58 +02:00
if ( ! m_file . read ( func , size ) )
2018-05-04 23:01:27 +02:00
{
break ;
}
2019-11-23 17:30:54 +01:00
if ( ! size | | ! func [ 0 ] )
2019-05-14 17:55:10 +02:00
{
// Skip old format Giga entries
continue ;
}
2023-09-02 10:55:21 +02:00
// CRC check is optional to be compatible with old format
if ( crc & & std : : max < u32 > ( calculate_crc16 ( reinterpret_cast < const uchar * > ( func . data ( ) ) , size * 4 ) , 1 ) ! = crc )
{
// Invalid, but continue anyway
continue ;
}
2019-11-23 17:30:54 +01:00
spu_program res ;
res . entry_point = addr ;
res . lower_bound = addr ;
res . data = std : : move ( func ) ;
result . emplace_front ( std : : move ( res ) ) ;
2018-05-04 23:01:27 +02:00
}
return result ;
}
2019-11-23 17:30:54 +01:00
void spu_cache : : add ( const spu_program & func )
2018-05-04 23:01:27 +02:00
{
if ( ! m_file )
{
return ;
}
2019-11-23 17:30:54 +01:00
be_t < u32 > size = : : size32 ( func . data ) ;
be_t < u32 > addr = func . entry_point ;
2019-01-21 19:04:32 +01:00
2023-09-02 10:55:21 +02:00
// Add CRC (forced non-zero)
size | = std : : max < u32 > ( calculate_crc16 ( reinterpret_cast < const uchar * > ( func . data . data ( ) ) , size * 4 ) , 1 ) < < 16 ;
2019-06-03 21:57:59 +02:00
const fs : : iovec_clone gather [ 3 ]
{
{ & size , sizeof ( size ) } ,
{ & addr , sizeof ( addr ) } ,
2019-11-23 17:30:54 +01:00
{ func . data . data ( ) , func . data . size ( ) * 4 }
2019-06-03 21:57:59 +02:00
} ;
2019-01-21 19:04:32 +01:00
// Append data
2019-06-03 21:57:59 +02:00
m_file . write_gather ( gather , 3 ) ;
2018-05-04 23:01:27 +02:00
}
2023-08-28 14:40:18 +02:00
void spu_cache : : initialize ( bool build_existing_cache )
2018-05-04 23:01:27 +02:00
{
2019-07-15 15:16:30 +02:00
spu_runtime : : g_interpreter = spu_runtime : : g_gateway ;
2019-07-15 18:51:58 +02:00
2021-12-30 17:39:18 +01:00
if ( g_cfg . core . spu_decoder = = spu_decoder_type : : _static | | g_cfg . core . spu_decoder = = spu_decoder_type : : dynamic )
2019-07-15 18:51:58 +02:00
{
2019-10-03 18:57:32 +02:00
for ( auto & x : * spu_runtime : : g_dispatcher )
{
x . raw ( ) = spu_runtime : : tr_interpreter ;
}
2019-07-15 18:51:58 +02:00
}
2019-03-25 19:31:16 +01:00
2021-04-21 22:12:21 +02:00
const std : : string ppu_cache = rpcs3 : : cache : : get_ppu_cache ( ) ;
2018-05-04 23:01:27 +02:00
2019-01-21 19:04:32 +01:00
if ( ppu_cache . empty ( ) )
2018-05-04 23:01:27 +02:00
{
return ;
}
// SPU cache file (version + block size type)
2019-01-13 18:06:30 +01:00
const std : : string loc = ppu_cache + " spu- " + fmt : : to_lower ( g_cfg . core . spu_block_size . to_string ( ) ) + " -v1-tane.dat " ;
2018-05-04 23:01:27 +02:00
2019-09-26 17:45:29 +02:00
spu_cache cache ( loc ) ;
2018-05-04 23:01:27 +02:00
2019-09-26 17:45:29 +02:00
if ( ! cache )
2018-05-04 23:01:27 +02:00
{
2020-02-01 09:36:09 +01:00
spu_log . error ( " Failed to initialize SPU cache at: %s " , loc ) ;
2018-05-04 23:01:27 +02:00
return ;
}
// Read cache
2019-09-26 17:45:29 +02:00
auto func_list = cache . get ( ) ;
2020-12-18 08:39:54 +01:00
atomic_t < usz > fnext { } ;
2019-03-18 21:01:16 +01:00
atomic_t < u8 > fail_flag { 0 } ;
2018-05-04 23:01:27 +02:00
2023-09-01 14:07:46 +02:00
auto data_list = g_fxo - > get < spu_cache > ( ) . precompile_funcs . pop_all ( ) ;
g_fxo - > get < spu_cache > ( ) . collect_funcs_to_precompile = false ;
2023-08-26 10:23:42 +02:00
2023-12-29 18:33:29 +01:00
usz total_precompile = 0 ;
2023-08-28 14:40:18 +02:00
for ( auto & sec : data_list )
{
total_precompile + = sec . funcs . size ( ) ;
}
2023-08-29 14:32:26 +02:00
const bool spu_precompilation_enabled = func_list . empty ( ) & & g_cfg . core . spu_cache & & g_cfg . core . llvm_precompilation ;
2023-08-28 11:20:17 +02:00
if ( spu_precompilation_enabled )
{
// What compiles in this case goes straight to disk
g_fxo - > get < spu_cache > ( ) = std : : move ( cache ) ;
}
2023-08-28 14:40:18 +02:00
else if ( ! build_existing_cache )
{
return ;
}
2023-08-28 11:20:17 +02:00
else
{
2023-08-29 14:32:26 +02:00
total_precompile = 0 ;
2023-09-01 14:07:46 +02:00
data_list = { } ;
2023-08-28 11:20:17 +02:00
}
atomic_t < usz > data_indexer = 0 ;
2023-08-26 10:23:42 +02:00
2021-12-30 17:39:18 +01:00
if ( g_cfg . core . spu_decoder = = spu_decoder_type : : dynamic | | g_cfg . core . spu_decoder = = spu_decoder_type : : llvm )
2019-03-25 19:31:16 +01:00
{
if ( auto compiler = spu_recompiler_base : : make_llvm_recompiler ( 11 ) )
{
compiler - > init ( ) ;
2019-10-25 23:52:56 +02:00
if ( compiler - > compile ( { } ) & & spu_runtime : : g_interpreter )
2019-03-25 19:31:16 +01:00
{
2020-02-01 09:36:09 +01:00
spu_log . success ( " SPU Runtime: Built the interpreter. " ) ;
2019-05-17 22:54:47 +02:00
if ( g_cfg . core . spu_decoder ! = spu_decoder_type : : llvm )
{
return ;
}
2019-03-25 19:31:16 +01:00
}
2019-10-25 23:52:56 +02:00
else
{
2020-02-01 09:36:09 +01:00
spu_log . fatal ( " SPU Runtime: Failed to build the interpreter. " ) ;
2019-10-25 23:52:56 +02:00
}
2019-03-25 19:31:16 +01:00
}
}
2020-04-07 14:41:38 +02:00
u32 worker_count = 0 ;
2021-03-31 15:31:21 +02:00
std : : optional < scoped_progress_dialog > progr ;
2023-11-28 19:22:32 +01:00
u32 total_funcs = 0 ;
2020-02-29 12:57:41 +01:00
if ( g_cfg . core . spu_decoder = = spu_decoder_type : : asmjit | | g_cfg . core . spu_decoder = = spu_decoder_type : : llvm )
{
2023-12-29 18:33:29 +01:00
const usz add_count = func_list . size ( ) + total_precompile ;
2023-08-26 10:23:42 +02:00
2023-08-28 14:40:18 +02:00
if ( add_count )
2023-08-26 10:23:42 +02:00
{
2023-12-29 18:33:29 +01:00
total_funcs = build_existing_cache ? : : narrow < u32 > ( add_count ) : 0 ;
2023-08-26 10:23:42 +02:00
}
2023-12-29 18:33:29 +01:00
worker_count = std : : min < u32 > ( rpcs3 : : utils : : get_max_threads ( ) , : : narrow < u32 > ( add_count ) ) ;
2020-02-29 12:57:41 +01:00
}
2023-11-28 19:22:32 +01:00
atomic_t < u32 > pending_progress = 0 ;
atomic_t < bool > showing_progress = false ;
if ( ! g_progr_ptotal )
{
g_progr_ptotal + = total_funcs ;
showing_progress . release ( true ) ;
progr . emplace ( " Building SPU cache... " ) ;
}
2020-04-07 14:41:38 +02:00
named_thread_group workers ( " SPU Worker " , worker_count , [ & ] ( ) - > uint
2018-05-04 23:01:27 +02:00
{
2022-06-14 14:28:38 +02:00
# ifdef __APPLE__
pthread_jit_write_protect_np ( false ) ;
# endif
2020-12-15 23:31:33 +01:00
// Set low priority
2021-01-25 19:49:16 +01:00
thread_ctrl : : scoped_priority low_prio ( - 1 ) ;
2020-12-15 23:31:33 +01:00
2020-02-29 12:57:41 +01:00
// Initialize compiler instances for parallel compilation
std : : unique_ptr < spu_recompiler_base > compiler ;
2019-01-21 19:04:32 +01:00
if ( g_cfg . core . spu_decoder = = spu_decoder_type : : asmjit )
{
compiler = spu_recompiler_base : : make_asmjit_recompiler ( ) ;
}
else if ( g_cfg . core . spu_decoder = = spu_decoder_type : : llvm )
{
compiler = spu_recompiler_base : : make_llvm_recompiler ( ) ;
}
2018-05-04 23:01:27 +02:00
compiler - > init ( ) ;
2018-06-10 14:46:01 +02:00
2024-04-09 19:36:28 +02:00
// Counter for error reporting
u32 logged_error = 0 ;
2020-02-29 12:57:41 +01:00
// How much every thread compiled
uint result = 0 ;
2019-01-21 19:04:32 +01:00
// Fake LS
std : : vector < be_t < u32 > > ls ( 0x10000 ) ;
2018-05-04 23:01:27 +02:00
2023-11-29 08:05:34 +01:00
usz func_i = fnext + + ;
// Ensure some actions are performed on a single thread
const bool is_first_thread = func_i = = 0 ;
2023-11-28 19:22:32 +01:00
2018-05-04 23:01:27 +02:00
// Build functions
2023-11-29 08:05:34 +01:00
for ( ; func_i < func_list . size ( ) ; func_i = fnext + + , ( showing_progress ? g_progr_pdone : pending_progress ) + = build_existing_cache ? 1 : 0 )
2018-05-04 23:01:27 +02:00
{
2019-11-23 17:30:54 +01:00
const spu_program & func = std : : as_const ( func_list ) [ func_i ] ;
2019-01-21 19:04:32 +01:00
2019-03-18 21:01:16 +01:00
if ( Emu . IsStopped ( ) | | fail_flag )
2018-06-01 14:52:31 +02:00
{
continue ;
}
2018-06-10 14:46:01 +02:00
// Get data start
2019-11-23 17:30:54 +01:00
const u32 start = func . lower_bound ;
const u32 size0 = : : size32 ( func . data ) ;
2018-06-10 14:46:01 +02:00
2020-04-04 14:36:05 +02:00
be_t < u64 > hash_start ;
{
sha1_context ctx ;
u8 output [ 20 ] ;
sha1_starts ( & ctx ) ;
sha1_update ( & ctx , reinterpret_cast < const u8 * > ( func . data . data ( ) ) , func . data . size ( ) * 4 ) ;
sha1_finish ( & ctx , output ) ;
std : : memcpy ( & hash_start , output , sizeof ( hash_start ) ) ;
}
// Check hash against allowed bounds
const bool inverse_bounds = g_cfg . core . spu_llvm_lower_bound > g_cfg . core . spu_llvm_upper_bound ;
if ( ( ! inverse_bounds & & ( hash_start < g_cfg . core . spu_llvm_lower_bound | | hash_start > g_cfg . core . spu_llvm_upper_bound ) ) | |
( inverse_bounds & & ( hash_start < g_cfg . core . spu_llvm_lower_bound & & hash_start > g_cfg . core . spu_llvm_upper_bound ) ) )
{
spu_log . error ( " [Debug] Skipped function %s " , fmt : : base57 ( hash_start ) ) ;
result + + ;
continue ;
}
2018-05-04 23:01:27 +02:00
// Initialize LS with function data only
2019-11-23 17:30:54 +01:00
for ( u32 i = 0 , pos = start ; i < size0 ; i + + , pos + = 4 )
2018-05-04 23:01:27 +02:00
{
2019-11-23 17:30:54 +01:00
ls [ pos / 4 ] = std : : bit_cast < be_t < u32 > > ( func . data [ i ] ) ;
2018-05-04 23:01:27 +02:00
}
// Call analyser
2019-11-23 17:30:54 +01:00
spu_program func2 = compiler - > analyse ( ls . data ( ) , func . entry_point ) ;
2018-05-04 23:01:27 +02:00
2019-10-27 10:58:09 +01:00
if ( func2 ! = func )
2018-05-13 19:34:11 +02:00
{
2020-02-01 09:36:09 +01:00
spu_log . error ( " [0x%05x] SPU Analyser failed, %u vs %u " , func2 . entry_point , func2 . data . size ( ) , size0 ) ;
2024-04-09 19:36:28 +02:00
if ( logged_error < 2 )
{
std : : string log ;
compiler - > dump ( func , log ) ;
spu_log . notice ( " [0x%05x] Function: %s " , func . entry_point , log ) ;
logged_error + + ;
}
2018-05-13 19:34:11 +02:00
}
2019-10-27 10:58:09 +01:00
else if ( ! compiler - > compile ( std : : move ( func2 ) ) )
2019-03-18 21:01:16 +01:00
{
// Likely, out of JIT memory. Signal to prevent further building.
fail_flag | = 1 ;
2023-08-26 10:23:42 +02:00
continue ;
2019-03-18 21:01:16 +01:00
}
2018-05-04 23:01:27 +02:00
// Clear fake LS
2019-10-25 23:52:56 +02:00
std : : memset ( ls . data ( ) + start / 4 , 0 , 4 * ( size0 - 1 ) ) ;
2018-06-10 14:46:01 +02:00
2020-02-29 12:57:41 +01:00
result + + ;
2023-11-28 19:22:32 +01:00
if ( is_first_thread & & ! showing_progress )
{
if ( ! g_progr . load ( ) & & ! g_progr_ptotal & & ! g_progr_ftotal )
{
showing_progress = true ;
g_progr_pdone + = pending_progress . exchange ( 0 ) ;
g_progr_ptotal + = total_funcs ;
progr . emplace ( " Building SPU cache... " ) ;
}
}
else if ( showing_progress & & pending_progress )
{
// Cover missing progress due to a race
g_progr_pdone + = pending_progress . exchange ( 0 ) ;
}
2018-06-01 14:52:31 +02:00
}
2020-02-29 12:57:41 +01:00
2023-08-26 10:23:42 +02:00
u32 last_sec_idx = umax ;
2023-11-29 08:05:34 +01:00
for ( func_i = data_indexer + + ; ; func_i = data_indexer + + , ( showing_progress ? g_progr_pdone : pending_progress ) + = build_existing_cache ? 1 : 0 )
2023-08-26 10:23:42 +02:00
{
2023-12-29 18:33:29 +01:00
usz passed_count = 0 ;
2023-08-26 10:23:42 +02:00
u32 func_addr = 0 ;
2023-09-01 21:21:47 +02:00
u32 next_func = 0 ;
2023-08-26 10:23:42 +02:00
u32 sec_addr = umax ;
u32 sec_idx = 0 ;
std : : basic_string_view < u32 > inst_data ;
// Try to get the data this index points to
for ( auto & sec : data_list )
{
if ( func_i < passed_count + sec . funcs . size ( ) )
{
2023-12-29 18:33:29 +01:00
const usz func_idx = func_i - passed_count ;
2023-08-26 10:23:42 +02:00
sec_addr = sec . vaddr ;
2023-09-02 14:56:34 +02:00
func_addr = : : at32 ( sec . funcs , func_idx ) ;
2023-08-26 10:23:42 +02:00
inst_data = sec . inst_data ;
2023-12-29 18:33:29 +01:00
next_func = sec . funcs . size ( ) > = func_idx ? : : narrow < u32 > ( sec_addr + inst_data . size ( ) * 4 ) : sec . funcs [ func_idx ] ;
2023-08-26 10:23:42 +02:00
break ;
}
passed_count + = sec . funcs . size ( ) ;
sec_idx + + ;
}
if ( sec_addr = = umax )
{
// End of compilation for thread
break ;
}
if ( Emu . IsStopped ( ) | | fail_flag )
{
continue ;
}
if ( last_sec_idx ! = sec_idx )
{
if ( last_sec_idx ! = umax )
{
// Clear fake LS of previous section
auto & sec = data_list [ last_sec_idx ] ;
std : : memset ( ls . data ( ) + sec . vaddr / 4 , 0 , sec . inst_data . size ( ) * 4 ) ;
}
// Initialize LS with the entire section data
for ( u32 i = 0 , pos = sec_addr ; i < inst_data . size ( ) ; i + + , pos + = 4 )
{
ls [ pos / 4 ] = std : : bit_cast < be_t < u32 > > ( inst_data [ i ] ) ;
}
last_sec_idx = sec_idx ;
}
2023-09-02 14:56:34 +02:00
u32 block_addr = func_addr ;
std : : map < u32 , std : : basic_string < u32 > > targets ;
2023-08-26 10:23:42 +02:00
2023-09-04 18:38:12 +02:00
// Call analyser
spu_program func2 = compiler - > analyse ( ls . data ( ) , block_addr , & targets ) ;
2023-08-26 10:23:42 +02:00
while ( ! func2 . data . empty ( ) )
{
const u32 last_inst = std : : bit_cast < be_t < u32 > > ( func2 . data . back ( ) ) ;
2023-12-29 18:33:29 +01:00
const u32 prog_size = : : size32 ( func2 . data ) ;
2023-08-26 10:23:42 +02:00
if ( ! compiler - > compile ( std : : move ( func2 ) ) )
{
// Likely, out of JIT memory. Signal to prevent further building.
fail_flag | = 1 ;
break ;
}
result + + ;
2023-09-02 14:56:34 +02:00
const u32 start_new = block_addr + prog_size * 4 ;
if ( start_new > = next_func | | ( start_new = = next_func - 4 & & ls [ start_new / 4 ] = = 0x200000u ) )
{
// Completed
break ;
}
2023-08-26 10:23:42 +02:00
if ( auto type = g_spu_itype . decode ( last_inst ) ;
2023-09-02 14:56:34 +02:00
type = = spu_itype : : BRSL | | type = = spu_itype : : BRASL | | type = = spu_itype : : BISL | | type = = spu_itype : : SYNC )
2023-08-26 10:23:42 +02:00
{
2023-09-02 14:56:34 +02:00
if ( ls [ start_new / 4 ] & & g_spu_itype . decode ( ls [ start_new / 4 ] ) ! = spu_itype : : UNK )
2023-08-26 10:23:42 +02:00
{
spu_log . notice ( " Precompiling fallthrough to 0x%05x " , start_new ) ;
2023-09-04 18:38:12 +02:00
func2 = compiler - > analyse ( ls . data ( ) , start_new , & targets ) ;
2023-09-02 14:56:34 +02:00
block_addr = start_new ;
2023-08-26 10:23:42 +02:00
continue ;
}
}
2023-09-02 14:56:34 +02:00
if ( targets . empty ( ) )
2023-09-01 21:21:47 +02:00
{
2023-09-02 14:56:34 +02:00
break ;
}
2023-09-01 21:21:47 +02:00
2023-09-02 14:56:34 +02:00
const auto upper = targets . upper_bound ( func_addr ) ;
2023-09-01 21:21:47 +02:00
2023-09-02 14:56:34 +02:00
if ( upper = = targets . begin ( ) )
{
break ;
}
2023-09-01 21:21:47 +02:00
2023-09-02 14:56:34 +02:00
u32 new_entry = umax ;
2023-09-01 21:21:47 +02:00
2023-09-02 14:56:34 +02:00
// Find the lowest target in the space in-between
for ( auto it = std : : prev ( upper ) ; it ! = targets . end ( ) & & it - > first < start_new & & new_entry > start_new ; it + + )
{
for ( u32 target : it - > second )
2023-09-01 21:21:47 +02:00
{
2023-09-02 14:56:34 +02:00
if ( target > = start_new & & target < next_func )
{
if ( target < new_entry )
{
new_entry = target ;
if ( new_entry = = start_new )
{
// Cannot go lower
break ;
}
}
}
2023-09-01 21:21:47 +02:00
}
2023-09-02 14:56:34 +02:00
}
2023-09-01 21:21:47 +02:00
2024-03-07 14:16:22 +01:00
if ( new_entry ! = umax & & ! spu_thread : : is_exec_code ( new_entry , { reinterpret_cast < const u8 * > ( ls . data ( ) ) , SPU_LS_SIZE } , 0 , true ) )
2023-09-02 14:56:34 +02:00
{
2023-09-04 18:38:12 +02:00
new_entry = umax ;
2023-09-02 14:56:34 +02:00
}
2023-09-01 21:21:47 +02:00
2023-09-04 18:38:12 +02:00
if ( new_entry = = umax )
2023-09-02 14:56:34 +02:00
{
2023-09-04 18:38:12 +02:00
new_entry = start_new ;
2024-03-07 14:16:22 +01:00
while ( new_entry < next_func & & ( ls [ start_new / 4 ] < 0x3fffc | | ! spu_thread : : is_exec_code ( new_entry , { reinterpret_cast < const u8 * > ( ls . data ( ) ) , SPU_LS_SIZE } , 0 , true ) ) )
2023-09-04 18:38:12 +02:00
{
new_entry + = 4 ;
}
if ( new_entry > = next_func | | ( new_entry = = next_func - 4 & & ls [ new_entry / 4 ] = = 0x200000u ) )
{
// Completed
break ;
}
2023-09-01 21:21:47 +02:00
}
2023-09-04 18:38:12 +02:00
2023-09-02 14:56:34 +02:00
spu_log . notice ( " Precompiling filler space at 0x%05x (next=0x%05x) " , new_entry , next_func ) ;
2023-09-04 18:38:12 +02:00
func2 = compiler - > analyse ( ls . data ( ) , new_entry , & targets ) ;
2023-09-02 14:56:34 +02:00
block_addr = new_entry ;
2023-08-26 10:23:42 +02:00
}
2023-11-28 19:22:32 +01:00
if ( is_first_thread & & ! showing_progress )
{
if ( ! g_progr . load ( ) & & ! g_progr_ptotal & & ! g_progr_ftotal )
{
showing_progress = true ;
g_progr_pdone + = pending_progress . exchange ( 0 ) ;
g_progr_ptotal + = total_funcs ;
progr . emplace ( " Building SPU cache... " ) ;
}
}
else if ( showing_progress & & pending_progress )
{
// Cover missing progress due to a race
g_progr_pdone + = pending_progress . exchange ( 0 ) ;
}
}
if ( showing_progress & & pending_progress )
{
// Cover missing progress due to a race
g_progr_pdone + = pending_progress . exchange ( 0 ) ;
2023-08-26 10:23:42 +02:00
}
2020-02-29 12:57:41 +01:00
return result ;
2019-01-21 19:04:32 +01:00
} ) ;
2018-05-04 23:01:27 +02:00
2023-08-29 14:32:26 +02:00
u32 built_total = 0 ;
2020-02-29 12:57:41 +01:00
// Join (implicitly) and print individual results
for ( u32 i = 0 ; i < workers . size ( ) ; i + + )
2019-01-21 19:04:32 +01:00
{
2020-02-29 12:57:41 +01:00
spu_log . notice ( " SPU Runtime: Worker %u built %u programs. " , i + 1 , workers [ i ] ) ;
2023-08-29 14:32:26 +02:00
built_total + = workers [ i ] ;
2019-01-21 19:04:32 +01:00
}
2018-05-04 23:01:27 +02:00
2023-08-29 14:32:26 +02:00
spu_log . notice ( " SPU Runtime: Workers built %u programs. " , built_total ) ;
2019-01-21 19:04:32 +01:00
if ( Emu . IsStopped ( ) )
{
2020-02-01 09:36:09 +01:00
spu_log . error ( " SPU Runtime: Cache building aborted. " ) ;
2019-01-21 19:04:32 +01:00
return ;
}
2019-03-18 21:01:16 +01:00
if ( fail_flag )
{
2020-02-01 09:36:09 +01:00
spu_log . fatal ( " SPU Runtime: Cache building failed (out of memory). " ) ;
2019-03-18 21:01:16 +01:00
return ;
}
2020-02-29 12:57:41 +01:00
if ( ( g_cfg . core . spu_decoder = = spu_decoder_type : : asmjit | | g_cfg . core . spu_decoder = = spu_decoder_type : : llvm ) & & ! func_list . empty ( ) )
2019-01-21 19:04:32 +01:00
{
2020-02-01 09:36:09 +01:00
spu_log . success ( " SPU Runtime: Built %u functions. " , func_list . size ( ) ) ;
2021-07-27 08:27:03 +02:00
if ( g_cfg . core . spu_debug )
{
std : : string dump ;
dump . reserve ( 10'000'000 ) ;
std : : map < std : : basic_string_view < u8 > , spu_program * > sorted ;
for ( auto & & f : func_list )
{
// Interpret as a byte string
std : : basic_string_view < u8 > data = { reinterpret_cast < u8 * > ( f . data . data ( ) ) , f . data . size ( ) * sizeof ( u32 ) } ;
sorted [ data ] = & f ;
}
std : : unordered_set < u32 > depth_n ;
u32 n_max = 0 ;
for ( auto & & [ bytes , f ] : sorted )
{
{
sha1_context ctx ;
u8 output [ 20 ] ;
sha1_starts ( & ctx ) ;
2021-08-01 09:12:00 +02:00
sha1_update ( & ctx , bytes . data ( ) , bytes . size ( ) ) ;
2021-07-27 08:27:03 +02:00
sha1_finish ( & ctx , output ) ;
fmt : : append ( dump , " \n \t [%s] " , fmt : : base57 ( output ) ) ;
}
u32 depth_m = 0 ;
for ( auto & & [ data , f2 ] : sorted )
{
u32 depth = 0 ;
if ( f2 = = f )
{
continue ;
}
for ( u32 i = 0 ; i < bytes . size ( ) ; i + + )
{
if ( i < data . size ( ) & & data [ i ] = = bytes [ i ] )
{
depth + + ;
}
else
{
break ;
}
}
depth_n . emplace ( depth ) ;
depth_m = std : : max ( depth , depth_m ) ;
}
fmt : : append ( dump , " c=%06d,d=%06d " , depth_n . size ( ) , depth_m ) ;
bool sk = false ;
2024-05-20 15:04:15 +02:00
for ( u32 i = 0 ; i < std : : min < usz > ( bytes . size ( ) , std : : max < usz > ( 256 , depth_m ) ) ; i + + )
2021-07-27 08:27:03 +02:00
{
if ( depth_m = = i )
{
dump + = ' | ' ;
sk = true ;
}
fmt : : append ( dump , " %02x " , bytes [ i ] ) ;
if ( i % 4 = = 3 )
{
if ( sk )
{
sk = false ;
}
else
{
dump + = ' ' ;
}
dump + = ' ' ;
}
}
fmt : : append ( dump , " \n \t %49s " , " " ) ;
2024-05-20 15:04:15 +02:00
for ( u32 i = 0 ; i < std : : min < usz > ( f - > data . size ( ) , std : : max < usz > ( 64 , utils : : aligned_div < u32 > ( depth_m , 4 ) ) ) ; i + + )
2021-07-27 08:27:03 +02:00
{
2021-12-30 17:39:18 +01:00
fmt : : append ( dump , " %-10s " , g_spu_iname . decode ( std : : bit_cast < be_t < u32 > > ( f - > data [ i ] ) ) ) ;
2021-07-27 08:27:03 +02:00
}
n_max = std : : max ( n_max , : : size32 ( depth_n ) ) ;
depth_n . clear ( ) ;
}
spu_log . notice ( " SPU Cache Dump (max_c=%d): %s " , n_max , dump ) ;
}
2018-05-04 23:01:27 +02:00
}
2019-09-26 17:45:29 +02:00
// Initialize global cache instance
2023-08-28 11:20:17 +02:00
if ( g_cfg . core . spu_cache & & cache )
2020-05-31 20:54:04 +02:00
{
2021-03-02 12:59:19 +01:00
g_fxo - > get < spu_cache > ( ) = std : : move ( cache ) ;
2020-05-31 20:54:04 +02:00
}
2018-05-04 23:01:27 +02:00
}
2019-11-23 17:30:54 +01:00
bool spu_program : : operator = = ( const spu_program & rhs ) const noexcept
2019-05-05 15:28:41 +02:00
{
2019-11-23 17:30:54 +01:00
// TODO
return entry_point - lower_bound = = rhs . entry_point - rhs . lower_bound & & data = = rhs . data ;
}
2019-05-05 15:28:41 +02:00
2019-11-23 17:30:54 +01:00
bool spu_program : : operator < ( const spu_program & rhs ) const noexcept
{
const u32 lhs_offs = ( entry_point - lower_bound ) / 4 ;
const u32 rhs_offs = ( rhs . entry_point - rhs . lower_bound ) / 4 ;
2019-05-05 15:28:41 +02:00
// Select range for comparison
2019-11-23 17:30:54 +01:00
std : : basic_string_view < u32 > lhs_data ( data . data ( ) + lhs_offs , data . size ( ) - lhs_offs ) ;
std : : basic_string_view < u32 > rhs_data ( rhs . data . data ( ) + rhs_offs , rhs . data . size ( ) - rhs_offs ) ;
const auto cmp0 = lhs_data . compare ( rhs_data ) ;
2019-05-05 15:28:41 +02:00
2019-11-23 17:30:54 +01:00
if ( cmp0 < 0 )
return true ;
else if ( cmp0 > 0 )
2019-05-05 15:28:41 +02:00
return false ;
2019-11-23 17:30:54 +01:00
// Compare from address 0 to the point before the entry point (TODO: undesirable)
lhs_data = { data . data ( ) , lhs_offs } ;
rhs_data = { rhs . data . data ( ) , rhs_offs } ;
const auto cmp1 = lhs_data . compare ( rhs_data ) ;
2019-05-05 15:28:41 +02:00
2019-11-23 17:30:54 +01:00
if ( cmp1 < 0 )
return true ;
else if ( cmp1 > 0 )
return false ;
2019-05-05 15:28:41 +02:00
2019-11-23 17:30:54 +01:00
// TODO
return lhs_offs < rhs_offs ;
2019-05-05 15:28:41 +02:00
}
2019-01-21 19:04:32 +01:00
spu_runtime : : spu_runtime ( )
{
// Clear LLVM output
2021-04-21 22:12:21 +02:00
m_cache_path = rpcs3 : : cache : : get_ppu_cache ( ) ;
2019-08-27 16:25:49 +02:00
if ( m_cache_path . empty ( ) )
{
return ;
}
2024-06-10 10:49:00 +02:00
if ( g_cfg . core . spu_debug & & g_cfg . core . spu_decoder ! = spu_decoder_type : : dynamic & & g_cfg . core . spu_decoder ! = spu_decoder_type : : _static )
2019-01-21 19:04:32 +01:00
{
2024-03-06 16:28:07 +01:00
if ( ! fs : : create_dir ( m_cache_path + " llvm/ " ) )
{
fs : : remove_all ( m_cache_path + " llvm/ " , false ) ;
}
2019-01-21 19:04:32 +01:00
fs : : file ( m_cache_path + " spu.log " , fs : : rewrite ) ;
2019-05-12 22:17:45 +02:00
fs : : file ( m_cache_path + " spu-ir.log " , fs : : rewrite ) ;
2019-01-21 19:04:32 +01:00
}
}
2019-11-23 17:30:54 +01:00
spu_item * spu_runtime : : add_empty ( spu_program & & data )
2019-01-21 19:04:32 +01:00
{
2019-11-23 17:30:54 +01:00
if ( data . data . empty ( ) )
2019-03-18 21:01:16 +01:00
{
2019-10-25 23:52:56 +02:00
return nullptr ;
2019-03-18 21:01:16 +01:00
}
2019-10-25 23:52:56 +02:00
// Store previous item if already added
spu_item * prev = nullptr ;
2019-01-21 19:04:32 +01:00
2019-10-25 23:52:56 +02:00
//Try to add item that doesn't exist yet
2019-11-23 17:30:54 +01:00
const auto ret = m_stuff [ data . data [ 0 ] > > 12 ] . push_if ( [ & ] ( spu_item & _new , spu_item & _old )
2019-07-18 17:40:08 +02:00
{
2019-11-23 17:30:54 +01:00
if ( _new . data = = _old . data )
2019-07-18 17:40:08 +02:00
{
2019-10-25 23:52:56 +02:00
prev = & _old ;
2019-07-18 17:40:08 +02:00
return false ;
}
2019-10-25 23:52:56 +02:00
return true ;
} , std : : move ( data ) ) ;
if ( ret )
{
return ret ;
2019-07-18 17:40:08 +02:00
}
2019-10-25 23:52:56 +02:00
return prev ;
2019-07-18 17:40:08 +02:00
}
2019-10-03 18:57:32 +02:00
spu_function_t spu_runtime : : rebuild_ubertrampoline ( u32 id_inst )
2019-07-18 17:40:08 +02:00
{
2019-05-14 17:55:10 +02:00
// Prepare sorted list
2019-10-25 23:52:56 +02:00
static thread_local std : : vector < std : : pair < std : : basic_string_view < u32 > , spu_function_t > > m_flat_list ;
// Remember top position
2022-09-19 14:57:51 +02:00
auto stuff_it = : : at32 ( m_stuff , id_inst > > 12 ) . begin ( ) ;
auto stuff_end = : : at32 ( m_stuff , id_inst > > 12 ) . end ( ) ;
2019-10-03 18:57:32 +02:00
{
2019-10-25 23:52:56 +02:00
if ( stuff_it - > trampoline )
{
return stuff_it - > trampoline ;
}
2019-10-03 18:57:32 +02:00
2019-10-25 23:52:56 +02:00
m_flat_list . clear ( ) ;
for ( auto it = stuff_it ; it ! = stuff_end ; + + it )
{
if ( const auto ptr = it - > compiled . load ( ) )
{
2019-11-23 17:30:54 +01:00
std : : basic_string_view < u32 > range { it - > data . data . data ( ) , it - > data . data . size ( ) } ;
range . remove_prefix ( ( it - > data . entry_point - it - > data . lower_bound ) / 4 ) ;
2019-10-25 23:52:56 +02:00
m_flat_list . emplace_back ( range , ptr ) ;
}
else
{
// Pull oneself deeper (TODO)
+ + stuff_it ;
}
}
2019-10-03 18:57:32 +02:00
}
2019-05-14 17:55:10 +02:00
2022-06-30 19:56:34 +02:00
std : : sort ( m_flat_list . begin ( ) , m_flat_list . end ( ) , FN ( x . first < y . first ) ) ;
2019-10-25 23:52:56 +02:00
2019-05-10 12:42:46 +02:00
struct work
{
u32 size ;
u16 from ;
u16 level ;
u8 * rel32 ;
2019-05-14 17:55:10 +02:00
decltype ( m_flat_list ) : : iterator beg ;
decltype ( m_flat_list ) : : iterator end ;
2019-05-10 12:42:46 +02:00
} ;
// Scratch vector
static thread_local std : : vector < work > workload ;
2019-01-21 19:04:32 +01:00
// Generate a dispatcher (übertrampoline)
2019-05-14 17:55:10 +02:00
const auto beg = m_flat_list . begin ( ) ;
const auto _end = m_flat_list . end ( ) ;
const u32 size0 = : : size32 ( m_flat_list ) ;
2019-01-21 19:04:32 +01:00
2019-10-25 23:52:56 +02:00
auto result = beg - > second ;
2019-07-18 17:40:08 +02:00
if ( size0 ! = 1 )
2019-01-21 19:04:32 +01:00
{
2022-07-10 08:39:30 +02:00
# if defined(ARCH_ARM64)
// Allocate some writable executable memory
u8 * const wxptr = jit_runtime : : alloc ( size0 * 128 + 16 , 16 ) ;
if ( ! wxptr )
{
return nullptr ;
}
// Raw assembly pointer
u8 * raw = wxptr ;
auto make_jump = [ & ] ( asmjit : : arm : : CondCode op , auto target )
{
2022-09-05 02:25:53 +02:00
// 36 bytes
2022-07-10 08:39:30 +02:00
// Fallback to dispatch if no target
const u64 taddr = target ? reinterpret_cast < u64 > ( target ) : reinterpret_cast < u64 > ( tr_dispatch ) ;
2022-09-05 02:25:53 +02:00
// ldr x9, #16 -> ldr x9, taddr
* raw + + = 0x89 ;
* raw + + = 0x00 ;
* raw + + = 0x00 ;
* raw + + = 0x58 ;
2022-07-10 08:39:30 +02:00
2022-09-05 02:25:53 +02:00
if ( op = = asmjit : : arm : : CondCode : : kAlways )
{
// br x9
* raw + + = 0x20 ;
* raw + + = 0x01 ;
* raw + + = 0x1F ;
* raw + + = 0xD6 ;
// nop
* raw + + = 0x1F ;
* raw + + = 0x20 ;
* raw + + = 0x03 ;
* raw + + = 0xD5 ;
// nop
* raw + + = 0x1F ;
* raw + + = 0x20 ;
* raw + + = 0x03 ;
* raw + + = 0xD5 ;
}
else
{
// b.COND #8 -> b.COND do_branch
switch ( op )
{
case asmjit : : arm : : CondCode : : kUnsignedLT :
* raw + + = 0x43 ;
break ;
case asmjit : : arm : : CondCode : : kUnsignedGT :
* raw + + = 0x48 ;
break ;
default :
asm ( " brk 0x42 " ) ;
}
2022-07-10 08:39:30 +02:00
2022-09-05 02:25:53 +02:00
* raw + + = 0x00 ;
* raw + + = 0x00 ;
* raw + + = 0x54 ;
// b #16 -> b cont
* raw + + = 0x04 ;
* raw + + = 0x00 ;
* raw + + = 0x00 ;
* raw + + = 0x14 ;
// do_branch: br x9
* raw + + = 0x20 ;
* raw + + = 0x01 ;
* raw + + = 0x1f ;
* raw + + = 0xD6 ;
}
2022-07-10 08:39:30 +02:00
2022-09-05 02:25:53 +02:00
// taddr
std : : memcpy ( raw , & taddr , 8 ) ;
raw + = 8 ;
2022-07-10 08:39:30 +02:00
2022-09-05 02:25:53 +02:00
// cont: next instruction
2022-07-10 08:39:30 +02:00
} ;
# elif defined(ARCH_X64)
2019-01-21 19:04:32 +01:00
// Allocate some writable executable memory
2019-05-11 18:21:07 +02:00
u8 * const wxptr = jit_runtime : : alloc ( size0 * 22 + 14 , 16 ) ;
2019-03-18 21:01:16 +01:00
if ( ! wxptr )
{
2019-07-18 17:40:08 +02:00
return nullptr ;
2019-03-18 21:01:16 +01:00
}
2019-01-21 19:04:32 +01:00
// Raw assembly pointer
u8 * raw = wxptr ;
// Write jump instruction with rel32 immediate
auto make_jump = [ & ] ( u8 op , auto target )
{
2020-12-09 08:47:45 +01:00
ensure ( raw + 8 < = wxptr + size0 * 22 + 16 ) ;
2019-01-21 19:04:32 +01:00
// Fallback to dispatch if no target
const u64 taddr = target ? reinterpret_cast < u64 > ( target ) : reinterpret_cast < u64 > ( tr_dispatch ) ;
// Compute the distance
const s64 rel = taddr - reinterpret_cast < u64 > ( raw ) - ( op ! = 0xe9 ? 6 : 5 ) ;
2021-05-22 09:35:15 +02:00
ensure ( rel > = s32 { smin } & & rel < = s32 { smax } ) ;
2019-01-21 19:04:32 +01:00
if ( op ! = 0xe9 )
{
// First jcc byte
* raw + + = 0x0f ;
2020-12-09 08:47:45 +01:00
ensure ( ( op > > 4 ) = = 0x8 ) ;
2019-01-21 19:04:32 +01:00
}
* raw + + = op ;
const s32 r32 = static_cast < s32 > ( rel ) ;
std : : memcpy ( raw , & r32 , 4 ) ;
raw + = 4 ;
} ;
2022-07-10 08:39:30 +02:00
# endif
2019-01-21 19:04:32 +01:00
2019-01-28 18:23:26 +01:00
workload . clear ( ) ;
2019-01-21 19:04:32 +01:00
workload . reserve ( size0 ) ;
workload . emplace_back ( ) ;
workload . back ( ) . size = size0 ;
2019-05-10 12:42:46 +02:00
workload . back ( ) . level = 0 ;
workload . back ( ) . from = - 1 ;
2021-04-09 21:12:47 +02:00
workload . back ( ) . rel32 = nullptr ;
2019-01-21 19:04:32 +01:00
workload . back ( ) . beg = beg ;
workload . back ( ) . end = _end ;
2019-10-03 18:57:32 +02:00
// LS address starting from PC is already loaded into rcx (see spu_runtime::tr_all)
2019-05-05 15:28:41 +02:00
2020-12-18 08:39:54 +01:00
for ( usz i = 0 ; i < workload . size ( ) ; i + + )
2019-01-21 19:04:32 +01:00
{
// Get copy of the workload info
2019-02-27 21:09:09 +01:00
auto w = workload [ i ] ;
2019-01-21 19:04:32 +01:00
// Split range in two parts
auto it = w . beg ;
auto it2 = w . beg ;
u32 size1 = w . size / 2 ;
u32 size2 = w . size - size1 ;
std : : advance ( it2 , w . size / 2 ) ;
2021-05-22 09:35:15 +02:00
while ( ensure ( w . level < umax ) )
2019-01-21 19:04:32 +01:00
{
it = it2 ;
size1 = w . size - size2 ;
if ( w . level > = w . beg - > first . size ( ) )
{
// Cannot split: smallest function is a prefix of bigger ones (TODO)
break ;
}
2022-09-19 14:57:51 +02:00
const u32 x1 = : : at32 ( w . beg - > first , w . level ) ;
2019-01-21 19:04:32 +01:00
if ( ! x1 )
{
// Cannot split: some functions contain holes at this level
w . level + + ;
2019-05-14 17:55:10 +02:00
// Resort subrange starting from the new level
std : : stable_sort ( w . beg , w . end , [ & ] ( const auto & a , const auto & b )
{
std : : basic_string_view < u32 > lhs = a . first ;
std : : basic_string_view < u32 > rhs = b . first ;
lhs . remove_prefix ( w . level ) ;
rhs . remove_prefix ( w . level ) ;
return lhs < rhs ;
} ) ;
2019-01-21 19:04:32 +01:00
continue ;
}
// Adjust ranges (forward)
2022-09-19 14:57:51 +02:00
while ( it ! = w . end & & x1 = = : : at32 ( it - > first , w . level ) )
2019-01-21 19:04:32 +01:00
{
it + + ;
size1 + + ;
}
if ( it = = w . end )
{
// Cannot split: words are identical within the range at this level
w . level + + ;
}
else
{
size2 = w . size - size1 ;
break ;
}
}
if ( w . rel32 )
{
2022-07-10 08:39:30 +02:00
# if defined(ARCH_X64)
2019-01-21 19:04:32 +01:00
// Patch rel32 linking it to the current location if necessary
2020-12-09 14:03:15 +01:00
const s32 r32 = : : narrow < s32 > ( raw - w . rel32 ) ;
2019-01-21 19:04:32 +01:00
std : : memcpy ( w . rel32 - 4 , & r32 , 4 ) ;
2022-07-10 08:39:30 +02:00
# elif defined(ARCH_ARM64)
// Rewrite jump address
{
u64 raw64 = reinterpret_cast < u64 > ( raw ) ;
2022-09-05 02:25:53 +02:00
memcpy ( w . rel32 - 8 , & raw64 , 8 ) ;
2022-07-10 08:39:30 +02:00
}
# else
# error "Unimplemented"
# endif
2019-01-21 19:04:32 +01:00
}
2019-05-10 12:42:46 +02:00
if ( w . level > = w . beg - > first . size ( ) | | w . level > = it - > first . size ( ) )
2019-01-21 19:04:32 +01:00
{
// If functions cannot be compared, assume smallest function
2020-02-01 09:36:09 +01:00
spu_log . error ( " Trampoline simplified at ??? (level=%u) " , w . level ) ;
2022-07-10 08:39:30 +02:00
# if defined(ARCH_X64)
2019-01-21 19:04:32 +01:00
make_jump ( 0xe9 , w . beg - > second ) ; // jmp rel32
2022-07-10 08:39:30 +02:00
# elif defined(ARCH_ARM64)
u64 branch_target = reinterpret_cast < u64 > ( w . beg - > second ) ;
make_jump ( asmjit : : arm : : CondCode : : kAlways , branch_target ) ;
# else
# error "Unimplemented"
# endif
2019-01-21 19:04:32 +01:00
continue ;
}
// Value for comparison
2022-09-19 14:57:51 +02:00
const u32 x = : : at32 ( it - > first , w . level ) ;
2019-01-21 19:04:32 +01:00
// Adjust ranges (backward)
2019-05-14 17:55:10 +02:00
while ( it ! = m_flat_list . begin ( ) )
2019-01-21 19:04:32 +01:00
{
it - - ;
2019-05-10 12:42:46 +02:00
if ( w . level > = it - > first . size ( ) )
{
2019-05-14 17:55:10 +02:00
it = m_flat_list . end ( ) ;
2019-05-10 12:42:46 +02:00
break ;
}
2022-09-19 14:57:51 +02:00
if ( : : at32 ( it - > first , w . level ) ! = x )
2019-01-21 19:04:32 +01:00
{
it + + ;
break ;
}
2020-12-09 08:47:45 +01:00
ensure ( it ! = w . beg ) ;
2019-01-21 19:04:32 +01:00
size1 - - ;
size2 + + ;
}
2019-05-14 17:55:10 +02:00
if ( it = = m_flat_list . end ( ) )
2019-05-10 12:42:46 +02:00
{
2020-02-01 09:36:09 +01:00
spu_log . error ( " Trampoline simplified (II) at ??? (level=%u) " , w . level ) ;
2022-07-10 08:39:30 +02:00
# if defined(ARCH_X64)
2019-05-10 12:42:46 +02:00
make_jump ( 0xe9 , w . beg - > second ) ; // jmp rel32
2022-07-10 08:39:30 +02:00
# elif defined(ARCH_ARM64)
u64 branch_target = reinterpret_cast < u64 > ( w . beg - > second ) ;
make_jump ( asmjit : : arm : : CondCode : : kAlways , branch_target ) ;
# else
# error "Unimplemented"
# endif
2019-05-10 12:42:46 +02:00
continue ;
}
// Emit 32-bit comparison
2022-07-10 08:39:30 +02:00
# if defined(ARCH_X64)
2020-12-09 08:47:45 +01:00
ensure ( raw + 12 < = wxptr + size0 * 22 + 16 ) ; // "Asm overflow"
2022-07-10 08:39:30 +02:00
# elif defined(ARCH_ARM64)
ensure ( raw + ( 4 * 4 ) < = wxptr + size0 * 128 + 16 ) ;
# else
# error "Unimplemented"
# endif
2019-02-27 21:09:09 +01:00
if ( w . from ! = w . level )
{
2019-05-11 18:21:07 +02:00
// If necessary (level has advanced), emit load: mov eax, [rcx + addr]
2019-05-10 12:42:46 +02:00
const u32 cmp_lsa = w . level * 4u ;
2022-07-10 08:39:30 +02:00
# if defined(ARCH_X64)
2019-05-11 18:21:07 +02:00
if ( cmp_lsa < 0x80 )
{
* raw + + = 0x8b ;
* raw + + = 0x41 ;
* raw + + = : : narrow < s8 > ( cmp_lsa ) ;
}
else
{
* raw + + = 0x8b ;
* raw + + = 0x81 ;
std : : memcpy ( raw , & cmp_lsa , 4 ) ;
raw + = 4 ;
}
2022-07-10 08:39:30 +02:00
# elif defined(ARCH_ARM64)
2022-09-05 02:25:53 +02:00
// ldr w9, #8
* raw + + = 0x49 ;
* raw + + = 0x00 ;
* raw + + = 0x00 ;
* raw + + = 0x18 ;
// b #8
* raw + + = 0x02 ;
* raw + + = 0x00 ;
* raw + + = 0x00 ;
* raw + + = 0x14 ;
// cmp_lsa
std : : memcpy ( raw , & cmp_lsa , 4 ) ;
raw + = 4 ;
// ldr w1, [x7, x9]
* raw + + = 0xE1 ;
* raw + + = 0x68 ;
* raw + + = 0x69 ;
* raw + + = 0xB8 ;
2022-07-10 08:39:30 +02:00
# else
# error "Unimplemented"
# endif
2019-02-27 21:09:09 +01:00
}
// Emit comparison: cmp eax, imm32
2022-07-10 08:39:30 +02:00
# if defined(ARCH_X64)
2019-02-27 21:09:09 +01:00
* raw + + = 0x3d ;
std : : memcpy ( raw , & x , 4 ) ;
raw + = 4 ;
2022-07-10 08:39:30 +02:00
# elif defined(ARCH_ARM64)
2022-09-05 02:25:53 +02:00
// ldr w9, #8
* raw + + = 0x49 ;
* raw + + = 0x00 ;
* raw + + = 0x00 ;
* raw + + = 0x18 ;
// b #8
* raw + + = 0x02 ;
* raw + + = 0x00 ;
* raw + + = 0x00 ;
* raw + + = 0x14 ;
// x
std : : memcpy ( raw , & x , 4 ) ;
raw + = 4 ;
2022-07-10 08:39:30 +02:00
2022-09-05 02:25:53 +02:00
// cmp w1, w9
* raw + + = 0x3f ;
* raw + + = 0x00 ;
* raw + + = 0x09 ;
* raw + + = 0x6B ;
2022-07-10 08:39:30 +02:00
# else
# error "Unimplemented"
# endif
2019-01-21 19:04:32 +01:00
// Low subrange target
if ( size1 = = 1 )
{
2022-07-10 08:39:30 +02:00
# if defined(ARCH_X64)
2019-01-21 19:04:32 +01:00
make_jump ( 0x82 , w . beg - > second ) ; // jb rel32
2022-07-10 08:39:30 +02:00
# elif defined(ARCH_ARM64)
u64 branch_target = reinterpret_cast < u64 > ( w . beg - > second ) ;
make_jump ( asmjit : : arm : : CondCode : : kUnsignedLT , branch_target ) ;
# else
# error "Unimplemented"
# endif
2019-01-21 19:04:32 +01:00
}
else
{
2022-07-10 08:39:30 +02:00
# if defined(ARCH_X64)
2019-01-21 19:04:32 +01:00
make_jump ( 0x82 , raw ) ; // jb rel32 (stub)
2022-07-10 08:39:30 +02:00
# elif defined(ARCH_ARM64)
make_jump ( asmjit : : arm : : CondCode : : kUnsignedLT , raw ) ;
# else
# error "Unimplemented"
# endif
2019-02-27 21:09:09 +01:00
auto & to = workload . emplace_back ( w ) ;
to . end = it ;
to . size = size1 ;
to . rel32 = raw ;
to . from = w . level ;
2019-01-21 19:04:32 +01:00
}
// Second subrange target
if ( size2 = = 1 )
{
2022-07-10 08:39:30 +02:00
# if defined(ARCH_X64)
2019-01-21 19:04:32 +01:00
make_jump ( 0xe9 , it - > second ) ; // jmp rel32
2022-07-10 08:39:30 +02:00
# elif defined(ARCH_ARM64)
u64 branch_target = reinterpret_cast < u64 > ( it - > second ) ;
make_jump ( asmjit : : arm : : CondCode : : kAlways , branch_target ) ;
# else
# error "Unimplemented"
# endif
2019-01-21 19:04:32 +01:00
}
else
{
it2 = it ;
// Select additional midrange for equality comparison
2022-09-19 14:57:51 +02:00
while ( it2 ! = w . end & & : : at32 ( it2 - > first , w . level ) = = x )
2019-01-21 19:04:32 +01:00
{
size2 - - ;
it2 + + ;
}
if ( it2 ! = w . end )
{
// High subrange target
if ( size2 = = 1 )
{
2022-07-10 08:39:30 +02:00
# if defined(ARCH_X64)
2019-01-21 19:04:32 +01:00
make_jump ( 0x87 , it2 - > second ) ; // ja rel32
2022-07-10 08:39:30 +02:00
# elif defined(ARCH_ARM64)
u64 branch_target = reinterpret_cast < u64 > ( it2 - > second ) ;
make_jump ( asmjit : : arm : : CondCode : : kUnsignedGT , branch_target ) ;
# else
# throw "Unimplemented"
# endif
2019-01-21 19:04:32 +01:00
}
else
{
2022-07-10 08:39:30 +02:00
# if defined(ARCH_X64)
2019-01-21 19:04:32 +01:00
make_jump ( 0x87 , raw ) ; // ja rel32 (stub)
2022-07-10 08:39:30 +02:00
# elif defined(ARCH_ARM64)
make_jump ( asmjit : : arm : : CondCode : : kUnsignedGT , raw ) ;
# else
# error "Unimplemented"
# endif
2019-02-27 21:09:09 +01:00
auto & to = workload . emplace_back ( w ) ;
to . beg = it2 ;
to . size = size2 ;
to . rel32 = raw ;
to . from = w . level ;
2019-01-21 19:04:32 +01:00
}
const u32 size3 = w . size - size1 - size2 ;
if ( size3 = = 1 )
{
2022-07-10 08:39:30 +02:00
# if defined(ARCH_X64)
2019-01-21 19:04:32 +01:00
make_jump ( 0xe9 , it - > second ) ; // jmp rel32
2022-07-10 08:39:30 +02:00
# elif defined(ARCH_ARM64)
u64 branch_target = reinterpret_cast < u64 > ( it - > second ) ;
make_jump ( asmjit : : arm : : CondCode : : kAlways , branch_target ) ;
# else
# error "Unimplemented"
# endif
2019-01-21 19:04:32 +01:00
}
else
{
2022-07-10 08:39:30 +02:00
# if defined(ARCH_X64)
2019-01-21 19:04:32 +01:00
make_jump ( 0xe9 , raw ) ; // jmp rel32 (stub)
2022-07-10 08:39:30 +02:00
# elif defined(ARCH_ARM64)
make_jump ( asmjit : : arm : : CondCode : : kAlways , raw ) ;
# else
# error "Unimplemented"
# endif
2019-02-27 21:09:09 +01:00
auto & to = workload . emplace_back ( w ) ;
to . beg = it ;
to . end = it2 ;
to . size = size3 ;
to . rel32 = raw ;
to . from = w . level ;
2019-01-21 19:04:32 +01:00
}
}
else
{
2022-07-10 08:39:30 +02:00
# if defined(ARCH_X64)
2019-01-21 19:04:32 +01:00
make_jump ( 0xe9 , raw ) ; // jmp rel32 (stub)
2022-07-10 08:39:30 +02:00
# elif defined(ARCH_ARM64)
make_jump ( asmjit : : arm : : CondCode : : kAlways , raw ) ;
# else
# error "Unimplemented"
# endif
2019-02-27 21:09:09 +01:00
auto & to = workload . emplace_back ( w ) ;
to . beg = it ;
to . size = w . size - size1 ;
to . rel32 = raw ;
to . from = w . level ;
2019-01-21 19:04:32 +01:00
}
}
}
2019-01-28 18:23:26 +01:00
workload . clear ( ) ;
2019-10-25 23:52:56 +02:00
result = reinterpret_cast < spu_function_t > ( reinterpret_cast < u64 > ( wxptr ) ) ;
2021-12-24 18:33:32 +01:00
2022-01-26 02:48:20 +01:00
std : : string fname ;
fmt : : append ( fname , " __ub%u " , m_flat_list . size ( ) ) ;
jit_announce ( wxptr , raw - wxptr , fname ) ;
2019-01-21 19:04:32 +01:00
}
2019-10-25 23:52:56 +02:00
if ( auto _old = stuff_it - > trampoline . compare_and_swap ( nullptr , result ) )
2019-05-10 12:42:46 +02:00
{
2019-10-25 23:52:56 +02:00
return _old ;
2019-05-10 12:42:46 +02:00
}
2019-10-25 23:52:56 +02:00
// Install ubertrampoline
2022-09-19 14:57:51 +02:00
auto & insert_to = : : at32 ( * spu_runtime : : g_dispatcher , id_inst > > 12 ) ;
2019-03-18 21:01:16 +01:00
2019-10-25 23:52:56 +02:00
auto _old = insert_to . load ( ) ;
2019-05-10 12:42:46 +02:00
2019-10-25 23:52:56 +02:00
do
2019-03-18 21:01:16 +01:00
{
2019-10-25 23:52:56 +02:00
// Make sure we are replacing an older ubertrampoline but not newer one
if ( _old ! = tr_dispatch )
2019-03-18 21:01:16 +01:00
{
2019-10-25 23:52:56 +02:00
bool ok = false ;
2019-03-18 21:01:16 +01:00
2019-10-25 23:52:56 +02:00
for ( auto it = stuff_it ; it ! = stuff_end ; + + it )
{
if ( it - > trampoline = = _old )
{
ok = true ;
break ;
}
}
if ( ! ok )
{
return result ;
}
}
2019-03-18 21:01:16 +01:00
}
2019-10-25 23:52:56 +02:00
while ( ! insert_to . compare_exchange ( _old , result ) ) ;
2019-03-18 21:01:16 +01:00
2019-10-25 23:52:56 +02:00
return result ;
2019-01-21 19:04:32 +01:00
}
2019-05-12 02:22:14 +02:00
spu_function_t spu_runtime : : find ( const u32 * ls , u32 addr ) const
2019-03-18 17:40:51 +01:00
{
2023-07-11 20:40:30 +02:00
const u32 index = ls [ addr / 4 ] > > 12 ;
for ( const auto & item : : : at32 ( m_stuff , index ) )
2019-03-18 17:40:51 +01:00
{
2019-10-25 23:52:56 +02:00
if ( const auto ptr = item . compiled . load ( ) )
2019-03-18 17:40:51 +01:00
{
2019-11-23 17:30:54 +01:00
std : : basic_string_view < u32 > range { item . data . data . data ( ) , item . data . data . size ( ) } ;
range . remove_prefix ( ( item . data . entry_point - item . data . lower_bound ) / 4 ) ;
2019-10-25 23:52:56 +02:00
if ( addr / 4 + range . size ( ) > 0x10000 )
{
continue ;
}
if ( range . compare ( 0 , range . size ( ) , ls + addr / 4 , range . size ( ) ) = = 0 )
{
return ptr ;
}
2019-03-18 17:40:51 +01:00
}
}
return nullptr ;
}
2019-10-27 11:03:27 +01:00
spu_function_t spu_runtime : : make_branch_patchpoint ( u16 data ) const
2019-02-27 23:08:18 +01:00
{
2022-07-10 08:38:48 +02:00
# if defined(ARCH_X64)
2019-02-27 23:08:18 +01:00
u8 * const raw = jit_runtime : : alloc ( 16 , 16 ) ;
2019-03-18 21:01:16 +01:00
if ( ! raw )
{
return nullptr ;
}
2019-05-11 18:21:07 +02:00
// Save address of the following jmp (GHC CC 3rd argument)
raw [ 0 ] = 0x4c ; // lea r12, [rip+1]
2019-02-27 23:08:18 +01:00
raw [ 1 ] = 0x8d ;
2019-05-11 18:21:07 +02:00
raw [ 2 ] = 0x25 ;
2019-02-27 23:08:18 +01:00
raw [ 3 ] = 0x01 ;
raw [ 4 ] = 0x00 ;
raw [ 5 ] = 0x00 ;
raw [ 6 ] = 0x00 ;
2019-05-11 18:21:07 +02:00
2019-02-27 23:08:18 +01:00
raw [ 7 ] = 0x90 ; // nop
// Jump to spu_recompiler_base::branch
raw [ 8 ] = 0xe9 ;
// Compute the distance
const s64 rel = reinterpret_cast < u64 > ( tr_branch ) - reinterpret_cast < u64 > ( raw + 8 ) - 5 ;
std : : memcpy ( raw + 9 , & rel , 4 ) ;
raw [ 13 ] = 0xcc ;
2019-10-27 11:03:27 +01:00
raw [ 14 ] = data > > 8 ;
raw [ 15 ] = data & 0xff ;
2019-02-27 23:08:18 +01:00
return reinterpret_cast < spu_function_t > ( raw ) ;
2022-07-10 08:38:48 +02:00
# elif defined(ARCH_ARM64)
2022-09-05 02:25:53 +02:00
# if defined(__APPLE__)
pthread_jit_write_protect_np ( false ) ;
# endif
u8 * const patch_fn = ensure ( jit_runtime : : alloc ( 36 , 16 ) ) ;
u8 * raw = patch_fn ;
// adr x21, #16
* raw + + = 0x95 ;
* raw + + = 0x00 ;
* raw + + = 0x00 ;
* raw + + = 0x10 ;
2022-07-10 08:38:48 +02:00
2022-09-05 02:25:53 +02:00
// nop x3
for ( int i = 0 ; i < 3 ; i + + )
{
* raw + + = 0x1F ;
* raw + + = 0x20 ;
* raw + + = 0x03 ;
* raw + + = 0xD5 ;
}
2022-07-13 20:32:44 +02:00
2022-09-05 02:25:53 +02:00
// ldr x9, #8
* raw + + = 0x49 ;
* raw + + = 0x00 ;
* raw + + = 0x00 ;
* raw + + = 0x58 ;
2022-07-10 08:38:48 +02:00
2022-09-05 02:25:53 +02:00
// br x9
* raw + + = 0x20 ;
* raw + + = 0x01 ;
* raw + + = 0x1F ;
* raw + + = 0xD6 ;
2022-07-10 08:38:48 +02:00
2022-09-05 02:25:53 +02:00
u64 branch_target = reinterpret_cast < u64 > ( tr_branch ) ;
std : : memcpy ( raw , & branch_target , 8 ) ;
raw + = 8 ;
2022-07-10 08:38:48 +02:00
2022-09-05 02:25:53 +02:00
* raw + + = static_cast < u8 > ( data > > 8 ) ;
* raw + + = static_cast < u8 > ( data & 0xff ) ;
2022-07-10 08:38:48 +02:00
2022-09-05 02:25:53 +02:00
# if defined(__APPLE__)
pthread_jit_write_protect_np ( true ) ;
# endif
2022-07-10 08:38:48 +02:00
2022-09-05 02:25:53 +02:00
// Flush all cache lines after potentially writing executable code
asm ( " ISB " ) ;
asm ( " DSB ISH " ) ;
return reinterpret_cast < spu_function_t > ( patch_fn ) ;
2022-07-10 08:38:48 +02:00
# else
# error "Unimplemented"
# endif
2019-02-27 23:08:18 +01:00
}
2018-05-03 14:55:45 +02:00
spu_recompiler_base : : spu_recompiler_base ( )
2018-04-09 16:45:37 +02:00
{
}
2017-01-25 00:22:19 +01:00
spu_recompiler_base : : ~ spu_recompiler_base ( )
{
}
2018-10-11 00:17:19 +02:00
void spu_recompiler_base : : dispatch ( spu_thread & spu , void * , u8 * rip )
2015-08-26 04:54:06 +02:00
{
2019-02-27 23:08:18 +01:00
// If code verification failed from a patched patchpoint, clear it with a dispatcher jump
2018-04-16 17:27:57 +02:00
if ( rip )
{
2022-07-10 08:38:48 +02:00
# if defined(ARCH_X64)
2019-10-03 18:57:32 +02:00
const s64 rel = reinterpret_cast < u64 > ( spu_runtime : : tr_all ) - reinterpret_cast < u64 > ( rip - 8 ) - 5 ;
2019-02-27 23:08:18 +01:00
union
{
u8 bytes [ 8 ] ;
u64 result ;
} ;
2019-10-03 18:57:32 +02:00
bytes [ 0 ] = 0xe9 ; // jmp rel32
std : : memcpy ( bytes + 1 , & rel , 4 ) ;
2019-10-25 23:52:56 +02:00
bytes [ 5 ] = 0x66 ; // lnop (2 bytes)
2019-02-27 23:08:18 +01:00
bytes [ 6 ] = 0x90 ;
bytes [ 7 ] = 0x90 ;
atomic_storage < u64 > : : release ( * reinterpret_cast < u64 * > ( rip - 8 ) , result ) ;
2022-07-10 08:38:48 +02:00
# elif defined(ARCH_ARM64)
2022-09-05 02:25:53 +02:00
union
{
u8 bytes [ 16 ] ;
u128 result ;
} ;
2022-07-10 08:38:48 +02:00
2022-09-05 02:25:53 +02:00
// ldr x9, #8
bytes [ 0 ] = 0x49 ;
bytes [ 1 ] = 0x00 ;
bytes [ 2 ] = 0x00 ;
bytes [ 3 ] = 0x58 ;
2022-07-10 08:38:48 +02:00
2022-09-05 02:25:53 +02:00
// br x9
bytes [ 4 ] = 0x20 ;
bytes [ 5 ] = 0x01 ;
bytes [ 6 ] = 0x1F ;
bytes [ 7 ] = 0xD6 ;
const u64 target = reinterpret_cast < u64 > ( spu_runtime : : tr_all ) ;
std : : memcpy ( bytes + 8 , & target , 8 ) ;
2022-07-10 08:38:48 +02:00
# if defined(__APPLE__)
pthread_jit_write_protect_np ( false ) ;
# endif
atomic_storage < u128 > : : release ( * reinterpret_cast < u128 * > ( rip ) , result ) ;
# if defined(__APPLE__)
pthread_jit_write_protect_np ( true ) ;
# endif
// Flush all cache lines after potentially writing executable code
asm ( " ISB " ) ;
asm ( " DSB ISH " ) ;
# else
# error "Unimplemented"
# endif
2018-04-16 17:27:57 +02:00
}
// Second attempt (recover from the recursion after repeated unsuccessful trampoline call)
2022-09-19 14:57:51 +02:00
if ( spu . block_counter ! = spu . block_recover & & & dispatch ! = : : at32 ( * spu_runtime : : g_dispatcher , spu . _ref < nse_t < u32 > > ( spu . pc ) > > 12 ) )
2018-04-16 17:27:57 +02:00
{
spu . block_recover = spu . block_counter ;
return ;
}
2019-10-25 23:52:56 +02:00
spu . jit - > init ( ) ;
2018-04-16 17:27:57 +02:00
// Compile
2020-02-19 16:26:41 +01:00
if ( spu . _ref < u32 > ( spu . pc ) = = 0u )
2020-01-20 21:40:10 +01:00
{
spu_runtime : : g_escape ( & spu ) ;
return ;
}
2019-10-25 23:52:56 +02:00
const auto func = spu . jit - > compile ( spu . jit - > analyse ( spu . _ptr < u32 > ( 0 ) , spu . pc ) ) ;
if ( ! func )
{
2020-02-01 09:36:09 +01:00
spu_log . fatal ( " [0x%05x] Compilation failed. " , spu . pc ) ;
2019-10-25 23:52:56 +02:00
return ;
}
2018-06-10 14:46:01 +02:00
// Diagnostic
if ( g_cfg . core . spu_block_size = = spu_block_size_type : : giga )
{
const v128 _info = spu . stack_mirror [ ( spu . gpr [ 1 ] . _u32 [ 3 ] & 0x3fff0 ) > > 4 ] ;
2019-05-05 15:28:41 +02:00
if ( _info . _u64 [ 0 ] + 1 )
2018-06-10 14:46:01 +02:00
{
2020-02-01 09:36:09 +01:00
spu_log . trace ( " Called from 0x%x " , _info . _u32 [ 2 ] - 4 ) ;
2018-06-10 14:46:01 +02:00
}
}
2022-07-10 08:38:48 +02:00
# if defined(__APPLE__)
pthread_jit_write_protect_np ( true ) ;
# endif
2019-10-25 23:52:56 +02:00
2022-07-10 08:38:48 +02:00
# if defined(ARCH_ARM64)
// Flush all cache lines after potentially writing executable code
asm ( " ISB " ) ;
asm ( " DSB ISH " ) ;
# endif
2019-10-25 23:52:56 +02:00
spu_runtime : : g_tail_escape ( & spu , func , nullptr ) ;
2018-04-09 16:45:37 +02:00
}
2015-08-26 04:54:06 +02:00
2018-10-11 00:17:19 +02:00
void spu_recompiler_base : : branch ( spu_thread & spu , void * , u8 * rip )
2018-04-09 16:45:37 +02:00
{
2022-07-10 08:38:48 +02:00
# if defined(ARCH_X64)
2019-10-27 11:03:27 +01:00
if ( const u32 ls_off = ( ( rip [ 6 ] < < 8 ) | rip [ 7 ] ) * 4 )
2022-07-10 08:38:48 +02:00
# elif defined(ARCH_ARM64)
if ( const u32 ls_off = ( ( rip [ 16 ] < < 8 ) | rip [ 17 ] ) * 4 ) // See branch_patchpoint `data`
# else
# error "Unimplemented"
# endif
2019-10-27 11:03:27 +01:00
{
2020-02-01 09:36:09 +01:00
spu_log . todo ( " Special branch patchpoint hit. \n Please report to the developer (0x%05x). " , ls_off ) ;
2019-10-27 11:03:27 +01:00
}
2019-03-18 17:40:51 +01:00
// Find function
2020-07-15 20:57:39 +02:00
const auto func = spu . jit - > get_runtime ( ) . find ( static_cast < u32 * > ( spu . _ptr < void > ( 0 ) ) , spu . pc ) ;
2019-03-18 17:40:51 +01:00
if ( ! func )
{
return ;
}
2015-08-26 04:54:06 +02:00
2022-07-10 08:38:48 +02:00
# if defined(ARCH_X64)
2018-04-09 16:45:37 +02:00
// Overwrite jump to this function with jump to the compiled function
2018-04-16 17:27:57 +02:00
const s64 rel = reinterpret_cast < u64 > ( func ) - reinterpret_cast < u64 > ( rip ) - 5 ;
2019-01-28 16:39:39 +01:00
union
{
u8 bytes [ 8 ] ;
u64 result ;
} ;
2018-04-09 16:45:37 +02:00
2021-05-22 09:35:15 +02:00
if ( rel > = s32 { smin } & & rel < = s32 { smax } )
2015-09-04 01:23:31 +02:00
{
2018-04-09 16:45:37 +02:00
const s64 rel8 = ( rel + 5 ) - 2 ;
2021-05-22 09:35:15 +02:00
if ( rel8 > = s8 { smin } & & rel8 < = s8 { smax } )
2016-04-14 01:09:41 +02:00
{
2018-04-09 16:45:37 +02:00
bytes [ 0 ] = 0xeb ; // jmp rel8
bytes [ 1 ] = static_cast < s8 > ( rel8 ) ;
2019-02-27 23:08:18 +01:00
std : : memset ( bytes + 2 , 0xcc , 4 ) ;
2018-04-09 16:45:37 +02:00
}
else
{
bytes [ 0 ] = 0xe9 ; // jmp rel32
std : : memcpy ( bytes + 1 , & rel , 4 ) ;
2019-02-27 23:08:18 +01:00
bytes [ 5 ] = 0xcc ;
2016-04-14 01:09:41 +02:00
}
2019-02-27 23:08:18 +01:00
2019-10-27 11:03:27 +01:00
bytes [ 6 ] = rip [ 6 ] ;
bytes [ 7 ] = rip [ 7 ] ;
2015-08-26 04:54:06 +02:00
}
2018-04-09 16:45:37 +02:00
else
2015-08-26 04:54:06 +02:00
{
2019-02-27 23:08:18 +01:00
fmt : : throw_exception ( " Impossible far jump: %p -> %p " , rip , func ) ;
2015-08-26 04:54:06 +02:00
}
2018-04-16 17:27:57 +02:00
2019-01-28 16:39:39 +01:00
atomic_storage < u64 > : : release ( * reinterpret_cast < u64 * > ( rip ) , result ) ;
2022-07-10 08:38:48 +02:00
# elif defined(ARCH_ARM64)
2022-09-05 02:25:53 +02:00
union
{
u8 bytes [ 16 ] ;
u128 result ;
} ;
2022-07-10 08:38:48 +02:00
2022-09-05 02:25:53 +02:00
// ldr x9, #8
bytes [ 0 ] = 0x49 ;
bytes [ 1 ] = 0x00 ;
bytes [ 2 ] = 0x00 ;
bytes [ 3 ] = 0x58 ;
2022-07-10 08:38:48 +02:00
2022-09-05 02:25:53 +02:00
// br x9
bytes [ 4 ] = 0x20 ;
bytes [ 5 ] = 0x01 ;
bytes [ 6 ] = 0x1F ;
bytes [ 7 ] = 0xD6 ;
const u64 target = reinterpret_cast < u64 > ( func ) ;
std : : memcpy ( bytes + 8 , & target , 8 ) ;
2022-07-10 08:38:48 +02:00
# if defined(__APPLE__)
pthread_jit_write_protect_np ( false ) ;
# endif
atomic_storage < u128 > : : release ( * reinterpret_cast < u128 * > ( rip ) , result ) ;
# if defined(__APPLE__)
pthread_jit_write_protect_np ( true ) ;
# endif
// Flush all cache lines after potentially writing executable code
asm ( " ISB " ) ;
asm ( " DSB ISH " ) ;
# else
# error "Unimplemented"
# endif
2019-10-25 23:52:56 +02:00
spu_runtime : : g_tail_escape ( & spu , func , rip ) ;
2018-04-09 16:45:37 +02:00
}
2015-08-26 04:54:06 +02:00
2021-03-05 20:05:37 +01:00
void spu_recompiler_base : : old_interpreter ( spu_thread & spu , void * ls , u8 * /*rip*/ )
2019-07-15 15:16:30 +02:00
{
2021-12-30 17:39:18 +01:00
if ( g_cfg . core . spu_decoder ! = spu_decoder_type : : _static )
2021-01-18 19:34:54 +01:00
{
fmt : : throw_exception ( " Invalid SPU decoder " ) ;
}
2019-07-15 15:16:30 +02:00
// Select opcode table
2021-12-30 17:39:18 +01:00
const auto & table = g_fxo - > get < spu_interpreter_rt > ( ) ;
2019-07-15 15:16:30 +02:00
// LS pointer
const auto base = static_cast < const u8 * > ( ls ) ;
while ( true )
{
2020-02-05 08:00:08 +01:00
if ( spu . state ) [[unlikely]]
2019-07-15 15:16:30 +02:00
{
if ( spu . check_state ( ) )
break ;
}
const u32 op = * reinterpret_cast < const be_t < u32 > * > ( base + spu . pc ) ;
2021-12-30 17:39:18 +01:00
if ( table . decode ( op ) ( spu , { op } ) )
2019-07-15 15:16:30 +02:00
spu . pc + = 4 ;
}
}
2023-08-29 13:50:50 +02:00
std : : vector < u32 > spu_thread : : discover_functions ( u32 base_addr , std : : span < const u8 > ls , bool is_known_addr , u32 /*entry*/ )
2023-08-26 10:23:42 +02:00
{
std : : vector < u32 > calls ;
2023-09-01 18:38:06 +02:00
std : : vector < u32 > branches ;
2023-08-26 10:23:42 +02:00
calls . reserve ( 100 ) ;
// Discover functions
// Use the most simple method: search for instructions that calls them
2023-09-01 18:38:06 +02:00
// And then filter invalid cases
// TODO: Does not detect jumptables or fixed-addr indirect calls
2023-08-31 08:54:45 +02:00
const v128 brasl_mask = is_known_addr ? v128 : : from32p ( 0x62u < < 23 ) : v128 : : from32p ( umax ) ;
2023-08-29 13:50:50 +02:00
2023-12-29 18:33:29 +01:00
for ( u32 i = utils : : align < u32 > ( base_addr , 0x10 ) ; i < std : : min < u32 > ( base_addr + : : size32 ( ls ) , 0x3FFF0 ) ; i + = 0x10 )
2023-08-26 10:23:42 +02:00
{
2023-09-01 18:38:06 +02:00
// Search for BRSL LR and BRASL LR or BR
2023-08-26 10:23:42 +02:00
// TODO: BISL
2023-08-29 13:50:50 +02:00
const v128 inst = read_from_ptr < be_t < v128 > > ( ls . data ( ) , i - base_addr ) ;
2023-08-31 08:54:45 +02:00
const v128 cleared_i16 = gv_and32 ( inst , v128 : : from32p ( utils : : rol32 ( ~ 0xffff , 7 ) ) ) ;
const v128 eq_brsl = gv_eq32 ( cleared_i16 , v128 : : from32p ( 0x66u < < 23 ) ) ;
const v128 eq_brasl = gv_eq32 ( cleared_i16 , brasl_mask ) ;
2023-09-01 18:38:06 +02:00
const v128 eq_br = gv_eq32 ( cleared_i16 , v128 : : from32p ( 0x64u < < 23 ) ) ;
2023-08-26 10:23:42 +02:00
const v128 result = eq_brsl | eq_brasl ;
if ( ! gv_testz ( result ) )
{
for ( u32 j = 0 ; j < 4 ; j + + )
{
if ( result . u32r [ j ] )
{
calls . push_back ( i + j * 4 ) ;
}
}
}
2023-09-01 18:38:06 +02:00
if ( ! gv_testz ( eq_br ) )
{
for ( u32 j = 0 ; j < 4 ; j + + )
{
if ( eq_br . u32r [ j ] )
{
branches . push_back ( i + j * 4 ) ;
}
}
}
2023-08-26 10:23:42 +02:00
}
calls . erase ( std : : remove_if ( calls . begin ( ) , calls . end ( ) , [ & ] ( u32 caller )
{
// Check the validity of both the callee code and the following caller code
2024-03-07 14:16:22 +01:00
return ! is_exec_code ( caller , ls , base_addr , true ) | | ! is_exec_code ( caller + 4 , ls , base_addr , true ) ;
2023-08-26 10:23:42 +02:00
} ) , calls . end ( ) ) ;
2023-09-01 18:38:06 +02:00
branches . erase ( std : : remove_if ( branches . begin ( ) , branches . end ( ) , [ & ] ( u32 caller )
{
// Check the validity of the callee code
2024-03-07 14:16:22 +01:00
return ! is_exec_code ( caller , ls , base_addr , true ) ;
2023-09-01 18:38:06 +02:00
} ) , branches . end ( ) ) ;
2023-08-26 10:23:42 +02:00
std : : vector < u32 > addrs ;
for ( u32 addr : calls )
{
2023-08-29 13:50:50 +02:00
const spu_opcode_t op { read_from_ptr < be_t < u32 > > ( ls , addr - base_addr ) } ;
2023-08-26 10:23:42 +02:00
const u32 func = op_branch_targets ( addr , op ) [ 0 ] ;
2023-08-31 08:54:45 +02:00
if ( func = = umax | | addr + 4 = = func | | func = = addr | | std : : count ( addrs . begin ( ) , addrs . end ( ) , func ) )
2023-08-26 10:23:42 +02:00
{
continue ;
}
2024-04-18 15:27:30 +02:00
if ( std : : count ( calls . begin ( ) , calls . end ( ) , func ) )
{
// Cannot call another call instruction (link is overwritten)
continue ;
}
2023-08-26 10:23:42 +02:00
addrs . push_back ( func ) ;
2024-03-07 16:52:46 +01:00
// Detect an "arguments passing" block, possible queue another function
for ( u32 next = func , it = 10 ; it & & next > = base_addr & & next < std : : min < u32 > ( base_addr + : : size32 ( ls ) , 0x3FFF0 ) ; it - - , next + = 4 )
{
const spu_opcode_t test_op { read_from_ptr < be_t < u32 > > ( ls , next - base_addr ) } ;
const auto type = g_spu_itype . decode ( test_op . opcode ) ;
if ( type & spu_itype : : branch & & type ! = spu_itype : : BR )
{
break ;
}
if ( type = = spu_itype : : UNK | | ! test_op . opcode )
{
break ;
}
if ( type ! = spu_itype : : BR )
{
continue ;
}
const u32 target = op_branch_targets ( next , op ) [ 0 ] ;
if ( target = = umax | | addr + 4 = = target | | target = = addr | | std : : count ( addrs . begin ( ) , addrs . end ( ) , target ) )
{
break ;
}
// Detect backwards branch to the block in examination
if ( target > = func & & target < = next )
{
break ;
}
if ( ! is_exec_code ( target , ls , base_addr , true ) )
{
break ;
}
addrs . push_back ( target ) ;
break ;
}
2023-08-26 10:23:42 +02:00
}
2023-09-01 18:38:06 +02:00
for ( u32 addr : branches )
{
const spu_opcode_t op { read_from_ptr < be_t < u32 > > ( ls , addr - base_addr ) } ;
const u32 func = op_branch_targets ( addr , op ) [ 0 ] ;
if ( func = = umax | | addr + 4 = = func | | func = = addr | | ! addr )
{
continue ;
}
2024-03-07 16:07:46 +01:00
// Search for AI R1, -x in the called code
// Reasoning: AI R1, -x means stack frame creation, this is likely be a function
for ( u32 next = func , it = 10 ; it & & next > = base_addr & & next < std : : min < u32 > ( base_addr + : : size32 ( ls ) , 0x3FFF0 ) ; it - - , next + = 4 )
{
const spu_opcode_t test_op { read_from_ptr < be_t < u32 > > ( ls , next - base_addr ) } ;
const auto type = g_spu_itype . decode ( test_op . opcode ) ;
if ( type & spu_itype : : branch )
{
break ;
}
2024-03-07 16:52:46 +01:00
if ( type = = spu_itype : : UNK | | ! test_op . opcode )
{
break ;
}
2024-03-07 16:07:46 +01:00
bool is_func = false ;
if ( type = = spu_itype : : AI & & test_op . rt = = 1u & & test_op . ra = = 1u )
{
if ( test_op . si10 > = 0 )
{
break ;
}
is_func = true ;
}
if ( ! is_func )
{
continue ;
}
addr = SPU_LS_SIZE + 4 ; // Terminate the next condition, no further checks needed
if ( std : : count ( addrs . begin ( ) , addrs . end ( ) , func ) )
{
break ;
}
addrs . push_back ( func ) ;
break ;
}
// Search for AI R1, +x or OR R3/4, Rx, 0 before the branch
2023-09-01 18:38:06 +02:00
// Reasoning: AI R1, +x means stack pointer restoration, branch after that is likely a tail call
// R3 and R4 are common function arguments because they are the first two
2023-12-29 18:33:29 +01:00
for ( u32 back = addr - 4 , it = 10 ; it & & back > = base_addr & & back < std : : min < u32 > ( base_addr + : : size32 ( ls ) , 0x3FFF0 ) ; it - - , back - = 4 )
2023-09-01 18:38:06 +02:00
{
const spu_opcode_t test_op { read_from_ptr < be_t < u32 > > ( ls , back - base_addr ) } ;
const auto type = g_spu_itype . decode ( test_op . opcode ) ;
if ( type & spu_itype : : branch )
{
break ;
}
bool is_tail = false ;
if ( type = = spu_itype : : AI & & test_op . rt = = 1u & & test_op . ra = = 1u )
{
if ( test_op . si10 < = 0 )
{
break ;
}
is_tail = true ;
}
else if ( ! ( type & spu_itype : : zregmod ) )
{
const u32 op_rt = type & spu_itype : : _quadrop ? + test_op . rt4 : + test_op . rt ;
if ( op_rt > = 80u & & ( type ! = spu_itype : : LQD | | test_op . ra ! = 1u ) )
{
// Modifying non-volatile registers, not a call (and not context restoration)
break ;
}
//is_tail = op_rt == 3u || op_rt == 4u;
}
if ( ! is_tail )
{
continue ;
}
if ( std : : count ( addrs . begin ( ) , addrs . end ( ) , func ) )
{
break ;
}
addrs . push_back ( func ) ;
break ;
}
}
2023-08-26 10:23:42 +02:00
std : : sort ( addrs . begin ( ) , addrs . end ( ) ) ;
return addrs ;
}
2024-03-06 16:28:07 +01:00
using reg_state_t = spu_recompiler_base : : reg_state_t ;
using vf = spu_recompiler_base : : vf ;
bool reg_state_t : : is_const ( ) const
{
return ! ! ( flag & vf : : is_const ) ;
}
2024-07-03 09:34:38 +02:00
bool reg_state_t : : compare_tags ( const reg_state_t & rhs ) const
{
// Compare by tag, address of instruction origin
return tag = = rhs . tag & & origin = = rhs . origin & & is_instruction = = rhs . is_instruction ;
}
2024-03-06 16:28:07 +01:00
bool reg_state_t : : operator & ( vf to_test ) const
{
return this - > flag . all_of ( to_test ) ;
}
bool reg_state_t : : is_less_than ( u32 imm ) const
{
if ( flag & vf : : is_const & & value < imm )
{
return true ;
}
2024-05-09 09:48:52 +02:00
if ( ~ known_zeroes < imm )
2024-03-06 16:28:07 +01:00
{
2024-05-09 09:48:52 +02:00
// The highest number possible within the mask's limit is less than imm
2024-03-06 16:28:07 +01:00
return true ;
}
return false ;
}
bool reg_state_t : : operator = = ( const reg_state_t & r ) const
{
if ( ( flag ^ r . flag ) - ( vf : : is_null + vf : : is_mask ) )
{
return false ;
}
2024-07-03 09:34:38 +02:00
return ( flag & vf : : is_const ? value = = r . value : ( compare_tags ( r ) & & known_ones = = r . known_ones & & known_zeroes = = r . known_zeroes ) ) ;
2024-03-06 16:28:07 +01:00
}
bool reg_state_t : : operator = = ( u32 imm ) const
{
return flag = = vf : : is_const & & value = = imm ;
}
// Compare equality but try to ignore changes in unmasked bits
bool reg_state_t : : compare_with_mask_indifference ( const reg_state_t & r , u32 mask_bits ) const
{
if ( ! mask_bits )
{
return true ;
}
if ( ( r . flag & flag ) & vf : : is_const )
{
// Simplified path for consts
if ( ( ( value ^ r . value ) & mask_bits ) = = 0 )
{
return true ;
}
return false ;
}
const bool is_equal = * this = = r ;
if ( is_equal )
{
return true ;
}
const auto _this = this - > downgrade ( ) ;
const auto _r = r . downgrade ( ) ;
2024-07-03 09:34:38 +02:00
const bool is_mask_equal = ( _this . compare_tags ( _r ) & & _this . flag = = _r . flag & & ! ( ( _this . known_ones ^ _r . known_ones ) & mask_bits ) & & ! ( ( _this . known_zeroes ^ _r . known_zeroes ) & mask_bits ) ) ;
2024-03-06 16:28:07 +01:00
return is_mask_equal ;
}
bool reg_state_t : : compare_with_mask_indifference ( u32 imm , u32 mask_bits ) const
{
if ( ! mask_bits )
{
return true ;
}
if ( flag & vf : : is_const )
{
if ( ( ( value ^ imm ) & mask_bits ) = = 0 )
{
return true ;
}
}
return false ;
}
// Ensure unequality but try to ignore changes in unmasked bits
bool reg_state_t : : unequal_with_mask_indifference ( const reg_state_t & r , u32 mask_bits ) const
{
if ( ! mask_bits )
{
return true ;
}
if ( ( r . flag & flag ) & vf : : is_const )
{
// Simplified path for consts
if ( ( value ^ r . value ) & mask_bits )
{
return true ;
}
return false ;
}
const bool is_equal = * this = = r ;
if ( is_equal )
{
return false ;
}
// Full path
const auto _this = this - > downgrade ( ) ;
const auto _r = r . downgrade ( ) ;
2024-07-03 09:34:38 +02:00
const bool is_base_value_equal = ( _this . compare_tags ( _r ) & & _this . flag = = _r . flag ) ;
2024-03-06 16:28:07 +01:00
if ( ! is_base_value_equal )
{
// Cannot ascertain unequality if the value origin is different
return false ;
}
// Find at least one bit that is known to be X state at value 'r', and known to be X^1 state at the objects' value
return ( ( ( _this . known_ones ^ _r . known_ones ) & mask_bits ) & ( ( _this . known_zeroes ^ _r . known_zeroes ) & mask_bits ) ) ! = 0 ;
}
reg_state_t reg_state_t : : downgrade ( ) const
{
if ( flag & vf : : is_const )
{
2024-06-27 08:52:03 +02:00
return reg_state_t { vf : : is_mask , 0 , umax , this - > value , ~ this - > value , this - > origin } ;
2024-03-06 16:28:07 +01:00
}
if ( ! ( flag - vf : : is_null ) )
{
2024-06-27 08:52:03 +02:00
return reg_state_t { vf : : is_mask , 0 , this - > tag , 0 , 0 , this - > origin } ;
2024-03-06 16:28:07 +01:00
}
return * this ;
}
2024-06-27 08:52:03 +02:00
reg_state_t reg_state_t : : merge ( const reg_state_t & rhs , u32 current_pc ) const
2024-03-06 16:28:07 +01:00
{
if ( rhs = = * this )
{
// Perfect state: no conflicts
return rhs ;
}
if ( ( rhs . flag + flag ) . all_of ( vf : : is_const + vf : : is_mask ) )
{
// Try to downgrade to a known-bits type value
const reg_state_t _rhs = rhs . downgrade ( ) ;
const reg_state_t _this = this - > downgrade ( ) ;
if ( ( _rhs . flag & _this . flag ) & vf : : is_mask )
{
// Now it is possible to merge the two values
reg_state_t res { vf : : is_mask , 0 , 0 , _rhs . known_ones & _this . known_ones , _rhs . known_zeroes & _this . known_zeroes } ;
if ( res . known_zeroes | res . known_ones )
{
2024-04-26 19:39:19 +02:00
// Success (create new value tag)
2024-03-06 16:28:07 +01:00
res . tag = reg_state_t : : alloc_tag ( ) ;
2024-06-27 08:52:03 +02:00
res . origin = current_pc ;
2024-07-03 09:34:38 +02:00
res . is_instruction = false ;
2024-03-06 16:28:07 +01:00
return res ;
}
}
}
2024-06-27 08:52:03 +02:00
return make_unknown ( current_pc ) ;
2024-03-06 16:28:07 +01:00
}
reg_state_t reg_state_t : : build_on_top_of ( const reg_state_t & rhs ) const
{
if ( flag & vf : : is_null )
{
// Value unmodified
return rhs ;
}
if ( rhs = = * this )
{
// Perfect state: no conflicts
return rhs ;
}
return * this ;
}
u32 reg_state_t : : get_known_zeroes ( ) const
{
if ( flag & vf : : is_const )
{
return ~ value ;
}
return known_zeroes ;
}
u32 reg_state_t : : get_known_ones ( ) const
{
if ( flag & vf : : is_const )
{
return value ;
}
return known_ones ;
}
reg_state_t reg_state_t : : from_value ( u32 value ) noexcept
{
reg_state_t v { } ;
v . value = value ;
v . flag = vf : : is_const ;
return v ;
}
u32 reg_state_t : : alloc_tag ( bool reset ) noexcept
{
static thread_local u32 g_tls_tag = 0 ;
if ( reset )
{
g_tls_tag = 0 ;
return 0 ;
}
return + + g_tls_tag ;
}
2024-06-27 08:52:03 +02:00
void reg_state_t : : invalidate_if_created ( u32 current_pc )
{
if ( ! is_const ( ) & & origin = = current_pc )
{
tag = reg_state_t : : alloc_tag ( ) ;
}
}
2024-03-06 16:28:07 +01:00
// Converge 2 register states to the same flow in execution
template < usz N >
2024-06-27 08:52:03 +02:00
static void merge ( std : : array < reg_state_t , N > & result , const std : : array < reg_state_t , N > & lhs , const std : : array < reg_state_t , N > & rhs , u32 current_pc )
2024-03-06 16:28:07 +01:00
{
usz index = umax ;
for ( reg_state_t & state : result )
{
index + + ;
2024-06-27 08:52:03 +02:00
state = lhs [ index ] . merge ( rhs [ index ] , current_pc ) ;
2024-03-06 16:28:07 +01:00
}
}
// Override RHS state with the newer LHS state
template < usz N >
static void build_on_top_of ( std : : array < reg_state_t , N > & result , const std : : array < reg_state_t , N > & lhs , const std : : array < reg_state_t , N > & rhs )
{
usz index = umax ;
for ( reg_state_t & state : result )
{
index + + ;
state = lhs [ index ] . build_on_top_of ( rhs [ index ] ) ;
}
}
struct block_reg_info
{
u32 pc = SPU_LS_SIZE ; // Address
std : : array < reg_state_t , s_reg_max > local_state { } ;
bool has_true_state = false ;
std : : array < reg_state_t , s_reg_max > start_reg_state { } ;
std : : array < reg_state_t , s_reg_max > end_reg_state { } ;
std : : array < reg_state_t , s_reg_max > addend_reg_state { } ;
std : : array < reg_state_t , s_reg_max > walkby_state { } ; // State that is made by merging state_predecessor and iterating over instructions for final instrucion walk
usz next_nodes_count = 0 ;
struct node_t
{
u32 prev_pc = umax ;
} ;
std : : vector < node_t > prev_nodes ;
static std : : unique_ptr < block_reg_info > create ( u32 pc ) noexcept
{
2024-06-27 08:52:03 +02:00
auto ptr = new block_reg_info { pc , reg_state_t : : make_unknown < s_reg_max > ( pc ) } ;
2024-03-06 16:28:07 +01:00
for ( reg_state_t & f : ptr - > local_state )
{
f . flag + = vf : : is_null ;
}
ptr - > start_reg_state = ptr - > local_state ;
return std : : unique_ptr < block_reg_info > ( ptr ) ;
}
// Evaluate registers state
2024-07-05 17:55:01 +02:00
std : : array < reg_state_t , s_reg_max > & evaluate_start_state ( const std : : map < u32 , std : : unique_ptr < block_reg_info > > & map , bool extensive_evaluation ) ;
2024-03-06 16:28:07 +01:00
// This function creates new node if not found and links the proceeding node to the old node
// In a manner in which no duplicate paths are formed
static void create_node ( u32 pc_rhs , u32 parent_pc , std : : map < u32 , std : : unique_ptr < block_reg_info > > & map )
{
//ensure(parent_node != pc_rhs);
ensure ( map [ parent_pc ] ) ;
if ( ! map [ pc_rhs ] )
{
map [ pc_rhs ] = create ( pc_rhs ) ;
}
node_t prev_node { parent_pc } ;
map [ parent_pc ] - > next_nodes_count + + ;
map [ pc_rhs ] - > prev_nodes . emplace_back ( prev_node ) ;
}
} ;
2023-09-04 18:38:12 +02:00
spu_program spu_recompiler_base : : analyse ( const be_t < u32 > * ls , u32 entry_point , std : : map < u32 , std : : basic_string < u32 > > * out_target_list )
2018-04-09 16:45:37 +02:00
{
2018-04-30 18:44:01 +02:00
// Result: addr + raw instruction data
2019-11-23 17:30:54 +01:00
spu_program result ;
result . data . reserve ( 10000 ) ;
result . entry_point = entry_point ;
result . lower_bound = entry_point ;
2018-05-04 23:01:27 +02:00
// Initialize block entries
2018-05-10 18:38:07 +02:00
m_block_info . reset ( ) ;
2018-06-10 14:46:01 +02:00
m_block_info . set ( entry_point / 4 ) ;
m_entry_info . reset ( ) ;
m_entry_info . set ( entry_point / 4 ) ;
2019-05-01 14:31:17 +02:00
m_ret_info . reset ( ) ;
2018-04-30 18:44:01 +02:00
// Simple block entry workload list
2019-05-05 15:28:41 +02:00
workload . clear ( ) ;
2018-06-10 14:46:01 +02:00
workload . push_back ( entry_point ) ;
2015-08-26 04:54:06 +02:00
2018-06-10 14:46:01 +02:00
std : : memset ( m_regmod . data ( ) , 0xff , sizeof ( m_regmod ) ) ;
2024-04-12 14:35:28 +02:00
m_use_ra . reset ( ) ;
m_use_rb . reset ( ) ;
m_use_rc . reset ( ) ;
2018-05-10 18:38:07 +02:00
m_targets . clear ( ) ;
2018-05-13 19:34:11 +02:00
m_preds . clear ( ) ;
2018-06-10 14:46:01 +02:00
m_preds [ entry_point ] ;
2019-04-30 23:06:42 +02:00
m_bbs . clear ( ) ;
2019-05-05 15:28:41 +02:00
m_chunks . clear ( ) ;
m_funcs . clear ( ) ;
2024-03-06 16:28:07 +01:00
m_inst_attrs . clear ( ) ;
m_patterns . clear ( ) ;
2015-08-26 04:54:06 +02:00
2018-06-10 14:46:01 +02:00
// SYNC instruction found
bool sync = false ;
u32 hbr_loc = 0 ;
u32 hbr_tg = - 1 ;
// Result bounds
u32 lsa = entry_point ;
u32 limit = 0x40000 ;
if ( g_cfg . core . spu_block_size = = spu_block_size_type : : giga )
{
}
2024-03-06 16:28:07 +01:00
// Weak constant propagation context (for guessing branch targets)
std : : array < bs_t < vf > , 128 > vflags { } ;
// Associated constant values for 32-bit preferred slot
std : : array < u32 , 128 > values ;
2018-06-10 14:46:01 +02:00
for ( u32 wi = 0 , wa = workload [ 0 ] ; wi < workload . size ( ) ; )
2018-04-30 18:44:01 +02:00
{
const auto next_block = [ & ]
2018-04-09 16:45:37 +02:00
{
2018-04-30 18:44:01 +02:00
// Reset value information
vflags . fill ( { } ) ;
2018-06-10 14:46:01 +02:00
sync = false ;
hbr_loc = 0 ;
hbr_tg = - 1 ;
2018-04-30 18:44:01 +02:00
wi + + ;
2018-06-10 14:46:01 +02:00
if ( wi < workload . size ( ) )
{
wa = workload [ wi ] ;
}
2018-04-30 18:44:01 +02:00
} ;
2018-06-10 14:46:01 +02:00
const u32 pos = wa ;
2018-05-13 19:34:11 +02:00
2018-04-30 18:44:01 +02:00
const auto add_block = [ & ] ( u32 target )
{
2018-06-10 14:46:01 +02:00
// Validate new target (TODO)
2019-01-13 14:26:36 +01:00
if ( target > = lsa & & target < limit )
2018-04-30 18:44:01 +02:00
{
// Check for redundancy
2018-05-10 18:38:07 +02:00
if ( ! m_block_info [ target / 4 ] )
2018-04-30 18:44:01 +02:00
{
2018-05-10 18:38:07 +02:00
m_block_info [ target / 4 ] = true ;
2018-06-10 14:46:01 +02:00
workload . push_back ( target ) ;
2018-04-30 18:44:01 +02:00
}
2018-05-13 19:34:11 +02:00
2018-06-10 14:46:01 +02:00
// Add predecessor
2019-05-05 15:28:41 +02:00
if ( m_preds [ target ] . find_first_of ( pos ) + 1 = = 0 )
2018-05-13 19:34:11 +02:00
{
2018-06-10 14:46:01 +02:00
m_preds [ target ] . push_back ( pos ) ;
2018-05-13 19:34:11 +02:00
}
2018-04-30 18:44:01 +02:00
}
} ;
2017-02-13 14:12:24 +01:00
2018-06-10 14:46:01 +02:00
if ( pos < lsa | | pos > = limit )
{
// Don't analyse if already beyond the limit
next_block ( ) ;
continue ;
}
2018-05-04 23:01:27 +02:00
const u32 data = ls [ pos / 4 ] ;
2018-04-30 18:44:01 +02:00
const auto op = spu_opcode_t { data } ;
2018-04-09 16:45:37 +02:00
2018-06-10 14:46:01 +02:00
wa + = 4 ;
2018-04-09 16:45:37 +02:00
2018-05-10 18:38:07 +02:00
m_targets . erase ( pos ) ;
2019-04-28 02:36:17 +02:00
// Fill register access info
2021-12-30 17:39:18 +01:00
if ( auto iflags = g_spu_iflag . decode ( data ) )
2019-04-28 02:36:17 +02:00
{
2020-02-18 15:09:38 +01:00
if ( + iflags & + spu_iflag : : use_ra )
2024-04-12 14:35:28 +02:00
m_use_ra . set ( pos / 4 ) ;
2020-02-18 15:09:38 +01:00
if ( + iflags & + spu_iflag : : use_rb )
2024-04-12 14:35:28 +02:00
m_use_rb . set ( pos / 4 ) ;
2020-02-18 15:09:38 +01:00
if ( + iflags & + spu_iflag : : use_rc )
2024-04-12 14:35:28 +02:00
m_use_rc . set ( pos / 4 ) ;
2019-04-28 02:36:17 +02:00
}
2018-04-30 18:44:01 +02:00
// Analyse instruction
2021-12-30 17:39:18 +01:00
switch ( const auto type = g_spu_itype . decode ( data ) )
2018-04-09 16:45:37 +02:00
{
case spu_itype : : UNK :
case spu_itype : : DFCEQ :
case spu_itype : : DFCMEQ :
case spu_itype : : DFCGT :
2019-03-29 14:49:19 +01:00
case spu_itype : : DFCMGT :
2018-04-09 16:45:37 +02:00
case spu_itype : : DFTSV :
2018-04-30 18:44:01 +02:00
{
next_block ( ) ;
continue ;
}
case spu_itype : : SYNC :
case spu_itype : : STOP :
case spu_itype : : STOPD :
{
2018-06-10 14:46:01 +02:00
if ( data = = 0 )
2018-04-30 18:44:01 +02:00
{
// Stop before null data
next_block ( ) ;
continue ;
}
2018-06-03 23:20:14 +02:00
if ( g_cfg . core . spu_block_size = = spu_block_size_type : : safe )
2018-04-30 18:44:01 +02:00
{
// Stop on special instructions (TODO)
2018-06-10 14:46:01 +02:00
m_targets [ pos ] ;
2018-04-30 18:44:01 +02:00
next_block ( ) ;
break ;
}
2018-06-10 14:46:01 +02:00
if ( type = = spu_itype : : SYNC )
{
// Remember
sync = true ;
}
2018-04-30 18:44:01 +02:00
break ;
}
2018-04-09 16:45:37 +02:00
case spu_itype : : IRET :
{
2018-06-10 14:46:01 +02:00
if ( op . d & & op . e )
{
2020-02-01 09:36:09 +01:00
spu_log . error ( " [0x%x] Invalid interrupt flags (DE) " , pos ) ;
2018-06-10 14:46:01 +02:00
}
m_targets [ pos ] ;
2018-04-30 18:44:01 +02:00
next_block ( ) ;
2018-04-09 16:45:37 +02:00
break ;
}
2018-04-30 18:44:01 +02:00
case spu_itype : : BI :
case spu_itype : : BISL :
2018-06-10 14:46:01 +02:00
case spu_itype : : BISLED :
2018-04-30 18:44:01 +02:00
case spu_itype : : BIZ :
case spu_itype : : BINZ :
case spu_itype : : BIHZ :
case spu_itype : : BIHNZ :
2018-04-09 16:45:37 +02:00
{
2018-06-10 14:46:01 +02:00
if ( op . d & & op . e )
{
2020-02-01 09:36:09 +01:00
spu_log . error ( " [0x%x] Invalid interrupt flags (DE) " , pos ) ;
2018-06-10 14:46:01 +02:00
}
2018-04-30 18:44:01 +02:00
const auto af = vflags [ op . ra ] ;
const auto av = values [ op . ra ] ;
2018-06-10 14:46:01 +02:00
const bool sl = type = = spu_itype : : BISL | | type = = spu_itype : : BISLED ;
2018-04-30 18:44:01 +02:00
2018-06-10 14:46:01 +02:00
if ( sl )
2018-04-09 16:45:37 +02:00
{
2018-05-10 18:38:07 +02:00
m_regmod [ pos / 4 ] = op . rt ;
2018-04-30 18:44:01 +02:00
vflags [ op . rt ] = + vf : : is_const ;
values [ op . rt ] = pos + 4 ;
}
2018-09-02 19:22:35 +02:00
if ( af & vf : : is_const )
2018-04-30 18:44:01 +02:00
{
const u32 target = spu_branch_target ( av ) ;
2020-02-01 09:36:09 +01:00
spu_log . warning ( " [0x%x] At 0x%x: indirect branch to 0x%x%s " , entry_point , pos , target , op . d ? " (D) " : op . e ? " (E) " : " " ) ;
2018-04-30 18:44:01 +02:00
2023-09-01 14:48:43 +02:00
if ( type = = spu_itype : : BI & & target = = pos + 4 & & op . d )
{
// Disable interrupts idiom
break ;
}
2018-05-10 18:38:07 +02:00
m_targets [ pos ] . push_back ( target ) ;
2019-05-16 14:03:01 +02:00
if ( g_cfg . core . spu_block_size = = spu_block_size_type : : giga )
2018-06-10 14:46:01 +02:00
{
if ( sync )
{
2020-02-01 09:36:09 +01:00
spu_log . notice ( " [0x%x] At 0x%x: ignoring %scall to 0x%x (SYNC) " , entry_point , pos , sl ? " " : " tail " , target ) ;
2018-06-10 14:46:01 +02:00
if ( target > entry_point )
{
limit = std : : min < u32 > ( limit , target ) ;
}
}
else
{
m_entry_info [ target / 4 ] = true ;
add_block ( target ) ;
}
}
2019-05-16 14:03:01 +02:00
else if ( target > entry_point )
2018-04-30 18:44:01 +02:00
{
2018-06-10 14:46:01 +02:00
limit = std : : min < u32 > ( limit , target ) ;
2018-04-30 18:44:01 +02:00
}
2018-06-10 14:46:01 +02:00
if ( sl & & g_cfg . core . spu_block_size ! = spu_block_size_type : : safe )
2018-04-30 18:44:01 +02:00
{
2019-05-01 14:31:17 +02:00
m_ret_info [ pos / 4 + 1 ] = true ;
2018-06-10 14:46:01 +02:00
m_entry_info [ pos / 4 + 1 ] = true ;
2018-06-03 23:20:14 +02:00
m_targets [ pos ] . push_back ( pos + 4 ) ;
2018-05-10 18:38:07 +02:00
add_block ( pos + 4 ) ;
2018-04-30 18:44:01 +02:00
}
}
2018-06-30 01:07:39 +02:00
else if ( type = = spu_itype : : BI & & g_cfg . core . spu_block_size ! = spu_block_size_type : : safe & & ! op . d & & ! op . e & & ! sync )
2018-04-30 18:44:01 +02:00
{
// Analyse jump table (TODO)
std : : basic_string < u32 > jt_abs ;
std : : basic_string < u32 > jt_rel ;
const u32 start = pos + 4 ;
2018-05-04 22:51:35 +02:00
u64 dabs = 0 ;
u64 drel = 0 ;
2018-04-30 18:44:01 +02:00
for ( u32 i = start ; i < limit ; i + = 4 )
{
2018-05-04 23:01:27 +02:00
const u32 target = ls [ i / 4 ] ;
2018-04-30 18:44:01 +02:00
2018-05-04 22:51:35 +02:00
if ( target = = 0 | | target % 4 )
2018-04-30 18:44:01 +02:00
{
// Address cannot be misaligned: abort
break ;
}
2018-06-10 14:46:01 +02:00
if ( target > = lsa & & target < 0x40000 )
2018-04-30 18:44:01 +02:00
{
// Possible jump table entry (absolute)
jt_abs . push_back ( target ) ;
}
2018-06-10 14:46:01 +02:00
if ( target + start > = lsa & & target + start < 0x40000 )
2018-04-30 18:44:01 +02:00
{
// Possible jump table entry (relative)
jt_rel . push_back ( target + start ) ;
}
if ( std : : max ( jt_abs . size ( ) , jt_rel . size ( ) ) * 4 + start < = i )
{
// Neither type of jump table completes
2018-06-30 01:07:39 +02:00
jt_abs . clear ( ) ;
jt_rel . clear ( ) ;
2018-04-30 18:44:01 +02:00
break ;
}
}
2018-05-04 22:51:35 +02:00
// Choose position after the jt as an anchor and compute the average distance
for ( u32 target : jt_abs )
{
dabs + = std : : abs ( static_cast < s32 > ( target - start - jt_abs . size ( ) * 4 ) ) ;
}
for ( u32 target : jt_rel )
{
drel + = std : : abs ( static_cast < s32 > ( target - start - jt_rel . size ( ) * 4 ) ) ;
}
// Add detected jump table blocks
2018-04-30 18:44:01 +02:00
if ( jt_abs . size ( ) > = 3 | | jt_rel . size ( ) > = 3 )
{
2018-05-04 22:51:35 +02:00
if ( jt_abs . size ( ) = = jt_rel . size ( ) )
{
if ( dabs < drel )
{
jt_rel . clear ( ) ;
}
if ( dabs > drel )
{
jt_abs . clear ( ) ;
}
2018-06-30 01:07:39 +02:00
2020-12-09 08:47:45 +01:00
ensure ( jt_abs . size ( ) ! = jt_rel . size ( ) ) ;
2018-05-04 22:51:35 +02:00
}
2018-04-30 18:44:01 +02:00
if ( jt_abs . size ( ) > = jt_rel . size ( ) )
{
2020-03-04 15:08:40 +01:00
const u32 new_size = ( start - lsa ) / 4 + : : size32 ( jt_abs ) ;
2018-05-04 22:51:35 +02:00
2019-11-23 17:30:54 +01:00
if ( result . data . size ( ) < new_size )
2018-04-30 18:44:01 +02:00
{
2019-11-23 17:30:54 +01:00
result . data . resize ( new_size ) ;
2018-05-04 22:51:35 +02:00
}
for ( u32 i = 0 ; i < jt_abs . size ( ) ; i + + )
{
add_block ( jt_abs [ i ] ) ;
2019-11-23 17:30:54 +01:00
result . data [ ( start - lsa ) / 4 + i ] = std : : bit_cast < u32 , be_t < u32 > > ( jt_abs [ i ] ) ;
2018-06-10 14:46:01 +02:00
m_targets [ start + i * 4 ] ;
2018-04-30 18:44:01 +02:00
}
2018-05-10 18:38:07 +02:00
m_targets . emplace ( pos , std : : move ( jt_abs ) ) ;
2018-04-30 18:44:01 +02:00
}
if ( jt_rel . size ( ) > = jt_abs . size ( ) )
{
2020-03-04 15:08:40 +01:00
const u32 new_size = ( start - lsa ) / 4 + : : size32 ( jt_rel ) ;
2018-05-04 22:51:35 +02:00
2019-11-23 17:30:54 +01:00
if ( result . data . size ( ) < new_size )
2018-05-04 22:51:35 +02:00
{
2019-11-23 17:30:54 +01:00
result . data . resize ( new_size ) ;
2018-05-04 22:51:35 +02:00
}
for ( u32 i = 0 ; i < jt_rel . size ( ) ; i + + )
2018-04-30 18:44:01 +02:00
{
2018-05-04 22:51:35 +02:00
add_block ( jt_rel [ i ] ) ;
2019-11-23 17:30:54 +01:00
result . data [ ( start - lsa ) / 4 + i ] = std : : bit_cast < u32 , be_t < u32 > > ( jt_rel [ i ] - start ) ;
2018-06-10 14:46:01 +02:00
m_targets [ start + i * 4 ] ;
2018-04-30 18:44:01 +02:00
}
2018-05-10 18:38:07 +02:00
m_targets . emplace ( pos , std : : move ( jt_rel ) ) ;
2018-04-30 18:44:01 +02:00
}
}
2018-06-10 14:46:01 +02:00
else if ( start + 12 * 4 < limit & &
2020-02-19 16:26:41 +01:00
ls [ start / 4 + 0 ] = = 0x1ce00408u & &
ls [ start / 4 + 1 ] = = 0x24000389u & &
ls [ start / 4 + 2 ] = = 0x24004809u & &
ls [ start / 4 + 3 ] = = 0x24008809u & &
ls [ start / 4 + 4 ] = = 0x2400c809u & &
ls [ start / 4 + 5 ] = = 0x24010809u & &
ls [ start / 4 + 6 ] = = 0x24014809u & &
ls [ start / 4 + 7 ] = = 0x24018809u & &
ls [ start / 4 + 8 ] = = 0x1c200807u & &
ls [ start / 4 + 9 ] = = 0x2401c809u )
2018-06-10 14:46:01 +02:00
{
2020-02-01 09:36:09 +01:00
spu_log . warning ( " [0x%x] Pattern 1 detected (hbr=0x%x:0x%x) " , pos , hbr_loc , hbr_tg ) ;
2018-06-10 14:46:01 +02:00
// Add 8 targets (TODO)
for ( u32 addr = start + 4 ; addr < start + 36 ; addr + = 4 )
{
m_targets [ pos ] . push_back ( addr ) ;
add_block ( addr ) ;
}
}
else if ( hbr_loc > start & & hbr_loc < limit & & hbr_tg = = start )
{
2020-02-01 09:36:09 +01:00
spu_log . warning ( " [0x%x] No patterns detected (hbr=0x%x:0x%x) " , pos , hbr_loc , hbr_tg ) ;
2018-06-10 14:46:01 +02:00
}
2018-04-30 18:44:01 +02:00
}
2018-06-30 01:07:39 +02:00
else if ( type = = spu_itype : : BI & & sync )
{
2020-02-01 09:36:09 +01:00
spu_log . notice ( " [0x%x] At 0x%x: ignoring indirect branch (SYNC) " , entry_point , pos ) ;
2018-06-30 01:07:39 +02:00
}
2018-04-30 18:44:01 +02:00
2018-06-10 14:46:01 +02:00
if ( type = = spu_itype : : BI | | sl )
2018-04-30 18:44:01 +02:00
{
2018-06-03 23:20:14 +02:00
if ( type = = spu_itype : : BI | | g_cfg . core . spu_block_size = = spu_block_size_type : : safe )
2018-04-30 18:44:01 +02:00
{
2018-06-10 14:46:01 +02:00
m_targets [ pos ] ;
2018-04-30 18:44:01 +02:00
}
2018-05-10 18:38:07 +02:00
else
{
2019-05-01 14:31:17 +02:00
m_ret_info [ pos / 4 + 1 ] = true ;
2018-06-10 14:46:01 +02:00
m_entry_info [ pos / 4 + 1 ] = true ;
2018-06-03 23:20:14 +02:00
m_targets [ pos ] . push_back ( pos + 4 ) ;
2018-05-10 18:38:07 +02:00
add_block ( pos + 4 ) ;
}
}
else
{
m_targets [ pos ] . push_back ( pos + 4 ) ;
add_block ( pos + 4 ) ;
2018-04-09 16:45:37 +02:00
}
2018-05-10 18:38:07 +02:00
next_block ( ) ;
2018-04-09 16:45:37 +02:00
break ;
}
2018-04-30 18:44:01 +02:00
2018-04-09 16:45:37 +02:00
case spu_itype : : BRSL :
2018-04-30 18:44:01 +02:00
case spu_itype : : BRASL :
2018-04-09 16:45:37 +02:00
{
2018-04-30 18:44:01 +02:00
const u32 target = spu_branch_target ( type = = spu_itype : : BRASL ? 0 : pos , op . i16 ) ;
2018-05-10 18:38:07 +02:00
m_regmod [ pos / 4 ] = op . rt ;
2018-04-30 18:44:01 +02:00
vflags [ op . rt ] = + vf : : is_const ;
values [ op . rt ] = pos + 4 ;
2019-05-16 01:41:31 +02:00
if ( type = = spu_itype : : BRSL & & target = = pos + 4 )
2018-04-09 16:45:37 +02:00
{
2018-04-30 18:44:01 +02:00
// Get next instruction address idiom
break ;
}
2018-05-10 18:38:07 +02:00
m_targets [ pos ] . push_back ( target ) ;
2018-06-03 23:20:14 +02:00
if ( g_cfg . core . spu_block_size ! = spu_block_size_type : : safe )
2018-04-30 18:44:01 +02:00
{
2019-05-01 14:31:17 +02:00
m_ret_info [ pos / 4 + 1 ] = true ;
2018-06-10 14:46:01 +02:00
m_entry_info [ pos / 4 + 1 ] = true ;
2018-06-03 23:20:14 +02:00
m_targets [ pos ] . push_back ( pos + 4 ) ;
2018-05-10 18:38:07 +02:00
add_block ( pos + 4 ) ;
2018-04-30 18:44:01 +02:00
}
2018-06-10 14:46:01 +02:00
if ( g_cfg . core . spu_block_size = = spu_block_size_type : : giga & & ! sync )
2018-04-30 18:44:01 +02:00
{
2018-06-10 14:46:01 +02:00
m_entry_info [ target / 4 ] = true ;
2018-04-30 18:44:01 +02:00
add_block ( target ) ;
2018-04-09 16:45:37 +02:00
}
2018-06-10 14:46:01 +02:00
else
{
if ( g_cfg . core . spu_block_size = = spu_block_size_type : : giga )
{
2020-02-01 09:36:09 +01:00
spu_log . notice ( " [0x%x] At 0x%x: ignoring fixed call to 0x%x (SYNC) " , entry_point , pos , target ) ;
2018-06-10 14:46:01 +02:00
}
if ( target > entry_point )
{
limit = std : : min < u32 > ( limit , target ) ;
}
}
2018-04-09 16:45:37 +02:00
2018-05-10 18:38:07 +02:00
next_block ( ) ;
2018-04-09 16:45:37 +02:00
break ;
}
2018-04-30 18:44:01 +02:00
case spu_itype : : BRA :
2019-05-16 01:41:31 +02:00
{
const u32 target = spu_branch_target ( 0 , op . i16 ) ;
if ( g_cfg . core . spu_block_size = = spu_block_size_type : : giga & & ! sync )
{
m_entry_info [ target / 4 ] = true ;
}
else
{
2024-06-06 19:11:37 +02:00
m_targets [ pos ] . push_back ( target ) ;
2019-05-16 01:41:31 +02:00
if ( g_cfg . core . spu_block_size = = spu_block_size_type : : giga )
{
2020-02-01 09:36:09 +01:00
spu_log . notice ( " [0x%x] At 0x%x: ignoring fixed tail call to 0x%x (SYNC) " , entry_point , pos , target ) ;
2019-05-16 01:41:31 +02:00
}
}
2024-04-07 09:00:29 +02:00
add_block ( target ) ;
2019-05-16 01:41:31 +02:00
next_block ( ) ;
break ;
}
case spu_itype : : BR :
2018-04-09 16:45:37 +02:00
case spu_itype : : BRZ :
case spu_itype : : BRNZ :
case spu_itype : : BRHZ :
case spu_itype : : BRHNZ :
{
2019-05-16 01:41:31 +02:00
const u32 target = spu_branch_target ( pos , op . i16 ) ;
2018-04-30 18:44:01 +02:00
if ( target = = pos + 4 )
2018-04-09 16:45:37 +02:00
{
2018-04-30 18:44:01 +02:00
// Nop
break ;
2018-04-09 16:45:37 +02:00
}
2018-05-10 18:38:07 +02:00
m_targets [ pos ] . push_back ( target ) ;
2018-04-30 18:44:01 +02:00
add_block ( target ) ;
2019-05-16 01:41:31 +02:00
if ( type ! = spu_itype : : BR )
2018-04-30 18:44:01 +02:00
{
2018-05-10 18:38:07 +02:00
m_targets [ pos ] . push_back ( pos + 4 ) ;
add_block ( pos + 4 ) ;
2018-04-30 18:44:01 +02:00
}
2018-05-10 18:38:07 +02:00
next_block ( ) ;
2018-04-30 18:44:01 +02:00
break ;
}
2019-11-11 21:12:21 +01:00
case spu_itype : : DSYNC :
2018-04-30 18:44:01 +02:00
case spu_itype : : HEQ :
case spu_itype : : HEQI :
case spu_itype : : HGT :
case spu_itype : : HGTI :
case spu_itype : : HLGT :
case spu_itype : : HLGTI :
case spu_itype : : LNOP :
case spu_itype : : NOP :
case spu_itype : : MTSPR :
case spu_itype : : FSCRWR :
case spu_itype : : STQA :
case spu_itype : : STQD :
case spu_itype : : STQR :
case spu_itype : : STQX :
{
// Do nothing
break ;
}
2018-07-25 15:39:03 +02:00
case spu_itype : : WRCH :
{
switch ( op . ra )
{
case MFC_EAL :
{
m_regmod [ pos / 4 ] = s_reg_mfc_eal ;
break ;
}
case MFC_LSA :
{
m_regmod [ pos / 4 ] = s_reg_mfc_lsa ;
break ;
}
case MFC_TagID :
{
m_regmod [ pos / 4 ] = s_reg_mfc_tag ;
break ;
}
case MFC_Size :
{
m_regmod [ pos / 4 ] = s_reg_mfc_size ;
break ;
}
2021-04-09 21:12:47 +02:00
default : break ;
2018-07-25 15:39:03 +02:00
}
break ;
}
2018-06-28 13:03:25 +02:00
case spu_itype : : LQA :
case spu_itype : : LQD :
case spu_itype : : LQR :
case spu_itype : : LQX :
{
// Unconst
m_regmod [ pos / 4 ] = op . rt ;
vflags [ op . rt ] = { } ;
break ;
}
2018-06-10 14:46:01 +02:00
case spu_itype : : HBR :
{
hbr_loc = spu_branch_target ( pos , op . roh < < 7 | op . rt ) ;
2018-09-02 19:22:35 +02:00
hbr_tg = vflags [ op . ra ] & vf : : is_const & & ! op . c ? values [ op . ra ] & 0x3fffc : - 1 ;
2018-06-10 14:46:01 +02:00
break ;
}
case spu_itype : : HBRA :
{
hbr_loc = spu_branch_target ( pos , op . r0h < < 7 | op . rt ) ;
hbr_tg = spu_branch_target ( 0x0 , op . i16 ) ;
break ;
}
case spu_itype : : HBRR :
{
hbr_loc = spu_branch_target ( pos , op . r0h < < 7 | op . rt ) ;
hbr_tg = spu_branch_target ( pos , op . i16 ) ;
break ;
}
2018-04-30 18:44:01 +02:00
case spu_itype : : IL :
{
2018-05-10 18:38:07 +02:00
m_regmod [ pos / 4 ] = op . rt ;
2018-04-30 18:44:01 +02:00
vflags [ op . rt ] = + vf : : is_const ;
values [ op . rt ] = op . si16 ;
break ;
}
case spu_itype : : ILA :
{
2018-05-10 18:38:07 +02:00
m_regmod [ pos / 4 ] = op . rt ;
2018-04-30 18:44:01 +02:00
vflags [ op . rt ] = + vf : : is_const ;
values [ op . rt ] = op . i18 ;
break ;
}
case spu_itype : : ILH :
{
2018-05-10 18:38:07 +02:00
m_regmod [ pos / 4 ] = op . rt ;
2018-04-30 18:44:01 +02:00
vflags [ op . rt ] = + vf : : is_const ;
values [ op . rt ] = op . i16 < < 16 | op . i16 ;
break ;
}
case spu_itype : : ILHU :
{
2018-05-10 18:38:07 +02:00
m_regmod [ pos / 4 ] = op . rt ;
2018-04-30 18:44:01 +02:00
vflags [ op . rt ] = + vf : : is_const ;
values [ op . rt ] = op . i16 < < 16 ;
break ;
}
case spu_itype : : IOHL :
{
2018-05-10 18:38:07 +02:00
m_regmod [ pos / 4 ] = op . rt ;
2018-04-30 18:44:01 +02:00
values [ op . rt ] = values [ op . rt ] | op . i16 ;
break ;
}
case spu_itype : : ORI :
{
2018-05-10 18:38:07 +02:00
m_regmod [ pos / 4 ] = op . rt ;
2018-04-30 18:44:01 +02:00
vflags [ op . rt ] = vflags [ op . ra ] & vf : : is_const ;
values [ op . rt ] = values [ op . ra ] | op . si10 ;
break ;
}
case spu_itype : : OR :
{
2018-05-10 18:38:07 +02:00
m_regmod [ pos / 4 ] = op . rt ;
2018-04-30 18:44:01 +02:00
vflags [ op . rt ] = vflags [ op . ra ] & vflags [ op . rb ] & vf : : is_const ;
values [ op . rt ] = values [ op . ra ] | values [ op . rb ] ;
break ;
}
2018-05-10 18:38:07 +02:00
case spu_itype : : ANDI :
{
m_regmod [ pos / 4 ] = op . rt ;
vflags [ op . rt ] = vflags [ op . ra ] & vf : : is_const ;
values [ op . rt ] = values [ op . ra ] & op . si10 ;
break ;
}
case spu_itype : : AND :
{
m_regmod [ pos / 4 ] = op . rt ;
vflags [ op . rt ] = vflags [ op . ra ] & vflags [ op . rb ] & vf : : is_const ;
values [ op . rt ] = values [ op . ra ] & values [ op . rb ] ;
break ;
}
2018-04-30 18:44:01 +02:00
case spu_itype : : AI :
{
2018-05-10 18:38:07 +02:00
m_regmod [ pos / 4 ] = op . rt ;
2018-04-30 18:44:01 +02:00
vflags [ op . rt ] = vflags [ op . ra ] & vf : : is_const ;
values [ op . rt ] = values [ op . ra ] + op . si10 ;
break ;
}
case spu_itype : : A :
{
2018-05-10 18:38:07 +02:00
m_regmod [ pos / 4 ] = op . rt ;
2018-04-30 18:44:01 +02:00
vflags [ op . rt ] = vflags [ op . ra ] & vflags [ op . rb ] & vf : : is_const ;
values [ op . rt ] = values [ op . ra ] + values [ op . rb ] ;
2018-04-09 16:45:37 +02:00
break ;
}
2018-05-10 18:38:07 +02:00
case spu_itype : : SFI :
{
m_regmod [ pos / 4 ] = op . rt ;
vflags [ op . rt ] = vflags [ op . ra ] & vf : : is_const ;
values [ op . rt ] = op . si10 - values [ op . ra ] ;
break ;
}
case spu_itype : : SF :
{
m_regmod [ pos / 4 ] = op . rt ;
vflags [ op . rt ] = vflags [ op . ra ] & vflags [ op . rb ] & vf : : is_const ;
values [ op . rt ] = values [ op . rb ] - values [ op . ra ] ;
break ;
}
case spu_itype : : ROTMI :
{
m_regmod [ pos / 4 ] = op . rt ;
2020-03-04 15:08:40 +01:00
if ( ( 0 - op . i7 ) & 0x20 )
2018-05-10 18:38:07 +02:00
{
vflags [ op . rt ] = + vf : : is_const ;
values [ op . rt ] = 0 ;
break ;
}
vflags [ op . rt ] = vflags [ op . ra ] & vf : : is_const ;
2020-03-04 15:08:40 +01:00
values [ op . rt ] = values [ op . ra ] > > ( ( 0 - op . i7 ) & 0x1f ) ;
2018-05-10 18:38:07 +02:00
break ;
}
case spu_itype : : SHLI :
{
m_regmod [ pos / 4 ] = op . rt ;
if ( op . i7 & 0x20 )
{
vflags [ op . rt ] = + vf : : is_const ;
values [ op . rt ] = 0 ;
break ;
}
vflags [ op . rt ] = vflags [ op . ra ] & vf : : is_const ;
values [ op . rt ] = values [ op . ra ] < < ( op . i7 & 0x1f ) ;
break ;
}
2018-04-09 16:45:37 +02:00
default :
{
2018-04-30 18:44:01 +02:00
// Unconst
2018-05-10 18:38:07 +02:00
const u32 op_rt = type & spu_itype : : _quadrop ? + op . rt4 : + op . rt ;
m_regmod [ pos / 4 ] = op_rt ;
vflags [ op_rt ] = { } ;
2018-04-30 18:44:01 +02:00
break ;
}
}
// Insert raw instruction value
2019-11-23 17:30:54 +01:00
const u32 new_size = ( pos - lsa ) / 4 ;
if ( result . data . size ( ) < = new_size )
2018-04-30 18:44:01 +02:00
{
2019-11-23 17:30:54 +01:00
if ( result . data . size ( ) < new_size )
2018-04-30 18:44:01 +02:00
{
2019-11-23 17:30:54 +01:00
result . data . resize ( new_size ) ;
2018-04-30 18:44:01 +02:00
}
2019-11-23 17:30:54 +01:00
result . data . emplace_back ( std : : bit_cast < u32 , be_t < u32 > > ( data ) ) ;
2018-04-30 18:44:01 +02:00
}
2019-11-23 17:30:54 +01:00
else if ( u32 & raw_val = result . data [ new_size ] )
2018-04-30 18:44:01 +02:00
{
2020-12-09 08:47:45 +01:00
ensure ( raw_val = = std : : bit_cast < u32 , be_t < u32 > > ( data ) ) ;
2018-04-09 16:45:37 +02:00
}
2018-04-30 18:44:01 +02:00
else
{
2019-11-23 17:30:54 +01:00
raw_val = std : : bit_cast < u32 , be_t < u32 > > ( data ) ;
2018-04-09 16:45:37 +02:00
}
2018-04-30 18:44:01 +02:00
}
2019-05-14 17:55:10 +02:00
while ( lsa > 0 | | limit < 0x40000 )
2018-04-30 18:44:01 +02:00
{
2020-03-04 15:08:40 +01:00
const u32 initial_size = : : size32 ( result . data ) ;
2018-05-13 19:34:11 +02:00
2018-06-10 14:46:01 +02:00
// Check unreachable blocks
2019-11-23 17:30:54 +01:00
limit = std : : min < u32 > ( limit , lsa + initial_size * 4 ) ;
2018-05-13 19:34:11 +02:00
for ( auto & pair : m_preds )
{
bool reachable = false ;
2018-06-01 23:33:48 +02:00
if ( pair . first > = limit )
2018-05-13 19:34:11 +02:00
{
2018-06-01 23:33:48 +02:00
continue ;
}
// All (direct and indirect) predecessors to check
std : : basic_string < u32 > workload ;
// Bit array used to deduplicate workload list
workload . push_back ( pair . first ) ;
m_bits [ pair . first / 4 ] = true ;
2020-12-18 08:39:54 +01:00
for ( usz i = 0 ; ! reachable & & i < workload . size ( ) ; i + + )
2018-06-01 23:33:48 +02:00
{
for ( u32 j = workload [ i ] ; ; j - = 4 )
2018-05-13 19:34:11 +02:00
{
2018-06-10 14:46:01 +02:00
// Go backward from an address until the entry point is reached
2019-11-23 17:30:54 +01:00
if ( j = = entry_point )
2018-06-01 23:33:48 +02:00
{
reachable = true ;
break ;
}
const auto found = m_preds . find ( j ) ;
bool had_fallthrough = false ;
if ( found ! = m_preds . end ( ) )
{
for ( u32 new_pred : found - > second )
{
// Check whether the predecessor is previous instruction
if ( new_pred = = j - 4 )
{
had_fallthrough = true ;
continue ;
}
// Check whether in range and not already added
if ( new_pred > = lsa & & new_pred < limit & & ! m_bits [ new_pred / 4 ] )
{
workload . push_back ( new_pred ) ;
m_bits [ new_pred / 4 ] = true ;
}
}
}
// Check for possible fallthrough predecessor
if ( ! had_fallthrough )
{
2022-09-19 14:57:51 +02:00
if ( : : at32 ( result . data , ( j - lsa ) / 4 - 1 ) = = 0 | | m_targets . count ( j - 4 ) )
2018-06-01 23:33:48 +02:00
{
break ;
}
}
if ( i = = 0 )
{
// TODO
}
2018-05-13 19:34:11 +02:00
}
}
2018-06-01 23:33:48 +02:00
for ( u32 pred : workload )
{
m_bits [ pred / 4 ] = false ;
}
2018-05-13 19:34:11 +02:00
if ( ! reachable & & pair . first < limit )
{
limit = pair . first ;
}
}
2019-11-23 17:30:54 +01:00
result . data . resize ( ( limit - lsa ) / 4 ) ;
2018-05-13 19:34:11 +02:00
2018-04-30 18:44:01 +02:00
// Check holes in safe mode (TODO)
u32 valid_size = 0 ;
2018-04-09 16:45:37 +02:00
2019-11-23 17:30:54 +01:00
for ( u32 i = 0 ; i < result . data . size ( ) ; i + + )
2018-04-30 18:44:01 +02:00
{
2019-11-23 17:30:54 +01:00
if ( result . data [ i ] = = 0 )
2018-04-30 18:44:01 +02:00
{
2019-11-23 17:30:54 +01:00
const u32 pos = lsa + i * 4 ;
2018-05-04 23:01:27 +02:00
const u32 data = ls [ pos / 4 ] ;
2018-04-30 18:44:01 +02:00
// Allow only NOP or LNOP instructions in holes
2018-05-13 19:34:11 +02:00
if ( data = = 0x200000 | | ( data & 0xffffff80 ) = = 0x40200000 )
2018-04-30 18:44:01 +02:00
{
2018-06-10 14:46:01 +02:00
continue ;
2018-04-30 18:44:01 +02:00
}
2018-06-10 14:46:01 +02:00
if ( g_cfg . core . spu_block_size ! = spu_block_size_type : : giga )
{
2019-11-23 17:30:54 +01:00
result . data . resize ( valid_size ) ;
2018-06-10 14:46:01 +02:00
break ;
}
2018-04-30 18:44:01 +02:00
}
else
{
2019-11-23 17:30:54 +01:00
valid_size = i + 1 ;
2018-04-30 18:44:01 +02:00
}
}
2018-05-13 19:34:11 +02:00
2018-06-10 14:46:01 +02:00
// Even if NOP or LNOP, should be removed at the end
2019-11-23 17:30:54 +01:00
result . data . resize ( valid_size ) ;
2018-06-10 14:46:01 +02:00
2018-05-13 19:34:11 +02:00
// Repeat if blocks were removed
2019-11-23 17:30:54 +01:00
if ( result . data . size ( ) = = initial_size )
2018-05-13 19:34:11 +02:00
{
break ;
}
2018-04-30 18:44:01 +02:00
}
2019-11-23 17:30:54 +01:00
limit = std : : min < u32 > ( limit , lsa + : : size32 ( result . data ) * 4 ) ;
2024-03-06 16:28:07 +01:00
m_inst_attrs . resize ( result . data . size ( ) ) ;
2018-06-10 14:46:01 +02:00
// Cleanup block info
for ( u32 i = 0 ; i < workload . size ( ) ; i + + )
{
const u32 addr = workload [ i ] ;
2019-11-23 17:30:54 +01:00
if ( addr < lsa | | addr > = limit | | ! result . data [ ( addr - lsa ) / 4 ] )
2018-06-10 14:46:01 +02:00
{
m_block_info [ addr / 4 ] = false ;
m_entry_info [ addr / 4 ] = false ;
2019-05-01 14:31:17 +02:00
m_ret_info [ addr / 4 ] = false ;
2018-06-10 14:46:01 +02:00
m_preds . erase ( addr ) ;
}
}
// Complete m_preds and associated m_targets for adjacent blocks
for ( auto it = m_preds . begin ( ) ; it ! = m_preds . end ( ) ; )
{
if ( it - > first < lsa | | it - > first > = limit )
{
it = m_preds . erase ( it ) ;
continue ;
}
// Erase impossible predecessors
const auto new_end = std : : remove_if ( it - > second . begin ( ) , it - > second . end ( ) , [ & ] ( u32 addr )
{
return addr < lsa | | addr > = limit ;
} ) ;
it - > second . erase ( new_end , it - > second . end ( ) ) ;
// Don't add fallthrough target if all predecessors are removed
if ( it - > second . empty ( ) & & ! m_entry_info [ it - > first / 4 ] )
{
// If not an entry point, remove the block completely
m_block_info [ it - > first / 4 ] = false ;
it = m_preds . erase ( it ) ;
continue ;
}
// Previous instruction address
const u32 prev = ( it - > first - 4 ) & 0x3fffc ;
// TODO: check the correctness
2019-11-23 17:30:54 +01:00
if ( m_targets . count ( prev ) = = 0 & & prev > = lsa & & prev < limit & & result . data [ ( prev - lsa ) / 4 ] )
2018-06-10 14:46:01 +02:00
{
// Add target and the predecessor
m_targets [ prev ] . push_back ( it - > first ) ;
it - > second . push_back ( prev ) ;
}
it + + ;
}
2023-09-04 18:38:12 +02:00
if ( out_target_list )
{
out_target_list - > insert ( m_targets . begin ( ) , m_targets . end ( ) ) ;
}
2018-06-10 14:46:01 +02:00
// Remove unnecessary target lists
for ( auto it = m_targets . begin ( ) ; it ! = m_targets . end ( ) ; )
{
if ( it - > first < lsa | | it - > first > = limit )
{
it = m_targets . erase ( it ) ;
continue ;
}
it + + ;
}
2019-04-30 23:06:42 +02:00
// Fill holes which contain only NOP and LNOP instructions (TODO: compile)
2019-11-23 17:30:54 +01:00
for ( u32 i = 0 , nnop = 0 , vsize = 0 ; i < = result . data . size ( ) ; i + + )
2018-06-10 14:46:01 +02:00
{
2019-11-23 17:30:54 +01:00
if ( i > = result . data . size ( ) | | result . data [ i ] )
2018-06-10 14:46:01 +02:00
{
2019-11-23 17:30:54 +01:00
if ( nnop & & nnop = = i - vsize )
2018-06-10 14:46:01 +02:00
{
// Write only complete NOP sequence
2019-11-23 17:30:54 +01:00
for ( u32 j = vsize ; j < i ; j + + )
2018-06-10 14:46:01 +02:00
{
2019-11-23 17:30:54 +01:00
result . data [ j ] = std : : bit_cast < u32 , be_t < u32 > > ( ls [ lsa / 4 + j ] ) ;
2018-06-10 14:46:01 +02:00
}
}
nnop = 0 ;
2019-11-23 17:30:54 +01:00
vsize = i + 1 ;
2018-06-10 14:46:01 +02:00
}
else
{
2019-11-23 17:30:54 +01:00
const u32 pos = lsa + i * 4 ;
2018-06-10 14:46:01 +02:00
const u32 data = ls [ pos / 4 ] ;
if ( data = = 0x200000 | | ( data & 0xffffff80 ) = = 0x40200000 )
{
nnop + + ;
}
}
}
2024-04-26 19:39:19 +02:00
// Fill block info
for ( auto & pred : m_preds )
2019-04-30 23:06:42 +02:00
{
2024-04-26 19:39:19 +02:00
auto & block = m_bbs [ pred . first ] ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
// Copy predeccessors (wrong at this point, needs a fixup later)
block . preds = pred . second ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
// Fill register usage info
for ( u32 ia = pred . first ; ia < limit ; ia + = 4 )
2019-04-30 23:06:42 +02:00
{
2024-04-26 19:39:19 +02:00
block . size + + ;
2019-04-30 23:06:42 +02:00
2024-04-26 19:39:19 +02:00
// Decode instruction
const spu_opcode_t op { std : : bit_cast < be_t < u32 > > ( result . data [ ( ia - lsa ) / 4 ] ) } ;
2019-05-05 15:28:41 +02:00
2024-04-26 19:39:19 +02:00
const auto type = g_spu_itype . decode ( op . opcode ) ;
2019-05-05 15:28:41 +02:00
2024-04-26 19:39:19 +02:00
u8 reg_save = 255 ;
2019-05-05 15:28:41 +02:00
2024-04-26 19:39:19 +02:00
if ( type = = spu_itype : : STQD & & op . ra = = s_reg_sp & & ! block . reg_mod [ op . rt ] & & ! block . reg_use [ op . rt ] )
2019-05-05 15:28:41 +02:00
{
2024-04-26 19:39:19 +02:00
// Register saved onto the stack before use
block . reg_save_dom [ op . rt ] = true ;
2019-05-05 15:28:41 +02:00
2024-04-26 19:39:19 +02:00
reg_save = op . rt ;
}
2024-04-12 14:35:28 +02:00
2024-04-26 19:39:19 +02:00
for ( auto _use : std : : initializer_list < std : : pair < u32 , bool > > { { op . ra , m_use_ra . test ( ia / 4 ) }
, { op . rb , m_use_rb . test ( ia / 4 ) } , { op . rc , m_use_rc . test ( ia / 4 ) } } )
{
if ( _use . second )
{
const u32 reg = _use . first ;
2019-05-05 15:28:41 +02:00
2024-04-26 19:39:19 +02:00
// Register reg use only if it happens before reg mod
if ( ! block . reg_mod [ reg ] )
{
block . reg_use . set ( reg ) ;
2019-04-30 23:06:42 +02:00
2024-04-26 19:39:19 +02:00
if ( reg_save ! = reg & & block . reg_save_dom [ reg ] )
{
// Register is still used after saving; probably not eligible for optimization
block . reg_save_dom [ reg ] = false ;
}
}
}
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
if ( type = = spu_itype : : WRCH & & op . ra = = MFC_Cmd )
{
// Expand MFC_Cmd reg use
for ( u8 reg : { s_reg_mfc_lsa , s_reg_mfc_tag , s_reg_mfc_size } )
{
if ( ! block . reg_mod [ reg ] )
block . reg_use . set ( reg ) ;
}
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
// Register reg modification
if ( u8 reg = m_regmod [ ia / 4 ] ; reg < s_reg_max )
{
block . reg_mod . set ( reg ) ;
block . reg_mod_xf . set ( reg , type & spu_itype : : xfloat ) ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
if ( type = = spu_itype : : SELB & & ( block . reg_mod_xf [ op . ra ] | | block . reg_mod_xf [ op . rb ] ) )
block . reg_mod_xf . set ( reg ) ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
// Possible post-dominating register load
if ( type = = spu_itype : : LQD & & op . ra = = s_reg_sp )
block . reg_load_mod [ reg ] = ia + 1 ;
else
block . reg_load_mod [ reg ] = 0 ;
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
// Find targets (also means end of the block)
const auto tfound = m_targets . find ( ia ) ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
if ( tfound ! = m_targets . end ( ) )
{
// Copy targets
block . targets = tfound - > second ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
// Assume that the call reads and modifies all volatile registers (TODO)
bool is_call = false ;
bool is_tail = false ;
switch ( type )
{
case spu_itype : : BRSL :
is_call = spu_branch_target ( ia , op . i16 ) ! = ia + 4 ;
break ;
case spu_itype : : BRASL :
is_call = spu_branch_target ( 0 , op . i16 ) ! = ia + 4 ;
break ;
case spu_itype : : BISL :
case spu_itype : : BISLED :
is_call = true ;
break ;
default :
break ;
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
if ( is_call )
{
for ( u32 i = 0 ; i < s_reg_max ; + + i )
{
if ( i = = s_reg_lr | | ( i > = 2 & & i < s_reg_80 ) | | i > s_reg_127 )
{
if ( ! block . reg_mod [ i ] )
block . reg_use . set ( i ) ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
if ( ! is_tail )
{
block . reg_mod . set ( i ) ;
block . reg_mod_xf [ i ] = false ;
}
}
}
}
2024-03-06 16:28:07 +01:00
break ;
}
}
}
2024-04-26 19:39:19 +02:00
// Fixup block predeccessors to point to basic blocks, not last instructions
for ( auto & bb : m_bbs )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
const u32 addr = bb . first ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
for ( u32 & pred : bb . second . preds )
{
pred = std : : prev ( m_bbs . upper_bound ( pred ) ) - > first ;
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
if ( m_entry_info [ addr / 4 ] & & g_cfg . core . spu_block_size = = spu_block_size_type : : giga )
{
// Register empty chunk
m_chunks . push_back ( addr ) ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
// Register function if necessary
if ( ! m_ret_info [ addr / 4 ] )
{
m_funcs [ addr ] ;
}
}
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
// Ensure there is a function at the lowest address
if ( g_cfg . core . spu_block_size = = spu_block_size_type : : giga )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
if ( auto emp = m_funcs . try_emplace ( m_bbs . begin ( ) - > first ) ; emp . second )
{
const u32 addr = emp . first - > first ;
spu_log . error ( " [0x%05x] Fixed first function at 0x%05x " , entry_point , addr ) ;
m_entry_info [ addr / 4 ] = true ;
m_ret_info [ addr / 4 ] = false ;
}
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
// Split functions
while ( g_cfg . core . spu_block_size = = spu_block_size_type : : giga )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
bool need_repeat = false ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
u32 start = 0 ;
u32 limit = 0x40000 ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
// Walk block list in ascending order
for ( auto & block : m_bbs )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
const u32 addr = block . first ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
if ( m_entry_info [ addr / 4 ] & & ! m_ret_info [ addr / 4 ] )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
const auto upper = m_funcs . upper_bound ( addr ) ;
start = addr ;
limit = upper = = m_funcs . end ( ) ? 0x40000 : upper - > first ;
2024-03-06 16:28:07 +01:00
}
2024-04-26 19:39:19 +02:00
// Find targets that exceed [start; limit) range and make new functions from them
for ( u32 target : block . second . targets )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
const auto tfound = m_bbs . find ( target ) ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
if ( tfound = = m_bbs . end ( ) )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
continue ;
2024-03-06 16:28:07 +01:00
}
2024-04-26 19:39:19 +02:00
if ( target < start | | target > = limit )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
if ( ! m_entry_info [ target / 4 ] | | m_ret_info [ target / 4 ] )
{
// Create new function entry (likely a tail call)
m_entry_info [ target / 4 ] = true ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
m_ret_info [ target / 4 ] = false ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
m_funcs . try_emplace ( target ) ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
if ( target < limit )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
need_repeat = true ;
2024-03-06 16:28:07 +01:00
}
}
}
}
2024-04-26 19:39:19 +02:00
block . second . func = start ;
}
if ( ! need_repeat )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
break ;
}
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
if ( ! m_bbs . count ( entry_point ) )
{
// Invalid code
spu_log . error ( " [0x%x] Invalid code " , entry_point ) ;
return { } ;
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
// Fill entry map
while ( true )
{
workload . clear ( ) ;
workload . push_back ( entry_point ) ;
ensure ( m_bbs . count ( entry_point ) ) ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
std : : basic_string < u32 > new_entries ;
for ( u32 wi = 0 ; wi < workload . size ( ) ; wi + + )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
const u32 addr = workload [ wi ] ;
auto & block = : : at32 ( m_bbs , addr ) ;
const u32 _new = block . chunk ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
if ( ! m_entry_info [ addr / 4 ] )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
// Check block predecessors
for ( u32 pred : block . preds )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
const u32 _old = : : at32 ( m_bbs , pred ) . chunk ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
if ( _old < 0x40000 & & _old ! = _new )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
// If block has multiple 'entry' points, it becomes an entry point itself
new_entries . push_back ( addr ) ;
2024-03-06 16:28:07 +01:00
}
}
}
2024-04-26 19:39:19 +02:00
// Update chunk address
block . chunk = m_entry_info [ addr / 4 ] ? addr : _new ;
// Process block targets
for ( u32 target : block . targets )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
const auto tfound = m_bbs . find ( target ) ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
if ( tfound = = m_bbs . end ( ) )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
continue ;
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
auto & tb = tfound - > second ;
const u32 value = m_entry_info [ target / 4 ] ? target : block . chunk ;
if ( u32 & tval = tb . chunk ; tval < 0x40000 )
{
// TODO: fix condition
if ( tval ! = value & & ! m_entry_info [ target / 4 ] )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
new_entries . push_back ( target ) ;
2024-03-06 16:28:07 +01:00
}
}
2024-04-26 19:39:19 +02:00
else
{
tval = value ;
workload . emplace_back ( target ) ;
}
2024-03-06 16:28:07 +01:00
}
2024-04-26 19:39:19 +02:00
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
if ( new_entries . empty ( ) )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
break ;
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
for ( u32 entry : new_entries )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
m_entry_info [ entry / 4 ] = true ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
// Acknowledge artificial (reversible) chunk entry point
m_ret_info [ entry / 4 ] = true ;
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
for ( auto & bb : m_bbs )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
// Reset chunk info
bb . second . chunk = 0x40000 ;
}
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
workload . clear ( ) ;
workload . push_back ( entry_point ) ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
// Fill workload adding targets
for ( u32 wi = 0 ; wi < workload . size ( ) ; wi + + )
{
const u32 addr = workload [ wi ] ;
auto & block = : : at32 ( m_bbs , addr ) ;
block . analysed = true ;
for ( u32 target : block . targets )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
const auto tfound = m_bbs . find ( target ) ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
if ( tfound = = m_bbs . end ( ) )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
continue ;
2024-03-06 16:28:07 +01:00
}
2024-04-26 19:39:19 +02:00
auto & tb = tfound - > second ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
if ( ! tb . analysed )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
workload . push_back ( target ) ;
tb . analysed = true ;
2024-03-06 16:28:07 +01:00
}
2024-04-26 19:39:19 +02:00
// Limited xfloat hint propagation (possibly TODO)
if ( tb . chunk = = block . chunk )
{
tb . reg_maybe_xf & = block . reg_mod_xf ;
}
else
{
tb . reg_maybe_xf . reset ( ) ;
}
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
block . reg_origin . fill ( 0x80000000 ) ;
block . reg_origin_abs . fill ( 0x80000000 ) ;
}
// Fill register origin info
while ( true )
{
bool must_repeat = false ;
for ( u32 wi = 0 ; wi < workload . size ( ) ; wi + + )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
const u32 addr = workload [ wi ] ;
auto & block = : : at32 ( m_bbs , addr ) ;
// Initialize entry point with default value: unknown origin (requires load)
if ( m_entry_info [ addr / 4 ] )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
for ( u32 i = 0 ; i < s_reg_max ; i + + )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
if ( block . reg_origin [ i ] = = 0x80000000 )
block . reg_origin [ i ] = 0x40000 ;
2024-03-06 16:28:07 +01:00
}
2024-04-26 19:39:19 +02:00
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
if ( g_cfg . core . spu_block_size = = spu_block_size_type : : giga & & m_entry_info [ addr / 4 ] & & ! m_ret_info [ addr / 4 ] )
{
for ( u32 i = 0 ; i < s_reg_max ; i + + )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
if ( block . reg_origin_abs [ i ] = = 0x80000000 )
block . reg_origin_abs [ i ] = 0x40000 ;
else if ( block . reg_origin_abs [ i ] + 1 = = 0 )
block . reg_origin_abs [ i ] = - 2 ;
}
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
for ( u32 target : block . targets )
{
const auto tfound = m_bbs . find ( target ) ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
if ( tfound = = m_bbs . end ( ) )
{
continue ;
2024-03-06 16:28:07 +01:00
}
2024-04-26 19:39:19 +02:00
auto & tb = tfound - > second ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
for ( u32 i = 0 ; i < s_reg_max ; i + + )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
if ( tb . chunk = = block . chunk & & tb . reg_origin [ i ] + 1 )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
const u32 expected = block . reg_mod [ i ] ? addr : block . reg_origin [ i ] ;
if ( tb . reg_origin [ i ] = = 0x80000000 )
{
tb . reg_origin [ i ] = expected ;
}
else if ( tb . reg_origin [ i ] ! = expected )
{
// Set -1 if multiple origins merged (requires PHI node)
tb . reg_origin [ i ] = - 1 ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
must_repeat | = ! tb . targets . empty ( ) ;
}
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
if ( g_cfg . core . spu_block_size = = spu_block_size_type : : giga & & tb . func = = block . func & & tb . reg_origin_abs [ i ] + 2 )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
const u32 expected = block . reg_mod [ i ] ? addr : block . reg_origin_abs [ i ] ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
if ( tb . reg_origin_abs [ i ] = = 0x80000000 )
{
tb . reg_origin_abs [ i ] = expected ;
}
else if ( tb . reg_origin_abs [ i ] ! = expected )
{
if ( tb . reg_origin_abs [ i ] = = 0x40000 | | expected + 2 = = 0 | | expected = = 0x40000 )
{
// Set -2: sticky value indicating possible external reg origin (0x40000)
tb . reg_origin_abs [ i ] = - 2 ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
must_repeat | = ! tb . targets . empty ( ) ;
}
else if ( tb . reg_origin_abs [ i ] + 1 )
{
tb . reg_origin_abs [ i ] = - 1 ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
must_repeat | = ! tb . targets . empty ( ) ;
}
}
}
}
}
2024-03-06 16:28:07 +01:00
}
2024-04-26 19:39:19 +02:00
if ( ! must_repeat )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
break ;
2024-03-06 16:28:07 +01:00
}
2024-04-26 19:39:19 +02:00
for ( u32 wi = 0 ; wi < workload . size ( ) ; wi + + )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
const u32 addr = workload [ wi ] ;
auto & block = : : at32 ( m_bbs , addr ) ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
// Reset values for the next attempt (keep negative values)
for ( u32 i = 0 ; i < s_reg_max ; i + + )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
if ( block . reg_origin [ i ] < = 0x40000 )
block . reg_origin [ i ] = 0x80000000 ;
if ( block . reg_origin_abs [ i ] < = 0x40000 )
block . reg_origin_abs [ i ] = 0x80000000 ;
2024-03-06 16:28:07 +01:00
}
}
2024-04-26 19:39:19 +02:00
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
// Fill more block info
for ( u32 wi = 0 ; wi < workload . size ( ) ; wi + + )
{
if ( g_cfg . core . spu_block_size ! = spu_block_size_type : : giga )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
break ;
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
const u32 addr = workload [ wi ] ;
auto & bb = : : at32 ( m_bbs , addr ) ;
auto & func = : : at32 ( m_funcs , bb . func ) ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
// Update function size
func . size = std : : max < u16 > ( func . size , bb . size + ( addr - bb . func ) / 4 ) ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
// Copy constants according to reg origin info
for ( u32 i = 0 ; i < s_reg_max ; i + + )
{
const u32 orig = bb . reg_origin_abs [ i ] ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
if ( orig < 0x40000 )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
auto & src = : : at32 ( m_bbs , orig ) ;
bb . reg_const [ i ] = src . reg_const [ i ] ;
bb . reg_val32 [ i ] = src . reg_val32 [ i ] ;
2024-03-06 16:28:07 +01:00
}
2024-04-26 19:39:19 +02:00
if ( ! bb . reg_save_dom [ i ] & & bb . reg_use [ i ] & & ( orig = = 0x40000 | | orig + 2 = = 0 ) )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
// Destroy offset if external reg value is used
func . reg_save_off [ i ] = - 1 ;
2024-03-06 16:28:07 +01:00
}
}
2024-04-26 19:39:19 +02:00
if ( u32 orig = bb . reg_origin_abs [ s_reg_sp ] ; orig < 0x40000 )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
auto & prologue = : : at32 ( m_bbs , orig ) ;
// Copy stack offset (from the assumed prologue)
bb . stack_sub = prologue . stack_sub ;
}
else if ( orig > 0x40000 )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
// Unpredictable stack
bb . stack_sub = 0x80000000 ;
2024-03-06 16:28:07 +01:00
}
2024-04-26 19:39:19 +02:00
spu_opcode_t op { } ;
auto last_inst = spu_itype : : UNK ;
for ( u32 ia = addr ; ia < addr + bb . size * 4 ; ia + = 4 )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
// Decode instruction again
op . opcode = std : : bit_cast < be_t < u32 > > ( result . data [ ( ia - lsa ) / 41 ] ) ;
last_inst = g_spu_itype . decode ( op . opcode ) ;
// Propagate some constants
switch ( last_inst )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
case spu_itype : : IL :
{
bb . reg_const [ op . rt ] = true ;
bb . reg_val32 [ op . rt ] = op . si16 ;
break ;
2024-03-06 16:28:07 +01:00
}
2024-04-26 19:39:19 +02:00
case spu_itype : : ILA :
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
bb . reg_const [ op . rt ] = true ;
bb . reg_val32 [ op . rt ] = op . i18 ;
2024-03-06 16:28:07 +01:00
break ;
}
2024-04-26 19:39:19 +02:00
case spu_itype : : ILHU :
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
bb . reg_const [ op . rt ] = true ;
bb . reg_val32 [ op . rt ] = op . i16 < < 16 ;
break ;
2024-03-06 16:28:07 +01:00
}
2024-04-26 19:39:19 +02:00
case spu_itype : : ILH :
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
bb . reg_const [ op . rt ] = true ;
bb . reg_val32 [ op . rt ] = op . i16 < < 16 | op . i16 ;
break ;
2024-03-06 16:28:07 +01:00
}
2024-04-26 19:39:19 +02:00
case spu_itype : : IOHL :
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
bb . reg_val32 [ op . rt ] = bb . reg_val32 [ op . rt ] | op . i16 ;
break ;
}
case spu_itype : : ORI :
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
bb . reg_const [ op . rt ] = bb . reg_const [ op . ra ] ;
bb . reg_val32 [ op . rt ] = bb . reg_val32 [ op . ra ] | op . si10 ;
2024-03-06 16:28:07 +01:00
break ;
}
2024-04-26 19:39:19 +02:00
case spu_itype : : OR :
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
bb . reg_const [ op . rt ] = bb . reg_const [ op . ra ] & & bb . reg_const [ op . rb ] ;
bb . reg_val32 [ op . rt ] = bb . reg_val32 [ op . ra ] | bb . reg_val32 [ op . rb ] ;
break ;
}
case spu_itype : : AI :
{
bb . reg_const [ op . rt ] = bb . reg_const [ op . ra ] ;
bb . reg_val32 [ op . rt ] = bb . reg_val32 [ op . ra ] + op . si10 ;
break ;
}
case spu_itype : : A :
{
bb . reg_const [ op . rt ] = bb . reg_const [ op . ra ] & & bb . reg_const [ op . rb ] ;
bb . reg_val32 [ op . rt ] = bb . reg_val32 [ op . ra ] + bb . reg_val32 [ op . rb ] ;
break ;
}
case spu_itype : : SFI :
{
bb . reg_const [ op . rt ] = bb . reg_const [ op . ra ] ;
bb . reg_val32 [ op . rt ] = op . si10 - bb . reg_val32 [ op . ra ] ;
break ;
}
case spu_itype : : SF :
{
bb . reg_const [ op . rt ] = bb . reg_const [ op . ra ] & & bb . reg_const [ op . rb ] ;
bb . reg_val32 [ op . rt ] = bb . reg_val32 [ op . rb ] - bb . reg_val32 [ op . ra ] ;
break ;
}
case spu_itype : : STQD :
{
if ( op . ra = = s_reg_sp & & bb . stack_sub ! = 0x80000000 & & bb . reg_save_dom [ op . rt ] )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
const u32 offset = 0x80000000 + op . si10 * 16 - bb . stack_sub ;
if ( func . reg_save_off [ op . rt ] = = 0 )
{
// Store reg save offset
func . reg_save_off [ op . rt ] = offset ;
}
else if ( func . reg_save_off [ op . rt ] ! = offset )
{
// Conflict of different offsets
func . reg_save_off [ op . rt ] = - 1 ;
}
2024-03-06 16:28:07 +01:00
}
break ;
}
2024-04-26 19:39:19 +02:00
case spu_itype : : LQD :
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
if ( op . ra = = s_reg_sp & & bb . stack_sub ! = 0x80000000 & & bb . reg_load_mod [ op . rt ] = = ia + 1 )
{
// Adjust reg load offset
bb . reg_load_mod [ op . rt ] = 0x80000000 + op . si10 * 16 - bb . stack_sub ;
}
// Clear const
bb . reg_const [ op . rt ] = false ;
2024-03-06 16:28:07 +01:00
break ;
}
2024-04-26 19:39:19 +02:00
default :
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
// Clear const if reg is modified here
if ( u8 reg = m_regmod [ ia / 4 ] ; reg < s_reg_max )
bb . reg_const [ reg ] = false ;
2024-03-06 16:28:07 +01:00
break ;
}
2024-04-26 19:39:19 +02:00
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
// $SP is modified
if ( m_regmod [ ia / 4 ] = = s_reg_sp )
{
if ( bb . reg_const [ s_reg_sp ] )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
// Making $SP a constant is a funny thing too.
bb . stack_sub = 0x80000000 ;
2024-03-06 16:28:07 +01:00
}
2024-04-26 19:39:19 +02:00
if ( bb . stack_sub ! = 0x80000000 )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
switch ( last_inst )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
case spu_itype : : AI :
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
if ( op . ra = = s_reg_sp )
bb . stack_sub - = op . si10 ;
else
bb . stack_sub = 0x80000000 ;
2024-03-06 16:28:07 +01:00
break ;
}
2024-04-26 19:39:19 +02:00
case spu_itype : : A :
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
if ( op . ra = = s_reg_sp & & bb . reg_const [ op . rb ] )
bb . stack_sub - = bb . reg_val32 [ op . rb ] ;
else if ( op . rb = = s_reg_sp & & bb . reg_const [ op . ra ] )
bb . stack_sub - = bb . reg_val32 [ op . ra ] ;
else
bb . stack_sub = 0x80000000 ;
break ;
}
case spu_itype : : SF :
{
if ( op . rb = = s_reg_sp & & bb . reg_const [ op . ra ] )
bb . stack_sub + = bb . reg_val32 [ op . ra ] ;
else
bb . stack_sub = 0x80000000 ;
break ;
}
default :
{
bb . stack_sub = 0x80000000 ;
break ;
}
}
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
// Check for funny values.
if ( bb . stack_sub > = 0x40000 | | bb . stack_sub % 16 )
{
bb . stack_sub = 0x80000000 ;
}
}
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
// Analyse terminator instruction
const u32 tia = addr + bb . size * 4 - 4 ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
switch ( last_inst )
{
case spu_itype : : BR :
case spu_itype : : BRNZ :
case spu_itype : : BRZ :
case spu_itype : : BRHNZ :
case spu_itype : : BRHZ :
case spu_itype : : BRSL :
{
const u32 target = spu_branch_target ( tia , op . i16 ) ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
if ( target = = tia + 4 )
{
bb . terminator = term_type : : fallthrough ;
}
else if ( last_inst ! = spu_itype : : BRSL )
{
// No-op terminator or simple branch instruction
bb . terminator = term_type : : br ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
if ( target = = bb . func )
{
// Recursive tail call
bb . terminator = term_type : : ret ;
}
}
else if ( op . rt = = s_reg_lr )
{
bb . terminator = term_type : : call ;
}
else
{
bb . terminator = term_type : : interrupt_call ;
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
break ;
}
case spu_itype : : BRA :
case spu_itype : : BRASL :
{
bb . terminator = term_type : : indirect_call ;
break ;
}
case spu_itype : : BI :
{
if ( op . d | | op . e | | bb . targets . size ( ) = = 1 )
{
bb . terminator = term_type : : interrupt_call ;
}
else if ( bb . targets . size ( ) > 1 )
{
// Jump table
bb . terminator = term_type : : br ;
}
else if ( op . ra = = s_reg_lr )
{
// Return (TODO)
bb . terminator = term_type : : ret ;
}
else
{
// Indirect tail call (TODO)
bb . terminator = term_type : : interrupt_call ;
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
break ;
}
case spu_itype : : BISLED :
case spu_itype : : IRET :
{
bb . terminator = term_type : : interrupt_call ;
break ;
}
case spu_itype : : BISL :
case spu_itype : : BIZ :
case spu_itype : : BINZ :
case spu_itype : : BIHZ :
case spu_itype : : BIHNZ :
{
if ( op . d | | op . e | | bb . targets . size ( ) ! = 1 )
{
bb . terminator = term_type : : interrupt_call ;
}
else if ( last_inst ! = spu_itype : : BISL & & bb . targets [ 0 ] = = tia + 4 & & op . ra = = s_reg_lr )
{
// Conditional return (TODO)
bb . terminator = term_type : : ret ;
}
else if ( last_inst = = spu_itype : : BISL )
{
// Indirect call
bb . terminator = term_type : : indirect_call ;
}
else
{
// TODO
bb . terminator = term_type : : interrupt_call ;
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
break ;
}
default :
{
// Normal instruction
bb . terminator = term_type : : fallthrough ;
break ;
}
}
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
// Check function blocks, verify and print some reasons
for ( auto & f : m_funcs )
{
if ( g_cfg . core . spu_block_size ! = spu_block_size_type : : giga )
{
break ;
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
bool is_ok = true ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
u32 used_stack = 0 ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
for ( auto it = m_bbs . lower_bound ( f . first ) ; it ! = m_bbs . end ( ) & & it - > second . func = = f . first ; + + it )
{
auto & bb = it - > second ;
auto & func = : : at32 ( m_funcs , bb . func ) ;
const u32 addr = it - > first ;
const u32 flim = bb . func + func . size * 4 ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
used_stack | = bb . stack_sub ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
if ( is_ok & & bb . terminator > = term_type : : indirect_call )
{
is_ok = false ;
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
if ( is_ok & & bb . terminator = = term_type : : ret )
{
// Check $LR (alternative return registers are currently not supported)
if ( u32 lr_orig = bb . reg_mod [ s_reg_lr ] ? addr : bb . reg_origin_abs [ s_reg_lr ] ; lr_orig < 0x40000 )
{
auto & src = : : at32 ( m_bbs , lr_orig ) ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
if ( src . reg_load_mod [ s_reg_lr ] ! = func . reg_save_off [ s_reg_lr ] )
{
spu_log . error ( " Function 0x%05x: [0x%05x] $LR mismatch (src=0x%x; 0x%x vs 0x%x) " , f . first , addr , lr_orig , src . reg_load_mod [ 0 ] , func . reg_save_off [ 0 ] ) ;
is_ok = false ;
}
else if ( src . reg_load_mod [ s_reg_lr ] = = 0 )
{
spu_log . error ( " Function 0x%05x: [0x%05x] $LR modified (src=0x%x) " , f . first , addr , lr_orig ) ;
is_ok = false ;
}
}
else if ( lr_orig > 0x40000 )
{
spu_log . todo ( " Function 0x%05x: [0x%05x] $LR unpredictable (src=0x%x) " , f . first , addr , lr_orig ) ;
is_ok = false ;
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
// Check $80..$127 (should be restored or unmodified)
for ( u32 i = s_reg_80 ; is_ok & & i < = s_reg_127 ; i + + )
{
if ( u32 orig = bb . reg_mod [ i ] ? addr : bb . reg_origin_abs [ i ] ; orig < 0x40000 )
{
auto & src = : : at32 ( m_bbs , orig ) ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
if ( src . reg_load_mod [ i ] ! = func . reg_save_off [ i ] )
{
spu_log . error ( " Function 0x%05x: [0x%05x] $%u mismatch (src=0x%x; 0x%x vs 0x%x) " , f . first , addr , i , orig , src . reg_load_mod [ i ] , func . reg_save_off [ i ] ) ;
is_ok = false ;
2024-03-06 16:28:07 +01:00
}
}
2024-04-26 19:39:19 +02:00
else if ( orig > 0x40000 )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
spu_log . todo ( " Function 0x%05x: [0x%05x] $%u unpredictable (src=0x%x) " , f . first , addr , i , orig ) ;
is_ok = false ;
2024-03-06 16:28:07 +01:00
}
2024-04-26 19:39:19 +02:00
if ( func . reg_save_off [ i ] + 1 = = 0 )
{
spu_log . error ( " Function 0x%05x: [0x%05x] $%u used incorrectly " , f . first , addr , i ) ;
is_ok = false ;
2024-03-06 16:28:07 +01:00
}
}
2024-04-26 19:39:19 +02:00
// Check $SP (should be restored or unmodified)
if ( bb . stack_sub ! = 0 & & bb . stack_sub ! = 0x80000000 )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
spu_log . error ( " Function 0x%05x: [0x%05x] return with stack frame 0x%x " , f . first , addr , bb . stack_sub ) ;
is_ok = false ;
2024-03-06 16:28:07 +01:00
}
2024-04-26 19:39:19 +02:00
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
if ( is_ok & & bb . terminator = = term_type : : call )
{
// Check call instruction (TODO)
if ( bb . stack_sub = = 0 )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
// Call without a stack frame
spu_log . error ( " Function 0x%05x: [0x%05x] frameless call " , f . first , addr ) ;
is_ok = false ;
}
}
if ( is_ok & & bb . terminator = = term_type : : fallthrough )
{
// Can't just fall out of the function
if ( bb . targets . size ( ) ! = 1 | | bb . targets [ 0 ] > = flim )
{
spu_log . error ( " Function 0x%05x: [0x%05x] bad fallthrough to 0x%x " , f . first , addr , bb . targets [ 0 ] ) ;
is_ok = false ;
}
}
if ( is_ok & & bb . stack_sub = = 0x80000000 )
{
spu_log . error ( " Function 0x%05x: [0x%05x] bad stack frame " , f . first , addr ) ;
is_ok = false ;
}
// Fill external function targets (calls, possibly tail calls)
for ( u32 target : bb . targets )
{
if ( target < bb . func | | target > = flim | | ( bb . terminator = = term_type : : call & & target = = bb . func ) )
{
if ( func . calls . find_first_of ( target ) + 1 = = 0 )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
func . calls . push_back ( target ) ;
2024-03-06 16:28:07 +01:00
}
}
2024-04-26 19:39:19 +02:00
}
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
if ( is_ok & & used_stack & & f . first = = entry_point )
{
spu_log . error ( " Function 0x%05x: considered possible chunk " , f . first ) ;
is_ok = false ;
}
// if (is_ok && f.first > 0x1d240 && f.first < 0x1e000)
// {
// spu_log.error("Function 0x%05x: manually disabled", f.first);
// is_ok = false;
// }
f . second . good = is_ok ;
}
// Check function call graph
while ( g_cfg . core . spu_block_size = = spu_block_size_type : : giga )
{
bool need_repeat = false ;
for ( auto & f : m_funcs )
{
if ( ! f . second . good )
{
continue ;
2024-03-06 16:28:07 +01:00
}
2024-04-26 19:39:19 +02:00
for ( u32 call : f . second . calls )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
const auto ffound = std : : as_const ( m_funcs ) . find ( call ) ;
if ( ffound = = m_funcs . cend ( ) | | ffound - > second . good = = false )
{
need_repeat = true ;
if ( f . second . good )
{
spu_log . error ( " Function 0x%05x: calls bad function (0x%05x) " , f . first , call ) ;
f . second . good = false ;
}
}
2024-03-06 16:28:07 +01:00
}
2024-04-26 19:39:19 +02:00
}
if ( ! need_repeat )
{
break ;
}
}
2024-07-03 09:34:38 +02:00
auto sort_breakig_reasons = [ ] ( const std : : array < atomic_t < u64 > , 128 > & breaking_reason )
{
std : : vector < std : : pair < u32 , u64 > > map ;
2024-07-19 22:40:19 +02:00
for ( u32 i = 0 ; i < static_cast < u32 > ( breaking_reason . size ( ) ) ; i + + )
2024-07-03 09:34:38 +02:00
{
if ( u64 v = breaking_reason [ i ] )
{
map . emplace_back ( i , v ) ;
}
}
std : : stable_sort ( map . begin ( ) , map . end ( ) , FN ( x . second > y . second ) ) ;
return map ;
} ;
2024-04-26 19:39:19 +02:00
struct putllc16_statistics_t
{
atomic_t < u64 > all = 0 ;
atomic_t < u64 > single = 0 ;
atomic_t < u64 > nowrite = 0 ;
std : : array < atomic_t < u64 > , 128 > breaking_reason { } ;
2024-07-03 09:34:38 +02:00
} ;
2024-04-26 19:39:19 +02:00
2024-07-03 09:34:38 +02:00
struct rchcnt_statistics_t
{
atomic_t < u64 > all = 0 ;
atomic_t < u64 > single = 0 ;
std : : array < atomic_t < u64 > , 128 > breaking_reason { } ;
2024-04-26 19:39:19 +02:00
} ;
2024-07-03 09:34:38 +02:00
// Pattern structures
2024-04-26 19:39:19 +02:00
struct atomic16_t
{
bool active = false ; // GETLLAR happened
u32 lsa_pc = SPU_LS_SIZE ; // PC of first LSA write
u32 lsa_last_pc = SPU_LS_SIZE ; // PC of first LSA write
u32 get_pc = SPU_LS_SIZE ; // PC of GETLLAR
u32 put_pc = SPU_LS_SIZE ; // PC of PUTLLC
reg_state_t ls { } ; // state of LS load/store address register
reg_state_t ls_offs = reg_state_t : : from_value ( 0 ) ; // Added value to ls
reg_state_t lsa { } ; // state of LSA register on GETLLAR
reg_state_t ls_reg [ 8 ] { } ; // stores/loads using register bundles with offset
reg_state_t ls_abs [ 8 ] { } ; // stores/loads using absolute address
u32 reg = s_reg_max ; // Source of address register of LS load/store
u32 reg2 = s_reg_max ; // Source 2 of address register of LS load/store (STQX/LQX)
//u32 ls_offs[8]{}; // LS offset from register (0 if const)
bool ls_pc_rel = false ; // For STQR/LQR
bool ls_access = false ; // LS accessed
bool ls_write = false ; // LS written
bool ls_invalid = false ; // From this point and on, any store will cancel the optimization
bool select_16_or_0_at_runtime = false ;
bool put_active = false ; // PUTLLC happened
bool get_rdatomic = false ; // True if MFC_RdAtomicStat was read after GETLLAR
u32 mem_count = 0 ;
// Return old state for error reporting
atomic16_t discard ( )
{
const u32 pc = lsa_pc ;
const u32 last_pc = lsa_last_pc ;
const atomic16_t old = * this ;
* this = atomic16_t { } ;
// Keep some members
lsa_pc = pc ;
lsa_last_pc = last_pc ;
return old ;
}
// Conditional breakage (break if a full 128-byte reservation is needed)
atomic16_t set_invalid_ls ( bool write )
{
ls_invalid = true ;
ls_write | = write ;
if ( write )
{
return discard ( ) ;
2024-03-06 16:28:07 +01:00
}
2024-04-26 19:39:19 +02:00
return atomic16_t { } ;
}
} ;
2024-07-03 09:34:38 +02:00
struct rchcnt_loop_t
{
bool active = false ; // RDCH/RCHCNT happened
bool failed = false ; // needc this flag to distinguish start of the pattern vs failed pattern (they begin and end of the same address)
bool conditioned = false ; // needc this flag to distinguish start of the pattern vs failed pattern (they begin and end of the same address)
u32 channel = 128 ;
u32 read_pc = SPU_LS_SIZE ; // PC of RDCH or RCHCNT (that encloses the loop)
reg_state_t ch_state { + vf : : is_null } ; // Channel stat, example: RCNCNT ch_state, MFC_Cmd
reg_state_t ch_product { + vf : : is_null } ; // Optional comparison state for channl state, example: CEQI ch_product, ch_state, 1
bool product_test_negate = false ; // Compare the opposite way, such as: CEQI ch_product, ch_state, 0 which turns 0 t -1 and 1 to 0
std : : basic_string < u32 > origins ;
u32 branch_pc = SPU_LS_SIZE ; // Where the loop branch is located
u32 branch_target = SPU_LS_SIZE ; // The target of the loop branch
// Return old state for error reporting
rchcnt_loop_t discard ( )
{
const rchcnt_loop_t old = * this ;
* this = rchcnt_loop_t { } ;
return old ;
}
} ;
2024-04-26 19:39:19 +02:00
// Reset tags
reg_state_t : : alloc_tag ( true ) ;
std : : map < u32 , std : : unique_ptr < block_reg_info > > infos ;
infos . emplace ( entry_point , block_reg_info : : create ( entry_point ) ) ;
struct block_reg_state_iterator
{
u32 pc { } ;
usz parent_iterator_index = umax ;
usz parent_target_index = 0 ;
usz iterator_id = 0 ;
2024-05-08 13:30:12 +02:00
usz temp_child_index = umax ;
usz temp_list_index = umax ;
2024-04-26 19:39:19 +02:00
// PUTLLC16 optimization analysis tracker
atomic16_t atomic16 { } ;
2024-07-03 09:34:38 +02:00
// RDCH/RCHCNT Loop analysis tracker
rchcnt_loop_t rchcnt_loop { } ;
2024-04-26 19:39:19 +02:00
block_reg_state_iterator ( u32 _pc , usz _parent_iterator_index = umax , usz _parent_target_index = 0 ) noexcept
: pc ( _pc )
, parent_iterator_index ( _parent_iterator_index )
, parent_target_index ( _parent_target_index )
{
}
} ;
std : : vector < block_reg_state_iterator > reg_state_it ;
std : : map < u32 , atomic16_t > atomic16_all ; // RdAtomicStat location -> atomic loop optimization state
2024-07-03 09:34:38 +02:00
std : : map < u32 , rchcnt_loop_t > rchcnt_loop_all ; // RDCH/RCHCNT location -> channel read loop optimization state
2024-04-26 19:39:19 +02:00
std : : map < u32 , bool > getllar_starts ; // True for failed loops
std : : map < u32 , bool > run_on_block ;
2024-05-08 13:30:12 +02:00
std : : map < u32 , bool > logged_block ;
2024-04-26 19:39:19 +02:00
std : : array < reg_state_t , s_reg_max > * true_state_walkby = nullptr ;
atomic16_t dummy16 { } ;
2024-07-03 09:34:38 +02:00
rchcnt_loop_t dummy_loop { } ;
2024-04-26 19:39:19 +02:00
bool likely_putllc_loop = false ;
2024-05-08 13:30:12 +02:00
bool had_putllc_evaluation = false ;
2024-04-26 19:39:19 +02:00
for ( u32 i = 0 , count = 0 ; i < result . data . size ( ) ; i + + )
{
const u32 inst = std : : bit_cast < be_t < u32 > > ( result . data [ i ] ) ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
if ( spu_opcode_t { inst } . ra = = MFC_RdAtomicStat & & g_spu_itype . decode ( inst ) = = spu_itype : : RDCH )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
count + + ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
if ( count = = 2 )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
likely_putllc_loop = true ;
break ;
}
}
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
usz target_count = 0 ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
for ( auto & [ pc , loc ] : m_targets )
{
target_count + = loc . size ( ) ;
}
2024-03-06 16:28:07 +01:00
2024-05-08 13:30:12 +02:00
const bool should_search_patterns = target_count < 300u ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
// Treat start of function as an unknown value with tag (because it is)
2024-06-27 08:52:03 +02:00
const reg_state_t start_program_count = reg_state_t : : make_unknown ( entry_point - 1 ) ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
// Initialize
reg_state_it . emplace_back ( entry_point ) ;
run_on_block [ entry_point / 4 ] = true ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
enum spu_addr_mask_t : u32
{
SPU_LS_MASK_128 = ( SPU_LS_SIZE - 1 ) & - 128 ,
SPU_LS_MASK_16 = ( SPU_LS_SIZE - 1 ) & - 16 ,
SPU_LS_MASK_4 = ( SPU_LS_SIZE - 1 ) & - 4 ,
SPU_LS_MASK_1 = ( SPU_LS_SIZE - 1 ) ,
} ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
u32 iterator_id_alloc = 0 ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
for ( u32 wf = 0 , wi = 0 , wa = entry_point , bpc = wa ; wf < = 1 ; )
{
const bool is_form_block = wf = = 0 ;
const bool is_pattern_match = wf = = 1 ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
dummy16 . active = false ;
2024-07-03 09:34:38 +02:00
dummy_loop . active = false ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
if ( ! is_form_block & & wa = = bpc )
{
if ( wi = = 0 )
{
for ( auto & [ addr , block ] : infos )
{
// Evaluate state for all blocks
2024-07-05 17:55:01 +02:00
block - > evaluate_start_state ( infos , should_search_patterns ) ;
2024-03-06 16:28:07 +01:00
}
2024-04-26 19:39:19 +02:00
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
if ( ! should_search_patterns )
{
2024-03-06 16:28:07 +01:00
break ;
}
2024-04-26 19:39:19 +02:00
if ( ! infos [ bpc ] )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
std : : string out = fmt : : format ( " Blocks: " ) ;
for ( auto & [ pc , _ ] : infos )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
if ( ! _ ) continue ;
fmt : : append ( out , " [0x%x] " , pc ) ;
2024-03-06 16:28:07 +01:00
}
2024-04-26 19:39:19 +02:00
out + = ' \n ' ;
for ( auto & [ pc , bb ] : m_bbs )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
if ( ! m_block_info [ pc / 4 ] )
{
continue ;
}
out + = fmt : : format ( " \n Targets 0x%x: " , pc ) ;
for ( auto addr : bb . targets )
{
fmt : : append ( out , " [0x%x] " , addr ) ;
}
2024-03-06 16:28:07 +01:00
}
2024-05-08 13:30:12 +02:00
spu_log . fatal ( " %s " , out ) ;
2024-03-06 16:28:07 +01:00
}
2024-05-08 13:30:12 +02:00
2024-07-05 17:55:01 +02:00
true_state_walkby = & ensure ( infos [ bpc ] ) - > evaluate_start_state ( infos , should_search_patterns ) ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
for ( reg_state_t & f : * true_state_walkby )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
if ( f . flag & vf : : is_null )
{
// Evaluate locally
f . flag - = vf : : is_null ;
}
2024-03-06 16:28:07 +01:00
}
}
2024-04-26 19:39:19 +02:00
auto & vregs = is_form_block ? infos [ bpc ] - > local_state : * true_state_walkby ;
2024-09-13 15:05:38 +02:00
const auto atomic16 = is_pattern_match ? & : : at32 ( reg_state_it , wi ) . atomic16 : & dummy16 ;
const auto rchcnt_loop = is_pattern_match ? & : : at32 ( reg_state_it , wi ) . rchcnt_loop : & dummy_loop ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
const u32 pos = wa ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
wa + = 4 ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
const auto break_putllc16 = [ & ] ( u32 cause , atomic16_t previous )
{
if ( previous . active & & likely_putllc_loop & & getllar_starts . contains ( previous . lsa_pc ) )
{
const bool is_first = ! std : : exchange ( getllar_starts [ previous . lsa_pc ] , true ) ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
if ( ! is_first )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
return ;
2024-03-06 16:28:07 +01:00
}
2024-04-26 19:39:19 +02:00
2024-05-08 13:30:12 +02:00
had_putllc_evaluation = true ;
2024-04-26 19:39:19 +02:00
g_fxo - > get < putllc16_statistics_t > ( ) . breaking_reason [ cause ] + + ;
if ( ! spu_log . notice )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
return ;
2024-03-06 16:28:07 +01:00
}
2024-04-26 19:39:19 +02:00
2024-05-09 09:48:52 +02:00
std : : string break_error = fmt : : format ( " PUTLLC pattern breakage [%x mem=%d lsa_const=%d cause=%u] (lsa_pc=0x%x) " , pos , previous . mem_count , u32 { ! previous . ls_offs . is_const ( ) } * 2 + previous . lsa . is_const ( ) , cause , previous . lsa_pc ) ;
2024-04-26 19:39:19 +02:00
2024-07-03 09:34:38 +02:00
const auto values = sort_breakig_reasons ( g_fxo - > get < putllc16_statistics_t > ( ) . breaking_reason ) ;
2024-04-26 19:39:19 +02:00
std : : string tracing = " Top Breaking Reasons: " ;
usz i = 0 ;
usz fail_count = 0 ;
bool switched_to_minimal = false ;
for ( auto it = values . begin ( ) ; it ! = values . end ( ) ; i + + , it + + )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
fail_count + = it - > second ;
if ( i > = 12 )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
continue ;
}
if ( i < 8 & & it - > second > 1 )
{
fmt : : append ( tracing , " [cause=%u, n=%d] " , it - > first , it - > second ) ;
2024-03-06 16:28:07 +01:00
}
else
{
2024-04-26 19:39:19 +02:00
if ( ! std : : exchange ( switched_to_minimal , true ) )
{
fmt : : append ( tracing , " ; More: " ) ;
}
fmt : : append ( tracing , " %u " , it - > first ) ;
2024-03-06 16:28:07 +01:00
}
}
2024-04-26 19:39:19 +02:00
fmt : : append ( tracing , " of %d failures " , fail_count ) ;
spu_log . notice ( " %s \n %s " , break_error , tracing ) ;
2024-07-03 09:34:38 +02:00
}
} ;
const auto break_channel_pattern = [ & ] ( u32 cause , rchcnt_loop_t previous )
{
if ( previous . active & & rchcnt_loop_all . contains ( previous . read_pc ) )
{
const bool is_first = ! std : : exchange ( rchcnt_loop_all [ previous . read_pc ] . failed , true ) ;
if ( ! is_first )
{
return ;
}
g_fxo - > get < rchcnt_statistics_t > ( ) . breaking_reason [ cause ] + + ;
if ( ! spu_log . notice )
{
return ;
}
std : : string break_error = fmt : : format ( " Channel pattern breakage [%x cause=%u] (read_pc=0x%x) " , pos , cause , previous . read_pc ) ;
const auto values = sort_breakig_reasons ( g_fxo - > get < rchcnt_statistics_t > ( ) . breaking_reason ) ;
std : : string tracing = " Top Breaking Reasons: " ;
2024-04-26 19:39:19 +02:00
2024-07-03 09:34:38 +02:00
usz i = 0 ;
usz fail_count = 0 ;
bool switched_to_minimal = false ;
for ( auto it = values . begin ( ) ; it ! = values . end ( ) ; i + + , it + + )
2024-03-06 16:28:07 +01:00
{
2024-07-03 09:34:38 +02:00
fail_count + = it - > second ;
2024-04-26 19:39:19 +02:00
2024-07-03 09:34:38 +02:00
if ( i > = 12 )
{
continue ;
}
if ( i < 8 & & it - > second > 1 )
{
fmt : : append ( tracing , " [cause=%u, n=%d] " , it - > first , it - > second ) ;
}
else
{
if ( ! std : : exchange ( switched_to_minimal , true ) )
{
fmt : : append ( tracing , " ; More: " ) ;
}
fmt : : append ( tracing , " %u " , it - > first ) ;
}
2024-03-06 16:28:07 +01:00
}
2024-07-03 09:34:38 +02:00
fmt : : append ( tracing , " of %d failures " , fail_count ) ;
spu_log . notice ( " %s \n %s " , break_error , tracing ) ;
2024-04-26 19:39:19 +02:00
}
} ;
2024-07-03 09:34:38 +02:00
const auto break_all_patterns = [ & ] ( u32 cause )
{
2024-09-13 15:05:38 +02:00
break_putllc16 ( cause , atomic16 - > discard ( ) ) ;
break_channel_pattern ( cause , rchcnt_loop - > discard ( ) ) ;
2024-07-03 09:34:38 +02:00
} ;
2024-04-26 19:39:19 +02:00
const auto calculate_absolute_ls_difference = [ ] ( u32 addr1 , u32 addr2 )
{
addr1 & = SPU_LS_MASK_1 ;
addr2 & = SPU_LS_MASK_1 ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
const u32 abs_diff = ( addr1 > = addr2 ? addr1 - addr2 : addr2 - addr1 ) ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
// Because memory is wrapping-around, take the gap that is smaller
return abs_diff > = SPU_LS_SIZE / 2 ? SPU_LS_SIZE - abs_diff : abs_diff ;
} ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
bool called_next = false ;
2024-03-06 16:28:07 +01:00
2024-07-03 09:34:38 +02:00
u32 data { } ;
2024-04-26 19:39:19 +02:00
const auto next_block = [ & ] ( )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
if ( called_next )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
// Guard multiple calles to next_block()
return ;
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
called_next = true ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
if ( wf = = 0 )
{
wi + + ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
auto & block = infos [ bpc ] ;
2024-03-06 16:28:07 +01:00
2024-09-07 17:00:27 +02:00
if ( pos = = entry_point | | ( g_cfg . core . spu_block_size ! = spu_block_size_type : : safe & & ( m_ret_info [ bpc / 4 ] | | m_entry_info [ bpc / 4 ] ) ) )
2024-04-26 19:39:19 +02:00
{
// Do not allow value passthrough
for ( reg_state_t & f : block - > start_reg_state )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
f . flag - = vf : : is_null ;
2024-03-06 16:28:07 +01:00
}
2024-04-26 19:39:19 +02:00
for ( reg_state_t & f : block - > local_state )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
f . flag - = vf : : is_null ;
2024-03-06 16:28:07 +01:00
}
2024-04-26 19:39:19 +02:00
// Block has an external origin, discard all previous information
block - > end_reg_state = block - > local_state ;
block - > has_true_state = true ;
2024-03-06 16:28:07 +01:00
}
2024-04-26 19:39:19 +02:00
block - > addend_reg_state = block - > local_state ;
}
else
{
2024-07-19 22:40:19 +02:00
std : : vector < usz > to_pop ;
2024-04-26 19:39:19 +02:00
usz stackframe_it = wi ;
2024-06-02 18:39:54 +02:00
u32 stackframe_pc = SPU_LS_SIZE ;
2024-04-26 19:39:19 +02:00
usz entry_index = umax ;
2024-05-08 13:30:12 +02:00
auto get_block_targets = [ & ] ( u32 pc ) - > std : : basic_string_view < u32 >
2024-04-26 19:39:19 +02:00
{
if ( m_block_info [ pc / 4 ] & & m_bbs . count ( pc ) )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
return m_bbs . at ( pc ) . targets ;
2024-03-06 16:28:07 +01:00
}
2024-04-26 19:39:19 +02:00
2024-05-08 13:30:12 +02:00
return { } ;
2024-04-26 19:39:19 +02:00
} ;
u32 target_pc = SPU_LS_SIZE ;
bool insert_entry = false ;
bool is_code_backdoor = false ;
while ( true )
{
const auto state_it = & reg_state_it [ stackframe_it ] ;
stackframe_pc = state_it - > pc ;
entry_index = state_it - > parent_target_index ;
2024-05-08 13:30:12 +02:00
const auto targets = get_block_targets ( stackframe_pc ) ;
2024-04-26 19:39:19 +02:00
const usz target_size = targets . size ( ) ;
while ( entry_index < target_size & & ( targets [ entry_index ] < lsa | | targets [ entry_index ] > = limit | | ! m_block_info [ targets [ entry_index ] / 4 ] ) )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
state_it - > parent_target_index + + ;
entry_index = state_it - > parent_target_index ;
2024-03-06 16:28:07 +01:00
}
2024-04-26 19:39:19 +02:00
if ( entry_index = = target_size )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
const usz parent_index = state_it - > parent_iterator_index ;
2024-03-06 16:28:07 +01:00
2024-05-08 13:30:12 +02:00
to_pop . emplace_back ( stackframe_it ) ;
2024-04-26 19:39:19 +02:00
if ( parent_index ! = umax )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
stackframe_it = parent_index ;
2024-03-06 16:28:07 +01:00
}
else
{
2024-05-08 13:30:12 +02:00
// Final
wi = 0 ;
2024-04-26 19:39:19 +02:00
break ;
2024-03-06 16:28:07 +01:00
}
}
else
{
2024-04-26 19:39:19 +02:00
target_pc = : : at32 ( targets , entry_index ) ;
2024-03-06 16:28:07 +01:00
2024-05-08 13:30:12 +02:00
usz occurence_count = 0 ;
std : : array < usz , 16 > duplicate_positions ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
// Virtual concept (there is no really such thing as loop connectors from the ccompiled-code level)
// But it helps to simplify this process
2024-05-08 13:30:12 +02:00
bool is_loop_connector = false ;
bool is_too_extensive = false ;
bool is_skipable = false ;
2024-04-26 19:39:19 +02:00
// Hack to avoid extensive analysis of all code paths possible:
// Allow up to 4 occurences of the upper-most block
// Because, loop "connectors" are usually backward in direction
// The proper solution would be to add a precursry function analysis stage which identifies all loop "connectors" and allows duplicates based on it
for ( usz i = stackframe_it , count = 0 ; ; count + + )
2024-03-06 16:28:07 +01:00
{
2024-05-08 13:30:12 +02:00
auto & entry = : : at32 ( reg_state_it , i ) ;
const u32 entry_pc = entry . pc ;
2024-07-08 07:43:10 +02:00
if ( count = = ( state_it - > atomic16 . active ? 25 : 12 ) )
2024-04-26 19:39:19 +02:00
{
2024-05-08 13:30:12 +02:00
if ( state_it - > atomic16 . active & & ! std : : exchange ( logged_block [ target_pc / 4 ] , true ) )
{
spu_log . notice ( " SPU Blcok Analysis is too extensive at 0x%x " , entry_pc ) ;
}
is_too_extensive = true ;
2024-04-26 19:39:19 +02:00
break ;
}
2024-03-06 16:28:07 +01:00
2024-05-08 13:30:12 +02:00
if ( entry_pc = = target_pc )
{
duplicate_positions [ occurence_count + + ] = i ;
2024-03-06 16:28:07 +01:00
2024-05-08 13:30:12 +02:00
if ( occurence_count = = duplicate_positions . size ( ) )
{
is_loop_connector = true ;
break ;
}
}
2024-03-06 16:28:07 +01:00
2024-05-08 13:30:12 +02:00
const usz parent_idx = entry . parent_iterator_index ;
if ( parent_idx = = umax )
{
break ;
}
ensure ( i ! = parent_idx ) ;
// Fill info for later
auto & parent = : : at32 ( reg_state_it , parent_idx ) ;
parent . temp_child_index = i ;
parent . temp_list_index = count ;
i = parent_idx ;
}
// Scan the code for "code flow" repetitions (entire sequences of blocks equal to each other)
// If found, this is 100% a loop, shoulkd it start a third time ignore it
if ( occurence_count > = 2 )
{
for ( usz it_begin = 0 ; ! is_skipable & & it_begin < occurence_count - 1 ; it_begin + + )
2024-04-26 19:39:19 +02:00
{
2024-05-08 13:30:12 +02:00
const usz block_start = duplicate_positions [ it_begin + 1 ] ;
for ( usz it_tail = 0 ; it_tail < it_begin + 1 ; it_tail + + )
2024-04-26 19:39:19 +02:00
{
2024-05-08 13:30:12 +02:00
const usz block_tail = duplicate_positions [ it_begin - it_tail ] ;
// Check if the distance is precisely two times from the end
if ( reg_state_it . size ( ) - block_start ! = utils : : rol64 ( reg_state_it . size ( ) - block_tail , 1 ) )
{
continue ;
}
bool is_equal = true ;
for ( usz j = 1 ; j < reg_state_it . size ( ) - block_tail ; j + + )
{
if ( reg_state_it [ block_start + j ] . pc ! = reg_state_it [ block_tail + j ] . pc )
{
is_equal = false ;
break ;
}
}
2024-03-06 16:28:07 +01:00
2024-05-08 13:30:12 +02:00
if ( is_equal )
2024-04-26 19:39:19 +02:00
{
2024-05-08 13:30:12 +02:00
is_skipable = true ;
2024-04-26 19:39:19 +02:00
break ;
}
}
}
2024-05-08 13:30:12 +02:00
}
2024-03-06 16:28:07 +01:00
2024-05-08 13:30:12 +02:00
if ( is_skipable )
{
if ( ! std : : exchange ( logged_block [ target_pc / 4 ] , true ) )
2024-04-26 19:39:19 +02:00
{
2024-05-08 13:30:12 +02:00
spu_log . notice ( " SPU block is a loop at [0x%05x -> 0x%05x] " , state_it - > pc , target_pc ) ;
2024-04-26 19:39:19 +02:00
}
2024-03-06 16:28:07 +01:00
2024-05-08 13:30:12 +02:00
state_it - > parent_target_index + + ;
continue ;
}
if ( is_loop_connector & & ! std : : exchange ( logged_block [ target_pc / 4 ] , true ) )
{
spu_log . notice ( " SPU block analysis is too repetitive at [0x%05x -> 0x%05x] " , state_it - > pc , target_pc ) ;
2024-04-26 19:39:19 +02:00
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
insert_entry = true ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
// Test if the code is an opening to external code (start of the function is always respected because it is already assumed to have no origin)
is_code_backdoor = m_ret_info [ target_pc / 4 ] | | ( m_entry_info [ target_pc / 4 ] & & target_pc ! = entry_point ) ;
2024-03-06 16:28:07 +01:00
2024-05-08 13:30:12 +02:00
if ( run_on_block [ target_pc / 4 ] )
2024-04-26 19:39:19 +02:00
{
2024-05-08 13:30:12 +02:00
insert_entry = false ;
}
else if ( is_code_backdoor | | is_too_extensive | | is_loop_connector )
{
if ( reg_state_it [ stackframe_it ] . atomic16 . active )
2024-04-26 19:39:19 +02:00
{
2024-05-08 13:30:12 +02:00
break_putllc16 ( 40 , reg_state_it [ stackframe_it ] . atomic16 . discard ( ) ) ;
2024-04-26 19:39:19 +02:00
}
2024-05-08 13:30:12 +02:00
2024-07-03 09:34:38 +02:00
if ( reg_state_it [ stackframe_it ] . rchcnt_loop . active )
{
break_channel_pattern ( 40 , reg_state_it [ stackframe_it ] . rchcnt_loop . discard ( ) ) ;
}
2024-05-08 13:30:12 +02:00
// Allow the block to run only once, to avoid unnecessary iterations
run_on_block [ target_pc / 4 ] = true ;
2024-04-26 19:39:19 +02:00
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
state_it - > parent_target_index + + ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
if ( ! insert_entry )
{
continue ;
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
break ;
}
2024-03-06 16:28:07 +01:00
}
2024-04-26 19:39:19 +02:00
2024-07-03 09:34:38 +02:00
const u32 previous_pc = m_bbs . at ( reg_state_it [ stackframe_it ] . pc ) . size * 4 + reg_state_it [ stackframe_it ] . pc - 4 ;
bool may_return = previous_pc + 4 ! = entry_point + result . data . size ( ) * 4 & & ( m_ret_info [ ( previous_pc / 4 ) + 1 ] | | m_entry_info [ previous_pc / 4 ] ) ;
if ( ! may_return )
{
const u32 branch_target = op_branch_targets ( previous_pc , spu_opcode_t { data } ) [ 0 ] ;
if ( branch_target = = umax | | branch_target > = entry_point + result . data . size ( ) * 4 | | branch_target < entry_point )
{
may_return = true ;
}
}
2024-09-13 15:05:38 +02:00
if ( wi ! = stackframe_it | | may_return | | ! insert_entry )
{
// Possible function end
if ( rchcnt_loop - > active )
{
// Does not post-dominates channel reads
auto & pair = rchcnt_loop_all [ rchcnt_loop - > read_pc ] ;
pair . failed = true ;
pair . active = false ;
}
}
2024-04-26 19:39:19 +02:00
// Backup analyser information
const auto atomic16_info = reg_state_it [ stackframe_it ] . atomic16 ;
2024-07-03 09:34:38 +02:00
const auto rchcnt_loop_info = reg_state_it [ stackframe_it ] . rchcnt_loop ;
2024-04-26 19:39:19 +02:00
// Clean from the back possible because it does not affect old indices
// Technically should always do a full cleanup at the moment
// TODO: Proper cleanup with keeping old indices valid
2024-07-19 22:40:19 +02:00
for ( usz it : to_pop )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
if ( it = = reg_state_it . size ( ) - 1 )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
reg_state_it . pop_back ( ) ;
2024-03-06 16:28:07 +01:00
}
else
{
2024-04-26 19:39:19 +02:00
// Should not be reachable at the moment
//ensure(false);
2024-05-08 13:30:12 +02:00
spu_log . error ( " Failed to clean block analyis steps at block_id %d " , reg_state_it [ it ] . iterator_id ) ;
2024-03-06 16:28:07 +01:00
}
}
2024-04-26 19:39:19 +02:00
if ( insert_entry )
2024-03-06 16:28:07 +01:00
{
2024-05-24 07:45:58 +02:00
const usz target_size = get_block_targets ( stackframe_pc ) . size ( ) ;
2024-04-26 19:39:19 +02:00
2024-05-08 13:30:12 +02:00
spu_log . trace ( " Emplacing: block_id=%d, pc=0x%x, target_it=%d/%d, new_pc=0x%x (has_it=%d) " , reg_state_it [ stackframe_it ] . iterator_id , stackframe_pc , entry_index + 1 , target_size , target_pc , atomic16_info . active ) ;
2024-04-26 19:39:19 +02:00
auto & next = reg_state_it . emplace_back ( target_pc , stackframe_it , 0 ) ;
if ( ! is_code_backdoor )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
// Restore analyser information (if not an entry)
next . atomic16 = atomic16_info ;
2024-07-03 09:34:38 +02:00
if ( previous_pc ! = rchcnt_loop_info . branch_pc | | target_pc = = rchcnt_loop_info . branch_target )
next . rchcnt_loop = rchcnt_loop_info ;
2024-03-06 16:28:07 +01:00
}
2024-07-03 09:34:38 +02:00
else
2024-03-06 16:28:07 +01:00
{
2024-07-03 09:34:38 +02:00
if ( atomic16_info . active )
{
break_putllc16 ( 39 , atomic16_info ) ;
}
2024-09-13 15:05:38 +02:00
if ( rchcnt_loop_info . active )
2024-07-03 09:34:38 +02:00
{
// Does not post-dominates channel read
2024-09-13 15:05:38 +02:00
auto & pair = rchcnt_loop_all [ rchcnt_loop_info . read_pc ] ;
2024-07-03 09:34:38 +02:00
pair . failed = true ;
pair . active = false ;
}
2024-03-06 16:28:07 +01:00
}
2024-04-26 19:39:19 +02:00
next . iterator_id = iterator_id_alloc + + ;
2024-05-24 07:45:58 +02:00
wi = static_cast < u32 > ( stackframe_it + 1 ) ;
2024-05-08 13:30:12 +02:00
ensure ( stackframe_it + 1 = = reg_state_it . size ( ) - 1 ) ;
2024-03-06 16:28:07 +01:00
}
2024-04-26 19:39:19 +02:00
}
if ( wi > = reg_state_it . size ( ) )
{
wf + + ;
wi = 0 ;
run_on_block . clear ( ) ;
if ( wf = = 1 )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
reg_state_it . clear ( ) ;
if ( ! infos . empty ( ) )
{
reg_state_it . emplace_back ( : : at32 ( infos , entry_point ) - > pc ) . iterator_id = iterator_id_alloc + + ; ;
}
2024-03-06 16:28:07 +01:00
}
}
2024-04-26 19:39:19 +02:00
if ( wi < reg_state_it . size ( ) )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
wa = : : at32 ( reg_state_it , wi ) . pc ;
bpc = wa ;
2024-03-06 16:28:07 +01:00
}
2024-04-26 19:39:19 +02:00
} ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
const auto get_reg = [ & ] ( u32 reg ) - > const reg_state_t &
{
return vregs [ reg ] ;
} ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
const auto move_reg = [ & ] ( u32 dst , u32 src )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
if ( dst = = src | | vregs [ src ] = = vregs [ dst ] )
{
return ;
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
vregs [ dst ] = vregs [ src ] ;
// Register storage has changed
vregs [ dst ] . flag - = vf : : is_null ;
} ;
const auto set_const_value = [ & ] ( u32 reg , u32 value )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
vregs [ reg ] = reg_state_t : : from_value ( value ) ;
} ;
2024-03-06 16:28:07 +01:00
2024-06-27 08:52:03 +02:00
const auto inherit_const_value = [ & ] ( u32 reg , const reg_state_t & ra , const reg_state_t & rb , u32 value , u32 pos )
2024-03-06 16:28:07 +01:00
{
2024-06-27 08:52:03 +02:00
if ( ra . origin ! = rb . origin )
{
pos = reg_state_it [ wi ] . pc ;
}
else
{
pos = ra . origin ;
}
const bs_t < vf > flag = ( ra . flag & rb . flag ) - vf : : is_null ;
vregs [ reg ] = reg_state_t { flag , value , flag & vf : : is_const ? u32 { umax } : reg_state_t : : alloc_tag ( ) , 0 , 0 , pos } ;
2024-04-26 19:39:19 +02:00
} ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
const auto inherit_const_mask_value = [ & ] ( u32 reg , reg_state_t state , u32 mask_ones , u32 mask_zeroes )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
if ( ( mask_ones | mask_zeroes ) = = 0 )
{
state . flag - = vf : : is_null ;
vregs [ reg ] = state ;
return ;
}
if ( state . flag & vf : : is_const )
{
vregs [ reg ] = reg_state_t : : from_value ( ( state . value | mask_ones ) & ~ mask_zeroes ) ;
return ;
}
const u32 ones = ( state . known_ones | mask_ones ) & ~ mask_zeroes ;
const u32 zeroes = ( state . known_zeroes | mask_zeroes ) & ~ mask_ones ;
if ( ( ones ^ zeroes ) = = umax )
{
// Special case: create a constant from full masks
vregs [ reg ] = reg_state_t : : from_value ( ones ) ;
return ;
}
ensure ( state . tag ! = umax ) ;
2024-06-27 08:52:03 +02:00
vregs [ reg ] = reg_state_t { vf : : is_mask , 0 , state . tag , ones , zeroes , state . origin } ;
2024-04-26 19:39:19 +02:00
} ;
2024-06-27 08:52:03 +02:00
const auto unconst = [ & ] ( u32 reg , u32 pc )
2024-03-06 16:28:07 +01:00
{
2024-07-03 09:34:38 +02:00
vregs [ reg ] = reg_state_t : : make_unknown ( pc , pos ) ;
2024-04-26 19:39:19 +02:00
} ;
const auto add_block = [ & ] ( u32 target )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
if ( ! is_form_block )
{
return ;
}
// Validate new target (TODO)
if ( target > = lsa & & target < limit )
{
if ( ! infos [ target ] )
{
infos [ target ] = block_reg_info : : create ( target ) ;
}
block_reg_info : : create_node ( target , bpc , infos ) ;
if ( ! run_on_block [ target / 4 ] )
{
reg_state_it . emplace_back ( target ) . iterator_id = iterator_id_alloc + + ;
run_on_block [ target / 4 ] = true ;
}
}
} ;
if ( pos < lsa | | pos > = limit )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
// Don't analyse if already beyond the limit
next_block ( ) ;
continue ;
2024-03-06 16:28:07 +01:00
}
2024-04-26 19:39:19 +02:00
if ( bpc ! = pos & & m_preds . count ( pos ) )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
// End of block reached
next_block ( ) ;
continue ;
2024-03-06 16:28:07 +01:00
}
2024-04-26 19:39:19 +02:00
if ( g_cfg . core . spu_block_size ! = spu_block_size_type : : safe & & ( m_ret_info [ pos / 4 ] | | m_entry_info [ pos / 4 ] | | pos = = entry_point ) )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
ensure ( bpc = = pos ) ;
// Block has an external origin, discard all previous information
// TODO: Make the optimizations conditional at runtime instead
if ( ! is_form_block )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
// Call for external code
2024-07-03 09:34:38 +02:00
break_all_patterns ( 25 ) ;
2024-03-06 16:28:07 +01:00
}
}
2024-04-26 19:39:19 +02:00
2024-09-13 15:05:38 +02:00
if ( atomic16 - > active )
2024-06-27 08:52:03 +02:00
{
2024-09-13 15:05:38 +02:00
for ( auto state : { & atomic16 - > lsa , & atomic16 - > ls , & atomic16 - > ls_offs } )
2024-06-27 08:52:03 +02:00
{
state - > invalidate_if_created ( pos ) ;
}
}
2024-09-13 15:05:38 +02:00
if ( rchcnt_loop - > active )
2024-07-03 09:34:38 +02:00
{
2024-09-13 15:05:38 +02:00
if ( rchcnt_loop - > origins . find_first_of ( pos ) ! = umax )
2024-07-03 09:34:38 +02:00
{
2024-09-13 15:05:38 +02:00
rchcnt_loop - > failed = true ;
rchcnt_loop - > active = false ;
2024-07-03 09:34:38 +02:00
}
}
data = std : : bit_cast < be_t < u32 > > ( : : at32 ( result . data , ( pos - lsa ) / 4 ) ) ;
2024-04-26 19:39:19 +02:00
const auto op = spu_opcode_t { data } ;
const auto type = g_spu_itype . decode ( data ) ;
// For debugging
2024-05-08 13:30:12 +02:00
if ( false & & likely_putllc_loop & & is_pattern_match )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
SPUDisAsm dis_asm ( cpu_disasm_mode : : dump , reinterpret_cast < const u8 * > ( result . data . data ( ) ) , result . lower_bound ) ;
dis_asm . disasm ( pos ) ;
std : : string consts ;
for ( auto _use : std : : initializer_list < std : : pair < u32 , bool > > { { op . ra , m_use_ra . test ( pos / 4 ) }
, { op . rb , m_use_rb . test ( pos / 4 ) } , { op . rc , m_use_rc . test ( pos / 4 ) } } )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
if ( ! _use . second )
{
continue ;
}
if ( ! consts . empty ( ) )
{
consts + = ' , ' ;
}
const u32 reg_file = _use . first ;
const auto & reg = get_reg ( reg_file ) ;
if ( reg . is_const ( ) )
{
fmt : : append ( consts , " r%d=0x%x " , reg_file , reg . value ) ;
}
else
{
if ( u32 mask = reg . known_zeroes | reg . known_ones )
{
fmt : : append ( consts , " r%d=#%d-&|0x%x " , reg_file , reg . tag , mask ) ;
}
else
{
fmt : : append ( consts , " r%d=#%d " , reg_file , reg . tag ) ;
}
}
2024-03-06 16:28:07 +01:00
}
2024-04-26 19:39:19 +02:00
if ( ! consts . empty ( ) )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
consts = " { " + consts + " } " ;
2024-03-06 16:28:07 +01:00
}
2024-04-26 19:39:19 +02:00
if ( dis_asm . last_opcode . ends_with ( ' \n ' ) )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
dis_asm . last_opcode . pop_back ( ) ;
2024-03-06 16:28:07 +01:00
}
2024-09-13 15:05:38 +02:00
spu_log . always ( ) ( " [SPU=0%x, it=%d] %s%s [%d] " , pos , reg_state_it [ wi ] . iterator_id , dis_asm . last_opcode , consts , atomic16 - > active ) ;
2024-03-06 16:28:07 +01:00
}
2024-04-26 19:39:19 +02:00
// Analyse instruction
switch ( type )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
case spu_itype : : UNK :
case spu_itype : : DFCEQ :
case spu_itype : : DFCMEQ :
case spu_itype : : DFCGT :
case spu_itype : : DFCMGT :
case spu_itype : : DFTSV :
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
// Stop before invalid instructions (TODO)
next_block ( ) ;
continue ;
2024-03-06 16:28:07 +01:00
}
2024-04-26 19:39:19 +02:00
case spu_itype : : SYNC :
case spu_itype : : STOP :
case spu_itype : : STOPD :
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
if ( data = = 0 )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
// Stop before null data
next_block ( ) ;
continue ;
2024-03-06 16:28:07 +01:00
}
2024-04-26 19:39:19 +02:00
if ( g_cfg . core . spu_block_size = = spu_block_size_type : : safe )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
// Stop on special instructions (TODO)
next_block ( ) ;
2024-03-06 16:28:07 +01:00
break ;
}
2024-04-26 19:39:19 +02:00
if ( type = = spu_itype : : SYNC )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
// Remember
sync = true ;
2024-03-06 16:28:07 +01:00
}
break ;
}
2024-04-26 19:39:19 +02:00
case spu_itype : : IRET :
case spu_itype : : BI :
case spu_itype : : BISL :
case spu_itype : : BISLED :
case spu_itype : : BIZ :
case spu_itype : : BINZ :
case spu_itype : : BIHZ :
case spu_itype : : BIHNZ :
{
2024-06-02 18:39:54 +02:00
if ( op . e | | op . d )
2024-03-06 16:28:07 +01:00
{
2024-07-03 09:34:38 +02:00
break_all_patterns ( 27 ) ;
2024-03-06 16:28:07 +01:00
}
break ;
}
2024-04-26 19:39:19 +02:00
case spu_itype : : BRSL :
case spu_itype : : BRASL :
2024-03-06 16:28:07 +01:00
{
break ;
}
2024-04-26 19:39:19 +02:00
case spu_itype : : BRA :
{
2024-03-06 16:28:07 +01:00
break ;
}
2024-04-26 19:39:19 +02:00
case spu_itype : : BRZ :
case spu_itype : : BRNZ :
2024-07-03 09:34:38 +02:00
{
const u32 next_pc = spu_branch_target ( pos , 1 ) ;
const u32 target = spu_branch_target ( pos , op . i16 ) ;
2024-09-13 15:05:38 +02:00
if ( rchcnt_loop - > active )
2024-07-03 09:34:38 +02:00
{
const reg_state_t & rt = vregs [ op . rt ] ;
2024-09-13 15:05:38 +02:00
if ( rt . is_instruction & & ( rchcnt_loop - > ch_state . origin = = rt . origin | | rchcnt_loop - > ch_product . origin = = rt . origin ) )
2024-07-03 09:34:38 +02:00
{
2024-09-13 15:05:38 +02:00
if ( rchcnt_loop - > conditioned )
2024-07-03 09:34:38 +02:00
{
// Let's not make it complicated, have a single branch determining the condition
2024-09-13 15:05:38 +02:00
break_channel_pattern ( 54 , rchcnt_loop - > discard ( ) ) ;
2024-07-03 09:34:38 +02:00
break ;
}
2024-09-13 15:05:38 +02:00
rchcnt_loop - > conditioned = true ;
rchcnt_loop - > branch_pc = pos ;
rchcnt_loop - > branch_target = rchcnt_loop - > product_test_negate ! = ( type = = spu_itype : : BRZ ) ? target : next_pc ;
2024-07-03 09:34:38 +02:00
break ;
}
}
break ;
}
case spu_itype : : BR :
2024-04-26 19:39:19 +02:00
case spu_itype : : BRHZ :
case spu_itype : : BRHNZ :
{
break ;
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
case spu_itype : : DSYNC :
case spu_itype : : HEQ :
case spu_itype : : HEQI :
case spu_itype : : HGT :
case spu_itype : : HGTI :
case spu_itype : : HLGT :
case spu_itype : : HLGTI :
case spu_itype : : LNOP :
case spu_itype : : NOP :
case spu_itype : : MTSPR :
case spu_itype : : FSCRWR :
{
// Do nothing
2024-03-06 16:28:07 +01:00
break ;
}
2024-04-26 19:39:19 +02:00
case spu_itype : : WRCH :
2024-03-06 16:28:07 +01:00
{
2024-09-13 15:05:38 +02:00
break_channel_pattern ( 56 , rchcnt_loop - > discard ( ) ) ;
2024-07-03 09:34:38 +02:00
2024-04-26 19:39:19 +02:00
switch ( op . ra )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
case MFC_EAL :
{
move_reg ( s_reg_mfc_eal , op . rt ) ;
2024-03-06 16:28:07 +01:00
break ;
}
2024-04-26 19:39:19 +02:00
case MFC_LSA :
{
auto rt = get_reg ( op . rt ) ;
inherit_const_mask_value ( s_reg_mfc_lsa , rt , 0 , ~ SPU_LS_MASK_1 ) ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
if ( is_pattern_match )
{
2024-09-13 15:05:38 +02:00
atomic16 - > lsa_last_pc = pos ;
2024-04-26 19:39:19 +02:00
}
break ;
}
case MFC_TagID :
2024-03-06 16:28:07 +01:00
{
break ;
}
2024-04-26 19:39:19 +02:00
case MFC_Size :
2024-03-06 16:28:07 +01:00
{
break ;
}
2024-04-26 19:39:19 +02:00
case MFC_Cmd :
{
2024-07-03 09:34:38 +02:00
const auto [ af , av , atagg , _3 , _5 , apc , ainst ] = get_reg ( op . rt ) ;
2024-04-26 19:39:19 +02:00
if ( ! is_pattern_match )
{
//
}
else if ( af & vf : : is_const )
{
switch ( av )
{
case MFC_GETLLAR_CMD :
{
2024-06-27 09:26:20 +02:00
// Get LSA and apply mask for GETLLAR
// TODO: Simplify this to be a value returning function
auto old_lsa = get_reg ( s_reg_mfc_lsa ) ;
inherit_const_mask_value ( s_reg_mfc_lsa , old_lsa , 0 , ~ SPU_LS_MASK_128 ) ;
// Restore LSA
2024-06-02 18:39:54 +02:00
auto lsa = get_reg ( s_reg_mfc_lsa ) ;
2024-06-27 09:26:20 +02:00
vregs [ s_reg_mfc_lsa ] = old_lsa ;
2024-09-13 15:05:38 +02:00
const u32 lsa_pc = atomic16 - > lsa_last_pc = = SPU_LS_SIZE ? bpc : atomic16 - > lsa_last_pc ;
2024-04-26 19:39:19 +02:00
2024-09-13 15:05:38 +02:00
if ( atomic16 - > active )
2024-04-26 19:39:19 +02:00
{
2024-09-13 15:05:38 +02:00
if ( atomic16 - > lsa_pc ! = lsa_pc | | atomic16 - > get_pc ! = pos | | atomic16 - > lsa ! = lsa )
2024-04-26 19:39:19 +02:00
{
2024-09-13 15:05:38 +02:00
break_putllc16 ( 30 , atomic16 - > discard ( ) ) ;
2024-04-26 19:39:19 +02:00
}
}
// If LSA write has not happened, use block start
2024-09-13 15:05:38 +02:00
atomic16 - > lsa_pc = lsa_pc ;
atomic16 - > get_pc = pos ;
atomic16 - > active = true ;
2024-04-26 19:39:19 +02:00
2024-09-13 15:05:38 +02:00
atomic16 - > lsa = lsa ;
2024-04-26 19:39:19 +02:00
if ( likely_putllc_loop )
{
// Register loop entry
2024-09-13 15:05:38 +02:00
if ( getllar_starts . emplace ( atomic16 - > lsa_pc , false ) . second )
2024-04-26 19:39:19 +02:00
{
g_fxo - > get < putllc16_statistics_t > ( ) . all + + ;
spu_log . notice ( " [0x%05x] GETLLAR pattern entry point " , pos ) ;
}
}
break ;
}
case MFC_PUTLLC_CMD :
{
2024-09-13 15:05:38 +02:00
if ( atomic16 - > active )
2024-04-26 19:39:19 +02:00
{
const auto _lsa = get_reg ( s_reg_mfc_lsa ) ;
// Search the value of LS address stoire/load in latest register file
2024-09-13 15:05:38 +02:00
if ( atomic16 - > ls_access & & atomic16 - > ls_write & & ! atomic16 - > ls_pc_rel & & ! atomic16 - > ls . is_const ( ) )
2024-04-26 19:39:19 +02:00
{
usz reg_it = umax ;
u32 regs [ 2 ] { s_reg_max , s_reg_max } ;
2024-09-13 15:05:38 +02:00
for ( auto val : { & atomic16 - > ls , & atomic16 - > ls_offs } )
2024-04-26 19:39:19 +02:00
{
reg_it + + ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
if ( val - > is_const ( ) )
{
regs [ reg_it ] = 0 ;
continue ;
}
2024-03-06 16:28:07 +01:00
2024-06-27 09:26:20 +02:00
if ( vregs [ s_reg_mfc_lsa ] . compare_with_mask_indifference ( * val , SPU_LS_MASK_16 ) )
2024-04-26 19:39:19 +02:00
{
regs [ reg_it ] = s_reg_mfc_lsa ;
continue ;
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
for ( u32 i = 0 ; i < = s_reg_127 ; i + + )
{
const auto & _reg = vregs [ i ] ;
2024-03-06 16:28:07 +01:00
2024-06-27 09:26:20 +02:00
if ( _reg . compare_with_mask_indifference ( * val , SPU_LS_MASK_16 ) )
2024-04-26 19:39:19 +02:00
{
regs [ reg_it ] = i ;
break ;
}
}
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
if ( regs [ 0 ] = = s_reg_max | | regs [ 1 ] = = s_reg_max )
{
2024-09-13 15:05:38 +02:00
break_putllc16 ( 3 , atomic16 - > discard ( ) ) ;
2024-04-26 19:39:19 +02:00
break ;
}
2024-03-06 16:28:07 +01:00
2024-09-13 15:05:38 +02:00
atomic16 - > reg = regs [ 0 ] ;
2024-03-06 16:28:07 +01:00
2024-09-13 15:05:38 +02:00
if ( ! atomic16 - > ls_offs . is_const ( ) )
2024-04-26 19:39:19 +02:00
{
2024-09-13 15:05:38 +02:00
atomic16 - > reg2 = regs [ 1 ] ;
2024-04-26 19:39:19 +02:00
}
}
2024-03-06 16:28:07 +01:00
2024-09-13 15:05:38 +02:00
if ( atomic16 - > ls_access & & atomic16 - > ls_write & & ! atomic16 - > lsa . compare_with_mask_indifference ( _lsa , SPU_LS_MASK_128 ) )
2024-04-26 19:39:19 +02:00
{
// LSA latest value mismatches with the one written with GETLLAR
2024-03-06 16:28:07 +01:00
2024-09-13 15:05:38 +02:00
if ( atomic16 - > lsa . flag ! = _lsa . flag )
2024-04-26 19:39:19 +02:00
{
2024-09-13 15:05:38 +02:00
break_putllc16 ( 1 , atomic16 - > discard ( ) ) ;
2024-04-26 19:39:19 +02:00
}
else
{
2024-09-13 15:05:38 +02:00
break_putllc16 ( 2 , atomic16 - > discard ( ) ) ;
2024-04-26 19:39:19 +02:00
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
break ;
}
2024-03-06 16:28:07 +01:00
2024-09-13 15:05:38 +02:00
if ( atomic16 - > ls_access & & atomic16 - > ls_write )
2024-04-26 19:39:19 +02:00
{
2024-09-13 15:05:38 +02:00
atomic16 - > select_16_or_0_at_runtime = false ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
bool ok = false ;
2024-03-06 16:28:07 +01:00
2024-09-13 15:05:38 +02:00
if ( atomic16 - > ls_pc_rel | | ! atomic16 - > ls_offs . is_const ( ) )
2024-04-26 19:39:19 +02:00
{
//
}
2024-09-13 15:05:38 +02:00
else if ( atomic16 - > lsa . is_const ( ) )
2024-04-26 19:39:19 +02:00
{
2024-09-13 15:05:38 +02:00
if ( atomic16 - > ls . is_const ( ) )
2024-04-26 19:39:19 +02:00
{
2024-09-13 15:05:38 +02:00
if ( atomic16 - > ls_offs . value ! = 0 )
2024-04-26 19:39:19 +02:00
{
// Rebase constant so we can get rid of ls_offs
2024-09-13 15:05:38 +02:00
atomic16 - > ls . value = spu_ls_target ( atomic16 - > ls_offs . value + atomic16 - > ls . value ) ;
atomic16 - > ls_offs = reg_state_t : : from_value ( 0 ) ;
2024-04-26 19:39:19 +02:00
}
2024-03-06 16:28:07 +01:00
2024-09-13 15:05:38 +02:00
if ( atomic16 - > ls . compare_with_mask_indifference ( atomic16 - > lsa , SPU_LS_MASK_128 ) )
2024-04-26 19:39:19 +02:00
{
ok = true ;
}
}
2024-09-13 15:05:38 +02:00
else if ( atomic16 - > ls_offs . compare_with_mask_indifference ( atomic16 - > lsa , SPU_LS_MASK_128 ) & & atomic16 - > ls . is_less_than ( 128 - ( atomic16 - > ls_offs . value & 127 ) ) )
2024-04-26 19:39:19 +02:00
{
2024-05-09 09:48:52 +02:00
// Relative memory access with offset less than 128 bytes
// Common around SPU utilities which have less strict restrictions about memory alignment
2024-04-26 19:39:19 +02:00
ok = true ;
}
}
2024-09-13 15:05:38 +02:00
else if ( atomic16 - > lsa . compare_with_mask_indifference ( atomic16 - > ls , SPU_LS_MASK_128 ) & & atomic16 - > ls_offs = = 0 )
2024-04-26 19:39:19 +02:00
{
// Unknown value with known offset of less than 128 bytes
ok = true ;
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
if ( ! ok )
{
// This is quite common.. let's try to select between putllc16 and putllc0 at runtime!
// break_putllc16(100);
2024-09-13 15:05:38 +02:00
// atomic16->discard();
2024-04-26 19:39:19 +02:00
// break;
2024-09-13 15:05:38 +02:00
atomic16 - > select_16_or_0_at_runtime = true ;
2024-04-26 19:39:19 +02:00
}
}
2024-03-06 16:28:07 +01:00
2024-09-13 15:05:38 +02:00
if ( ! atomic16 - > get_rdatomic )
2024-04-26 19:39:19 +02:00
{
// MFC_RdAtomicStat must have been read, otherwise GETLLAR may not be executed (according to HW tests)
2024-09-13 15:05:38 +02:00
break_putllc16 ( 21 , atomic16 - > discard ( ) ) ;
2024-04-26 19:39:19 +02:00
}
2024-03-06 16:28:07 +01:00
2024-09-13 15:05:38 +02:00
atomic16 - > put_pc = pos ;
atomic16 - > put_active = true ;
2024-04-26 19:39:19 +02:00
}
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
break ;
}
default :
2024-03-06 16:28:07 +01:00
{
2024-09-13 15:05:38 +02:00
break_putllc16 ( 4 , atomic16 - > discard ( ) ) ;
2024-04-26 19:39:19 +02:00
break ;
}
}
}
else
{
2024-09-13 15:05:38 +02:00
break_putllc16 ( 5 , atomic16 - > discard ( ) ) ;
2024-04-26 19:39:19 +02:00
}
2024-03-06 16:28:07 +01:00
2024-09-13 15:05:38 +02:00
if ( ! atomic16 - > active )
2024-04-26 19:39:19 +02:00
{
// Propagate failure
for ( auto & atm : atomic16_all )
{
if ( atm . second . active & & atm . second . put_pc = = pos )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
break_putllc16 ( 31 , atm . second . discard ( ) ) ;
2024-03-06 16:28:07 +01:00
}
}
}
2024-04-26 19:39:19 +02:00
break ;
2019-04-30 23:06:42 +02:00
}
2024-04-26 19:39:19 +02:00
case MFC_EAH :
case SPU_WrDec :
case SPU_WrSRR0 :
case SPU_WrEventAck :
case SPU_Set_Bkmk_Tag :
case SPU_PM_Start_Ev :
case SPU_PM_Stop_Ev :
case MFC_WrTagMask :
//case MFC_WrTagUpdate: // Technically correct to ignore but risky
break ;
default :
2019-04-30 23:06:42 +02:00
{
2024-07-03 09:34:38 +02:00
break_all_patterns ( 6 ) ;
2024-04-26 19:39:19 +02:00
break ;
}
2019-04-30 23:06:42 +02:00
}
2024-04-26 19:39:19 +02:00
break ;
}
2019-04-30 23:06:42 +02:00
2024-04-26 19:39:19 +02:00
case spu_itype : : RCHCNT :
case spu_itype : : RDCH :
{
const bool is_read = type = = spu_itype : : RDCH ;
2024-07-03 09:34:38 +02:00
bool invalidate = true ;
const auto it = rchcnt_loop_all . find ( pos ) ;
if ( it ! = rchcnt_loop_all . end ( ) )
{
2024-09-13 15:05:38 +02:00
if ( rchcnt_loop - > failed | | ! rchcnt_loop - > conditioned | | rchcnt_loop - > read_pc ! = pos )
2024-07-03 09:34:38 +02:00
{
// Propagate faiure
it - > second . failed = true ;
it - > second . active = false ;
it - > second . conditioned = false ;
}
else
{
it - > second . active = false ;
}
2024-09-13 15:05:38 +02:00
rchcnt_loop - > active = false ;
2024-07-03 09:34:38 +02:00
}
2024-09-13 15:05:38 +02:00
if ( rchcnt_loop - > active )
2024-07-03 09:34:38 +02:00
{
2024-09-13 15:05:38 +02:00
if ( rchcnt_loop - > read_pc ! = pos )
2024-07-03 09:34:38 +02:00
{
2024-09-13 15:05:38 +02:00
break_channel_pattern ( 53 , rchcnt_loop - > discard ( ) ) ;
2024-07-03 09:34:38 +02:00
}
}
2019-05-05 15:28:41 +02:00
2024-04-26 19:39:19 +02:00
switch ( op . ra )
{
case MFC_RdAtomicStat :
{
if ( ! is_read )
2019-05-05 15:28:41 +02:00
{
break ;
}
2024-09-13 15:05:38 +02:00
if ( atomic16 - > active )
2019-05-05 15:28:41 +02:00
{
2024-09-13 15:05:38 +02:00
if ( atomic16 - > put_active )
2019-05-05 15:28:41 +02:00
{
2024-09-13 15:05:38 +02:00
if ( getllar_starts . contains ( atomic16 - > lsa_pc ) & & getllar_starts [ atomic16 - > lsa_pc ] )
2019-05-05 15:28:41 +02:00
{
2024-09-13 15:05:38 +02:00
break_putllc16 ( 24 , atomic16 - > discard ( ) ) ;
2024-04-26 19:39:19 +02:00
break ;
2019-05-05 15:28:41 +02:00
}
2024-04-26 19:39:19 +02:00
const auto it = atomic16_all . find ( pos ) ;
2019-04-30 23:06:42 +02:00
2024-04-26 19:39:19 +02:00
if ( it = = atomic16_all . end ( ) )
{
// Fresh new pattern detected in a single code path
2024-09-19 07:43:07 +02:00
atomic16_all . emplace ( pos , * atomic16 ) ;
2024-04-26 19:39:19 +02:00
}
else if ( it - > second . active )
{
// Merge pattern attributes between different code paths, may cause detection of failures
atomic16_t & existing = it - > second ;
2019-05-05 15:28:41 +02:00
2024-06-27 08:52:03 +02:00
auto compare_tag_and_reg = [ ] ( std : : pair < const reg_state_t * , u32 > a , std : : pair < const reg_state_t * , u32 > b )
{
if ( b . first - > is_const ( ) & & a . first - > is_const ( ) )
{
return a . first - > compare_with_mask_indifference ( * b . first , SPU_LS_MASK_1 ) ;
}
// Compare register source
return a . second = = b . second ;
} ;
2024-09-13 15:05:38 +02:00
if ( existing . lsa_pc ! = atomic16 - > lsa_pc | | existing . put_pc ! = atomic16 - > put_pc | | ! existing . lsa . compare_with_mask_indifference ( atomic16 - > lsa , SPU_LS_MASK_128 ) )
2024-04-26 19:39:19 +02:00
{
// Register twice
2024-09-13 15:05:38 +02:00
break_putllc16 ( 22 , atomic16 - > discard ( ) ) ;
2024-04-26 19:39:19 +02:00
break_putllc16 ( 22 , existing . discard ( ) ) ;
}
2019-05-05 15:28:41 +02:00
2024-09-13 15:05:38 +02:00
if ( existing . active & & existing . ls_access & & atomic16 - > ls_access & & ( ! compare_tag_and_reg ( { & existing . ls , existing . reg } , { & atomic16 - > ls , atomic16 - > reg } ) | | existing . ls_offs ! = atomic16 - > ls_offs | | existing . reg2 ! = atomic16 - > reg2 ) )
2024-04-26 19:39:19 +02:00
{
// Conflicting loads with stores in more than one code path
2024-09-13 15:05:38 +02:00
break_putllc16 ( 27 , atomic16 - > set_invalid_ls ( existing . ls_write | | atomic16 - > ls_write ) ) ;
2019-05-05 15:28:41 +02:00
2024-09-13 15:05:38 +02:00
if ( ! atomic16 - > active )
2024-04-26 19:39:19 +02:00
{
existing . active = false ;
}
}
2019-05-05 15:28:41 +02:00
2024-09-13 15:05:38 +02:00
if ( existing . active & & ( existing . ls_write | | atomic16 - > ls_write ) & & ( existing . ls_invalid | | atomic16 - > ls_invalid ) )
2024-04-26 19:39:19 +02:00
{
// Conflicting loads with stores in more than one code path
2024-09-13 15:05:38 +02:00
break_putllc16 ( 33 , atomic16 - > discard ( ) ) ;
2024-04-26 19:39:19 +02:00
existing . active = false ;
existing . ls_invalid = true ;
}
2019-05-05 15:28:41 +02:00
2024-09-13 15:05:38 +02:00
if ( existing . active & & ! existing . ls_access & & atomic16 - > ls_access )
2024-04-26 19:39:19 +02:00
{
// Propagate LS access
2024-09-13 15:05:38 +02:00
existing . ls = atomic16 - > ls ;
existing . reg = atomic16 - > reg ;
existing . reg2 = atomic16 - > reg2 ;
existing . ls_offs = atomic16 - > ls_offs ;
2024-04-26 19:39:19 +02:00
}
2019-05-05 15:28:41 +02:00
2024-09-13 15:05:38 +02:00
existing . ls_write | = atomic16 - > ls_write ;
existing . ls_invalid | = atomic16 - > ls_invalid ;
existing . ls_access | = atomic16 - > ls_access ;
existing . mem_count = std : : max < u32 > ( existing . mem_count , atomic16 - > mem_count ) ;
existing . select_16_or_0_at_runtime | = atomic16 - > select_16_or_0_at_runtime ;
2024-04-26 19:39:19 +02:00
}
2019-05-05 15:28:41 +02:00
2024-09-13 15:05:38 +02:00
atomic16 - > discard ( ) ;
2024-04-26 19:39:19 +02:00
}
2024-09-13 15:05:38 +02:00
else if ( ! atomic16 - > get_rdatomic )
2024-04-26 19:39:19 +02:00
{
2024-09-13 15:05:38 +02:00
atomic16 - > get_rdatomic = true ;
2019-05-05 15:28:41 +02:00
2024-04-26 19:39:19 +02:00
// Go above and beyond and also set the constant for it
set_const_value ( op . rt , MFC_GETLLAR_SUCCESS ) ;
invalidate = false ;
}
}
2019-05-05 15:28:41 +02:00
2024-04-26 19:39:19 +02:00
break ;
}
// Let's be safe here and no not allow multi-threaded communications
case SPU_WrOutMbox :
case SPU_WrOutIntrMbox :
case SPU_RdSigNotify1 :
case SPU_RdSigNotify2 :
case SPU_RdInMbox :
//case SPU_RdEventStat:
2019-05-05 15:28:41 +02:00
{
2024-04-26 19:39:19 +02:00
if ( is_read )
2019-05-05 15:28:41 +02:00
{
2024-09-13 15:05:38 +02:00
break_putllc16 ( 28 , atomic16 - > discard ( ) ) ;
2019-05-05 15:28:41 +02:00
}
2024-04-26 19:39:19 +02:00
else
2019-05-05 15:28:41 +02:00
{
2024-09-13 15:05:38 +02:00
break_putllc16 ( 29 , atomic16 - > discard ( ) ) ;
2019-05-05 15:28:41 +02:00
}
2024-04-26 19:39:19 +02:00
2024-07-03 09:34:38 +02:00
if ( ! is_pattern_match | | is_read )
{
//
}
2024-09-13 15:05:38 +02:00
else if ( ! rchcnt_loop - > active & & it = = rchcnt_loop_all . end ( ) )
2024-07-03 09:34:38 +02:00
{
2024-09-13 15:05:38 +02:00
rchcnt_loop - > read_pc = pos ;
rchcnt_loop - > channel = op . ra ;
rchcnt_loop - > active = true ;
2024-07-03 09:34:38 +02:00
unconst ( op . rt , pos ) ;
2024-09-13 15:05:38 +02:00
rchcnt_loop - > ch_state = vregs [ op . rt ] ;
2024-07-03 09:34:38 +02:00
invalidate = false ;
}
2024-09-13 15:05:38 +02:00
else if ( rchcnt_loop - > active )
2024-07-03 09:34:38 +02:00
{
// Success
2024-09-13 15:05:38 +02:00
rchcnt_loop - > active = false ;
2024-07-13 09:48:41 +02:00
if ( it = = rchcnt_loop_all . end ( ) )
{
2024-09-19 07:43:07 +02:00
rchcnt_loop_all . emplace ( pos , * rchcnt_loop ) ;
2024-07-13 09:48:41 +02:00
}
2024-07-03 09:34:38 +02:00
}
2024-04-26 19:39:19 +02:00
break ;
}
default :
{
break ;
}
2019-05-05 15:28:41 +02:00
}
2024-04-26 19:39:19 +02:00
if ( invalidate )
{
2024-06-27 08:52:03 +02:00
unconst ( op . rt , pos ) ;
2024-04-26 19:39:19 +02:00
}
2019-05-05 15:28:41 +02:00
break ;
}
2024-04-26 19:39:19 +02:00
case spu_itype : : STQR :
case spu_itype : : LQR :
2018-06-10 14:46:01 +02:00
{
2024-04-26 19:39:19 +02:00
const bool is_store = type = = spu_itype : : STQR ;
2018-06-10 14:46:01 +02:00
2024-09-13 15:05:38 +02:00
if ( atomic16 - > active )
2018-06-10 14:46:01 +02:00
{
2024-09-13 15:05:38 +02:00
atomic16 - > mem_count + + ;
2024-04-26 19:39:19 +02:00
// Do not clear lower 16 bytes addressing because the program can move on 4-byte basis
const u32 offs = spu_branch_target ( pos - result . lower_bound , op . si16 ) ;
2024-09-13 15:05:38 +02:00
if ( atomic16 - > lsa . is_const ( ) & & [ & ] ( )
2018-06-10 14:46:01 +02:00
{
2024-04-26 19:39:19 +02:00
bool hack = false ;
2018-06-10 14:46:01 +02:00
2024-04-26 19:39:19 +02:00
if ( offs % 16 = = 0 & & ( pos - result . lower_bound + op . si16 * 4 ) = = offs )
2018-06-10 14:46:01 +02:00
{
2024-09-13 15:05:38 +02:00
const u32 reservation_bound = ( atomic16 - > lsa . value | 127 ) ;
2024-04-26 19:39:19 +02:00
const u32 min_offs = offs ;
2018-06-10 14:46:01 +02:00
2024-04-26 19:39:19 +02:00
// Hack: assume there is no overflow in relative instruction offset
// Thus, use instruction position + offset as a lower bound for reservation access
2024-05-09 09:48:52 +02:00
if ( min_offs > reservation_bound )
2024-04-26 19:39:19 +02:00
{
spu_log . success ( " STQR/LQR Atomic Loop Hack: abs_pos=0x%x, abs=0x%x, i16*4=0x%x, ls_bound=0x%x " , offs , pos + op . si16 * 4 , op . si16 * 4 , reservation_bound ) ;
hack = true ;
}
}
2019-05-04 17:29:47 +02:00
2024-04-26 19:39:19 +02:00
return hack ;
} ( ) )
{
// Ignore memory access in this case
}
2024-09-13 15:05:38 +02:00
else if ( atomic16 - > ls_invalid & & is_store )
2024-04-26 19:39:19 +02:00
{
2024-09-13 15:05:38 +02:00
break_putllc16 ( 35 , atomic16 - > set_invalid_ls ( is_store ) ) ;
2024-04-26 19:39:19 +02:00
}
2024-09-13 15:05:38 +02:00
else if ( atomic16 - > ls_access & & atomic16 - > ls ! = start_program_count )
2019-05-04 17:29:47 +02:00
{
2024-09-13 15:05:38 +02:00
break_putllc16 ( 7 , atomic16 - > set_invalid_ls ( is_store ) ) ;
2019-05-04 17:29:47 +02:00
}
2024-09-13 15:05:38 +02:00
else if ( atomic16 - > ls_access & & offs ! = atomic16 - > ls_offs )
2018-06-10 14:46:01 +02:00
{
2024-09-13 15:05:38 +02:00
if ( atomic16 - > ls_offs . compare_with_mask_indifference ( offs , SPU_LS_MASK_1 ) )
2018-06-10 14:46:01 +02:00
{
2024-09-13 15:05:38 +02:00
atomic16 - > ls_write | = is_store ;
2024-04-26 19:39:19 +02:00
}
else
{
// Sad
2024-09-13 15:05:38 +02:00
break_putllc16 ( 8 , atomic16 - > set_invalid_ls ( is_store ) ) ;
2018-06-10 14:46:01 +02:00
}
}
2019-04-30 23:06:42 +02:00
else
{
2024-09-13 15:05:38 +02:00
atomic16 - > ls = start_program_count ;
atomic16 - > ls_offs = reg_state_t : : from_value ( offs ) ;
atomic16 - > ls_pc_rel = true ;
atomic16 - > ls_write | = is_store ;
atomic16 - > ls_access = true ;
2019-04-30 23:06:42 +02:00
}
2019-05-04 17:29:47 +02:00
2024-09-13 15:05:38 +02:00
// atomic16->ls_reg[offs % 128 / 16] = start_program_count;
// atomic16->ls_offs[offs % 128 / 16] = offs;
2019-05-04 17:29:47 +02:00
}
2024-04-26 19:39:19 +02:00
if ( is_store )
2019-05-01 14:31:17 +02:00
{
2024-04-26 19:39:19 +02:00
break ;
2019-05-01 14:31:17 +02:00
}
2019-05-05 15:28:41 +02:00
2024-04-26 19:39:19 +02:00
// Unconst
2024-06-27 08:52:03 +02:00
unconst ( op . rt , pos ) ;
2024-04-26 19:39:19 +02:00
break ;
2019-04-30 23:06:42 +02:00
}
2024-04-26 19:39:19 +02:00
case spu_itype : : STQX :
case spu_itype : : LQX :
2019-05-01 14:31:17 +02:00
{
2024-04-26 19:39:19 +02:00
const bool is_store = type = = spu_itype : : STQX ;
2019-05-01 14:31:17 +02:00
2024-09-13 15:05:38 +02:00
if ( atomic16 - > active )
2019-04-30 23:06:42 +02:00
{
2024-09-13 15:05:38 +02:00
atomic16 - > mem_count + + ;
2019-04-30 23:06:42 +02:00
2024-04-26 19:39:19 +02:00
auto ra = get_reg ( op . ra ) ;
ra . value & = SPU_LS_MASK_1 ;
auto rb = get_reg ( op . rb ) ;
rb . value & = SPU_LS_MASK_1 ;
2019-04-30 23:06:42 +02:00
2024-04-26 19:39:19 +02:00
const u32 const_flags = u32 { ra . is_const ( ) } + u32 { rb . is_const ( ) } ;
2019-05-04 17:29:47 +02:00
2024-04-26 19:39:19 +02:00
switch ( const_flags )
2019-05-04 17:29:47 +02:00
{
2024-04-26 19:39:19 +02:00
case 2 :
{
auto add_res = ra ;
add_res . value + = rb . value ;
add_res . value & = SPU_LS_MASK_16 ;
add_res . tag = umax ;
2019-05-04 17:29:47 +02:00
2024-09-13 15:05:38 +02:00
if ( atomic16 - > lsa . unequal_with_mask_indifference ( add_res , SPU_LS_MASK_128 ) )
2024-04-26 19:39:19 +02:00
{
// Unrelated, ignore
}
2024-09-13 15:05:38 +02:00
else if ( atomic16 - > ls_invalid & & is_store )
2024-04-26 19:39:19 +02:00
{
2024-09-13 15:05:38 +02:00
break_putllc16 ( 20 , atomic16 - > set_invalid_ls ( is_store ) ) ;
2024-04-26 19:39:19 +02:00
}
2024-09-13 15:05:38 +02:00
else if ( atomic16 - > ls_access & & add_res ! = atomic16 - > ls )
2024-04-26 19:39:19 +02:00
{
2024-09-13 15:05:38 +02:00
if ( atomic16 - > ls . unequal_with_mask_indifference ( add_res , SPU_LS_MASK_128 ) & & atomic16 - > ls_offs = = 0 )
2024-04-26 19:39:19 +02:00
{
// Ok
}
2024-09-13 15:05:38 +02:00
else if ( atomic16 - > ls_pc_rel )
2024-04-26 19:39:19 +02:00
{
2024-09-13 15:05:38 +02:00
break_putllc16 ( 8 , atomic16 - > set_invalid_ls ( is_store ) ) ;
2024-04-26 19:39:19 +02:00
}
else
{
// Sad
2024-09-13 15:05:38 +02:00
break_putllc16 ( 9 , atomic16 - > set_invalid_ls ( is_store ) ) ;
2024-04-26 19:39:19 +02:00
}
}
else
{
2024-09-13 15:05:38 +02:00
atomic16 - > ls = reg_state_t : : from_value ( add_res . value ) ;
atomic16 - > ls_offs = reg_state_t : : from_value ( 0 ) ;
atomic16 - > ls_pc_rel = false ;
atomic16 - > ls_write | = is_store ;
atomic16 - > ls_access = true ;
2024-04-26 19:39:19 +02:00
}
2019-05-01 14:31:17 +02:00
2024-04-26 19:39:19 +02:00
break ;
}
case 1 :
2019-04-30 23:06:42 +02:00
{
2024-04-26 19:39:19 +02:00
const auto & state = ra . is_const ( ) ? rb : ra ;
2024-09-13 15:05:38 +02:00
const auto & _lsa = atomic16 - > lsa ;
2024-04-26 19:39:19 +02:00
const u32 offs = ( ra . is_const ( ) ? ra . value : rb . value ) & SPU_LS_MASK_1 ;
const u32 abs_diff = calculate_absolute_ls_difference ( offs , 0 ) ;
2019-05-01 14:31:17 +02:00
2024-04-26 19:39:19 +02:00
if ( ( _lsa . unequal_with_mask_indifference ( state , SPU_LS_MASK_128 ) & & offs = = 0 ) | |
( _lsa . compare_with_mask_indifference ( state , SPU_LS_MASK_1 ) & & abs_diff > = 128u ) | |
( _lsa . compare_with_mask_indifference ( state , SPU_LS_MASK_128 ) & & abs_diff > = 256u )
)
{
// We already know it's an unrelated load/store
// The reason for SPU_LS_SIZE - 128 check is that in case LSA is not aligned, it detects the possible wraparound
}
2024-09-13 15:05:38 +02:00
else if ( atomic16 - > ls_invalid & & is_store )
2024-04-26 19:39:19 +02:00
{
2024-09-13 15:05:38 +02:00
break_putllc16 ( 23 , atomic16 - > set_invalid_ls ( is_store ) ) ;
2024-04-26 19:39:19 +02:00
}
2024-09-13 15:05:38 +02:00
else if ( atomic16 - > ls_access & & atomic16 - > ls ! = state )
2024-04-26 19:39:19 +02:00
{
2024-09-13 15:05:38 +02:00
if ( atomic16 - > ls . unequal_with_mask_indifference ( state , SPU_LS_MASK_128 ) & & offs = = 0 )
2019-05-01 14:31:17 +02:00
{
2024-04-26 19:39:19 +02:00
// Ok
2019-05-01 14:31:17 +02:00
}
2024-09-13 15:05:38 +02:00
else if ( atomic16 - > ls_pc_rel )
2019-05-01 14:31:17 +02:00
{
2024-09-13 15:05:38 +02:00
break_putllc16 ( 36 , atomic16 - > set_invalid_ls ( is_store ) ) ;
2024-04-26 19:39:19 +02:00
}
else
{
// Sad
2024-09-13 15:05:38 +02:00
break_putllc16 ( 11 , atomic16 - > set_invalid_ls ( is_store ) ) ;
2019-05-01 14:31:17 +02:00
}
}
2024-09-13 15:05:38 +02:00
else if ( atomic16 - > ls_access )
2024-04-26 19:39:19 +02:00
{
2024-09-13 15:05:38 +02:00
ensure ( ! atomic16 - > ls . is_const ( ) ) ;
2019-04-30 23:06:42 +02:00
2024-09-13 15:05:38 +02:00
if ( atomic16 - > ls_offs . compare_with_mask_indifference ( offs , SPU_LS_MASK_1 ) )
2024-04-26 19:39:19 +02:00
{
// Ok
2024-09-13 15:05:38 +02:00
atomic16 - > ls_write | = is_store ;
2024-04-26 19:39:19 +02:00
}
2024-09-13 15:05:38 +02:00
else if ( atomic16 - > ls_offs . is_const ( ) & & atomic16 - > ls_offs . value / 16 = = offs / 16 & & state . get_known_zeroes ( ) % 16 > = std : : max < u32 > ( offs % 16 , atomic16 - > ls_offs . value % 16 ) )
2024-04-26 19:39:19 +02:00
{
// For special case observed in games (offset cannot cause the address to roll over the next 16 bytes)
2024-09-13 15:05:38 +02:00
atomic16 - > ls_write | = is_store ;
2024-04-26 19:39:19 +02:00
}
else
{
2024-09-13 15:05:38 +02:00
break_putllc16 ( 12 , atomic16 - > set_invalid_ls ( is_store ) ) ;
2024-04-26 19:39:19 +02:00
}
}
else
2019-05-01 14:31:17 +02:00
{
2024-09-13 15:05:38 +02:00
atomic16 - > ls = state ;
atomic16 - > ls_offs = reg_state_t : : from_value ( offs ) ;
atomic16 - > ls_pc_rel = false ;
atomic16 - > ls_write | = is_store ;
atomic16 - > ls_access = true ;
2024-04-26 19:39:19 +02:00
}
2019-05-01 14:31:17 +02:00
2024-04-26 19:39:19 +02:00
break ;
}
case 0 :
{
2024-09-13 15:05:38 +02:00
const bool is_ra_first = atomic16 - > ls_access ? ra = = atomic16 - > ls : op . ra < = op . rb ;
2024-04-26 19:39:19 +02:00
const auto & state1 = is_ra_first ? ra : rb ;
const auto & state2 = is_ra_first ? rb : ra ;
2024-09-13 15:05:38 +02:00
if ( atomic16 - > ls_access & & ( atomic16 - > ls ! = state1 | | atomic16 - > ls_offs ! = state2 ) )
2024-04-26 19:39:19 +02:00
{
2024-09-13 15:05:38 +02:00
if ( atomic16 - > ls_pc_rel )
2019-05-01 14:31:17 +02:00
{
2024-09-13 15:05:38 +02:00
break_putllc16 ( 32 , atomic16 - > set_invalid_ls ( is_store ) ) ;
2019-05-01 14:31:17 +02:00
}
2024-04-26 19:39:19 +02:00
else
2019-05-01 14:31:17 +02:00
{
2024-04-26 19:39:19 +02:00
// Sad
2024-09-13 15:05:38 +02:00
break_putllc16 ( 13 , atomic16 - > set_invalid_ls ( is_store ) ) ;
2019-05-01 14:31:17 +02:00
}
2019-04-30 23:06:42 +02:00
}
2024-04-26 19:39:19 +02:00
else
{
2024-09-13 15:05:38 +02:00
atomic16 - > ls = state1 ;
atomic16 - > ls_offs = state2 ;
atomic16 - > ls_pc_rel = false ;
atomic16 - > ls_write | = is_store ;
atomic16 - > ls_access = true ;
2024-04-26 19:39:19 +02:00
}
break ;
}
default : fmt : : throw_exception ( " Unreachable! " ) ;
2019-04-30 23:06:42 +02:00
}
}
2024-04-26 19:39:19 +02:00
if ( is_store )
2019-04-30 23:06:42 +02:00
{
2024-04-26 19:39:19 +02:00
break ;
2019-04-30 23:06:42 +02:00
}
2018-06-10 14:46:01 +02:00
2024-04-26 19:39:19 +02:00
// Unconst
2024-06-27 08:52:03 +02:00
unconst ( op . rt , pos ) ;
2019-05-05 15:28:41 +02:00
break ;
}
2024-04-26 19:39:19 +02:00
case spu_itype : : STQA :
case spu_itype : : LQA :
2018-06-10 14:46:01 +02:00
{
2024-04-26 19:39:19 +02:00
const bool is_store = type = = spu_itype : : STQA ;
2018-06-10 14:46:01 +02:00
2024-09-13 15:05:38 +02:00
if ( atomic16 - > active )
2019-05-01 14:31:17 +02:00
{
2024-09-13 15:05:38 +02:00
atomic16 - > mem_count + + ;
2018-05-02 20:49:19 +02:00
2024-04-26 19:39:19 +02:00
const reg_state_t ca = reg_state_t : : from_value ( spu_ls_target ( 0 , op . i16 ) ) ;
2018-05-02 20:49:19 +02:00
2024-09-13 15:05:38 +02:00
if ( atomic16 - > lsa . unequal_with_mask_indifference ( ca , SPU_LS_MASK_128 ) )
2019-05-05 15:28:41 +02:00
{
2024-04-26 19:39:19 +02:00
// We already know it's an unrelated load/store
}
2024-09-13 15:05:38 +02:00
else if ( atomic16 - > ls_invalid & & is_store )
2024-04-26 19:39:19 +02:00
{
2024-09-13 15:05:38 +02:00
break_putllc16 ( 37 , atomic16 - > set_invalid_ls ( is_store ) ) ;
2024-04-26 19:39:19 +02:00
}
2024-09-13 15:05:38 +02:00
else if ( atomic16 - > ls_access & & ca ! = atomic16 - > ls )
2024-04-26 19:39:19 +02:00
{
2024-09-13 15:05:38 +02:00
if ( atomic16 - > ls . unequal_with_mask_indifference ( ca , SPU_LS_MASK_128 ) & & atomic16 - > ls_offs = = 0 )
2019-05-05 15:28:41 +02:00
{
2024-04-26 19:39:19 +02:00
// Ok
2019-05-05 15:28:41 +02:00
}
2024-09-13 15:05:38 +02:00
else if ( atomic16 - > ls_pc_rel )
2019-05-05 15:28:41 +02:00
{
2024-09-13 15:05:38 +02:00
break_putllc16 ( 14 , atomic16 - > set_invalid_ls ( is_store ) ) ;
2024-04-26 19:39:19 +02:00
}
else
{
// Sad
2024-09-13 15:05:38 +02:00
break_putllc16 ( 15 , atomic16 - > set_invalid_ls ( is_store ) ) ;
2019-05-05 15:28:41 +02:00
}
}
2024-04-26 19:39:19 +02:00
else
2019-05-05 15:28:41 +02:00
{
2024-09-13 15:05:38 +02:00
atomic16 - > ls = ca ;
atomic16 - > ls_offs = reg_state_t : : from_value ( 0 ) ;
atomic16 - > ls_pc_rel = false ;
atomic16 - > ls_write | = is_store ;
atomic16 - > ls_access = true ;
2019-05-05 15:28:41 +02:00
}
}
2024-04-26 19:39:19 +02:00
if ( is_store )
2019-05-05 15:28:41 +02:00
{
break ;
}
2019-03-25 19:31:16 +01:00
2024-04-26 19:39:19 +02:00
// Unconst
2024-06-27 08:52:03 +02:00
unconst ( op . rt , pos ) ;
2024-04-26 19:39:19 +02:00
break ;
}
case spu_itype : : STQD :
case spu_itype : : LQD :
{
const bool is_store = type = = spu_itype : : STQD ;
2024-09-13 15:05:38 +02:00
if ( atomic16 - > active )
2019-05-05 15:28:41 +02:00
{
2024-09-13 15:05:38 +02:00
atomic16 - > mem_count + + ;
2024-04-26 19:39:19 +02:00
auto ra = get_reg ( op . ra ) ;
2024-09-13 15:05:38 +02:00
const auto & _lsa = atomic16 - > lsa ;
2024-04-26 19:39:19 +02:00
ra . value = ra . is_const ( ) ? spu_ls_target ( ra . value , op . si10 * 4 ) : 0 ;
const u32 offs = ra . is_const ( ) ? 0 : spu_ls_target ( 0 , op . si10 * 4 ) ;
const u32 abs_diff = calculate_absolute_ls_difference ( offs , 0 ) ;
if ( ( _lsa . unequal_with_mask_indifference ( ra , SPU_LS_MASK_128 ) & & offs = = 0 ) | |
( _lsa . compare_with_mask_indifference ( ra , SPU_LS_MASK_1 ) & & abs_diff > = 128u ) | |
( _lsa . compare_with_mask_indifference ( ra , SPU_LS_MASK_128 ) & & abs_diff > = 256u )
)
2019-05-05 15:28:41 +02:00
{
2024-04-26 19:39:19 +02:00
// We already know it's an unrelated load/store
// The reason for SPU_LS_SIZE - 128 check is that in case LSA is not aligned, it detects the possible wraparound
2019-05-05 15:28:41 +02:00
}
2024-09-13 15:05:38 +02:00
else if ( atomic16 - > ls_invalid & & is_store )
2019-05-05 15:28:41 +02:00
{
2024-09-13 15:05:38 +02:00
break_putllc16 ( 34 , atomic16 - > set_invalid_ls ( is_store ) ) ;
2024-04-26 19:39:19 +02:00
}
2024-09-13 15:05:38 +02:00
else if ( atomic16 - > ls_access & & atomic16 - > ls ! = ra )
2024-04-26 19:39:19 +02:00
{
2024-09-13 15:05:38 +02:00
if ( atomic16 - > ls . unequal_with_mask_indifference ( ra , SPU_LS_MASK_128 ) & & ( offs = = 0 & & atomic16 - > ls_offs = = 0 ) )
2019-05-05 15:28:41 +02:00
{
2024-04-26 19:39:19 +02:00
// Ok
}
2024-09-13 15:05:38 +02:00
else if ( atomic16 - > ls_pc_rel )
2019-05-05 15:28:41 +02:00
{
2024-09-13 15:05:38 +02:00
break_putllc16 ( 16 , atomic16 - > set_invalid_ls ( is_store ) ) ;
2019-05-05 15:28:41 +02:00
}
2024-04-26 19:39:19 +02:00
else
2019-05-05 15:28:41 +02:00
{
2024-04-26 19:39:19 +02:00
// Sad
2024-09-13 15:05:38 +02:00
break_putllc16 ( 17 , atomic16 - > set_invalid_ls ( is_store ) ) ;
2019-05-05 15:28:41 +02:00
}
2024-04-26 19:39:19 +02:00
}
2024-09-13 15:05:38 +02:00
else if ( atomic16 - > ls_access )
2024-04-26 19:39:19 +02:00
{
2024-09-13 15:05:38 +02:00
if ( atomic16 - > ls_offs . compare_with_mask_indifference ( offs , SPU_LS_MASK_1 ) )
2019-05-05 15:28:41 +02:00
{
2024-09-13 15:05:38 +02:00
atomic16 - > ls_write | = is_store ;
2019-05-05 15:28:41 +02:00
}
2024-09-13 15:05:38 +02:00
else if ( atomic16 - > ls_offs . is_const ( ) & & atomic16 - > ls_offs . value / 16 = = offs / 16 & & ra . get_known_zeroes ( ) % 16 > = std : : max < u32 > ( offs % 16 , atomic16 - > ls_offs . value % 16 ) )
2019-05-05 15:28:41 +02:00
{
2024-04-26 19:39:19 +02:00
// For special case observed in games (offset cannot cause the address to roll over the next 16 bytes)
2024-09-13 15:05:38 +02:00
atomic16 - > ls_write | = is_store ;
2019-05-05 15:28:41 +02:00
}
2024-04-26 19:39:19 +02:00
else
{
2024-09-13 15:05:38 +02:00
break_putllc16 ( 18 , atomic16 - > set_invalid_ls ( is_store ) ) ;
2019-05-05 15:28:41 +02:00
}
}
2024-04-26 19:39:19 +02:00
else
2019-05-05 15:28:41 +02:00
{
2024-09-13 15:05:38 +02:00
atomic16 - > ls = ra ;
atomic16 - > ls_offs = reg_state_t : : from_value ( offs ) ;
atomic16 - > ls_pc_rel = false ;
atomic16 - > ls_write | = is_store ;
atomic16 - > ls_access = true ;
2019-05-05 15:28:41 +02:00
}
}
2024-04-26 19:39:19 +02:00
if ( type = = spu_itype : : STQD )
{
break ;
}
2019-05-05 15:28:41 +02:00
2024-04-26 19:39:19 +02:00
// Unconst
2024-06-27 08:52:03 +02:00
unconst ( op . rt , pos ) ;
2024-04-26 19:39:19 +02:00
break ;
}
case spu_itype : : HBR :
2019-05-05 15:28:41 +02:00
{
2024-04-26 19:39:19 +02:00
hbr_loc = spu_branch_target ( pos , op . roh < < 7 | op . rt ) ;
2024-07-03 09:34:38 +02:00
const auto [ af , av , at , ao , az , apc , ainst ] = get_reg ( op . ra ) ;
2024-04-26 19:39:19 +02:00
hbr_tg = af & vf : : is_const & & ! op . c ? av & 0x3fffc : - 1 ;
break ;
}
2019-05-05 15:28:41 +02:00
2024-04-26 19:39:19 +02:00
case spu_itype : : HBRA :
{
hbr_loc = spu_branch_target ( pos , op . r0h < < 7 | op . rt ) ;
hbr_tg = spu_branch_target ( 0x0 , op . i16 ) ;
break ;
}
2019-05-05 15:28:41 +02:00
2024-04-26 19:39:19 +02:00
case spu_itype : : HBRR :
{
hbr_loc = spu_branch_target ( pos , op . r0h < < 7 | op . rt ) ;
hbr_tg = spu_branch_target ( pos , op . i16 ) ;
break ;
}
2019-05-05 15:28:41 +02:00
2024-04-26 19:39:19 +02:00
case spu_itype : : IL :
{
set_const_value ( op . rt , op . si16 ) ;
2019-05-05 15:28:41 +02:00
break ;
}
2024-04-26 19:39:19 +02:00
case spu_itype : : ILA :
2019-05-16 01:41:31 +02:00
{
2024-04-26 19:39:19 +02:00
set_const_value ( op . rt , op . i18 ) ;
2019-05-16 01:41:31 +02:00
break ;
}
2024-04-26 19:39:19 +02:00
case spu_itype : : ILH :
2019-05-05 15:28:41 +02:00
{
2024-04-26 19:39:19 +02:00
set_const_value ( op . rt , op . i16 < < 16 | op . i16 ) ;
break ;
}
case spu_itype : : ILHU :
{
set_const_value ( op . rt , op . i16 < < 16 ) ;
break ;
}
case spu_itype : : IOHL :
{
const auto rt = get_reg ( op . rt ) ;
inherit_const_mask_value ( op . rt , rt , op . i16 , 0 ) ;
break ;
}
case spu_itype : : ORI :
{
if ( ! op . si10 )
2019-05-05 15:28:41 +02:00
{
2024-04-26 19:39:19 +02:00
move_reg ( op . rt , op . ra ) ;
break ;
2019-05-05 15:28:41 +02:00
}
2024-04-26 19:39:19 +02:00
const auto ra = get_reg ( op . ra ) ;
inherit_const_mask_value ( op . rt , ra , op . si10 , 0 ) ;
break ;
}
case spu_itype : : OR :
{
if ( op . ra = = op . rb )
2019-05-05 15:28:41 +02:00
{
2024-04-26 19:39:19 +02:00
move_reg ( op . rt , op . ra ) ;
break ;
2019-05-05 15:28:41 +02:00
}
2024-04-26 19:39:19 +02:00
2024-06-27 08:52:03 +02:00
const auto ra = get_reg ( op . ra ) ;
const auto rb = get_reg ( op . rb ) ;
2024-07-03 09:34:38 +02:00
const auto [ af , av , at , ao , az , apc , ainst ] = ra ;
const auto [ bf , bv , bt , bo , bz , bpc , binst ] = rb ;
2024-06-27 08:52:03 +02:00
inherit_const_value ( op . rt , ra , rb , av | bv , pos ) ;
2024-04-26 19:39:19 +02:00
break ;
}
case spu_itype : : XORI :
{
if ( ! op . si10 )
2019-05-05 15:28:41 +02:00
{
2024-04-26 19:39:19 +02:00
move_reg ( op . rt , op . ra ) ;
break ;
2019-05-05 15:28:41 +02:00
}
2024-04-26 19:39:19 +02:00
2024-06-27 08:52:03 +02:00
const auto ra = get_reg ( op . ra ) ;
2024-07-03 09:34:38 +02:00
const auto [ af , av , at , ao , az , apc , ainst ] = ra ;
2024-06-27 08:52:03 +02:00
inherit_const_value ( op . rt , ra , ra , av ^ op . si10 , pos ) ;
2024-04-26 19:39:19 +02:00
break ;
}
case spu_itype : : XOR :
{
if ( op . ra = = op . rb )
2019-05-05 15:28:41 +02:00
{
2024-04-26 19:39:19 +02:00
set_const_value ( op . rt , 0 ) ;
break ;
2019-05-05 15:28:41 +02:00
}
2024-06-27 08:52:03 +02:00
const auto ra = get_reg ( op . ra ) ;
const auto rb = get_reg ( op . rb ) ;
2024-07-03 09:34:38 +02:00
const auto [ af , av , at , ao , az , apc , ainst ] = ra ;
const auto [ bf , bv , bt , bo , bz , bpc , binst ] = rb ;
2024-06-27 08:52:03 +02:00
inherit_const_value ( op . rt , ra , rb , bv ^ av , pos ) ;
2019-05-05 15:28:41 +02:00
break ;
}
2024-04-26 19:39:19 +02:00
case spu_itype : : NOR :
2019-05-05 15:28:41 +02:00
{
2024-06-27 08:52:03 +02:00
const auto ra = get_reg ( op . ra ) ;
const auto rb = get_reg ( op . rb ) ;
2024-07-03 09:34:38 +02:00
const auto [ af , av , at , ao , az , apc , ainst ] = ra ;
const auto [ bf , bv , bt , bo , bz , bpc , binst ] = rb ;
2024-06-27 08:52:03 +02:00
inherit_const_value ( op . rt , ra , rb , ~ ( bv | av ) , pos ) ;
2019-05-05 15:28:41 +02:00
break ;
}
2024-04-26 19:39:19 +02:00
case spu_itype : : ANDI :
2019-05-05 15:28:41 +02:00
{
2024-04-26 19:39:19 +02:00
const auto ra = get_reg ( op . ra ) ;
inherit_const_mask_value ( op . rt , ra , 0 , ~ op . si10 ) ;
break ;
}
case spu_itype : : AND :
{
if ( op . ra = = op . rb )
2019-05-05 15:28:41 +02:00
{
2024-04-26 19:39:19 +02:00
move_reg ( op . rt , op . ra ) ;
break ;
2019-05-05 15:28:41 +02:00
}
2024-04-26 19:39:19 +02:00
2024-06-27 08:52:03 +02:00
const auto ra = get_reg ( op . ra ) ;
const auto rb = get_reg ( op . rb ) ;
2024-07-03 09:34:38 +02:00
const auto [ af , av , at , ao , az , apc , ainst ] = ra ;
const auto [ bf , bv , bt , bo , bz , bpc , binst ] = rb ;
2024-06-27 08:52:03 +02:00
inherit_const_value ( op . rt , ra , rb , bv & av , pos ) ;
2024-04-26 19:39:19 +02:00
break ;
}
case spu_itype : : AI :
{
if ( ! op . si10 )
2019-05-05 15:28:41 +02:00
{
2024-04-26 19:39:19 +02:00
move_reg ( op . rt , op . ra ) ;
break ;
2019-05-05 15:28:41 +02:00
}
2024-04-26 19:39:19 +02:00
const auto ra = get_reg ( op . ra ) ;
2024-07-03 09:34:38 +02:00
const auto [ af , av , at , ao , az , apc , ainst ] = ra ;
2024-04-26 19:39:19 +02:00
2024-06-27 08:52:03 +02:00
inherit_const_value ( op . rt , ra , ra , av + op . si10 , pos ) ;
2024-04-26 19:39:19 +02:00
if ( u32 mask = ra . get_known_zeroes ( ) & ~ op . si10 ; mask & 1 )
2019-05-05 15:28:41 +02:00
{
2024-04-26 19:39:19 +02:00
// Added zeroes are always zeroes which comes in handy later
inherit_const_mask_value ( op . rt , vregs [ op . rt ] , 0 , ( 1u < < std : : countr_one ( mask ) ) - 1 ) ;
2019-05-05 15:28:41 +02:00
}
break ;
}
2024-04-26 19:39:19 +02:00
case spu_itype : : A :
2019-05-05 15:28:41 +02:00
{
2024-04-26 19:39:19 +02:00
const auto ra = get_reg ( op . ra ) ;
const auto rb = get_reg ( op . rb ) ;
2024-07-03 09:34:38 +02:00
const auto [ af , av , at , ao , az , apc , ainst ] = ra ;
const auto [ bf , bv , bt , bo , bz , bpc , binst ] = rb ;
2024-04-26 19:39:19 +02:00
2024-06-27 08:52:03 +02:00
inherit_const_value ( op . rt , ra , rb , bv + av , pos ) ;
2024-04-26 19:39:19 +02:00
if ( u32 mask = ra . get_known_zeroes ( ) & rb . get_known_zeroes ( ) ; mask & 1 )
{
// Added zeroes are always zeroes which comes in handy later
inherit_const_mask_value ( op . rt , vregs [ op . rt ] , 0 , ( 1u < < std : : countr_one ( mask ) ) - 1 ) ;
}
2019-05-05 15:28:41 +02:00
break ;
}
2024-04-26 19:39:19 +02:00
case spu_itype : : SFI :
2019-05-05 15:28:41 +02:00
{
2024-06-27 08:52:03 +02:00
const auto ra = get_reg ( op . ra ) ;
2024-07-03 09:34:38 +02:00
const auto [ af , av , at , ao , az , apc , ainst ] = get_reg ( op . ra ) ;
2024-06-27 08:52:03 +02:00
inherit_const_value ( op . rt , ra , ra , op . si10 - av , pos ) ;
2019-05-05 15:28:41 +02:00
break ;
}
2024-04-26 19:39:19 +02:00
case spu_itype : : SF :
2019-05-05 15:28:41 +02:00
{
2024-04-26 19:39:19 +02:00
const auto ra = get_reg ( op . ra ) ;
const auto rb = get_reg ( op . rb ) ;
2019-05-05 15:28:41 +02:00
2024-07-03 09:34:38 +02:00
const auto [ af , av , at , ao , az , apc , ainst ] = ra ;
const auto [ bf , bv , bt , bo , bz , bpc , binst ] = rb ;
2019-05-05 15:28:41 +02:00
2024-06-27 08:52:03 +02:00
inherit_const_value ( op . rt , ra , rb , bv - av , pos ) ;
2019-05-05 15:28:41 +02:00
2024-04-26 19:39:19 +02:00
if ( u32 mask = ra . get_known_zeroes ( ) & rb . get_known_zeroes ( ) ; mask & 1 )
2019-05-05 15:28:41 +02:00
{
2024-04-26 19:39:19 +02:00
// Subtracted zeroes are always zeroes which comes in handy later
inherit_const_mask_value ( op . rt , vregs [ op . rt ] , 0 , ( 1u < < std : : countr_one ( mask ) ) - 1 ) ;
}
2019-05-05 15:28:41 +02:00
2024-04-26 19:39:19 +02:00
break ;
}
case spu_itype : : FSMBI :
{
const u32 mask = ( op . i16 > > 12 ) ;
2019-05-05 15:28:41 +02:00
2024-04-26 19:39:19 +02:00
const u32 value = ( mask & 1 ? 0xff : 0 ) |
( mask & 2 ? 0xff00 : 0 ) |
( mask & 4 ? 0xff0000 : 0 ) |
( mask & 8 ? 0xff000000u : 0 ) ;
2019-05-05 15:28:41 +02:00
2024-04-26 19:39:19 +02:00
set_const_value ( op . rt , value ) ;
break ;
}
case spu_itype : : ROTMI :
{
if ( ( 0 - op . i7 ) & 0x20 )
2019-05-05 15:28:41 +02:00
{
2024-04-26 19:39:19 +02:00
set_const_value ( op . rt , 0 ) ;
break ;
2019-05-05 15:28:41 +02:00
}
2024-04-26 19:39:19 +02:00
if ( ! op . i7 )
2019-05-05 15:28:41 +02:00
{
2024-04-26 19:39:19 +02:00
move_reg ( op . rt , op . ra ) ;
break ;
2019-05-05 15:28:41 +02:00
}
2024-06-27 08:52:03 +02:00
const auto ra = get_reg ( op . ra ) ;
2024-07-03 09:34:38 +02:00
const auto [ af , av , at , ao , az , apc , ainst ] = get_reg ( op . ra ) ;
2024-06-27 08:52:03 +02:00
inherit_const_value ( op . rt , ra , ra , av > > ( ( 0 - op . i7 ) & 0x1f ) , pos ) ;
2024-04-26 19:39:19 +02:00
break ;
}
case spu_itype : : SHLI :
{
if ( op . i7 & 0x20 )
2019-05-05 15:28:41 +02:00
{
2024-04-26 19:39:19 +02:00
set_const_value ( op . rt , 0 ) ;
break ;
2019-05-05 15:28:41 +02:00
}
2024-04-26 19:39:19 +02:00
if ( ! op . i7 )
2019-05-05 15:28:41 +02:00
{
2024-04-26 19:39:19 +02:00
move_reg ( op . rt , op . ra ) ;
break ;
2019-05-05 15:28:41 +02:00
}
2024-06-27 08:52:03 +02:00
const auto ra = get_reg ( op . ra ) ;
2024-07-03 09:34:38 +02:00
const auto [ af , av , at , ao , az , apc , ainst ] = ra ;
2024-06-27 08:52:03 +02:00
inherit_const_value ( op . rt , ra , ra , av < < ( op . i7 & 0x1f ) , pos ) ;
2024-04-26 19:39:19 +02:00
break ;
2019-05-05 15:28:41 +02:00
}
2024-04-26 19:39:19 +02:00
case spu_itype : : SELB :
{
const auto ra = get_reg ( op . ra ) ;
const auto rb = get_reg ( op . rb ) ;
2019-05-05 15:28:41 +02:00
2024-04-26 19:39:19 +02:00
// Ignore RC, perform a value merge which also respect bitwise information
2024-06-27 08:52:03 +02:00
vregs [ op . rt4 ] = ra . merge ( rb , pos ) ;
2024-04-26 19:39:19 +02:00
break ;
}
2024-07-03 09:34:38 +02:00
case spu_itype : : CEQI :
{
const auto ra = get_reg ( op . ra ) ;
const auto [ af , av , at , ao , az , apc , ainst ] = ra ;
2024-07-08 07:43:10 +02:00
inherit_const_value ( op . rt , ra , ra , av = = op . si10 + 0u , pos ) ;
2024-07-03 09:34:38 +02:00
2024-09-13 15:05:38 +02:00
if ( rchcnt_loop - > active )
2024-07-03 09:34:38 +02:00
{
2024-09-13 15:05:38 +02:00
if ( ra . is_instruction & & ra . origin = = rchcnt_loop - > ch_state . origin )
2024-07-03 09:34:38 +02:00
{
if ( op . si10 ! = 0 & & op . si10 ! = 1 )
{
2024-09-13 15:05:38 +02:00
break_channel_pattern ( 55 , rchcnt_loop - > discard ( ) ) ;
2024-07-03 09:34:38 +02:00
break ;
}
2024-09-13 15:05:38 +02:00
rchcnt_loop - > ch_product = vregs [ op . rt ] ;
rchcnt_loop - > product_test_negate = op . si10 = = 1 ;
2024-07-03 09:34:38 +02:00
}
}
break ;
}
2024-04-26 19:39:19 +02:00
case spu_itype : : SHLQBYI :
2019-05-05 15:28:41 +02:00
{
2024-04-26 19:39:19 +02:00
if ( op . i7 & 0x10 )
2019-05-05 15:28:41 +02:00
{
2024-04-26 19:39:19 +02:00
set_const_value ( op . rt , 0 ) ;
break ;
2019-05-05 15:28:41 +02:00
}
2024-04-26 19:39:19 +02:00
if ( ! op . i7 )
2019-05-05 15:28:41 +02:00
{
2024-04-26 19:39:19 +02:00
move_reg ( op . rt , op . ra ) ;
break ;
}
2019-05-05 15:28:41 +02:00
2024-04-26 19:39:19 +02:00
[[fallthrough]] ;
}
default :
{
// Make unknown value
if ( ! ( type & spu_itype : : zregmod ) )
{
const u32 op_rt = type & spu_itype : : _quadrop ? + op . rt4 : + op . rt ;
2024-06-27 08:52:03 +02:00
u32 ra = s_reg_max , rb = s_reg_max , rc = s_reg_max ;
if ( m_use_ra . test ( pos / 4 ) )
{
ra = op . ra ;
}
if ( m_use_rb . test ( pos / 4 ) )
{
rb = op . rb ;
}
if ( type & spu_itype : : _quadrop & & m_use_rc . test ( pos / 4 ) )
{
rc = op . rc ;
}
u32 reg_pos = SPU_LS_SIZE ;
for ( u32 reg : { ra , rb , rc } )
{
if ( reg ! = s_reg_max )
{
if ( reg_pos = = SPU_LS_SIZE )
{
reg = vregs [ reg ] . origin ;
}
else if ( reg_pos ! = vregs [ reg ] . origin )
{
const u32 block_start = reg_state_it [ wi ] . pc ;
// if (vregs[reg].origin >= block_start && vregs[reg].origin <= pos)
// {
// reg_pos = std::max<u32>(vregs[reg].origin, reg_pos);
// }
reg_pos = block_start ;
break ;
}
}
}
unconst ( op_rt , reg_pos = = SPU_LS_SIZE ? pos : reg_pos ) ;
2024-07-03 09:34:38 +02:00
2024-09-13 15:05:38 +02:00
if ( rchcnt_loop - > active )
2024-07-03 09:34:38 +02:00
{
2024-09-13 15:05:38 +02:00
if ( rchcnt_loop - > origins . find_first_of ( vregs [ op_rt ] . origin ) = = umax )
2024-07-03 09:34:38 +02:00
{
2024-09-13 15:05:38 +02:00
rchcnt_loop - > origins . push_back ( vregs [ op_rt ] . origin ) ;
2024-07-03 09:34:38 +02:00
}
}
2019-05-05 15:28:41 +02:00
}
2024-04-26 19:39:19 +02:00
break ;
}
2019-05-05 15:28:41 +02:00
}
2024-04-26 19:39:19 +02:00
if ( m_targets . count ( pos ) )
2019-05-05 15:28:41 +02:00
{
2024-04-26 19:39:19 +02:00
for ( u32 next_target : : : at32 ( m_targets , pos ) )
{
add_block ( next_target ) ;
}
next_block ( ) ;
2019-05-05 15:28:41 +02:00
}
}
2024-03-06 16:28:07 +01:00
std : : string func_hash ;
if ( ! result . data . empty ( ) )
{
sha1_context ctx ;
u8 output [ 20 ] { } ;
sha1_starts ( & ctx ) ;
sha1_update ( & ctx , reinterpret_cast < const u8 * > ( result . data . data ( ) ) , result . data . size ( ) * 4 ) ;
sha1_finish ( & ctx , output ) ;
fmt : : append ( func_hash , " %s " , fmt : : base57 ( output ) ) ;
}
for ( const auto & [ pc_commited , pattern ] : atomic16_all )
{
if ( ! pattern . active )
{
continue ;
}
if ( getllar_starts . contains ( pattern . lsa_pc ) & & getllar_starts [ pattern . lsa_pc ] )
{
continue ;
}
auto & stats = g_fxo - > get < putllc16_statistics_t > ( ) ;
2024-05-08 13:30:12 +02:00
had_putllc_evaluation = true ;
2024-03-06 16:28:07 +01:00
if ( ! pattern . ls_write )
{
spu_log . success ( " PUTLLC0 Pattern Detected! (put_pc=0x%x, %s) (putllc0=%d, putllc16+0=%d, all=%d) " , pattern . put_pc , func_hash , + + stats . nowrite , + + stats . single , + stats . all ) ;
add_pattern ( false , inst_attr : : putllc0 , pattern . put_pc - lsa ) ;
continue ;
}
union putllc16_info
{
u32 data ;
bf_t < u32 , 30 , 2 > type ;
bf_t < u32 , 29 , 1 > runtime16_select ;
2024-04-26 18:29:18 +02:00
bf_t < u32 , 28 , 1 > no_notify ;
2024-03-06 16:28:07 +01:00
bf_t < u32 , 18 , 8 > reg ;
bf_t < u32 , 0 , 18 > off18 ;
bf_t < u32 , 0 , 8 > reg2 ;
} value { } ;
enum : u32
{
v_const = 0 ,
v_relative = 1 ,
v_reg_offs = 2 ,
v_reg2 = 3 ,
} ;
2024-04-26 18:29:18 +02:00
for ( auto it = infos . lower_bound ( utils : : sub_saturate < u32 > ( pattern . put_pc , 512 ) ) ; it ! = infos . end ( ) & & it - > first < pattern . put_pc + 512 ; it + + )
{
for ( auto & state : it - > second - > end_reg_state )
{
if ( state . is_const ( ) & & ( state . value & - 0x20 ) = = ( CELL_SYNC_ERROR_ALIGN & - 0x20 ) )
{
// Do not notify if it is a cellSync function
value . no_notify = 1 ;
spu_log . success ( " Detected cellSync function at 0x%x, disabling reservation notification. " , pattern . put_pc ) ;
break ;
}
}
if ( value . no_notify )
{
break ;
}
}
2024-03-06 16:28:07 +01:00
value . runtime16_select = pattern . select_16_or_0_at_runtime ;
value . reg = s_reg_max ;
if ( pattern . ls . is_const ( ) )
{
ensure ( pattern . reg = = s_reg_max & & pattern . reg2 = = s_reg_max & & pattern . ls_offs . is_const ( ) , " Unexpected register usage " ) ;
value . type = v_const ;
value . off18 = pattern . ls . value & SPU_LS_MASK_1 ;
}
else if ( pattern . ls = = start_program_count )
{
ensure ( pattern . ls_offs . is_const ( ) , " Unexpected register2 usage " ) ;
value . type = v_relative ;
value . off18 = pattern . ls_offs . value & SPU_LS_MASK_1 ;
}
else if ( pattern . ls_offs . is_const ( ) )
{
ensure ( pattern . reg ! = s_reg_max , " Not found register usage " ) ;
value . type = v_reg_offs ;
value . reg = pattern . reg ;
value . off18 = pattern . ls_offs . value ;
}
else
{
ensure ( pattern . reg ! = s_reg_max , " Not found register usage " ) ;
ensure ( pattern . reg2 ! = s_reg_max , " Not found register2 usage " ) ;
value . type = v_reg2 ;
value . reg = pattern . reg ;
value . reg2 = pattern . reg2 ;
}
2024-08-11 15:31:18 +02:00
if ( g_cfg . core . spu_accurate_reservations )
2024-07-07 11:23:06 +02:00
{
2024-08-11 15:31:18 +02:00
// Because enabling it is a hack, as it turns out
2024-07-07 11:23:06 +02:00
continue ;
}
2024-03-06 16:28:07 +01:00
add_pattern ( false , inst_attr : : putllc16 , pattern . put_pc - result . entry_point , value . data ) ;
spu_log . success ( " PUTLLC16 Pattern Detected! (mem_count=%d, put_pc=0x%x, pc_rel=%d, offset=0x%x, const=%u, two_regs=%d, reg=%u, runtime=%d, 0x%x-%s) (putllc0=%d, putllc16+0=%d, all=%d) "
, pattern . mem_count , pattern . put_pc , value . type = = v_relative , value . off18 , value . type = = v_const , value . type = = v_reg2 , value . reg , value . runtime16_select , entry_point , func_hash , + stats . nowrite , + + stats . single , + stats . all ) ;
}
2024-07-03 09:34:38 +02:00
for ( const auto & [ read_pc , pattern ] : rchcnt_loop_all )
{
if ( pattern . failed | | pattern . read_pc = = SPU_LS_SIZE )
{
continue ;
}
if ( pattern . active )
{
spu_log . error ( " Channel loop error! (get_pc=0x%x, 0x%x-%s) " , read_pc , entry_point , func_hash ) ;
continue ;
}
if ( inst_attr attr = m_inst_attrs [ ( read_pc - entry_point ) / 4 ] ; attr = = inst_attr : : none )
{
2024-07-13 09:48:41 +02:00
add_pattern ( false , inst_attr : : rchcnt_loop , read_pc - result . entry_point ) ;
2024-07-03 09:34:38 +02:00
2024-07-07 11:23:54 +02:00
spu_log . error ( " Channel Loop Pattern Detected! Report to developers! (read_pc=0x%x, branch_pc=0x%x, branch_target=0x%x, 0x%x-%s) " , read_pc , pattern . branch_pc , pattern . branch_target , entry_point , func_hash ) ;
2024-07-03 09:34:38 +02:00
}
}
2024-05-08 13:30:12 +02:00
if ( likely_putllc_loop & & ! had_putllc_evaluation )
{
spu_log . notice ( " Likely missed PUTLLC16 patterns. (entry=0x%x) " , entry_point ) ;
}
2019-11-23 17:30:54 +01:00
if ( result . data . empty ( ) )
2019-05-05 15:28:41 +02:00
{
// Blocks starting from 0x0 or invalid instruction won't be compiled, may need special interpreter fallback
}
return result ;
}
2019-11-23 17:30:54 +01:00
void spu_recompiler_base : : dump ( const spu_program & result , std : : string & out )
2019-05-05 15:28:41 +02:00
{
2021-10-12 22:12:30 +02:00
SPUDisAsm dis_asm ( cpu_disasm_mode : : dump , reinterpret_cast < const u8 * > ( result . data . data ( ) ) , result . lower_bound ) ;
2019-05-12 22:17:45 +02:00
std : : string hash ;
2024-04-22 16:42:14 +02:00
if ( ! result . data . empty ( ) )
2019-05-12 22:17:45 +02:00
{
sha1_context ctx ;
u8 output [ 20 ] ;
sha1_starts ( & ctx ) ;
2019-11-23 17:30:54 +01:00
sha1_update ( & ctx , reinterpret_cast < const u8 * > ( result . data . data ( ) ) , result . data . size ( ) * 4 ) ;
2019-05-12 22:17:45 +02:00
sha1_finish ( & ctx , output ) ;
fmt : : append ( hash , " %s " , fmt : : base57 ( output ) ) ;
}
2024-04-22 16:42:14 +02:00
else
{
hash = " N/A " ;
}
2019-05-12 22:17:45 +02:00
2023-07-27 18:15:32 +02:00
fmt : : append ( out , " ========== SPU BLOCK 0x%05x (size %u, %s) ========== \n \n " , result . entry_point , result . data . size ( ) , hash ) ;
2019-05-12 22:17:45 +02:00
2019-05-05 15:28:41 +02:00
for ( auto & bb : m_bbs )
{
2019-05-12 22:17:45 +02:00
for ( u32 pos = bb . first , end = bb . first + bb . second . size * 4 ; pos < end ; pos + = 4 )
{
dis_asm . disasm ( pos ) ;
2023-07-27 18:15:32 +02:00
if ( ! dis_asm . last_opcode . ends_with ( ' \n ' ) )
{
dis_asm . last_opcode + = ' \n ' ;
}
fmt : : append ( out , " >%s " , dis_asm . last_opcode ) ;
2019-05-12 22:17:45 +02:00
}
2023-07-27 18:15:32 +02:00
out + = ' \n ' ;
2019-05-05 15:28:41 +02:00
if ( m_block_info [ bb . first / 4 ] )
{
fmt : : append ( out , " A: [0x%05x] %s \n " , bb . first , m_entry_info [ bb . first / 4 ] ? ( m_ret_info [ bb . first / 4 ] ? " Chunk " : " Entry " ) : " Block " ) ;
fmt : : append ( out , " \t F: 0x%05x \n " , bb . second . func ) ;
for ( u32 pred : bb . second . preds )
{
fmt : : append ( out , " \t <- 0x%05x \n " , pred ) ;
}
for ( u32 target : bb . second . targets )
{
fmt : : append ( out , " \t -> 0x%05x%s \n " , target , m_bbs . count ( target ) ? " " : " (null) " ) ;
}
}
else
{
fmt : : append ( out , " A: [0x%05x] ? \n " , bb . first ) ;
}
2019-05-12 22:17:45 +02:00
out + = ' \n ' ;
2019-05-05 15:28:41 +02:00
}
for ( auto & f : m_funcs )
{
fmt : : append ( out , " F: [0x%05x]%s \n " , f . first , f . second . good ? " (good) " : " (bad) " ) ;
fmt : : append ( out , " \t N: 0x%05x \n " , f . second . size * 4 + f . first ) ;
for ( u32 call : f . second . calls )
{
fmt : : append ( out , " \t >> 0x%05x%s \n " , call , m_funcs . count ( call ) ? " " : " (null) " ) ;
}
}
out + = ' \n ' ;
}
2020-03-28 09:06:30 +01:00
struct spu_llvm_worker
2019-05-17 22:54:47 +02:00
{
2020-03-28 09:06:30 +01:00
lf_queue < std : : pair < u64 , const spu_program * > > registered ;
2019-05-17 22:54:47 +02:00
void operator ( ) ( )
{
// SPU LLVM Recompiler instance
2024-03-30 18:08:45 +01:00
std : : unique_ptr < spu_recompiler_base > compiler ;
2019-05-17 22:54:47 +02:00
// Fake LS
2024-03-30 18:08:45 +01:00
std : : vector < be_t < u32 > > ls ;
2019-05-17 22:54:47 +02:00
2024-03-07 17:52:25 +01:00
bool set_relax_flag = false ;
2021-02-01 12:46:10 +01:00
for ( auto slice = registered . pop_all ( ) ; ; [ & ]
2020-03-28 09:06:30 +01:00
{
2021-02-01 12:46:10 +01:00
if ( slice )
{
slice . pop_front ( ) ;
}
if ( slice | | thread_ctrl : : state ( ) = = thread_state : : aborting )
{
return ;
}
2024-03-07 17:52:25 +01:00
if ( set_relax_flag )
{
spu_thread : : g_spu_work_count - - ;
set_relax_flag = false ;
}
2023-07-31 22:57:26 +02:00
thread_ctrl : : wait_on ( utils : : bless < atomic_t < u32 > > ( & registered ) [ 1 ] , 0 ) ;
2021-02-01 12:46:10 +01:00
slice = registered . pop_all ( ) ;
} ( ) )
{
auto * prog = slice . get ( ) ;
2020-03-28 09:06:30 +01:00
if ( thread_ctrl : : state ( ) = = thread_state : : aborting )
{
break ;
}
if ( ! prog )
{
continue ;
}
2020-12-15 16:06:51 +01:00
if ( ! prog - > second )
{
break ;
}
2024-03-30 18:08:45 +01:00
if ( ! compiler )
{
// Postponed initialization
compiler = spu_recompiler_base : : make_llvm_recompiler ( ) ;
compiler - > init ( ) ;
ls . resize ( SPU_LS_SIZE / sizeof ( be_t < u32 > ) ) ;
}
2024-03-07 17:52:25 +01:00
if ( ! set_relax_flag )
{
spu_thread : : g_spu_work_count + + ;
set_relax_flag = true ;
}
2020-03-28 09:06:30 +01:00
const auto & func = * prog - > second ;
// Get data start
const u32 start = func . lower_bound ;
const u32 size0 = : : size32 ( func . data ) ;
// Initialize LS with function data only
for ( u32 i = 0 , pos = start ; i < size0 ; i + + , pos + = 4 )
{
ls [ pos / 4 ] = std : : bit_cast < be_t < u32 > > ( func . data [ i ] ) ;
}
// Call analyser
spu_program func2 = compiler - > analyse ( ls . data ( ) , func . entry_point ) ;
if ( func2 ! = func )
{
spu_log . error ( " [0x%05x] SPU Analyser failed, %u vs %u " , func2 . entry_point , func2 . data . size ( ) , size0 ) ;
}
else if ( const auto target = compiler - > compile ( std : : move ( func2 ) ) )
{
// Redirect old function (TODO: patch in multiple places)
const s64 rel = reinterpret_cast < u64 > ( target ) - prog - > first - 5 ;
union
{
u8 bytes [ 8 ] ;
u64 result ;
} ;
bytes [ 0 ] = 0xe9 ; // jmp rel32
std : : memcpy ( bytes + 1 , & rel , 4 ) ;
bytes [ 5 ] = 0x90 ;
bytes [ 6 ] = 0x90 ;
bytes [ 7 ] = 0x90 ;
atomic_storage < u64 > : : release ( * reinterpret_cast < u64 * > ( prog - > first ) , result ) ;
}
else
{
spu_log . fatal ( " [0x%05x] Compilation failed. " , func . entry_point ) ;
2024-03-07 17:52:25 +01:00
break ;
2020-03-28 09:06:30 +01:00
}
// Clear fake LS
std : : memset ( ls . data ( ) + start / 4 , 0 , 4 * ( size0 - 1 ) ) ;
}
2024-03-07 17:52:25 +01:00
if ( set_relax_flag )
{
spu_thread : : g_spu_work_count - - ;
set_relax_flag = false ;
}
2020-03-28 09:06:30 +01:00
}
} ;
// SPU LLVM recompiler thread context
struct spu_llvm
{
// Workload
lf_queue < std : : pair < const u64 , spu_item * > > registered ;
2022-08-25 09:27:51 +02:00
atomic_ptr < named_thread_group < spu_llvm_worker > > m_workers ;
2020-03-28 09:06:30 +01:00
2020-05-31 20:54:04 +02:00
spu_llvm ( )
{
// Dependency
g_fxo - > init < spu_cache > ( ) ;
}
2020-03-28 09:06:30 +01:00
void operator ( ) ( )
{
2021-12-30 17:39:18 +01:00
if ( g_cfg . core . spu_decoder ! = spu_decoder_type : : llvm )
{
return ;
}
2024-03-30 18:08:45 +01:00
while ( ! registered & & thread_ctrl : : state ( ) ! = thread_state : : aborting )
{
// Wait for the first SPU block before launching any thread
thread_ctrl : : wait_on ( utils : : bless < atomic_t < u32 > > ( & registered ) [ 1 ] , 0 ) ;
}
if ( thread_ctrl : : state ( ) = = thread_state : : aborting )
{
return ;
}
2019-11-10 23:10:23 +01:00
// To compile (hash -> item)
std : : unordered_multimap < u64 , spu_item * , value_hash < u64 > > enqueued ;
// Mini-profiler (hash -> number of occurrences)
std : : unordered_map < u64 , atomic_t < u64 > , value_hash < u64 > > samples ;
// For synchronization with profiler thread
stx : : init_mutex prof_mutex ;
named_thread profiler ( " SPU LLVM Profiler " sv , [ & ] ( )
2019-05-17 22:54:47 +02:00
{
2019-11-10 23:10:23 +01:00
while ( thread_ctrl : : state ( ) ! = thread_state : : aborting )
2019-05-17 22:54:47 +02:00
{
2019-11-10 23:10:23 +01:00
{
// Lock if enabled
const auto lock = prof_mutex . access ( ) ;
if ( ! lock )
{
// Wait when the profiler is disabled
prof_mutex . wait_for_initialized ( ) ;
continue ;
}
// Collect profiling samples
2021-03-05 20:05:37 +01:00
idm : : select < named_thread < spu_thread > > ( [ & ] ( u32 /*id*/ , spu_thread & spu )
2019-11-10 23:10:23 +01:00
{
const u64 name = atomic_storage < u64 > : : load ( spu . block_hash ) ;
2021-04-17 14:39:13 +02:00
if ( auto state = + spu . state ; ! : : is_paused ( state ) & & ! : : is_stopped ( state ) & & cpu_flag : : wait - state )
2019-11-10 23:10:23 +01:00
{
2021-01-12 11:01:06 +01:00
const auto found = std : : as_const ( samples ) . find ( name ) ;
2019-11-10 23:10:23 +01:00
if ( found ! = std : : as_const ( samples ) . end ( ) )
{
const_cast < atomic_t < u64 > & > ( found - > second ) + + ;
}
}
} ) ;
}
// Sleep for a short period if enabled
thread_ctrl : : wait_for ( 20 , false ) ;
}
} ) ;
2020-03-28 09:06:30 +01:00
u32 worker_count = 1 ;
2021-01-28 19:33:50 +01:00
if ( uint hc = utils : : get_thread_count ( ) ; hc > = 12 )
2020-03-28 09:06:30 +01:00
{
2024-03-07 17:52:25 +01:00
worker_count = hc - 12 + 3 ;
}
else if ( hc > = 6 )
{
worker_count = 2 ;
2020-03-28 09:06:30 +01:00
}
u32 worker_index = 0 ;
2024-02-28 07:28:49 +01:00
u32 notify_compile_count = 0 ;
2024-03-07 21:23:53 +01:00
u32 compile_pending = 0 ;
2024-02-28 07:28:49 +01:00
std : : vector < u8 > notify_compile ( worker_count ) ;
2020-03-28 09:06:30 +01:00
2022-08-25 09:27:51 +02:00
m_workers = make_single < named_thread_group < spu_llvm_worker > > ( " SPUW. " , worker_count ) ;
auto workers_ptr = m_workers . load ( ) ;
auto & workers = * workers_ptr ;
2020-03-28 09:06:30 +01:00
2019-11-10 23:10:23 +01:00
while ( thread_ctrl : : state ( ) ! = thread_state : : aborting )
{
for ( const auto & pair : registered . pop_all ( ) )
{
enqueued . emplace ( pair ) ;
// Interrupt and kick profiler thread
const auto lock = prof_mutex . init_always ( [ & ] { } ) ;
// Register new blocks to collect samples
samples . emplace ( pair . first , 0 ) ;
2019-05-17 22:54:47 +02:00
}
2019-11-10 23:10:23 +01:00
if ( enqueued . empty ( ) )
2019-05-17 22:54:47 +02:00
{
2024-02-28 07:28:49 +01:00
// Send pending notifications
if ( notify_compile_count )
{
for ( usz i = 0 ; i < worker_count ; i + + )
{
if ( notify_compile [ i ] )
{
2024-03-07 21:23:53 +01:00
( workers . begin ( ) + i ) - > registered . notify ( ) ;
2024-02-28 07:28:49 +01:00
}
}
}
2019-11-10 23:10:23 +01:00
// Interrupt profiler thread and put it to sleep
static_cast < void > ( prof_mutex . reset ( ) ) ;
2023-07-31 22:57:26 +02:00
thread_ctrl : : wait_on ( utils : : bless < atomic_t < u32 > > ( & registered ) [ 1 ] , 0 ) ;
2024-02-28 07:28:49 +01:00
std : : fill ( notify_compile . begin ( ) , notify_compile . end ( ) , 0 ) ; // Reset notification flags
notify_compile_count = 0 ;
2024-03-07 21:23:53 +01:00
compile_pending = 0 ;
2019-05-17 22:54:47 +02:00
continue ;
}
2019-11-10 23:10:23 +01:00
// Find the most used enqueued item
u64 sample_max = 0 ;
auto found_it = enqueued . begin ( ) ;
for ( auto it = enqueued . begin ( ) , end = enqueued . end ( ) ; it ! = end ; + + it )
{
2022-09-19 14:57:51 +02:00
const u64 cur = : : at32 ( std : : as_const ( samples ) , it - > first ) ;
2019-11-10 23:10:23 +01:00
if ( cur > sample_max )
{
sample_max = cur ;
found_it = it ;
}
}
// Start compiling
2019-11-23 17:30:54 +01:00
const spu_program & func = found_it - > second - > data ;
2019-10-25 23:52:56 +02:00
// Old function pointer (pre-recompiled)
2019-11-10 23:10:23 +01:00
const spu_function_t _old = found_it - > second - > compiled ;
// Remove item from the queue
enqueued . erase ( found_it ) ;
2019-05-17 22:54:47 +02:00
2024-02-28 07:28:49 +01:00
// Prefer using an inactive thread
for ( usz i = 0 ; i < worker_count & & ! ! ( workers . begin ( ) + ( worker_index % worker_count ) ) - > registered ; i + + )
{
worker_index + + ;
}
2020-03-28 09:06:30 +01:00
// Push the workload
2024-02-28 07:28:49 +01:00
const bool notify = ( workers . begin ( ) + ( worker_index % worker_count ) ) - > registered . template push < false > ( reinterpret_cast < u64 > ( _old ) , & func ) ;
if ( notify & & ! notify_compile [ worker_index % worker_count ] )
{
notify_compile [ worker_index % worker_count ] = 1 ;
notify_compile_count + + ;
2024-03-07 21:23:53 +01:00
}
compile_pending + + ;
2024-02-28 07:28:49 +01:00
2024-03-07 21:23:53 +01:00
// Notify all before queue runs out if there is considerable excess
// Optimized that: if there are many workers, it acts soon
// If there are only a few workers, it postpones notifications until there is some more workload
if ( notify_compile_count & & std : : min < u32 > ( 7 , utils : : aligned_div < u32 > ( worker_count * 2 , 3 ) + 2 ) < = compile_pending )
{
for ( usz i = 0 ; i < worker_count ; i + + )
2024-02-28 07:28:49 +01:00
{
2024-03-07 21:23:53 +01:00
if ( notify_compile [ i ] )
2024-02-28 07:28:49 +01:00
{
2024-03-07 21:23:53 +01:00
( workers . begin ( ) + i ) - > registered . notify ( ) ;
2024-02-28 07:28:49 +01:00
}
}
2024-03-07 21:23:53 +01:00
std : : fill ( notify_compile . begin ( ) , notify_compile . end ( ) , 0 ) ; // Reset notification flags
notify_compile_count = 0 ;
compile_pending = 0 ;
2024-02-28 07:28:49 +01:00
}
worker_index + + ;
2019-05-17 22:54:47 +02:00
}
2020-12-15 16:06:51 +01:00
2021-02-21 15:16:06 +01:00
static_cast < void > ( prof_mutex . init_always ( [ & ] { samples . clear ( ) ; } ) ) ;
2022-08-25 09:27:51 +02:00
m_workers . reset ( ) ;
2020-12-15 16:06:51 +01:00
for ( u32 i = 0 ; i < worker_count ; i + + )
{
2022-08-25 09:27:51 +02:00
( workers . begin ( ) + i ) - > operator = ( thread_state : : aborting ) ;
2020-12-15 16:06:51 +01:00
}
2019-05-17 22:54:47 +02:00
}
2022-08-25 09:27:51 +02:00
spu_llvm & operator = ( thread_state )
{
if ( const auto workers = m_workers . load ( ) )
{
for ( u32 i = 0 ; i < workers - > size ( ) ; i + + )
{
( workers - > begin ( ) + i ) - > operator = ( thread_state : : aborting ) ;
}
}
return * this ;
}
2019-05-17 22:54:47 +02:00
static constexpr auto thread_name = " SPU LLVM " sv ;
} ;
using spu_llvm_thread = named_thread < spu_llvm > ;
struct spu_fast : public spu_recompiler_base
{
virtual void init ( ) override
{
if ( ! m_spurt )
{
2021-03-02 12:59:19 +01:00
m_spurt = & g_fxo - > get < spu_runtime > ( ) ;
2019-05-17 22:54:47 +02:00
}
}
2019-11-23 17:30:54 +01:00
virtual spu_function_t compile ( spu_program & & _func ) override
2019-05-17 22:54:47 +02:00
{
2024-08-07 04:31:46 +02:00
# ifndef ARCH_X64
fmt : : throw_exception ( " Fast LLVM recompiler is unimplemented for architectures other than X86-64 " ) ;
# endif
2019-10-25 23:52:56 +02:00
const auto add_loc = m_spurt - > add_empty ( std : : move ( _func ) ) ;
2019-05-17 22:54:47 +02:00
2019-10-25 23:52:56 +02:00
if ( ! add_loc )
2019-05-17 22:54:47 +02:00
{
2019-10-25 23:52:56 +02:00
return nullptr ;
2019-05-17 22:54:47 +02:00
}
2019-10-25 23:52:56 +02:00
if ( add_loc - > compiled )
2019-05-17 22:54:47 +02:00
{
2019-10-25 23:52:56 +02:00
return add_loc - > compiled ;
2019-05-17 22:54:47 +02:00
}
2019-11-23 17:30:54 +01:00
const spu_program & func = add_loc - > data ;
2019-10-25 23:52:56 +02:00
if ( g_cfg . core . spu_debug & & ! add_loc - > logged . exchange ( 1 ) )
2019-05-17 22:54:47 +02:00
{
std : : string log ;
2019-10-25 23:52:56 +02:00
this - > dump ( func , log ) ;
2023-07-27 18:15:32 +02:00
fs : : write_file ( m_spurt - > get_cache_path ( ) + " spu.log " , fs : : create + fs : : write + fs : : append , log ) ;
2019-05-17 22:54:47 +02:00
}
// Allocate executable area with necessary size
2019-11-23 17:30:54 +01:00
const auto result = jit_runtime : : alloc ( 22 + 1 + 9 + : : size32 ( func . data ) * ( 16 + 16 ) + 36 + 47 , 16 ) ;
2019-05-17 22:54:47 +02:00
if ( ! result )
{
return nullptr ;
}
2019-11-23 17:30:54 +01:00
m_pos = func . lower_bound ;
m_size = : : size32 ( func . data ) * 4 ;
2019-05-17 22:54:47 +02:00
2019-11-10 23:10:23 +01:00
{
sha1_context ctx ;
u8 output [ 20 ] ;
sha1_starts ( & ctx ) ;
2019-11-23 17:30:54 +01:00
sha1_update ( & ctx , reinterpret_cast < const u8 * > ( func . data . data ( ) ) , func . data . size ( ) * 4 ) ;
2019-11-10 23:10:23 +01:00
sha1_finish ( & ctx , output ) ;
be_t < u64 > hash_start ;
std : : memcpy ( & hash_start , output , sizeof ( hash_start ) ) ;
m_hash_start = hash_start ;
}
2019-05-17 22:54:47 +02:00
u8 * raw = result ;
2019-11-10 23:10:23 +01:00
// 8-byte intruction for patching (long NOP)
* raw + + = 0x0f ;
* raw + + = 0x1f ;
* raw + + = 0x84 ;
* raw + + = 0 ;
* raw + + = 0 ;
* raw + + = 0 ;
* raw + + = 0 ;
* raw + + = 0 ;
// mov rax, m_hash_start
* raw + + = 0x48 ;
* raw + + = 0xb8 ;
std : : memcpy ( raw , & m_hash_start , sizeof ( m_hash_start ) ) ;
raw + = 8 ;
// Update block_hash: mov [r13 + spu_thread::m_block_hash], rax
2019-10-25 16:20:39 +02:00
* raw + + = 0x49 ;
2019-11-10 23:10:23 +01:00
* raw + + = 0x89 ;
2019-10-25 16:20:39 +02:00
* raw + + = 0x45 ;
* raw + + = : : narrow < s8 > ( : : offset32 ( & spu_thread : : block_hash ) ) ;
2019-05-17 22:54:47 +02:00
// Load PC: mov eax, [r13 + spu_thread::pc]
* raw + + = 0x41 ;
* raw + + = 0x8b ;
* raw + + = 0x45 ;
* raw + + = : : narrow < s8 > ( : : offset32 ( & spu_thread : : pc ) ) ;
// Get LS address starting from PC: lea rcx, [rbp + rax]
* raw + + = 0x48 ;
* raw + + = 0x8d ;
* raw + + = 0x4c ;
* raw + + = 0x05 ;
* raw + + = 0x00 ;
// Verification (slow)
2019-11-23 17:30:54 +01:00
for ( u32 i = 0 ; i < func . data . size ( ) ; i + + )
2019-05-17 22:54:47 +02:00
{
2019-11-23 17:30:54 +01:00
if ( ! func . data [ i ] )
2019-05-17 22:54:47 +02:00
{
continue ;
}
// cmp dword ptr [rcx + off], opc
* raw + + = 0x81 ;
* raw + + = 0xb9 ;
2019-11-23 17:30:54 +01:00
const u32 off = i * 4 ;
const u32 opc = func . data [ i ] ;
2019-05-17 22:54:47 +02:00
std : : memcpy ( raw + 0 , & off , 4 ) ;
std : : memcpy ( raw + 4 , & opc , 4 ) ;
raw + = 8 ;
// jne tr_dispatch
const s64 rel = reinterpret_cast < u64 > ( spu_runtime : : tr_dispatch ) - reinterpret_cast < u64 > ( raw ) - 6 ;
* raw + + = 0x0f ;
* raw + + = 0x85 ;
std : : memcpy ( raw + 0 , & rel , 4 ) ;
raw + = 4 ;
}
// trap
//*raw++ = 0xcc;
// Secondary prologue: sub rsp,0x28
* raw + + = 0x48 ;
* raw + + = 0x83 ;
* raw + + = 0xec ;
* raw + + = 0x28 ;
// Fix args: xchg r13,rbp
* raw + + = 0x49 ;
* raw + + = 0x87 ;
* raw + + = 0xed ;
// mov r12d, eax
* raw + + = 0x41 ;
* raw + + = 0x89 ;
* raw + + = 0xc4 ;
// mov esi, 0x7f0
* raw + + = 0xbe ;
* raw + + = 0xf0 ;
* raw + + = 0x07 ;
* raw + + = 0x00 ;
* raw + + = 0x00 ;
// lea rdi, [rbp + spu_thread::gpr]
* raw + + = 0x48 ;
* raw + + = 0x8d ;
* raw + + = 0x7d ;
* raw + + = : : narrow < s8 > ( : : offset32 ( & spu_thread : : gpr ) ) ;
// Save base pc: mov [rbp + spu_thread::base_pc], eax
* raw + + = 0x89 ;
* raw + + = 0x45 ;
* raw + + = : : narrow < s8 > ( : : offset32 ( & spu_thread : : base_pc ) ) ;
// inc block_counter
* raw + + = 0x48 ;
* raw + + = 0xff ;
* raw + + = 0x85 ;
const u32 blc_off = : : offset32 ( & spu_thread : : block_counter ) ;
std : : memcpy ( raw , & blc_off , 4 ) ;
raw + = 4 ;
// lea r14, [local epilogue]
* raw + + = 0x4c ;
* raw + + = 0x8d ;
* raw + + = 0x35 ;
2019-11-23 17:30:54 +01:00
const u32 epi_off = : : size32 ( func . data ) * 16 ;
2019-05-17 22:54:47 +02:00
std : : memcpy ( raw , & epi_off , 4 ) ;
raw + = 4 ;
// Instructions (each instruction occupies fixed number of bytes)
2019-11-23 17:30:54 +01:00
for ( u32 i = 0 ; i < func . data . size ( ) ; i + + )
2019-05-17 22:54:47 +02:00
{
2019-11-23 17:30:54 +01:00
const u32 pos = m_pos + i * 4 ;
2019-05-17 22:54:47 +02:00
2019-11-23 17:30:54 +01:00
if ( ! func . data [ i ] )
2019-05-17 22:54:47 +02:00
{
// Save pc: mov [rbp + spu_thread::pc], r12d
* raw + + = 0x44 ;
* raw + + = 0x89 ;
* raw + + = 0x65 ;
* raw + + = : : narrow < s8 > ( : : offset32 ( & spu_thread : : pc ) ) ;
// Epilogue: add rsp,0x28
* raw + + = 0x48 ;
* raw + + = 0x83 ;
* raw + + = 0xc4 ;
* raw + + = 0x28 ;
// ret (TODO)
* raw + + = 0xc3 ;
std : : memset ( raw , 0xcc , 16 - 9 ) ;
raw + = 16 - 9 ;
continue ;
}
// Fix endianness
2019-11-23 17:30:54 +01:00
const spu_opcode_t op { std : : bit_cast < be_t < u32 > > ( func . data [ i ] ) } ;
2019-05-17 22:54:47 +02:00
2021-12-30 17:39:18 +01:00
switch ( auto type = g_spu_itype . decode ( op . opcode ) )
2019-05-17 22:54:47 +02:00
{
case spu_itype : : BRZ :
case spu_itype : : BRHZ :
case spu_itype : : BRNZ :
case spu_itype : : BRHNZ :
{
const u32 target = spu_branch_target ( pos , op . i16 ) ;
if ( 0 & & target > = m_pos & & target < m_pos + m_size )
{
* raw + + = type = = spu_itype : : BRHZ | | type = = spu_itype : : BRHNZ ? 0x66 : 0x90 ;
* raw + + = 0x83 ;
* raw + + = 0xbd ;
const u32 off = : : offset32 ( & spu_thread : : gpr , op . rt ) + 12 ;
std : : memcpy ( raw , & off , 4 ) ;
raw + = 4 ;
* raw + + = 0x00 ;
* raw + + = 0x0f ;
* raw + + = type = = spu_itype : : BRZ | | type = = spu_itype : : BRHZ ? 0x84 : 0x85 ;
const u32 dif = ( target - ( pos + 4 ) ) / 4 * 16 + 2 ;
std : : memcpy ( raw , & dif , 4 ) ;
raw + = 4 ;
* raw + + = 0x66 ;
* raw + + = 0x90 ;
break ;
}
[[fallthrough]] ;
}
default :
{
// Ballast: mov r15d, pos
* raw + + = 0x41 ;
* raw + + = 0xbf ;
std : : memcpy ( raw , & pos , 4 ) ;
raw + = 4 ;
// mov ebx, opc
* raw + + = 0xbb ;
std : : memcpy ( raw , & op , 4 ) ;
raw + = 4 ;
// call spu_* (specially built interpreter function)
2023-09-06 05:53:10 +02:00
const s64 rel = spu_runtime : : g_interpreter_table [ static_cast < usz > ( type ) ] - reinterpret_cast < u64 > ( raw ) - 5 ;
2019-05-17 22:54:47 +02:00
* raw + + = 0xe8 ;
std : : memcpy ( raw , & rel , 4 ) ;
raw + = 4 ;
break ;
}
}
}
// Local dispatcher/epilogue: fix stack after branch instruction, then dispatch or return
// add rsp, 8
* raw + + = 0x48 ;
* raw + + = 0x83 ;
* raw + + = 0xc4 ;
* raw + + = 0x08 ;
// and rsp, -16
* raw + + = 0x48 ;
* raw + + = 0x83 ;
* raw + + = 0xe4 ;
* raw + + = 0xf0 ;
// lea rax, [r12 - size]
* raw + + = 0x49 ;
* raw + + = 0x8d ;
* raw + + = 0x84 ;
* raw + + = 0x24 ;
const u32 msz = 0u - m_size ;
std : : memcpy ( raw , & msz , 4 ) ;
raw + = 4 ;
// sub eax, [rbp + spu_thread::base_pc]
* raw + + = 0x2b ;
* raw + + = 0x45 ;
* raw + + = : : narrow < s8 > ( : : offset32 ( & spu_thread : : base_pc ) ) ;
// cmp eax, (0 - size)
* raw + + = 0x3d ;
std : : memcpy ( raw , & msz , 4 ) ;
raw + = 4 ;
// jb epilogue
* raw + + = 0x72 ;
* raw + + = + 12 ;
// movsxd rax, eax
* raw + + = 0x48 ;
* raw + + = 0x63 ;
* raw + + = 0xc0 ;
// shl rax, 2
* raw + + = 0x48 ;
* raw + + = 0xc1 ;
* raw + + = 0xe0 ;
* raw + + = 0x02 ;
// add rax, r14
* raw + + = 0x4c ;
* raw + + = 0x01 ;
* raw + + = 0xf0 ;
// jmp rax
* raw + + = 0xff ;
* raw + + = 0xe0 ;
// Save pc: mov [rbp + spu_thread::pc], r12d
* raw + + = 0x44 ;
* raw + + = 0x89 ;
* raw + + = 0x65 ;
* raw + + = : : narrow < s8 > ( : : offset32 ( & spu_thread : : pc ) ) ;
// Epilogue: add rsp,0x28 ; ret
* raw + + = 0x48 ;
* raw + + = 0x83 ;
* raw + + = 0xc4 ;
* raw + + = 0x28 ;
* raw + + = 0xc3 ;
2019-10-25 23:52:56 +02:00
const auto fn = reinterpret_cast < spu_function_t > ( result ) ;
// Install pointer carefully
const bool added = ! add_loc - > compiled & & add_loc - > compiled . compare_and_swap_test ( nullptr , fn ) ;
2020-04-04 14:36:05 +02:00
// Check hash against allowed bounds
const bool inverse_bounds = g_cfg . core . spu_llvm_lower_bound > g_cfg . core . spu_llvm_upper_bound ;
if ( ( ! inverse_bounds & & ( m_hash_start < g_cfg . core . spu_llvm_lower_bound | | m_hash_start > g_cfg . core . spu_llvm_upper_bound ) ) | |
( inverse_bounds & & ( m_hash_start < g_cfg . core . spu_llvm_lower_bound & & m_hash_start > g_cfg . core . spu_llvm_upper_bound ) ) )
{
spu_log . error ( " [Debug] Skipped function %s " , fmt : : base57 ( be_t < u64 > { m_hash_start } ) ) ;
}
else if ( added )
2019-10-25 23:52:56 +02:00
{
// Send work to LLVM compiler thread
2021-03-02 12:59:19 +01:00
g_fxo - > get < spu_llvm_thread > ( ) . registered . push ( m_hash_start , add_loc ) ;
2019-10-25 23:52:56 +02:00
}
// Rebuild trampoline if necessary
2019-11-23 17:30:54 +01:00
if ( ! m_spurt - > rebuild_ubertrampoline ( func . data [ 0 ] ) )
2019-05-17 22:54:47 +02:00
{
return nullptr ;
}
2019-10-25 23:52:56 +02:00
if ( added )
{
add_loc - > compiled . notify_all ( ) ;
}
2019-05-17 22:54:47 +02:00
2019-10-25 23:52:56 +02:00
return fn ;
2019-05-17 22:54:47 +02:00
}
} ;
std : : unique_ptr < spu_recompiler_base > spu_recompiler_base : : make_fast_llvm_recompiler ( )
{
return std : : make_unique < spu_fast > ( ) ;
}
2024-04-03 10:47:32 +02:00
2024-07-05 17:55:01 +02:00
std : : array < reg_state_t , s_reg_max > & block_reg_info : : evaluate_start_state ( const std : : map < u32 , std : : unique_ptr < block_reg_info > > & map , bool extensive_evaluation )
2024-03-06 16:28:07 +01:00
{
if ( ! has_true_state )
{
2024-04-26 19:39:19 +02:00
std : : array < reg_state_t , s_reg_max > temp ;
2024-03-06 16:28:07 +01:00
std : : basic_string < u32 > been_there ;
struct iterator_info
{
u32 block_pc = SPU_LS_SIZE ;
struct state_t
{
u32 block_pc = SPU_LS_SIZE ;
std : : array < reg_state_t , s_reg_max > reg_state ;
bool disconnected = false ;
bool state_written = false ;
} ;
std : : vector < state_t > state_prev ;
usz completed = 0 ;
usz parent_iterator_index = umax ;
2024-04-26 19:39:19 +02:00
usz parent_state_index = umax ;
2024-03-06 16:28:07 +01:00
} ;
std : : vector < iterator_info > info_queue ;
2024-04-26 19:39:19 +02:00
iterator_info first_entry { pc , { } , 0 , umax , umax } ;
2024-03-06 16:28:07 +01:00
info_queue . emplace_back ( std : : move ( first_entry ) ) ;
// info_queue may grow
for ( usz qi = 0 ; qi < info_queue . size ( ) ; )
{
const auto it = std : : addressof ( info_queue [ qi ] ) ;
ensure ( qi = = info_queue . size ( ) - 1 ) ;
auto & cur_node = : : at32 ( map , it - > block_pc ) ;
ensure ( it - > parent_iterator_index = = qi - 1 ) ;
if ( cur_node - > has_true_state )
{
// Evaluted somewhen before
if ( qi ! = 0 )
{
ensure ( ! been_there . empty ( ) ) ;
been_there . pop_back ( ) ;
info_queue . pop_back ( ) ;
qi - - ;
continue ;
}
else
{
break ;
}
}
if ( it - > state_prev . empty ( ) )
{
// Build the list here to avoid code duplication
const usz real_size = cur_node - > prev_nodes . size ( ) ;
if ( real_size )
{
it - > state_prev . resize ( real_size ) ;
for ( usz i = 0 ; i < real_size ; i + + )
{
it - > state_prev [ i ] . block_pc = cur_node - > prev_nodes [ i ] . prev_pc ;
}
}
}
const usz next_entry_idx = it - > completed ;
if ( next_entry_idx = = it - > state_prev . size ( ) )
{
// Result merge from all predecessors
// Flag to mark the state as resolved
bool is_all_resolved = true ;
bool has_past_state = false ;
for ( usz bi = 0 ; bi < it - > state_prev . size ( ) ; bi + + )
{
if ( it - > state_prev [ bi ] . disconnected )
{
is_all_resolved = false ;
continue ;
}
has_past_state = true ;
const u32 node_pc = it - > state_prev [ bi ] . block_pc ;
const auto & node = : : at32 ( map , node_pc ) ;
// Check if the node is resolved
if ( ! node - > has_true_state )
{
// Assume this block cannot be resolved at the moment
is_all_resolved = false ;
break ;
}
}
if ( qi = = 0 )
{
// TODO: First block is always resolved here, but this logic can be improved to detect more cases of opportunistic resolving
is_all_resolved = true ;
}
auto & res_state = is_all_resolved ? cur_node - > start_reg_state : temp ;
for ( usz bi = 0 ; bi < it - > state_prev . size ( ) ; bi + + )
{
if ( it - > state_prev [ bi ] . disconnected )
{
// Loop state, even if not ignored for a million times the result would still be the same
// So ignore it
continue ;
}
std : : array < reg_state_t , s_reg_max > * arg_state { } ;
const auto & node = : : at32 ( map , it - > state_prev [ bi ] . block_pc ) ;
if ( node - > has_true_state )
{
// State is resolved, use the entry's state
arg_state = std : : addressof ( node - > end_reg_state ) ;
}
else
{
// Use accumulated state from one path of code history
arg_state = std : : addressof ( it - > state_prev [ bi ] . reg_state ) ;
ensure ( it - > state_prev [ bi ] . state_written ) ;
}
if ( bi = = 0 )
{
res_state = * arg_state ;
}
else
{
2024-06-27 08:52:03 +02:00
merge ( res_state , res_state , * arg_state , it - > block_pc ) ;
2024-03-06 16:28:07 +01:00
}
}
std : : array < reg_state_t , s_reg_max > * result_storage { } ;
if ( is_all_resolved )
{
// Complete state of this block
result_storage = std : : addressof ( cur_node - > end_reg_state ) ;
cur_node - > has_true_state = true ;
}
else
{
// Patch incomplete state into saved state entry of parent block
ensure ( it - > parent_iterator_index ! = qi ) ;
ensure ( it - > parent_iterator_index ! = umax ) ;
2024-04-26 19:39:19 +02:00
auto & state_vec = : : at32 ( info_queue , it - > parent_iterator_index ) . state_prev ;
auto & state = : : at32 ( state_vec , it - > parent_state_index ) ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
ensure ( state . block_pc = = it - > block_pc ) ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
result_storage = std : : addressof ( state . reg_state ) ;
ensure ( ! state . state_written ) ;
state . state_written = true ;
2024-03-06 16:28:07 +01:00
}
// Stack the newer state on top of the old (if exists)
if ( has_past_state )
{
build_on_top_of ( * result_storage , cur_node - > addend_reg_state , res_state ) ;
}
else
{
* result_storage = cur_node - > addend_reg_state ;
}
if ( qi ! = 0 )
{
ensure ( ! been_there . empty ( ) ) ;
been_there . pop_back ( ) ;
info_queue . pop_back ( ) ;
qi - - ;
}
else
{
ensure ( cur_node - > has_true_state ) ;
break ;
}
}
else
{
const u32 prev_pc = cur_node - > prev_nodes [ it - > completed + + ] . prev_pc ;
const auto & prev_node = : : at32 ( map , prev_pc ) ;
// Queue for resolving if needed
if ( ! prev_node - > has_true_state )
{
2024-04-26 19:39:19 +02:00
// TODO: The true maximum occurence count need to depend on the amount of branching-outs passed through
// Currently allow 2 for short-term code and 1 for long-term code
const bool loop_terminator_detected = std : : count ( been_there . begin ( ) , been_there . end ( ) , prev_pc ) > = ( qi < 20 ? 2u : 1u ) ;
2024-07-05 17:55:01 +02:00
const bool avoid_extensive_analysis = qi > = ( extensive_evaluation ? 22 : 16 ) ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
if ( ! loop_terminator_detected & & ! avoid_extensive_analysis )
2024-03-06 16:28:07 +01:00
{
2024-04-26 19:39:19 +02:00
info_queue . emplace_back ( iterator_info { prev_pc , { } , 0 , qi , it - > completed - 1 } ) ;
2024-03-06 16:28:07 +01:00
been_there . push_back ( prev_pc ) ;
qi + + ;
}
else
{
2024-04-26 19:39:19 +02:00
auto & state = : : at32 ( it - > state_prev , it - > completed - 1 ) ;
2024-03-06 16:28:07 +01:00
2024-04-26 19:39:19 +02:00
// Loop state, even if not ignored for a million times the result would be the same
// This is similar to multiplying zero a million times
// This is true at least for now, that any register difference is considered an unknown state change
// So ignore it
ensure ( state . block_pc = = prev_pc ) ;
ensure ( ! state . disconnected ) ;
state . disconnected = true ;
2024-03-06 16:28:07 +01:00
// Repeat
// qi += 0;
}
}
else
{
// Repeat
// qi += 0;
}
}
}
ensure ( has_true_state ) ;
}
walkby_state = start_reg_state ;
return walkby_state ;
}
void spu_recompiler_base : : add_pattern ( bool fill_all , inst_attr attr , u32 start , u32 end )
{
if ( end = = umax )
{
end = start ;
}
m_patterns [ start ] = pattern_info { utils : : address_range : : start_end ( start , end ) } ;
for ( u32 i = start ; i < = ( fill_all ? end : start ) ; i + = 4 )
{
m_inst_attrs [ i / 4 ] = attr ;
}
}
2024-04-03 10:47:32 +02:00
extern std : : string format_spu_func_info ( u32 addr , cpu_thread * spu )
{
spu_thread * _spu = static_cast < spu_thread * > ( spu ) ;
std : : unique_ptr < spu_recompiler_base > compiler = spu_recompiler_base : : make_asmjit_recompiler ( ) ;
compiler - > init ( ) ;
auto func = compiler - > analyse ( reinterpret_cast < const be_t < u32 > * > ( _spu - > ls ) , addr ) ;
std : : string info ;
{
sha1_context ctx ;
u8 output [ 20 ] ;
sha1_starts ( & ctx ) ;
sha1_update ( & ctx , reinterpret_cast < const u8 * > ( func . data . data ( ) ) , func . data . size ( ) * 4 ) ;
sha1_finish ( & ctx , output ) ;
fmt : : append ( info , " size=%d, end=0x%x, hash=%s " , func . data . size ( ) , addr + func . data . size ( ) * 4 , fmt : : base57 ( output ) ) ;
}
return info ;
}