2021-07-29 21:31:45 +02:00
# include "stdafx.h"
2025-04-24 12:41:04 +02:00
# include "rx/cpu/cell/ppu/Decoder.hpp"
2025-04-08 18:46:57 +02:00
# include "util/JIT.h"
# include "util/StrUtil.h"
2022-07-04 15:02:17 +02:00
# include "util/serialization.hpp"
2017-02-26 16:56:31 +01:00
# include "Crypto/sha1.h"
2021-01-29 11:32:19 +01:00
# include "Crypto/unself.h"
# include "Loader/ELF.h"
# include "Loader/mself.hpp"
2024-11-15 03:24:03 +01:00
# include "Emu/localized_string.h"
2020-10-18 14:00:10 +02:00
# include "Emu/perf_meter.hpp"
2018-09-25 22:34:45 +02:00
# include "Emu/Memory/vm_reservation.h"
2020-10-30 14:32:49 +01:00
# include "Emu/Memory/vm_locking.h"
2023-01-07 17:20:21 +01:00
# include "Emu/RSX/Core/RSXReservationLock.hpp"
2020-02-15 23:36:20 +01:00
# include "Emu/VFS.h"
2021-04-21 22:12:21 +02:00
# include "Emu/system_progress.hpp"
# include "Emu/system_utils.hpp"
2025-02-11 03:00:37 +01:00
# include "Emu/System.h"
2016-04-14 01:09:41 +02:00
# include "PPUThread.h"
# include "PPUInterpreter.h"
2016-06-07 22:24:20 +02:00
# include "PPUAnalyser.h"
2016-04-14 01:09:41 +02:00
# include "PPUModule.h"
2020-11-10 15:57:06 +01:00
# include "PPUDisAsm.h"
2018-05-04 23:01:27 +02:00
# include "SPURecompiler.h"
2021-03-23 20:32:50 +01:00
# include "timers.hpp"
2017-02-06 19:36:46 +01:00
# include "lv2/sys_sync.h"
2017-02-26 16:56:31 +01:00
# include "lv2/sys_prx.h"
2021-01-30 14:08:22 +01:00
# include "lv2/sys_overlay.h"
# include "lv2/sys_process.h"
2022-07-14 21:07:02 +02:00
# include "lv2/sys_spu.h"
2025-04-24 12:41:04 +02:00
# include <cstddef>
# include <rx/format.hpp>
# include <format>
2015-03-16 22:38:21 +01:00
2016-06-22 15:37:51 +02:00
# ifdef LLVM_AVAILABLE
# ifdef _MSC_VER
# pragma warning(push, 0)
2019-11-30 00:11:28 +01:00
# else
# pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Wold-style-cast"
2021-03-05 20:05:37 +01:00
# pragma GCC diagnostic ignored "-Wunused-parameter"
2021-04-07 23:52:18 +02:00
# pragma GCC diagnostic ignored "-Wmissing-noreturn"
2024-04-19 23:49:47 +02:00
# pragma GCC diagnostic ignored "-Wstrict-aliasing"
2016-06-22 15:37:51 +02:00
# endif
2023-11-29 19:53:38 +01:00
# include <llvm/IR/Verifier.h>
# include <llvm/Transforms/Utils/BasicBlockUtils.h>
# include <llvm/Analysis/CGSCCPassManager.h>
# include <llvm/Analysis/LoopAnalysisManager.h>
# include <llvm/Passes/PassBuilder.h>
# include <llvm/Transforms/Scalar/EarlyCSE.h>
2016-06-22 15:37:51 +02:00
# ifdef _MSC_VER
# pragma warning(pop)
2019-11-30 00:11:28 +01:00
# else
# pragma GCC diagnostic pop
2016-06-22 15:37:51 +02:00
# endif
# include "PPUTranslator.h"
# endif
2017-02-07 14:14:44 +01:00
# include <cfenv>
2020-03-31 02:11:37 +02:00
# include <cctype>
2023-07-05 12:52:16 +02:00
# include <span>
2021-03-31 15:31:21 +02:00
# include <optional>
2025-01-10 16:34:24 +01:00
# include <charconv>
2023-07-05 12:52:16 +02:00
2020-11-24 06:18:31 +01:00
# include "util/asm.hpp"
2020-11-07 23:56:35 +01:00
# include "util/vm.hpp"
2020-12-13 14:34:45 +01:00
# include "util/v128.hpp"
2021-12-30 17:39:18 +01:00
# include "util/simd.hpp"
2020-12-21 15:12:05 +01:00
# include "util/sysinfo.hpp"
2017-02-07 14:14:44 +01:00
2025-04-08 18:46:57 +02:00
# include "util/sema.h"
2025-02-11 03:00:37 +01:00
2022-06-14 14:28:38 +02:00
# ifdef __APPLE__
# include <libkern/OSCacheControl.h>
# endif
2020-06-01 01:27:33 +02:00
extern atomic_t < u64 > g_watchdog_hold_ctr ;
2020-04-07 19:29:11 +02:00
// Should be of the same type
2020-10-01 17:15:07 +02:00
using spu_rdata_t = decltype ( ppu_thread : : rdata ) ;
2020-04-07 19:29:11 +02:00
extern void mov_rdata ( spu_rdata_t & _dst , const spu_rdata_t & _src ) ;
2020-10-30 03:17:00 +01:00
extern void mov_rdata_nt ( spu_rdata_t & _dst , const spu_rdata_t & _src ) ;
2020-04-07 19:29:11 +02:00
extern bool cmp_rdata ( const spu_rdata_t & _lhs , const spu_rdata_t & _rhs ) ;
// Verify AVX availability for TSX transactions
static const bool s_tsx_avx = utils : : has_avx ( ) ;
2017-02-22 11:10:55 +01:00
template < >
2020-03-03 21:39:40 +01:00
void fmt_class_string < ppu_join_status > : : format ( std : : string & out , u64 arg )
2017-02-22 11:10:55 +01:00
{
2020-03-03 21:39:40 +01:00
format_enum ( out , arg , [ ] ( ppu_join_status js )
2017-02-22 11:10:55 +01:00
{
2025-04-05 21:50:45 +02:00
switch ( js )
{
case ppu_join_status : : joinable : return " none " ;
case ppu_join_status : : detached : return " detached " ;
case ppu_join_status : : zombie : return " zombie " ;
case ppu_join_status : : exited : return " exited " ;
case ppu_join_status : : max : break ;
}
2017-02-22 11:10:55 +01:00
2025-04-05 21:50:45 +02:00
return unknown ;
} ) ;
2017-02-22 11:10:55 +01:00
}
2021-05-21 07:48:37 +02:00
template < >
void fmt_class_string < ppu_thread_status > : : format ( std : : string & out , u64 arg )
{
format_enum ( out , arg , [ ] ( ppu_thread_status s )
{
2025-04-05 21:50:45 +02:00
switch ( s )
{
case PPU_THREAD_STATUS_IDLE : return " IDLE " ;
case PPU_THREAD_STATUS_RUNNABLE : return " RUN " ;
case PPU_THREAD_STATUS_ONPROC : return " ONPROC " ;
case PPU_THREAD_STATUS_SLEEP : return " SLEEP " ;
case PPU_THREAD_STATUS_STOP : return " STOP " ;
case PPU_THREAD_STATUS_ZOMBIE : return " Zombie " ;
case PPU_THREAD_STATUS_DELETED : return " Deleted " ;
case PPU_THREAD_STATUS_UNKNOWN : break ;
}
2021-05-21 07:48:37 +02:00
2025-04-05 21:50:45 +02:00
return unknown ;
} ) ;
2021-05-21 07:48:37 +02:00
}
2021-07-10 10:56:48 +02:00
template < >
void fmt_class_string < typename ppu_thread : : call_history_t > : : format ( std : : string & out , u64 arg )
{
const auto & history = get_object ( arg ) ;
2021-07-18 20:06:06 +02:00
PPUDisAsm dis_asm ( cpu_disasm_mode : : normal , vm : : g_sudo_addr ) ;
2023-07-09 07:45:15 +02:00
for ( u64 count = 0 , idx = history . index - 1 ; idx ! = umax & & count < history . data . size ( ) ; count + + , idx - - )
2021-07-10 10:56:48 +02:00
{
2023-07-09 07:45:15 +02:00
const u32 pc = history . data [ idx % history . data . size ( ) ] ;
2021-07-18 20:06:06 +02:00
dis_asm . disasm ( pc ) ;
fmt : : append ( out , " \n (%u) 0x%08x: %s " , count , pc , dis_asm . last_opcode ) ;
2021-07-10 10:56:48 +02:00
}
}
2023-07-09 07:45:15 +02:00
template < >
void fmt_class_string < typename ppu_thread : : syscall_history_t > : : format ( std : : string & out , u64 arg )
{
const auto & history = get_object ( arg ) ;
for ( u64 count = 0 , idx = history . index - 1 ; idx ! = umax & & count < history . data . size ( ) ; count + + , idx - - )
{
const auto & entry = history . data [ idx % history . data . size ( ) ] ;
fmt : : append ( out , " \n (%u) 0x%08x: %s, 0x%x, r3=0x%x, r4=0x%x, r5=0x%x, r6=0x%x " , count , entry . cia , entry . func_name , entry . error , entry . args [ 0 ] , entry . args [ 1 ] , entry . args [ 2 ] , entry . args [ 3 ] ) ;
}
}
2021-12-30 17:39:18 +01:00
extern const ppu_decoder < ppu_itype > g_ppu_itype { } ;
extern const ppu_decoder < ppu_iname > g_ppu_iname { } ;
2012-11-15 00:39:56 +01:00
2022-07-04 15:02:17 +02:00
template < >
bool serialize < ppu_thread : : cr_bits > ( utils : : serial & ar , typename ppu_thread : : cr_bits & o )
{
if ( ar . is_writing ( ) )
{
ar ( o . pack ( ) ) ;
}
else
{
o . unpack ( ar ) ;
}
return true ;
}
2025-03-22 02:28:00 +01:00
class concurent_memory_limit
{
u32 m_total = 0 ;
atomic_t < u32 > m_free = 0 ;
static constexpr auto k_block_size = 1024 * 8 ;
public :
class [[nodiscard]] user
{
2025-04-05 21:50:45 +02:00
concurent_memory_limit * m_limit = nullptr ;
u32 m_used = 0 ;
2025-03-22 02:28:00 +01:00
public :
2025-04-05 21:50:45 +02:00
user ( concurent_memory_limit * limit , u32 used ) : m_limit ( limit ) , m_used ( used ) { }
2025-03-22 02:28:00 +01:00
user ( ) = default ;
2025-04-05 21:50:45 +02:00
user ( user & & other )
2025-03-22 02:28:00 +01:00
{
* this = std : : move ( other ) ;
}
~ user ( )
{
if ( m_used ! = 0 )
{
m_limit - > release ( m_used ) ;
}
}
2025-04-05 21:50:45 +02:00
user & operator = ( user & & other )
2025-03-22 02:28:00 +01:00
{
std : : swap ( other . m_limit , m_limit ) ;
std : : swap ( other . m_used , m_used ) ;
return * this ;
}
2025-04-05 21:50:45 +02:00
explicit operator bool ( ) const
{
return m_limit ! = nullptr ;
}
2025-03-22 02:28:00 +01:00
} ;
concurent_memory_limit ( u64 total )
: m_total ( u32 ( std : : min < u64 > ( total / k_block_size , std : : numeric_limits < u32 > : : max ( ) ) ) ) , m_free ( m_total ) { }
user acquire ( u64 amount )
{
amount = utils : : aligned_div < u64 > ( amount , k_block_size ) ;
u32 allocated = 0 ;
while ( ! m_free . fetch_op ( [ & , this ] ( u32 & value )
2025-04-05 21:50:45 +02:00
{
if ( value > = amount | | value = = m_total )
{
// Allow at least allocation, make 0 the "memory unavailable" sign value for atomic waiting efficiency
const u32 new_val = static_cast < u32 > ( utils : : sub_saturate < u64 > ( value , amount ) ) ;
allocated = value - new_val ;
value = new_val ;
return true ;
}
// Resort to waiting
allocated = 0 ;
return Emu . IsStopped ( ) ;
} )
. second )
2025-03-22 02:28:00 +01:00
{
// Wait until not 0
m_free . wait ( 0 ) ;
}
if ( Emu . IsStopped ( ) )
{
return { } ;
}
return user ( this , allocated ) ;
}
2025-04-05 21:50:45 +02:00
std : : size_t free_memory ( ) const
{
2025-03-22 02:28:00 +01:00
return m_free . load ( ) * k_block_size ;
}
2025-04-05 21:50:45 +02:00
std : : uint64_t total_memory ( ) const
{
2025-03-22 02:28:00 +01:00
return m_total * k_block_size ;
}
private :
void release ( u32 amount )
{
if ( ! m_free . fetch_add ( amount ) )
{
m_free . notify_all ( ) ;
}
}
} ;
2017-02-26 16:56:31 +01:00
extern void ppu_initialize ( ) ;
2024-12-22 19:59:48 +01:00
extern void ppu_finalize ( const ppu_module < lv2_obj > & info , bool force_mem_release = false ) ;
extern bool ppu_initialize ( const ppu_module < lv2_obj > & info , bool check_only = false , u64 file_size = 0 ) ;
2025-04-05 21:50:45 +02:00
extern bool ppu_initialize ( const ppu_module < lv2_obj > & info , bool check_only , u64 file_size , concurent_memory_limit & memory_limit ) ;
2025-01-10 16:34:24 +01:00
static void ppu_initialize2 ( class jit_compiler & jit , const ppu_module < lv2_obj > & module_part , const std : : string & cache_path , const std : : string & obj_name ) ;
2023-06-25 14:53:42 +02:00
extern bool ppu_load_exec ( const ppu_exec_object & , bool virtual_load , const std : : string & , utils : : serial * = nullptr ) ;
2024-12-22 19:59:48 +01:00
extern std : : pair < shared_ptr < lv2_overlay > , CellError > ppu_load_overlay ( const ppu_exec_object & , bool virtual_load , const std : : string & path , s64 file_offset , utils : : serial * = nullptr ) ;
2021-01-29 11:32:19 +01:00
extern void ppu_unload_prx ( const lv2_prx & ) ;
2024-12-22 19:59:48 +01:00
extern shared_ptr < lv2_prx > ppu_load_prx ( const ppu_prx_object & , bool virtual_load , const std : : string & , s64 file_offset , utils : : serial * = nullptr ) ;
2016-07-27 23:43:22 +02:00
extern void ppu_execute_syscall ( ppu_thread & ppu , u64 code ) ;
2021-12-30 17:39:18 +01:00
static void ppu_break ( ppu_thread & , ppu_opcode_t , be_t < u32 > * , ppu_intrp_func * ) ;
2016-07-27 23:43:22 +02:00
2020-09-25 16:29:25 +02:00
extern void do_cell_atomic_128_store ( u32 addr , const void * to_write ) ;
2025-04-05 21:50:45 +02:00
const auto ppu_gateway = build_function_asm < void ( * ) ( ppu_thread * ) > ( " ppu_gateway " , [ ] ( native_asm & c , auto & args )
{
// Gateway for PPU, converts from native to GHC calling convention, also saves RSP value for escape
using namespace asmjit ;
2021-01-31 19:38:47 +01:00
2021-12-30 17:39:18 +01:00
# if defined(ARCH_X64)
2021-01-31 19:38:47 +01:00
# ifdef _WIN32
2025-04-05 21:50:45 +02:00
c . push ( x86 : : r15 ) ;
c . push ( x86 : : r14 ) ;
c . push ( x86 : : r13 ) ;
c . push ( x86 : : r12 ) ;
c . push ( x86 : : rsi ) ;
c . push ( x86 : : rdi ) ;
c . push ( x86 : : rbp ) ;
c . push ( x86 : : rbx ) ;
c . sub ( x86 : : rsp , 0xa8 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rsp , 0x90 ) , x86 : : xmm15 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rsp , 0x80 ) , x86 : : xmm14 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rsp , 0x70 ) , x86 : : xmm13 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rsp , 0x60 ) , x86 : : xmm12 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rsp , 0x50 ) , x86 : : xmm11 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rsp , 0x40 ) , x86 : : xmm10 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rsp , 0x30 ) , x86 : : xmm9 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rsp , 0x20 ) , x86 : : xmm8 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rsp , 0x10 ) , x86 : : xmm7 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rsp , 0 ) , x86 : : xmm6 ) ;
2021-01-31 19:38:47 +01:00
# else
2025-04-05 21:50:45 +02:00
c . push ( x86 : : rbp ) ;
c . push ( x86 : : r15 ) ;
c . push ( x86 : : r14 ) ;
c . push ( x86 : : r13 ) ;
c . push ( x86 : : r12 ) ;
c . push ( x86 : : rbx ) ;
c . push ( x86 : : rax ) ;
2021-01-31 19:38:47 +01:00
# endif
2025-04-05 21:50:45 +02:00
// Save native stack pointer for longjmp emulation
2025-04-24 12:41:04 +02:00
c . mov ( x86 : : qword_ptr ( args [ 0 ] , OFFSET_OF ( ppu_thread , hv_ctx . regs ) ) , x86 : : rsp ) ;
2021-01-31 19:38:47 +01:00
2025-04-05 21:50:45 +02:00
// Initialize args
c . mov ( x86 : : r13 , x86 : : qword_ptr ( reinterpret_cast < u64 > ( & vm : : g_exec_addr ) ) ) ;
c . mov ( x86 : : rbp , args [ 0 ] ) ;
2025-04-24 12:41:04 +02:00
c . mov ( x86 : : edx , x86 : : dword_ptr ( x86 : : rbp , OFFSET_OF ( ppu_thread , cia ) ) ) ; // Load PC
2021-01-31 19:38:47 +01:00
2025-04-05 21:50:45 +02:00
c . mov ( x86 : : rax , x86 : : qword_ptr ( x86 : : r13 , x86 : : edx , 1 , 0 ) ) ; // Load call target
c . mov ( x86 : : rdx , x86 : : rax ) ;
c . shl ( x86 : : rax , 16 ) ;
c . shr ( x86 : : rax , 16 ) ;
c . shr ( x86 : : rdx , 48 ) ;
c . shl ( x86 : : edx , 13 ) ;
c . mov ( x86 : : r12d , x86 : : edx ) ; // Load relocation base
2021-01-31 19:38:47 +01:00
2025-04-05 21:50:45 +02:00
c . mov ( x86 : : rbx , x86 : : qword_ptr ( reinterpret_cast < u64 > ( & vm : : g_base_addr ) ) ) ;
2025-04-24 12:41:04 +02:00
c . mov ( x86 : : r14 , x86 : : qword_ptr ( x86 : : rbp , OFFSET_OF ( ppu_thread , gpr [ 0 ] ) ) ) ; // Load some registers
c . mov ( x86 : : rsi , x86 : : qword_ptr ( x86 : : rbp , OFFSET_OF ( ppu_thread , gpr [ 1 ] ) ) ) ;
c . mov ( x86 : : rdi , x86 : : qword_ptr ( x86 : : rbp , OFFSET_OF ( ppu_thread , gpr [ 2 ] ) ) ) ;
2021-01-31 19:38:47 +01:00
2025-04-05 21:50:45 +02:00
if ( utils : : has_avx ( ) )
{
c . vzeroupper ( ) ;
}
2021-01-31 19:38:47 +01:00
2025-04-05 21:50:45 +02:00
c . call ( x86 : : rax ) ;
2021-01-31 19:38:47 +01:00
2025-04-05 21:50:45 +02:00
if ( utils : : has_avx ( ) )
{
c . vzeroupper ( ) ;
}
2021-01-31 19:38:47 +01:00
# ifdef _WIN32
2025-04-05 21:50:45 +02:00
c . movaps ( x86 : : xmm6 , x86 : : oword_ptr ( x86 : : rsp , 0 ) ) ;
c . movaps ( x86 : : xmm7 , x86 : : oword_ptr ( x86 : : rsp , 0x10 ) ) ;
c . movaps ( x86 : : xmm8 , x86 : : oword_ptr ( x86 : : rsp , 0x20 ) ) ;
c . movaps ( x86 : : xmm9 , x86 : : oword_ptr ( x86 : : rsp , 0x30 ) ) ;
c . movaps ( x86 : : xmm10 , x86 : : oword_ptr ( x86 : : rsp , 0x40 ) ) ;
c . movaps ( x86 : : xmm11 , x86 : : oword_ptr ( x86 : : rsp , 0x50 ) ) ;
c . movaps ( x86 : : xmm12 , x86 : : oword_ptr ( x86 : : rsp , 0x60 ) ) ;
c . movaps ( x86 : : xmm13 , x86 : : oword_ptr ( x86 : : rsp , 0x70 ) ) ;
c . movaps ( x86 : : xmm14 , x86 : : oword_ptr ( x86 : : rsp , 0x80 ) ) ;
c . movaps ( x86 : : xmm15 , x86 : : oword_ptr ( x86 : : rsp , 0x90 ) ) ;
c . add ( x86 : : rsp , 0xa8 ) ;
c . pop ( x86 : : rbx ) ;
c . pop ( x86 : : rbp ) ;
c . pop ( x86 : : rdi ) ;
c . pop ( x86 : : rsi ) ;
c . pop ( x86 : : r12 ) ;
c . pop ( x86 : : r13 ) ;
c . pop ( x86 : : r14 ) ;
c . pop ( x86 : : r15 ) ;
2021-01-31 19:38:47 +01:00
# else
2025-04-05 21:50:45 +02:00
c . add ( x86 : : rsp , + 8 ) ;
c . pop ( x86 : : rbx ) ;
c . pop ( x86 : : r12 ) ;
c . pop ( x86 : : r13 ) ;
c . pop ( x86 : : r14 ) ;
c . pop ( x86 : : r15 ) ;
c . pop ( x86 : : rbp ) ;
2021-01-31 19:38:47 +01:00
# endif
2025-04-05 21:50:45 +02:00
c . ret ( ) ;
2021-12-30 17:39:18 +01:00
# else
2025-04-05 21:50:45 +02:00
// See https://github.com/ghc/ghc/blob/master/rts/include/stg/MachRegs.h
// for GHC calling convention definitions on Aarch64
// and https://developer.arm.com/documentation/den0024/a/The-ABI-for-ARM-64-bit-Architecture/Register-use-in-the-AArch64-Procedure-Call-Standard/Parameters-in-general-purpose-registers
// for AArch64 calling convention
// PPU function argument layout:
// x19 = m_exec
// x20 = m_thread,
// x21 = seg0
// x22 = m_base
// x23 - x25 = gpr[0] - gpr[3]
// Push callee saved registers to the hv context
// Assume our LLVM compiled code is unsafe and can clobber our stack. GHC on aarch64 treats stack as scratch.
// We also want to store the register context at a fixed place so we can read the hypervisor state from any lcoation.
// We need to save x18-x30 = 13 x 8B each + 8 bytes for 16B alignment = 112B
// Pre-context save
// Layout:
// pc, sp
// x18, x19...x30
// NOTE: Do not touch x19..x30 before saving the registers!
2025-04-24 12:41:04 +02:00
const u64 hv_register_array_offset = OFFSET_OF ( ppu_thread , hv_ctx . regs ) ;
2025-04-05 21:50:45 +02:00
Label hv_ctx_pc = c . newLabel ( ) ; // Used to hold the far jump return address
// Sanity
ensure ( hv_register_array_offset < 4096 ) ; // Imm10
c . mov ( a64 : : x15 , args [ 0 ] ) ;
c . add ( a64 : : x14 , a64 : : x15 , Imm ( hv_register_array_offset ) ) ; // Per-thread context save
c . adr ( a64 : : x15 , hv_ctx_pc ) ; // x15 = pc
c . mov ( a64 : : x13 , a64 : : sp ) ; // x16 = sp
c . stp ( a64 : : x15 , a64 : : x13 , arm : : Mem ( a64 : : x14 ) ) ;
c . stp ( a64 : : x18 , a64 : : x19 , arm : : Mem ( a64 : : x14 , 16 ) ) ;
c . stp ( a64 : : x20 , a64 : : x21 , arm : : Mem ( a64 : : x14 , 32 ) ) ;
c . stp ( a64 : : x22 , a64 : : x23 , arm : : Mem ( a64 : : x14 , 48 ) ) ;
c . stp ( a64 : : x24 , a64 : : x25 , arm : : Mem ( a64 : : x14 , 64 ) ) ;
c . stp ( a64 : : x26 , a64 : : x27 , arm : : Mem ( a64 : : x14 , 80 ) ) ;
c . stp ( a64 : : x28 , a64 : : x29 , arm : : Mem ( a64 : : x14 , 96 ) ) ;
c . str ( a64 : : x30 , arm : : Mem ( a64 : : x14 , 112 ) ) ;
// Load REG_Base - use absolute jump target to bypass rel jmp range limits
c . mov ( a64 : : x19 , Imm ( reinterpret_cast < u64 > ( & vm : : g_exec_addr ) ) ) ;
c . ldr ( a64 : : x19 , arm : : Mem ( a64 : : x19 ) ) ;
// Load PPUThread struct base -> REG_Sp
const arm : : GpX ppu_t_base = a64 : : x20 ;
c . mov ( ppu_t_base , args [ 0 ] ) ;
// Load PC
const arm : : GpX pc = a64 : : x15 ;
const arm : : GpX cia_addr_reg = a64 : : x11 ;
// Load offset value
2025-04-24 12:41:04 +02:00
c . mov ( cia_addr_reg , Imm ( static_cast < u64 > ( OFFSET_OF ( ppu_thread , cia ) ) ) ) ;
2025-04-05 21:50:45 +02:00
// Load cia
c . ldr ( pc . w ( ) , arm : : Mem ( ppu_t_base , cia_addr_reg ) ) ;
// Multiply by 2 to index into ptr table
c . add ( pc , pc , pc ) ;
// Load call target
const arm : : GpX call_target = a64 : : x13 ;
c . ldr ( call_target , arm : : Mem ( a64 : : x19 , pc ) ) ;
// Compute REG_Hp
const arm : : GpX reg_hp = a64 : : x21 ;
c . mov ( reg_hp , call_target ) ;
c . lsr ( reg_hp , reg_hp , 48 ) ;
c . lsl ( reg_hp . w ( ) , reg_hp . w ( ) , 13 ) ;
// Zero top 16 bits of call target
c . lsl ( call_target , call_target , Imm ( 16 ) ) ;
c . lsr ( call_target , call_target , Imm ( 16 ) ) ;
// Load registers
c . mov ( a64 : : x22 , Imm ( reinterpret_cast < u64 > ( & vm : : g_base_addr ) ) ) ;
c . ldr ( a64 : : x22 , arm : : Mem ( a64 : : x22 ) ) ;
const arm : : GpX gpr_addr_reg = a64 : : x9 ;
2025-04-24 12:41:04 +02:00
c . mov ( gpr_addr_reg , Imm ( static_cast < u64 > ( OFFSET_OF ( ppu_thread , gpr ) ) ) ) ;
2025-04-05 21:50:45 +02:00
c . add ( gpr_addr_reg , gpr_addr_reg , ppu_t_base ) ;
c . ldr ( a64 : : x23 , arm : : Mem ( gpr_addr_reg ) ) ;
c . ldr ( a64 : : x24 , arm : : Mem ( gpr_addr_reg , 8 ) ) ;
c . ldr ( a64 : : x25 , arm : : Mem ( gpr_addr_reg , 16 ) ) ;
// Thread context save. This is needed for PPU because different functions can switch between x19 and x20 for the base register.
// We need a different solution to ensure that no matter which version, we get the right vaue on far return.
c . mov ( a64 : : x26 , ppu_t_base ) ;
// Save thread pointer to stack. SP is the only register preserved across GHC calls.
c . sub ( a64 : : sp , a64 : : sp , Imm ( 16 ) ) ;
c . str ( a64 : : x20 , arm : : Mem ( a64 : : sp ) ) ;
// GHC scratchpad mem. If managed correctly (i.e no returns ever), GHC functions should never require a stack frame.
// We allocate a slab to use for all functions as they tail-call into each other.
c . sub ( a64 : : sp , a64 : : sp , Imm ( 8192 ) ) ;
// Execute LLE call
c . blr ( call_target ) ;
// Return address after far jump. Reset sp and start unwinding...
c . bind ( hv_ctx_pc ) ;
// Clear scratchpad allocation
c . add ( a64 : : sp , a64 : : sp , Imm ( 8192 ) ) ;
c . ldr ( a64 : : x20 , arm : : Mem ( a64 : : sp ) ) ;
c . add ( a64 : : sp , a64 : : sp , Imm ( 16 ) ) ;
// We either got here through normal "ret" which keeps our x20 intact, or we jumped here and the escape reset our x20 reg
// Either way, x26 contains our thread base and we forcefully reset the stack pointer
c . add ( a64 : : x14 , a64 : : x20 , Imm ( hv_register_array_offset ) ) ; // Per-thread context save
c . ldr ( a64 : : x15 , arm : : Mem ( a64 : : x14 , 8 ) ) ;
c . ldp ( a64 : : x18 , a64 : : x19 , arm : : Mem ( a64 : : x14 , 16 ) ) ;
c . ldp ( a64 : : x20 , a64 : : x21 , arm : : Mem ( a64 : : x14 , 32 ) ) ;
c . ldp ( a64 : : x22 , a64 : : x23 , arm : : Mem ( a64 : : x14 , 48 ) ) ;
c . ldp ( a64 : : x24 , a64 : : x25 , arm : : Mem ( a64 : : x14 , 64 ) ) ;
c . ldp ( a64 : : x26 , a64 : : x27 , arm : : Mem ( a64 : : x14 , 80 ) ) ;
c . ldp ( a64 : : x28 , a64 : : x29 , arm : : Mem ( a64 : : x14 , 96 ) ) ;
c . ldr ( a64 : : x30 , arm : : Mem ( a64 : : x14 , 112 ) ) ;
// Return
c . mov ( a64 : : sp , a64 : : x15 ) ;
c . ret ( a64 : : x30 ) ;
2021-12-30 17:39:18 +01:00
# endif
2025-04-05 21:50:45 +02:00
} ) ;
2021-01-31 19:38:47 +01:00
2025-04-05 21:50:45 +02:00
const extern auto ppu_escape = build_function_asm < void ( * ) ( ppu_thread * ) > ( " ppu_escape " , [ ] ( native_asm & c , auto & args )
{
using namespace asmjit ;
2021-01-31 19:38:47 +01:00
2021-12-30 17:39:18 +01:00
# if defined(ARCH_X64)
2025-04-05 21:50:45 +02:00
// Restore native stack pointer (longjmp emulation)
2025-04-24 12:41:04 +02:00
c . mov ( x86 : : rsp , x86 : : qword_ptr ( args [ 0 ] , OFFSET_OF ( ppu_thread , hv_ctx . regs ) ) ) ;
2021-01-31 19:38:47 +01:00
2025-04-05 21:50:45 +02:00
// Return to the return location
c . sub ( x86 : : rsp , 8 ) ;
c . ret ( ) ;
2024-08-04 04:09:06 +02:00
# else
2025-04-05 21:50:45 +02:00
// We really shouldn't be using this, but an implementation shoudln't hurt
// Far jump return. Only clobbers x30.
const arm : : GpX ppu_t_base = a64 : : x20 ;
2025-04-24 12:41:04 +02:00
const u64 hv_register_array_offset = OFFSET_OF ( ppu_thread , hv_ctx . regs ) ;
2025-04-05 21:50:45 +02:00
c . mov ( ppu_t_base , args [ 0 ] ) ;
c . mov ( a64 : : x30 , Imm ( hv_register_array_offset ) ) ;
c . ldr ( a64 : : x30 , arm : : Mem ( ppu_t_base , a64 : : x30 ) ) ;
c . ret ( a64 : : x30 ) ;
2021-12-30 17:39:18 +01:00
# endif
2025-04-05 21:50:45 +02:00
} ) ;
2021-01-31 19:38:47 +01:00
void ppu_recompiler_fallback ( ppu_thread & ppu ) ;
2021-12-30 17:39:18 +01:00
# if defined(ARCH_X64)
2025-04-05 21:50:45 +02:00
const auto ppu_recompiler_fallback_ghc = build_function_asm < void ( * ) ( ppu_thread & ppu ) > ( " " , [ ] ( native_asm & c , auto & args )
{
using namespace asmjit ;
2021-01-31 19:38:47 +01:00
2025-04-05 21:50:45 +02:00
c . mov ( args [ 0 ] , x86 : : rbp ) ;
c . jmp ( ppu_recompiler_fallback ) ;
} ) ;
2021-12-30 17:39:18 +01:00
# elif defined(ARCH_ARM64)
2025-04-05 21:50:45 +02:00
const auto ppu_recompiler_fallback_ghc = build_function_asm < void ( * ) ( ppu_thread & ppu ) > ( " " , [ ] ( native_asm & c , auto & args )
{
using namespace asmjit ;
Label fallback_fn = c . newLabel ( ) ;
Label escape_fn = c . newLabel ( ) ;
// This is called as GHC so the first arg is in x20.
// Fix up the arg registers and call the real function.
c . mov ( args [ 0 ] , a64 : : x20 ) ;
c . ldr ( a64 : : x13 , arm : : ptr ( fallback_fn ) ) ;
c . blr ( a64 : : x13 ) ;
// There is no call-stack to return to in arm64 GHC. Escape to host.
c . mov ( a64 : : x0 , a64 : : x20 ) ;
c . ldr ( a64 : : x13 , arm : : ptr ( escape_fn ) ) ;
c . br ( a64 : : x13 ) ;
c . bind ( fallback_fn ) ;
c . embedUInt64 ( reinterpret_cast < u64 > ( ppu_recompiler_fallback ) ) ;
c . bind ( escape_fn ) ;
c . embedUInt64 ( reinterpret_cast < u64 > ( ppu_escape ) ) ;
} ) ;
2021-12-30 17:39:18 +01:00
# endif
2021-01-31 19:38:47 +01:00
2017-03-25 16:53:45 +01:00
// Get pointer to executable cache
2024-03-21 14:56:31 +01:00
static inline u8 * ppu_ptr ( u32 addr )
2023-12-02 18:04:44 +01:00
{
2024-03-21 14:56:31 +01:00
return vm : : g_exec_addr + u64 { addr } * 2 ;
2023-12-02 18:04:44 +01:00
}
2024-03-21 14:56:31 +01:00
static inline ppu_intrp_func_t ppu_read ( u32 addr )
2017-02-10 20:56:16 +01:00
{
2024-03-21 14:56:31 +01:00
return read_from_ptr < ppu_intrp_func_t > ( ppu_ptr ( addr ) ) ;
2017-02-10 20:56:16 +01:00
}
2017-02-10 13:20:54 +01:00
// Get interpreter cache value
2021-12-30 17:39:18 +01:00
static ppu_intrp_func_t ppu_cache ( u32 addr )
2016-06-07 22:24:20 +02:00
{
2025-04-24 12:41:04 +02:00
if ( g_cfg . core . ppu_decoder = = ppu_decoder_type : : llvm_legacy )
2021-01-18 19:34:54 +01:00
{
fmt : : throw_exception ( " Invalid PPU decoder " ) ;
}
2021-12-30 17:39:18 +01:00
return g_fxo - > get < ppu_interpreter_rt > ( ) . decode ( vm : : read32 ( addr ) ) ;
2017-02-10 13:20:54 +01:00
}
2021-12-30 17:39:18 +01:00
static ppu_intrp_func ppu_ret = { [ ] ( ppu_thread & ppu , ppu_opcode_t , be_t < u32 > * this_op , ppu_intrp_func * )
2025-04-05 21:50:45 +02:00
{
// Fix PC and return (step execution)
ppu . cia = vm : : get_addr ( this_op ) ;
return ;
} } ;
2017-02-11 15:17:07 +01:00
2021-12-30 17:39:18 +01:00
static void ppu_fallback ( ppu_thread & ppu , ppu_opcode_t op , be_t < u32 > * this_op , ppu_intrp_func * next_fn )
{
const auto _pc = vm : : get_addr ( this_op ) ;
const auto _fn = ppu_cache ( _pc ) ;
2024-03-21 14:56:31 +01:00
write_to_ptr < ppu_intrp_func_t > ( ppu_ptr ( _pc ) , _fn ) ;
2021-12-30 17:39:18 +01:00
return _fn ( ppu , op , this_op , next_fn ) ;
2019-03-20 16:20:13 +01:00
}
// TODO: Make this a dispatch call
void ppu_recompiler_fallback ( ppu_thread & ppu )
{
2021-12-30 17:39:18 +01:00
perf_meter < " PPUFALL1 " _u64 > perf0 ;
2017-05-20 13:45:02 +02:00
if ( g_cfg . core . ppu_debug )
2017-04-08 22:58:00 +02:00
{
2021-12-30 17:39:18 +01:00
ppu_log . error ( " Unregistered PPU Function (LR=0x%x) " , ppu . lr ) ;
2017-04-08 22:58:00 +02:00
}
2021-12-30 17:39:18 +01:00
const auto & table = g_fxo - > get < ppu_interpreter_rt > ( ) ;
2021-01-31 19:38:47 +01:00
2019-03-20 16:20:13 +01:00
while ( true )
{
2024-03-21 14:56:31 +01:00
if ( uptr func = uptr ( ppu_read ( ppu . cia ) ) ; ( func < < 16 > > 16 ) ! = reinterpret_cast < uptr > ( ppu_recompiler_fallback_ghc ) )
2021-09-01 12:38:17 +02:00
{
// We found a recompiler function at cia, return
break ;
}
2021-12-30 17:39:18 +01:00
// Run one instruction in interpreter (TODO)
const u32 op = vm : : read32 ( ppu . cia ) ;
table . decode ( op ) ( ppu , { op } , vm : : _ptr < u32 > ( ppu . cia ) , & ppu_ret ) ;
2019-03-20 16:20:13 +01:00
if ( ppu . test_stopped ( ) )
{
2021-01-31 19:38:47 +01:00
break ;
2019-03-20 16:20:13 +01:00
}
}
2017-04-08 22:58:00 +02:00
}
2020-10-13 21:36:00 +02:00
void ppu_reservation_fallback ( ppu_thread & ppu )
{
2021-12-30 17:39:18 +01:00
perf_meter < " PPUFALL2 " _u64 > perf0 ;
const auto & table = g_fxo - > get < ppu_interpreter_rt > ( ) ;
2020-10-13 21:36:00 +02:00
while ( true )
{
2021-12-30 17:39:18 +01:00
// Run one instruction in interpreter (TODO)
2020-10-13 21:36:00 +02:00
const u32 op = vm : : read32 ( ppu . cia ) ;
2021-12-30 17:39:18 +01:00
table . decode ( op ) ( ppu , { op } , vm : : _ptr < u32 > ( ppu . cia ) , & ppu_ret ) ;
2020-10-13 21:36:00 +02:00
if ( ! ppu . raddr | | ! ppu . use_full_rdata )
{
// We've escaped from reservation, return.
return ;
}
if ( ppu . test_stopped ( ) )
{
return ;
}
}
}
2023-07-05 12:52:16 +02:00
u32 ppu_read_mmio_aware_u32 ( u8 * vm_base , u32 eal )
{
if ( eal > = RAW_SPU_BASE_ADDR )
{
// RawSPU MMIO
2024-12-22 19:59:48 +01:00
auto thread = idm : : get_unlocked < named_thread < spu_thread > > ( spu_thread : : find_raw_spu ( ( eal - RAW_SPU_BASE_ADDR ) / RAW_SPU_OFFSET ) ) ;
2023-07-05 12:52:16 +02:00
if ( ! thread )
{
// Access Violation
}
else if ( ( eal - RAW_SPU_BASE_ADDR ) % RAW_SPU_OFFSET + sizeof ( u32 ) - 1 < SPU_LS_SIZE ) // LS access
{
}
else if ( u32 value { } ; thread - > read_reg ( eal , value ) )
{
return std : : bit_cast < be_t < u32 > > ( value ) ;
}
else
{
fmt : : throw_exception ( " Invalid RawSPU MMIO offset (addr=0x%x) " , eal ) ;
}
}
// Value is assumed to be swapped
return read_from_ptr < u32 > ( vm_base + eal ) ;
}
void ppu_write_mmio_aware_u32 ( u8 * vm_base , u32 eal , u32 value )
{
if ( eal > = RAW_SPU_BASE_ADDR )
{
// RawSPU MMIO
2024-12-22 19:59:48 +01:00
auto thread = idm : : get_unlocked < named_thread < spu_thread > > ( spu_thread : : find_raw_spu ( ( eal - RAW_SPU_BASE_ADDR ) / RAW_SPU_OFFSET ) ) ;
2023-07-05 12:52:16 +02:00
if ( ! thread )
{
// Access Violation
}
else if ( ( eal - RAW_SPU_BASE_ADDR ) % RAW_SPU_OFFSET + sizeof ( u32 ) - 1 < SPU_LS_SIZE ) // LS access
{
}
else if ( thread - > write_reg ( eal , std : : bit_cast < be_t < u32 > > ( value ) ) )
{
return ;
}
else
{
fmt : : throw_exception ( " Invalid RawSPU MMIO offset (addr=0x%x) " , eal ) ;
}
}
// Value is assumed swapped
write_to_ptr < u32 > ( vm_base + eal , value ) ;
}
extern bool ppu_test_address_may_be_mmio ( std : : span < const be_t < u32 > > insts )
{
std : : set < u32 > reg_offsets ;
bool found_raw_spu_base = false ;
bool found_spu_area_offset_element = false ;
for ( u32 inst : insts )
{
// Common around MMIO (orders IO)
if ( inst = = ppu_instructions : : EIEIO ( ) )
{
return true ;
}
const u32 op_imm16 = ( inst & 0xfc00ffff ) ;
// RawSPU MMIO base
// 0xe00000000 is a common constant so try to find an ORIS 0x10 or ADDIS 0x10 nearby (for multiplying SPU ID by it)
2025-04-05 21:50:45 +02:00
if ( op_imm16 = = ppu_instructions : : ADDIS ( { } , { } , - 0x2000 ) | | op_imm16 = = ppu_instructions : : ORIS ( { } , { } , 0xe000 ) | | op_imm16 = = ppu_instructions : : XORIS ( { } , { } , 0xe000 ) )
2023-07-05 12:52:16 +02:00
{
found_raw_spu_base = true ;
if ( found_spu_area_offset_element )
{
// Found both
return true ;
}
}
else if ( op_imm16 = = ppu_instructions : : ORIS ( { } , { } , 0x10 ) | | op_imm16 = = ppu_instructions : : ADDIS ( { } , { } , 0x10 ) )
{
found_spu_area_offset_element = true ;
if ( found_raw_spu_base )
{
// Found both
return true ;
}
}
// RawSPU MMIO base + problem state offset
else if ( op_imm16 = = ppu_instructions : : ADDIS ( { } , { } , - 0x1ffc ) )
{
return true ;
}
else if ( op_imm16 = = ppu_instructions : : ORIS ( { } , { } , 0xe004 ) )
{
return true ;
}
else if ( op_imm16 = = ppu_instructions : : XORIS ( { } , { } , 0xe004 ) )
{
return true ;
}
// RawSPU MMIO base + problem state offset + 64k of SNR1 offset
else if ( op_imm16 = = ppu_instructions : : ADDIS ( { } , { } , - 0x1ffb ) )
{
return true ;
}
else if ( op_imm16 = = ppu_instructions : : ORIS ( { } , { } , 0xe005 ) )
{
return true ;
}
else if ( op_imm16 = = ppu_instructions : : XORIS ( { } , { } , 0xe005 ) )
{
return true ;
}
// RawSPU MMIO base + problem state offset + 264k of SNR2 offset (STW allows 32K+- offset so in order to access SNR2 it needs to first add another 64k)
// SNR2 is the only register currently implemented that has its 0x80000 bit is set so its the only one its hardcoded access is done this way
else if ( op_imm16 = = ppu_instructions : : ADDIS ( { } , { } , - 0x1ffa ) )
{
return true ;
}
else if ( op_imm16 = = ppu_instructions : : ORIS ( { } , { } , 0xe006 ) )
{
return true ;
}
else if ( op_imm16 = = ppu_instructions : : XORIS ( { } , { } , 0xe006 ) )
{
return true ;
}
// Try to detect a function that receives RawSPU problem state base pointer as an argument
else if ( ( op_imm16 & ~ 0xffff ) = = ppu_instructions : : LWZ ( { } , { } , 0 ) | |
2025-04-05 21:50:45 +02:00
( op_imm16 & ~ 0xffff ) = = ppu_instructions : : STW ( { } , { } , 0 ) | |
( op_imm16 & ~ 0xffff ) = = ppu_instructions : : ADDI ( { } , { } , 0 ) )
2023-07-05 12:52:16 +02:00
{
const bool is_load = ( op_imm16 & ~ 0xffff ) = = ppu_instructions : : LWZ ( { } , { } , 0 ) ;
const bool is_store = ( op_imm16 & ~ 0xffff ) = = ppu_instructions : : STW ( { } , { } , 0 ) ;
const bool is_neither = ! is_store & & ! is_load ;
const bool is_snr = ( is_store | | is_neither ) & & ( ( op_imm16 & 0xffff ) = = ( SPU_RdSigNotify2_offs & 0xffff ) | | ( op_imm16 & 0xffff ) = = ( SPU_RdSigNotify1_offs & 0xffff ) ) ;
if ( is_snr | | spu_thread : : test_is_problem_state_register_offset ( op_imm16 & 0xffff , is_load | | is_neither , is_store | | is_neither ) )
{
reg_offsets . insert ( op_imm16 & 0xffff ) ;
if ( reg_offsets . size ( ) > = 2 )
{
// Assume high MMIO likelyhood if more than one offset appears in nearby code
// Such as common IN_MBOX + OUT_MBOX
return true ;
}
}
}
}
return false ;
}
2023-06-19 17:05:50 +02:00
struct ppu_toc_manager
{
std : : unordered_map < u32 , u32 > toc_map ;
shared_mutex mutex ;
} ;
2017-04-08 22:58:00 +02:00
2021-12-30 17:39:18 +01:00
static void ppu_check_toc ( ppu_thread & ppu , ppu_opcode_t op , be_t < u32 > * this_op , ppu_intrp_func * next_fn )
2017-04-08 22:58:00 +02:00
{
2023-06-19 17:05:50 +02:00
ppu . cia = vm : : get_addr ( this_op ) ;
2017-04-08 22:58:00 +02:00
{
2023-06-19 17:05:50 +02:00
auto & toc_manager = g_fxo - > get < ppu_toc_manager > ( ) ;
reader_lock lock ( toc_manager . mutex ) ;
auto & ppu_toc = toc_manager . toc_map ;
const auto found = ppu_toc . find ( ppu . cia ) ;
if ( found ! = ppu_toc . end ( ) )
{
const u32 toc = atomic_storage < u32 > : : load ( found - > second ) ;
// Compare TOC with expected value
if ( toc ! = umax & & ppu . gpr [ 2 ] ! = toc )
{
ppu_log . error ( " Unexpected TOC (0x%x, expected 0x%x) " , ppu . gpr [ 2 ] , toc ) ;
atomic_storage < u32 > : : exchange ( found - > second , u32 { umax } ) ;
}
}
2017-04-08 22:58:00 +02:00
}
// Fallback to the interpreter function
2021-12-30 17:39:18 +01:00
return ppu_cache ( ppu . cia ) ( ppu , op , this_op , next_fn ) ;
2017-02-11 00:42:59 +01:00
}
2017-02-10 13:20:54 +01:00
extern void ppu_register_range ( u32 addr , u32 size )
{
2017-02-11 00:42:59 +01:00
if ( ! size )
{
2020-02-01 09:31:27 +01:00
ppu_log . error ( " ppu_register_range(0x%x): empty range " , addr ) ;
2017-04-06 15:57:32 +02:00
return ;
2017-02-11 00:42:59 +01:00
}
2021-02-08 16:04:50 +01:00
size = utils : : align ( size + addr % 0x10000 , 0x10000 ) ;
addr & = - 0x10000 ;
2017-02-10 13:20:54 +01:00
// Register executable range at
2023-12-02 18:04:44 +01:00
utils : : memory_commit ( ppu_ptr ( addr ) , u64 { size } * 2 , utils : : protection : : rw ) ;
2021-09-01 12:38:17 +02:00
ensure ( vm : : page_protect ( addr , size , 0 , vm : : page_executable ) ) ;
2017-02-10 13:20:54 +01:00
2021-02-26 10:20:25 +01:00
if ( g_cfg . core . ppu_debug )
{
utils : : memory_commit ( vm : : g_stat_addr + addr , size ) ;
}
2021-01-31 19:38:47 +01:00
const u64 seg_base = addr ;
2017-02-11 00:42:59 +01:00
2017-02-10 13:20:54 +01:00
while ( size )
{
2025-04-24 12:41:04 +02:00
if ( g_cfg . core . ppu_decoder = = ppu_decoder_type : : llvm_legacy )
2021-01-31 19:38:47 +01:00
{
// Assume addr is the start of first segment of PRX
2024-03-21 14:56:31 +01:00
const uptr entry_value = reinterpret_cast < uptr > ( ppu_recompiler_fallback_ghc ) | ( seg_base < < ( 32 + 3 ) ) ;
write_to_ptr < uptr > ( ppu_ptr ( addr ) , entry_value ) ;
2021-01-31 19:38:47 +01:00
}
else
{
2024-03-21 14:56:31 +01:00
write_to_ptr < ppu_intrp_func_t > ( ppu_ptr ( addr ) , ppu_fallback ) ;
2021-01-31 19:38:47 +01:00
}
2017-02-10 13:20:54 +01:00
addr + = 4 ;
size - = 4 ;
}
}
2021-12-30 17:39:18 +01:00
static void ppu_far_jump ( ppu_thread & , ppu_opcode_t , be_t < u32 > * , ppu_intrp_func * ) ;
2021-09-01 12:38:17 +02:00
2021-12-30 17:39:18 +01:00
extern void ppu_register_function_at ( u32 addr , u32 size , ppu_intrp_func_t ptr = nullptr )
2017-02-10 13:20:54 +01:00
{
2017-03-22 21:23:47 +01:00
// Initialize specific function
if ( ptr )
{
2024-03-21 14:56:31 +01:00
write_to_ptr < uptr > ( ppu_ptr ( addr ) , ( reinterpret_cast < uptr > ( ptr ) & 0xffff'ffff'ffffu ) | ( uptr ( ppu_read ( addr ) ) & ~ 0xffff'ffff'ffffu ) ) ;
2017-03-22 21:23:47 +01:00
return ;
}
2017-02-11 00:42:59 +01:00
if ( ! size )
{
2017-05-20 13:45:02 +02:00
if ( g_cfg . core . ppu_debug )
2017-04-08 22:58:00 +02:00
{
2020-02-01 09:31:27 +01:00
ppu_log . error ( " ppu_register_function_at(0x%x): empty range " , addr ) ;
2017-04-08 22:58:00 +02:00
}
2017-12-19 22:01:03 +01:00
return ;
2017-02-11 00:42:59 +01:00
}
2025-04-24 12:41:04 +02:00
if ( g_cfg . core . ppu_decoder ! = ppu_decoder_type : : _static )
2016-06-07 22:24:20 +02:00
{
2017-02-10 13:20:54 +01:00
return ;
}
// Initialize interpreter cache
while ( size )
{
2024-03-21 14:56:31 +01:00
if ( auto old = ppu_read ( addr ) ; old ! = ppu_break & & old ! = ppu_far_jump )
2017-03-22 21:23:47 +01:00
{
2024-03-21 14:56:31 +01:00
write_to_ptr < ppu_intrp_func_t > ( ppu_ptr ( addr ) , ppu_cache ( addr ) ) ;
2017-03-22 21:23:47 +01:00
}
2017-02-10 13:20:54 +01:00
addr + = 4 ;
size - = 4 ;
2016-06-07 22:24:20 +02:00
}
2016-06-27 18:34:08 +02:00
}
2016-06-07 22:24:20 +02:00
2021-09-01 12:38:17 +02:00
extern void ppu_register_function_at ( u32 addr , u32 size , u64 ptr )
{
2021-12-30 17:39:18 +01:00
return ppu_register_function_at ( addr , size , reinterpret_cast < ppu_intrp_func_t > ( ptr ) ) ;
2021-09-01 12:38:17 +02:00
}
2021-09-06 09:33:44 +02:00
u32 ppu_get_exported_func_addr ( u32 fnid , const std : : string & module_name ) ;
2021-12-30 17:39:18 +01:00
void ppu_return_from_far_jump ( ppu_thread & ppu , ppu_opcode_t , be_t < u32 > * , ppu_intrp_func * )
2021-09-06 09:33:44 +02:00
{
auto & calls_info = ppu . hle_func_calls_with_toc_info ;
ensure ( ! calls_info . empty ( ) ) ;
// Branch to next instruction after far jump call entry with restored R2 and LR
const auto restore_info = & calls_info . back ( ) ;
ppu . cia = restore_info - > cia + 4 ;
ppu . lr = restore_info - > saved_lr ;
ppu . gpr [ 2 ] = restore_info - > saved_r2 ;
calls_info . pop_back ( ) ;
}
static const bool s_init_return_far_jump_func = [ ]
{
REG_HIDDEN_FUNC_PURE ( ppu_return_from_far_jump ) ;
return true ;
} ( ) ;
2021-09-01 12:38:17 +02:00
struct ppu_far_jumps_t
{
2021-09-06 09:33:44 +02:00
struct all_info_t
{
u32 target ;
bool link ;
bool with_toc ;
std : : string module_name ;
2022-08-17 15:53:05 +02:00
ppu_intrp_func_t func ;
2022-12-24 15:15:29 +01:00
2022-12-09 19:06:50 +01:00
u32 get_target ( u32 pc , ppu_thread * ppu = nullptr ) const
2021-09-01 12:38:17 +02:00
{
2022-12-09 19:06:50 +01:00
u32 direct_target = this - > target ;
2021-09-06 09:33:44 +02:00
2022-12-09 19:06:50 +01:00
bool to_link = this - > link ;
bool from_opd = this - > with_toc ;
2021-09-06 09:33:44 +02:00
2022-12-09 19:06:50 +01:00
if ( ! this - > module_name . empty ( ) )
2021-09-06 09:33:44 +02:00
{
2022-12-09 19:06:50 +01:00
direct_target = ppu_get_exported_func_addr ( direct_target , this - > module_name ) ;
2021-09-06 09:33:44 +02:00
}
2022-12-09 19:06:50 +01:00
if ( from_opd & & ! vm : : check_addr < sizeof ( ppu_func_opd_t ) > ( direct_target ) )
2021-09-06 09:33:44 +02:00
{
// Avoid reading unmapped memory under mutex
from_opd = false ;
}
if ( from_opd )
{
2022-12-09 19:06:50 +01:00
auto & opd = vm : : _ref < ppu_func_opd_t > ( direct_target ) ;
direct_target = opd . addr ;
2021-09-06 09:33:44 +02:00
2021-11-25 19:15:24 +01:00
// We modify LR to custom values here
2022-12-09 19:06:50 +01:00
to_link = false ;
2021-09-06 09:33:44 +02:00
if ( ppu )
{
auto & calls_info = ppu - > hle_func_calls_with_toc_info ;
// Save LR and R2
// Set LR to the this ppu_return_from_far_jump branch for restoration of registers
// NOTE: In order to clean up this information all calls must return in order
auto & saved_info = calls_info . emplace_back ( ) ;
saved_info . cia = pc ;
2022-11-19 12:50:31 +01:00
saved_info . saved_lr = std : : exchange ( ppu - > lr , g_fxo - > get < ppu_function_manager > ( ) . func_addr ( FIND_FUNC ( ppu_return_from_far_jump ) , true ) ) ;
2021-09-06 09:33:44 +02:00
saved_info . saved_r2 = std : : exchange ( ppu - > gpr [ 2 ] , opd . rtoc ) ;
}
}
2022-12-09 19:06:50 +01:00
if ( to_link & & ppu )
2021-09-06 09:33:44 +02:00
{
ppu - > lr = pc + 4 ;
}
2022-12-09 19:06:50 +01:00
return direct_target ;
}
} ;
ppu_far_jumps_t ( int ) noexcept { }
std : : map < u32 , all_info_t > vals ;
: : jit_runtime rt ;
mutable shared_mutex mutex ;
// Get target address, 'ppu' is used in ppu_far_jump in order to modify registers
u32 get_target ( u32 pc , ppu_thread * ppu = nullptr )
{
reader_lock lock ( mutex ) ;
if ( auto it = vals . find ( pc ) ; it ! = vals . end ( ) )
{
all_info_t & all_info = it - > second ;
return all_info . get_target ( pc , ppu ) ;
2021-09-01 12:38:17 +02:00
}
2021-09-02 17:14:26 +02:00
return { } ;
2021-09-01 12:38:17 +02:00
}
2022-08-17 15:53:05 +02:00
2022-12-09 19:06:50 +01:00
// Get function patches in range (entry -> target)
std : : vector < std : : pair < u32 , u32 > > get_targets ( u32 pc , u32 size )
{
std : : vector < std : : pair < u32 , u32 > > targets ;
reader_lock lock ( mutex ) ;
auto it = vals . lower_bound ( pc ) ;
if ( it = = vals . end ( ) )
{
return targets ;
}
if ( it - > first > = pc + size )
{
return targets ;
}
2022-12-24 15:15:29 +01:00
2022-12-09 19:06:50 +01:00
for ( auto end = vals . lower_bound ( pc + size ) ; it ! = end ; it + + )
{
all_info_t & all_info = it - > second ;
if ( u32 target = all_info . get_target ( it - > first ) )
{
targets . emplace_back ( it - > first , target ) ;
}
}
return targets ;
}
// Generate a mini-function which updates PC (for LLVM) and jumps to ppu_far_jump to handle redirections
2022-08-17 15:53:05 +02:00
template < bool Locked = true >
ppu_intrp_func_t gen_jump ( u32 pc )
{
[[maybe_unused]] std : : conditional_t < Locked , std : : lock_guard < shared_mutex > , const shared_mutex & > lock ( mutex ) ;
auto it = vals . find ( pc ) ;
if ( it = = vals . end ( ) )
{
return nullptr ;
}
if ( ! it - > second . func )
{
it - > second . func = build_function_asm < ppu_intrp_func_t > ( " " , [ & ] ( native_asm & c , auto & args )
2025-04-05 21:50:45 +02:00
{
using namespace asmjit ;
2022-08-17 15:53:05 +02:00
# ifdef ARCH_X64
2025-04-05 21:50:45 +02:00
c . mov ( args [ 0 ] , x86 : : rbp ) ;
2025-04-24 12:41:04 +02:00
c . mov ( x86 : : dword_ptr ( args [ 0 ] , OFFSET_OF ( ppu_thread , cia ) ) , pc ) ;
2025-04-05 21:50:45 +02:00
c . jmp ( ppu_far_jump ) ;
2022-08-17 15:53:05 +02:00
# else
2025-04-05 21:50:45 +02:00
Label jmp_address = c . newLabel ( ) ;
Label imm_address = c . newLabel ( ) ;
c . ldr ( args [ 1 ] . w ( ) , arm : : ptr ( imm_address ) ) ;
2025-04-24 12:41:04 +02:00
c . str ( args [ 1 ] . w ( ) , arm : : Mem ( args [ 0 ] , OFFSET_OF ( ppu_thread , cia ) ) ) ;
2025-04-05 21:50:45 +02:00
c . ldr ( args [ 1 ] , arm : : ptr ( jmp_address ) ) ;
c . br ( args [ 1 ] ) ;
c . align ( AlignMode : : kCode , 16 ) ;
c . bind ( jmp_address ) ;
c . embedUInt64 ( reinterpret_cast < u64 > ( ppu_far_jump ) ) ;
c . bind ( imm_address ) ;
c . embedUInt32 ( pc ) ;
2022-08-17 15:53:05 +02:00
# endif
2025-04-05 21:50:45 +02:00
} ,
& rt ) ;
2022-08-17 15:53:05 +02:00
}
return it - > second . func ;
}
2021-09-01 12:38:17 +02:00
} ;
u32 ppu_get_far_jump ( u32 pc )
{
2022-08-17 15:53:05 +02:00
if ( ! g_fxo - > is_init < ppu_far_jumps_t > ( ) )
{
return 0 ;
}
2021-09-06 09:33:44 +02:00
return g_fxo - > get < ppu_far_jumps_t > ( ) . get_target ( pc ) ;
2021-09-01 12:38:17 +02:00
}
2021-09-01 13:38:20 +02:00
2022-08-17 15:53:05 +02:00
static void ppu_far_jump ( ppu_thread & ppu , ppu_opcode_t , be_t < u32 > * , ppu_intrp_func * )
2021-09-01 12:38:17 +02:00
{
2022-08-17 15:53:05 +02:00
const u32 cia = g_fxo - > get < ppu_far_jumps_t > ( ) . get_target ( ppu . cia , & ppu ) ;
2021-09-06 09:33:44 +02:00
if ( ! vm : : check_addr ( cia , vm : : page_executable ) )
{
fmt : : throw_exception ( " PPU far jump failed! (returned cia = 0x%08x) " , cia ) ;
}
2021-09-02 17:14:26 +02:00
ppu . cia = cia ;
2021-09-01 12:38:17 +02:00
}
2021-09-06 09:33:44 +02:00
bool ppu_form_branch_to_code ( u32 entry , u32 target , bool link , bool with_toc , std : : string module_name )
2021-09-01 12:38:17 +02:00
{
2021-09-06 09:33:44 +02:00
// Force align entry and target
2021-09-01 12:38:17 +02:00
entry & = - 4 ;
2021-09-06 09:33:44 +02:00
// Exported functions are using target as FNID, must not be changed
if ( module_name . empty ( ) )
{
target & = - 4 ;
u32 cia_target = target ;
if ( with_toc )
{
ppu_func_opd_t opd { } ;
if ( ! vm : : try_access ( target , & opd , sizeof ( opd ) , false ) )
{
// Cannot access function descriptor
return false ;
}
// For now allow situations where OPD is changed later by patches or by the program itself
2025-04-05 21:50:45 +02:00
// cia_target = opd.addr;
2021-09-06 09:33:44 +02:00
// So force a valid target (executable, yet not equal to entry)
cia_target = entry ^ 8 ;
}
// Target CIA must be aligned, executable and not equal with
if ( cia_target % 4 | | entry = = cia_target | | ! vm : : check_addr ( cia_target , vm : : page_executable ) )
{
return false ;
}
}
// Entry must be executable
if ( ! vm : : check_addr ( entry , vm : : page_executable ) )
2021-09-01 12:38:17 +02:00
{
return false ;
}
2022-08-17 15:53:05 +02:00
g_fxo - > init < ppu_far_jumps_t > ( 0 ) ;
2021-09-01 12:38:17 +02:00
2021-09-06 09:33:44 +02:00
if ( ! module_name . empty ( ) )
{
// Always use function descriptor for exported functions
with_toc = true ;
}
if ( with_toc )
{
// Always link for calls with function descriptor
link = true ;
}
2021-09-01 12:38:17 +02:00
// Register branch target in host memory, not guest memory
auto & jumps = g_fxo - > get < ppu_far_jumps_t > ( ) ;
std : : lock_guard lock ( jumps . mutex ) ;
2021-12-30 17:39:18 +01:00
jumps . vals . insert_or_assign ( entry , ppu_far_jumps_t : : all_info_t { target , link , with_toc , std : : move ( module_name ) } ) ;
2025-04-24 12:41:04 +02:00
ppu_register_function_at ( entry , 4 , g_cfg . core . ppu_decoder ! = ppu_decoder_type : : llvm_legacy ? & ppu_far_jump : ensure ( g_fxo - > get < ppu_far_jumps_t > ( ) . gen_jump < false > ( entry ) ) ) ;
2021-09-01 12:38:17 +02:00
return true ;
}
2021-09-06 09:33:44 +02:00
bool ppu_form_branch_to_code ( u32 entry , u32 target , bool link , bool with_toc )
{
return ppu_form_branch_to_code ( entry , target , link , with_toc , std : : string { } ) ;
}
bool ppu_form_branch_to_code ( u32 entry , u32 target , bool link )
{
return ppu_form_branch_to_code ( entry , target , link , false ) ;
}
bool ppu_form_branch_to_code ( u32 entry , u32 target )
{
return ppu_form_branch_to_code ( entry , target , false ) ;
}
2021-09-01 12:38:17 +02:00
void ppu_remove_hle_instructions ( u32 addr , u32 size )
{
2022-08-17 15:53:05 +02:00
if ( Emu . IsStopped ( ) | | ! g_fxo - > is_init < ppu_far_jumps_t > ( ) )
{
return ;
}
2021-09-01 15:56:38 +02:00
2021-09-01 12:38:17 +02:00
auto & jumps = g_fxo - > get < ppu_far_jumps_t > ( ) ;
std : : lock_guard lock ( jumps . mutex ) ;
for ( auto it = jumps . vals . begin ( ) ; it ! = jumps . vals . end ( ) ; )
{
if ( it - > first > = addr & & it - > first < = addr + size - 1 & & size )
{
it = jumps . vals . erase ( it ) ;
continue ;
}
it + + ;
}
}
2023-07-24 12:33:23 +02:00
atomic_t < bool > g_debugger_pause_all_threads_on_bp = false ;
2021-03-31 19:08:01 +02:00
2017-02-10 14:13:17 +01:00
// Breakpoint entry point
2021-12-30 17:39:18 +01:00
static void ppu_break ( ppu_thread & ppu , ppu_opcode_t , be_t < u32 > * this_op , ppu_intrp_func * next_fn )
2017-02-10 14:13:17 +01:00
{
2021-03-31 19:08:01 +02:00
const bool pause_all = g_debugger_pause_all_threads_on_bp ;
2021-12-30 17:39:18 +01:00
const u32 old_cia = vm : : get_addr ( this_op ) ;
ppu . cia = old_cia ;
2021-02-06 22:25:40 +01:00
// Pause
2021-03-31 19:08:01 +02:00
ppu . state . atomic_op ( [ & ] ( bs_t < cpu_flag > & state )
2025-04-05 21:50:45 +02:00
{
if ( pause_all )
state + = cpu_flag : : dbg_global_pause ;
if ( pause_all | | ! ( state & cpu_flag : : dbg_step ) )
state + = cpu_flag : : dbg_pause ;
} ) ;
2021-03-31 19:08:01 +02:00
if ( pause_all )
{
// Pause all other threads
2025-04-05 21:50:45 +02:00
Emu . CallFromMainThread ( [ ] ( )
{
Emu . Pause ( ) ;
} ) ;
2021-03-31 19:08:01 +02:00
}
2021-03-02 12:59:19 +01:00
2021-12-30 17:39:18 +01:00
if ( ppu . check_state ( ) | | old_cia ! = atomic_storage < u32 > : : load ( ppu . cia ) )
2017-02-10 14:13:17 +01:00
{
2021-12-30 17:39:18 +01:00
// Do not execute if PC changed
return ;
2017-02-10 14:13:17 +01:00
}
// Fallback to the interpreter function
2022-03-27 10:37:11 +02:00
return ppu_cache ( ppu . cia ) ( ppu , { * this_op } , this_op , ppu . state ? & ppu_ret : next_fn ) ;
2017-02-10 14:13:17 +01:00
}
// Set or remove breakpoint
2021-07-30 20:30:29 +02:00
extern bool ppu_breakpoint ( u32 addr , bool is_adding )
2017-02-10 14:13:17 +01:00
{
2025-04-24 12:41:04 +02:00
if ( addr % 4 | | ! vm : : check_addr ( addr , vm : : page_executable ) | | g_cfg . core . ppu_decoder = = ppu_decoder_type : : llvm_legacy )
2017-02-10 14:13:17 +01:00
{
2021-07-30 20:30:29 +02:00
return false ;
2017-02-10 14:13:17 +01:00
}
2021-07-30 20:30:29 +02:00
// Remove breakpoint parameters
2024-03-21 14:56:31 +01:00
ppu_intrp_func_t func_original = 0 ;
ppu_intrp_func_t breakpoint = & ppu_break ;
2017-02-10 14:13:17 +01:00
2021-07-30 20:30:29 +02:00
if ( u32 hle_addr { } ; g_fxo - > is_init < ppu_function_manager > ( ) & & ( hle_addr = g_fxo - > get < ppu_function_manager > ( ) . addr ) )
2017-04-02 20:10:06 +02:00
{
2021-07-30 20:30:29 +02:00
// HLE function index
const u32 index = ( addr - hle_addr ) / 8 ;
2017-04-02 20:10:06 +02:00
2021-07-30 20:30:29 +02:00
if ( addr % 8 = = 4 & & index < ppu_function_manager : : get ( ) . size ( ) )
{
// HLE function placement
2024-03-21 14:56:31 +01:00
func_original = ppu_function_manager : : get ( ) [ index ] ;
2021-07-30 20:30:29 +02:00
}
2017-04-02 20:10:06 +02:00
}
2024-03-21 14:56:31 +01:00
if ( ! func_original )
2017-04-02 20:10:06 +02:00
{
2021-07-30 20:30:29 +02:00
// If not an HLE function use regular instruction function
2024-03-21 14:56:31 +01:00
func_original = ppu_cache ( addr ) ;
2017-04-02 20:10:06 +02:00
}
2021-07-30 20:30:29 +02:00
if ( is_adding )
2017-04-02 20:10:06 +02:00
{
2024-03-21 14:56:31 +01:00
if ( ppu_read ( addr ) = = ppu_fallback )
2021-08-12 20:58:18 +02:00
{
ppu_log . error ( " Unregistered instruction replaced with a breakpoint at 0x%08x " , addr ) ;
2024-03-21 14:56:31 +01:00
func_original = ppu_fallback ;
}
if ( ppu_read ( addr ) ! = func_original )
{
return false ;
2021-08-12 20:58:18 +02:00
}
2024-03-21 14:56:31 +01:00
write_to_ptr < ppu_intrp_func_t > ( ppu_ptr ( addr ) , breakpoint ) ;
return true ;
2017-04-02 20:10:06 +02:00
}
2021-07-30 20:30:29 +02:00
2024-03-21 14:56:31 +01:00
if ( ppu_read ( addr ) ! = breakpoint )
{
return false ;
}
write_to_ptr < ppu_intrp_func_t > ( ppu_ptr ( addr ) , func_original ) ;
return true ;
2017-04-02 20:10:06 +02:00
}
2017-10-06 17:39:15 +02:00
extern bool ppu_patch ( u32 addr , u32 value )
{
2020-08-21 11:10:00 +02:00
if ( addr % 4 )
2017-10-06 17:39:15 +02:00
{
2020-08-21 11:10:00 +02:00
ppu_log . fatal ( " Patch failed at 0x%x: unanligned memory address. " , addr ) ;
2018-05-07 20:57:06 +02:00
return false ;
}
2017-10-06 17:39:15 +02:00
2022-04-30 15:51:52 +02:00
vm : : writer_lock rlock ;
2020-08-21 11:10:00 +02:00
2020-11-10 18:09:28 +01:00
if ( ! vm : : check_addr ( addr ) )
2018-05-07 20:57:06 +02:00
{
2020-02-01 09:31:27 +01:00
ppu_log . fatal ( " Patch failed at 0x%x: invalid memory address. " , addr ) ;
2018-05-07 20:57:06 +02:00
return false ;
}
2017-10-06 17:39:15 +02:00
2020-11-10 18:09:28 +01:00
const bool is_exec = vm : : check_addr ( addr , vm : : page_executable ) ;
2020-08-21 11:10:00 +02:00
2025-04-24 12:41:04 +02:00
if ( is_exec & & g_cfg . core . ppu_decoder = = ppu_decoder_type : : llvm_legacy & & ! Emu . IsReady ( ) )
2020-08-21 11:10:00 +02:00
{
// TODO: support recompilers
ppu_log . fatal ( " Patch failed at 0x%x: LLVM recompiler is used. " , addr ) ;
return false ;
}
* vm : : get_super_ptr < u32 > ( addr ) = value ;
if ( is_exec )
2018-05-07 20:57:06 +02:00
{
2024-03-21 14:56:31 +01:00
if ( auto old = ppu_read ( addr ) ; old ! = ppu_break & & old ! = ppu_fallback )
2020-08-21 11:10:00 +02:00
{
2024-03-21 14:56:31 +01:00
write_to_ptr < ppu_intrp_func_t > ( ppu_ptr ( addr ) , ppu_cache ( addr ) ) ;
2020-08-21 11:10:00 +02:00
}
2017-10-06 17:39:15 +02:00
}
2018-05-07 20:57:06 +02:00
return true ;
2017-10-06 17:39:15 +02:00
}
2020-11-23 18:57:34 +01:00
std : : array < u32 , 2 > op_branch_targets ( u32 pc , ppu_opcode_t op )
{
2021-05-22 09:35:15 +02:00
std : : array < u32 , 2 > res { pc + 4 , umax } ;
2020-11-23 18:57:34 +01:00
2023-08-08 08:48:12 +02:00
if ( u32 target = g_fxo - > is_init < ppu_far_jumps_t > ( ) ? g_fxo - > get < ppu_far_jumps_t > ( ) . get_target ( pc ) : 0 )
2021-09-01 12:38:17 +02:00
{
res [ 0 ] = target ;
return res ;
}
2020-11-23 18:57:34 +01:00
switch ( const auto type = g_ppu_itype . decode ( op . opcode ) )
{
case ppu_itype : : B :
case ppu_itype : : BC :
{
res [ type = = ppu_itype : : BC ? 1 : 0 ] = ( ( op . aa ? 0 : pc ) + ( type = = ppu_itype : : B ? + op . bt24 : + op . bt14 ) ) ;
break ;
}
case ppu_itype : : BCCTR :
case ppu_itype : : BCLR :
case ppu_itype : : UNK :
{
2021-05-22 09:35:15 +02:00
res [ 0 ] = umax ;
2020-11-23 18:57:34 +01:00
break ;
}
default : break ;
}
return res ;
}
2023-07-12 10:02:12 +02:00
void ppu_thread : : dump_regs ( std : : string & ret , std : : any & custom_data ) const
2020-03-31 02:11:37 +02:00
{
2023-07-10 16:43:59 +02:00
const system_state emu_state = Emu . GetStatus ( false ) ;
const bool is_stopped_or_frozen = state & cpu_flag : : exit | | emu_state = = system_state : : frozen | | emu_state < = system_state : : stopping ;
const ppu_debugger_mode mode = debugger_mode . load ( ) ;
const bool is_decimal = ! is_stopped_or_frozen & & mode = = ppu_debugger_mode : : is_decimal ;
2023-07-12 10:02:12 +02:00
struct dump_registers_data_t
{
u32 preferred_cr_field_index = 7 ;
} ;
dump_registers_data_t * func_data = nullptr ;
func_data = std : : any_cast < dump_registers_data_t > ( & custom_data ) ;
if ( ! func_data )
{
custom_data . reset ( ) ;
custom_data = std : : make_any < dump_registers_data_t > ( ) ;
func_data = ensure ( std : : any_cast < dump_registers_data_t > ( & custom_data ) ) ;
}
2021-10-12 22:12:30 +02:00
PPUDisAsm dis_asm ( cpu_disasm_mode : : normal , vm : : g_sudo_addr ) ;
2020-03-31 02:57:54 +02:00
for ( uint i = 0 ; i < 32 ; + + i )
{
auto reg = gpr [ i ] ;
2021-03-16 14:20:45 +01:00
// Fixup for syscall arguments
2025-04-05 21:50:45 +02:00
if ( current_function & & i > = 3 & & i < = 10 )
reg = syscall_args [ i - 3 ] ;
2021-03-16 14:20:45 +01:00
2022-05-08 12:40:21 +02:00
auto [ is_const , const_value ] = dis_asm . try_get_const_gpr_value ( i , cia ) ;
if ( const_value ! = reg )
{
2022-12-09 19:06:50 +01:00
// Expectation of predictable code path has not been met (such as a branch directly to the instruction)
2022-05-08 12:40:21 +02:00
is_const = false ;
}
2023-02-11 20:32:38 +01:00
fmt : : append ( ret , " r%d%s%s " , i , i < = 9 ? " " : " " , is_const ? " © " : " : " ) ;
bool printed_error = false ;
if ( ( reg > > 31 ) = = 0x1'ffff'ffff )
{
const usz old_size = ret . size ( ) ;
fmt : : append ( ret , " %s (0x%x) " , CellError { static_cast < u32 > ( reg ) } , reg ) ;
// Test if failed to format (appended " 0x8".. in such case)
if ( ret [ old_size ] = = ' 0 ' )
{
// Failed
ret . resize ( old_size ) ;
}
else
{
printed_error = true ;
}
}
if ( ! printed_error )
{
2023-07-10 16:43:59 +02:00
if ( is_decimal )
{
fmt : : append ( ret , " %-11d " , reg ) ;
}
else
{
fmt : : append ( ret , " 0x%-8llx " , reg ) ;
}
2023-02-11 20:32:38 +01:00
}
2020-03-31 02:57:54 +02:00
2020-11-10 18:09:28 +01:00
constexpr u32 max_str_len = 32 ;
constexpr u32 hex_count = 8 ;
2020-03-31 02:57:54 +02:00
2021-05-22 09:35:15 +02:00
if ( reg < = u32 { umax } & & vm : : check_addr < max_str_len > ( static_cast < u32 > ( reg ) ) )
2020-03-31 02:57:54 +02:00
{
2020-04-17 08:15:30 +02:00
bool is_function = false ;
u32 toc = 0 ;
2022-05-18 15:09:27 +02:00
auto is_exec_code = [ & ] ( u32 addr )
{
return addr % 4 = = 0 & & vm : : check_addr ( addr , vm : : page_executable ) & & g_ppu_itype . decode ( * vm : : get_super_ptr < u32 > ( addr ) ) ! = ppu_itype : : UNK ;
} ;
2022-09-29 11:04:38 +02:00
if ( const u32 reg_ptr = * vm : : get_super_ptr < be_t < u32 , 1 > > ( static_cast < u32 > ( reg ) ) ;
2022-05-18 15:09:27 +02:00
vm : : check_addr < 8 > ( reg_ptr ) & & ! vm : : check_addr ( toc , vm : : page_executable ) )
2020-03-31 02:57:54 +02:00
{
2022-05-18 15:09:27 +02:00
// Check executability and alignment
if ( reg % 4 = = 0 & & is_exec_code ( reg_ptr ) )
2020-04-17 08:15:30 +02:00
{
toc = * vm : : get_super_ptr < u32 > ( static_cast < u32 > ( reg + 4 ) ) ;
2022-05-18 17:00:32 +02:00
if ( toc % 4 = = 0 & & ( toc > > 29 ) = = ( reg_ptr > > 29 ) & & vm : : check_addr ( toc ) & & ! vm : : check_addr ( toc , vm : : page_executable ) )
2020-04-17 08:15:30 +02:00
{
is_function = true ;
2020-08-21 19:58:38 +02:00
reg = reg_ptr ;
2020-04-17 08:15:30 +02:00
}
}
2020-03-31 02:57:54 +02:00
}
2023-12-29 18:33:29 +01:00
else if ( is_exec_code ( static_cast < u32 > ( reg ) ) )
2020-04-17 08:15:30 +02:00
{
is_function = true ;
}
2020-03-31 02:57:54 +02:00
2023-12-29 18:33:29 +01:00
const auto gpr_buf = vm : : get_super_ptr < u8 > ( static_cast < u32 > ( reg ) ) ;
2020-03-31 02:57:54 +02:00
std : : string buf_tmp ( gpr_buf , gpr_buf + max_str_len ) ;
2021-02-13 09:13:26 +01:00
std : : string_view sv ( buf_tmp . data ( ) , std : : min < usz > ( buf_tmp . size ( ) , buf_tmp . find_first_of ( " \0 \n " sv ) ) ) ;
2020-04-17 08:15:30 +02:00
if ( is_function )
{
if ( toc )
{
fmt : : append ( ret , " -> func(at=0x%x, toc=0x%x) " , reg , toc ) ;
}
else
{
2023-12-29 18:33:29 +01:00
dis_asm . disasm ( static_cast < u32 > ( reg ) ) ;
2020-11-10 15:57:06 +01:00
fmt : : append ( ret , " -> %s " , dis_asm . last_opcode ) ;
2020-04-17 08:15:30 +02:00
}
}
2021-02-13 09:13:26 +01:00
// NTS: size of 3 and above is required
// If ends with a newline, only one character is required
else if ( ( sv . size ( ) = = buf_tmp . size ( ) | | ( sv . size ( ) > = ( buf_tmp [ sv . size ( ) ] = = ' \n ' ? 1 : 3 ) ) ) & &
2025-04-05 21:50:45 +02:00
std : : all_of ( sv . begin ( ) , sv . end ( ) , [ ] ( u8 c )
{
return std : : isprint ( c ) ;
} ) )
2020-03-31 02:57:54 +02:00
{
2021-02-13 09:13:26 +01:00
fmt : : append ( ret , " -> \" %s \" " , sv ) ;
2020-03-31 02:57:54 +02:00
}
else
{
2020-04-03 10:21:18 +02:00
fmt : : append ( ret , " -> " ) ;
2020-03-31 02:57:54 +02:00
for ( u32 j = 0 ; j < hex_count ; + + j )
{
fmt : : append ( ret , " %02x " , buf_tmp [ j ] ) ;
}
}
}
2023-06-06 08:11:32 +02:00
fmt : : trim_back ( ret ) ;
ret + = ' \n ' ;
2020-03-31 02:57:54 +02:00
}
2020-04-03 10:21:18 +02:00
2023-07-12 10:02:12 +02:00
const u32 current_cia = cia ;
const u32 cr_packed = cr . pack ( ) ;
for ( u32 addr :
2025-04-05 21:50:45 +02:00
{
current_cia ,
current_cia + 4 ,
current_cia + 8 ,
current_cia - 4 ,
current_cia + 12 ,
} )
2023-07-12 10:02:12 +02:00
{
dis_asm . disasm ( addr ) ;
if ( dis_asm . last_opcode . size ( ) < = 4 )
{
continue ;
}
2024-08-14 15:42:21 +02:00
usz index = dis_asm . last_opcode . rfind ( " ,cr " ) ;
2023-07-12 10:02:12 +02:00
2024-08-14 15:42:21 +02:00
if ( index > dis_asm . last_opcode . size ( ) - 4 )
{
index = dis_asm . last_opcode . rfind ( " cr " ) ;
2023-07-12 10:02:12 +02:00
}
2024-08-14 15:42:21 +02:00
if ( index < = dis_asm . last_opcode . size ( ) - 4 )
2023-07-12 10:02:12 +02:00
{
const char result = dis_asm . last_opcode [ index + 3 ] ;
if ( result > = ' 0 ' & & result < = ' 7 ' )
{
func_data - > preferred_cr_field_index = result - ' 0 ' ;
break ;
}
}
if ( dis_asm . last_opcode . find ( " stdcx. " ) ! = umax | | dis_asm . last_opcode . find ( " stwcx. " ) ! = umax )
{
// Modifying CR0
func_data - > preferred_cr_field_index = 0 ;
break ;
}
}
const u32 displayed_cr_field = ( cr_packed > > ( ( 7 - func_data - > preferred_cr_field_index ) * 4 ) ) & 0xf ;
fmt : : append ( ret , " CR: 0x%08x, CR%d: [LT=%u GT=%u EQ=%u SO=%u] \n " , cr_packed , func_data - > preferred_cr_field_index , displayed_cr_field > > 3 , ( displayed_cr_field > > 2 ) & 1 , ( displayed_cr_field > > 1 ) & 1 , displayed_cr_field & 1 ) ;
2020-04-03 10:21:18 +02:00
for ( uint i = 0 ; i < 32 ; + + i )
{
2022-05-31 07:36:10 +02:00
const f64 r = fpr [ i ] ;
if ( ! std : : bit_cast < u64 > ( r ) )
{
fmt : : append ( ret , " f%d%s: %-12.6G [%-18s] (f32=0x%x) \n " , i , i < = 9 ? " " : " " , r , " " , std : : bit_cast < u32 > ( f32 ( r ) ) ) ;
continue ;
}
fmt : : append ( ret , " f%d%s: %-12.6G [0x%016x] (f32=0x%x) \n " , i , i < = 9 ? " " : " " , r , std : : bit_cast < u64 > ( r ) , std : : bit_cast < u32 > ( f32 ( r ) ) ) ;
2020-04-03 10:21:18 +02:00
}
2020-11-11 04:59:24 +01:00
for ( uint i = 0 ; i < 32 ; + + i , ret + = ' \n ' )
2020-04-03 10:21:18 +02:00
{
2020-11-11 04:59:24 +01:00
fmt : : append ( ret , " v%d%s: " , i , i < = 9 ? " " : " " ) ;
2020-11-14 07:03:33 +01:00
2020-11-11 04:59:24 +01:00
const auto r = vr [ i ] ;
const u32 i3 = r . u32r [ 0 ] ;
if ( v128 : : from32p ( i3 ) = = r )
{
// Shortand formatting
fmt : : append ( ret , " %08x " , i3 ) ;
fmt : : append ( ret , " [x: %g] " , r . fr [ 0 ] ) ;
}
else
{
fmt : : append ( ret , " %08x %08x %08x %08x " , r . u32r [ 0 ] , r . u32r [ 1 ] , r . u32r [ 2 ] , r . u32r [ 3 ] ) ;
fmt : : append ( ret , " [x: %g y: %g z: %g w: %g] " , r . fr [ 0 ] , r . fr [ 1 ] , r . fr [ 2 ] , r . fr [ 3 ] ) ;
}
2020-04-03 10:21:18 +02:00
}
2017-04-28 15:28:37 +02:00
2023-07-12 10:02:12 +02:00
fmt : : append ( ret , " CIA: 0x%x \n " , current_cia ) ;
2020-05-05 06:23:12 +02:00
fmt : : append ( ret , " LR: 0x%llx \n " , lr ) ;
fmt : : append ( ret , " CTR: 0x%llx \n " , ctr ) ;
fmt : : append ( ret , " VRSAVE: 0x%08x \n " , vrsave ) ;
2025-04-24 12:41:04 +02:00
fmt : : append ( ret , " XER: [CA=%u | OV=%u | SO=%u | CNT=%u] \n " , xer_ca , xer_ov , xer_so , xer_cnt ) ;
2020-05-05 06:23:12 +02:00
fmt : : append ( ret , " VSCR: [SAT=%u | NJ=%u] \n " , sat , nj ) ;
fmt : : append ( ret , " FPSCR: [FL=%u | FG=%u | FE=%u | FU=%u] \n " , fpscr . fl , fpscr . fg , fpscr . fe , fpscr . fu ) ;
2021-11-21 10:41:05 +01:00
const u32 addr = raddr ;
if ( addr )
2020-05-05 06:23:12 +02:00
fmt : : append ( ret , " Reservation Addr: 0x%x " , addr ) ;
2020-05-01 07:59:15 +02:00
else
2020-05-05 06:23:12 +02:00
fmt : : append ( ret , " Reservation Addr: none " ) ;
2017-04-28 15:28:37 +02:00
2022-05-18 15:09:27 +02:00
fmt : : append ( ret , " \n Reservation Data (entire cache line): \n " ) ;
2021-11-21 10:41:05 +01:00
be_t < u32 > data [ 32 ] { } ;
std : : memcpy ( data , rdata , sizeof ( rdata ) ) ; // Show the data even if the reservation was lost inside the atomic loop
if ( addr & & ! use_full_rdata )
{
const u32 offset = addr & 0x78 ;
fmt : : append ( ret , " [0x%02x] %08x %08x \n " , offset , data [ offset / sizeof ( u32 ) ] , data [ offset / sizeof ( u32 ) + 1 ] ) ;
// Asterisk marks the offset of data that had been given to the guest PPU code
* ( & ret . back ( ) - ( addr & 4 ? 9 : 18 ) ) = ' * ' ;
}
else
{
for ( usz i = 0 ; i < std : : size ( data ) ; i + = 4 )
{
2025-04-05 21:50:45 +02:00
fmt : : append ( ret , " [0x%02x] %08x %08x %08x %08x \n " , i * sizeof ( data [ 0 ] ) , data [ i + 0 ] , data [ i + 1 ] , data [ i + 2 ] , data [ i + 3 ] ) ;
2021-11-21 10:41:05 +01:00
}
if ( addr )
{
// See the note above
* ( & ret . back ( ) - ( 4 - ( addr % 16 / 4 ) ) * 9 - ( 8 - ( addr % 128 / 16 ) ) * std : : size ( " [0x00] " sv ) ) = ' * ' ;
}
}
2020-03-31 02:11:37 +02:00
}
std : : string ppu_thread : : dump_callstack ( ) const
{
std : : string ret ;
fmt : : append ( ret , " Call stack: \n ========= \n 0x%08x (0x0) called \n " , cia ) ;
2020-07-03 06:56:55 +02:00
for ( const auto & sp : dump_callstack_list ( ) )
2020-03-31 02:11:37 +02:00
{
// TODO: function addresses too
2020-11-07 17:12:52 +01:00
fmt : : append ( ret , " > from 0x%08x (sp=0x%08x) \n " , sp . first , sp . second ) ;
2020-03-31 02:11:37 +02:00
}
return ret ;
}
2020-07-03 06:56:55 +02:00
std : : vector < std : : pair < u32 , u32 > > ppu_thread : : dump_callstack_list ( ) const
2020-03-31 02:11:37 +02:00
{
2025-04-05 21:50:45 +02:00
// std::shared_lock rlock(vm::g_mutex); // Needs optimizations
2020-03-19 11:29:50 +01:00
2017-04-28 15:28:37 +02:00
// Determine stack range
2020-08-21 19:49:57 +02:00
const u64 r1 = gpr [ 1 ] ;
2021-05-22 09:35:15 +02:00
if ( r1 > u32 { umax } | | r1 % 0x10 )
2020-08-21 19:49:57 +02:00
{
return { } ;
}
const u32 stack_ptr = static_cast < u32 > ( r1 ) ;
2020-03-19 11:29:50 +01:00
2020-11-10 18:09:28 +01:00
if ( ! vm : : check_addr ( stack_ptr , vm : : page_writable ) )
2020-03-19 11:29:50 +01:00
{
// Normally impossible unless the code does not follow ABI rules
2020-03-31 02:11:37 +02:00
return { } ;
2020-03-19 11:29:50 +01:00
}
2017-04-28 15:28:37 +02:00
u32 stack_min = stack_ptr & ~ 0xfff ;
u32 stack_max = stack_min + 4096 ;
2020-11-10 18:09:28 +01:00
while ( stack_min & & vm : : check_addr ( stack_min - 4096 , vm : : page_writable ) )
2017-04-28 15:28:37 +02:00
{
stack_min - = 4096 ;
}
2020-11-10 18:09:28 +01:00
while ( stack_max + 4096 & & vm : : check_addr ( stack_max , vm : : page_writable ) )
2017-04-28 15:28:37 +02:00
{
stack_max + = 4096 ;
}
2020-07-03 06:56:55 +02:00
std : : vector < std : : pair < u32 , u32 > > call_stack_list ;
2020-03-31 02:11:37 +02:00
2023-08-20 03:24:42 +02:00
bool is_first = true ;
bool skip_single_frame = false ;
const u64 _lr = this - > lr ;
const u32 _cia = this - > cia ;
const u64 gpr0 = this - > gpr [ 0 ] ;
2020-11-07 17:12:52 +01:00
2020-03-31 02:11:37 +02:00
for (
2020-11-07 17:12:52 +01:00
u64 sp = r1 ;
2020-07-03 05:18:14 +02:00
sp % 0x10 = = 0u & & sp > = stack_min & & sp < = stack_max - ppu_stack_start_offset ;
2025-04-05 21:50:45 +02:00
is_first = false )
2015-02-01 14:52:34 +01:00
{
2020-11-07 17:12:52 +01:00
auto is_invalid = [ ] ( u64 addr )
{
2021-05-22 09:35:15 +02:00
if ( addr > u32 { umax } | | addr % 4 | | ! vm : : check_addr ( static_cast < u32 > ( addr ) , vm : : page_executable ) )
2020-11-07 20:05:44 +01:00
{
return true ;
}
// Ignore HLE stop address
2022-11-19 12:50:31 +01:00
return addr = = g_fxo - > get < ppu_function_manager > ( ) . func_addr ( 1 , true ) ;
2020-11-07 17:12:52 +01:00
} ;
2020-07-03 05:18:14 +02:00
2023-08-20 03:24:42 +02:00
if ( is_first & & ! is_invalid ( _lr ) )
2020-07-03 05:18:14 +02:00
{
2023-08-20 03:24:42 +02:00
// Detect functions with no stack or before LR has been stored
// Tracking if instruction has already been passed through
// Instead of using map or set, use two vectors relative to CIA and resize as needed
std : : vector < be_t < u32 > > inst_neg ;
std : : vector < be_t < u32 > > inst_pos ;
auto get_inst = [ & ] ( u32 pos ) - > be_t < u32 > &
2020-11-07 17:12:52 +01:00
{
2023-08-20 03:24:42 +02:00
static be_t < u32 > s_inst_empty { } ;
if ( pos < _cia )
{
const u32 neg_dist = ( _cia - pos - 4 ) / 4 ;
if ( neg_dist > = inst_neg . size ( ) )
{
const u32 inst_bound = pos & - 256 ;
const usz old_size = inst_neg . size ( ) ;
const usz new_size = neg_dist + ( pos - inst_bound ) / 4 + 1 ;
if ( new_size > = 0x8000 )
{
// Gross lower limit for the function (if it is that size it is unlikely that it is even a leaf function)
return s_inst_empty ;
}
inst_neg . resize ( new_size ) ;
2023-12-30 19:53:07 +01:00
if ( ! vm : : try_access ( inst_bound , & inst_neg [ old_size ] , : : narrow < u32 > ( ( new_size - old_size ) * sizeof ( be_t < u32 > ) ) , false ) )
2023-08-20 03:24:42 +02:00
{
// Failure (this would be detected as failure by zeroes)
}
2020-11-07 17:12:52 +01:00
2023-08-20 03:24:42 +02:00
// Reverse the array (because this buffer directs backwards in address)
for ( usz start = old_size , end = new_size - 1 ; start < end ; start + + , end - - )
{
std : : swap ( inst_neg [ start ] , inst_neg [ end ] ) ;
}
}
return inst_neg [ neg_dist ] ;
}
const u32 pos_dist = ( pos - _cia ) / 4 ;
if ( pos_dist > = inst_pos . size ( ) )
2020-11-07 17:12:52 +01:00
{
2023-08-20 03:24:42 +02:00
const u32 inst_bound = utils : : align < u32 > ( pos , 256 ) ;
const usz old_size = inst_pos . size ( ) ;
const usz new_size = pos_dist + ( inst_bound - pos ) / 4 + 1 ;
if ( new_size > = 0x8000 )
{
// Gross upper limit for the function (if it is that size it is unlikely that it is even a leaf function)
return s_inst_empty ;
}
inst_pos . resize ( new_size ) ;
2023-12-30 19:53:07 +01:00
if ( ! vm : : try_access ( pos , & inst_pos [ old_size ] , : : narrow < u32 > ( ( new_size - old_size ) * sizeof ( be_t < u32 > ) ) , false ) )
2023-08-20 03:24:42 +02:00
{
// Failure (this would be detected as failure by zeroes)
}
}
return inst_pos [ pos_dist ] ;
} ;
bool upper_abort = false ;
struct context_t
{
u32 start_point ;
2025-04-05 21:50:45 +02:00
bool maybe_leaf = false ; // True if the function is leaf or at the very end/start of non-leaf
bool non_leaf = false ; // Absolutely not a leaf
bool about_to_push_frame = false ; // STDU incoming
bool about_to_store_lr = false ; // Link is about to be stored on stack
bool about_to_pop_frame = false ; // ADDI R1 is about to be issued
bool about_to_load_link = false ; // MTLR is about to be issued
2023-08-20 03:24:42 +02:00
bool maybe_use_reg0_instead_of_lr = false ; // Use R0 at the end of a non-leaf function if ADDI has been issued before MTLR
} ;
// Start with CIA
std : : deque < context_t > workload { context_t { _cia } } ;
usz start = 0 ;
for ( ; start < workload . size ( ) ; start + + )
{
for ( u32 wa = workload [ start ] . start_point ; vm : : check_addr ( wa , vm : : page_executable ) ; )
{
be_t < u32 > & opcode = get_inst ( wa ) ;
auto & [ _ , maybe_leaf , non_leaf , about_to_push_frame , about_to_store_lr ,
about_to_pop_frame , about_to_load_link , maybe_use_reg0_instead_of_lr ] = workload [ start ] ;
if ( ! opcode )
{
// Already passed or failure of reading
break ;
}
const ppu_opcode_t op { opcode } ;
// Mark as passed through
opcode = 0 ;
const auto type = g_ppu_itype . decode ( op . opcode ) ;
if ( workload . size ( ) = = 1 & & type = = ppu_itype : : STDU & & op . rs = = 1u & & op . ra = = 1u )
{
if ( op . simm16 > = 0 )
{
// Against ABI
non_leaf = true ;
upper_abort = true ;
break ;
}
// Saving LR to register: this is indeed a new function (ok because LR has not been saved yet)
maybe_leaf = true ;
about_to_push_frame = true ;
about_to_pop_frame = false ;
upper_abort = true ;
break ;
}
if ( workload . size ( ) = = 1 & & type = = ppu_itype : : STD & & op . ra = = 1u & & op . rs = = 0u )
{
bool found_matching_stdu = false ;
for ( u32 back = 1 ; back < 20 ; back + + )
{
be_t < u32 > & opcode = get_inst ( utils : : sub_saturate < u32 > ( _cia , back * 4 ) ) ;
if ( ! opcode )
{
// Already passed or failure of reading
break ;
}
const ppu_opcode_t test_op { opcode } ;
const auto type = g_ppu_itype . decode ( test_op . opcode ) ;
if ( type = = ppu_itype : : BCLR )
{
break ;
}
if ( type = = ppu_itype : : STDU & & test_op . rs = = 1u & & test_op . ra = = 1u )
{
if ( 0 - ( test_op . ds < < 2 ) = = ( op . ds < < 2 ) - 0x10 )
{
found_matching_stdu = true ;
}
break ;
}
}
if ( found_matching_stdu )
{
// Saving LR to stack: this is indeed a new function (ok because LR has not been saved yet)
maybe_leaf = true ;
about_to_store_lr = true ;
about_to_pop_frame = true ;
upper_abort = true ;
break ;
}
}
const u32 spr = ( ( op . spr > > 5 ) | ( ( op . spr & 0x1f ) < < 5 ) ) ;
// It can be placed before or after STDU, ignore for now
// if (workload.size() == 1 && type == ppu_itype::MFSPR && op.rs == 0u && spr == 0x8)
// {
// // Saving LR to register: this is indeed a new function (ok because LR has not been saved yet)
// maybe_leaf = true;
// about_to_store_lr = true;
// about_to_pop_frame = true;
// }
if ( type = = ppu_itype : : MTSPR & & spr = = 0x8 & & op . rs = = 0u )
{
// Test for special case: if ADDI R1 is not found later in code, it means that LR is not restored and R0 should be used instead
// Can also search for ADDI R1 backwards and pull the value from stack (needs more research if it is more reliable)
maybe_use_reg0_instead_of_lr = true ;
}
if ( type = = ppu_itype : : UNK )
{
// Ignore for now
break ;
}
if ( ( type & ppu_itype : : branch ) & & op . lk )
{
// Gave up on LR before saving
non_leaf = true ;
about_to_pop_frame = true ;
upper_abort = true ;
break ;
}
// Even if BCLR is conditional, it still counts because LR value is ready for return
if ( type = = ppu_itype : : BCLR )
{
// Returned
maybe_leaf = true ;
upper_abort = true ;
break ;
}
if ( type = = ppu_itype : : ADDI & & op . ra = = 1u & & op . rd = = 1u )
{
if ( op . simm16 < 0 )
{
// Against ABI
non_leaf = true ;
upper_abort = true ;
break ;
}
else if ( op . simm16 > 0 )
{
// Remember that SP is about to be restored
about_to_pop_frame = true ;
non_leaf = true ;
upper_abort = true ;
break ;
}
}
const auto results = op_branch_targets ( wa , op ) ;
bool proceeded = false ;
for ( usz res_i = 0 ; res_i < results . size ( ) ; res_i + + )
{
const u32 route_pc = results [ res_i ] ;
if ( route_pc = = umax )
{
continue ;
}
if ( vm : : check_addr ( route_pc , vm : : page_executable ) & & get_inst ( route_pc ) )
{
if ( proceeded )
{
// Remember next route start point
workload . push_back ( context_t { route_pc } ) ;
}
else
{
// Next PC
wa = route_pc ;
proceeded = true ;
}
}
}
}
if ( upper_abort )
{
break ;
2020-11-07 17:12:52 +01:00
}
}
2023-08-20 03:24:42 +02:00
2023-08-21 11:43:05 +02:00
const context_t & res = workload [ std : : min < usz > ( start , workload . size ( ) - 1 ) ] ;
2023-08-20 03:24:42 +02:00
if ( res . maybe_leaf & & ! res . non_leaf )
2020-11-07 17:12:52 +01:00
{
2023-08-20 03:24:42 +02:00
const u32 result = res . maybe_use_reg0_instead_of_lr ? static_cast < u32 > ( gpr0 ) : static_cast < u32 > ( _lr ) ;
// Same stack as far as we know
call_stack_list . emplace_back ( result , static_cast < u32 > ( sp ) ) ;
if ( res . about_to_store_lr )
{
// LR has yet to be stored on stack, ignore the stack value
skip_single_frame = true ;
}
}
if ( res . about_to_pop_frame | | ( res . maybe_leaf & & ! res . non_leaf ) )
{
const u64 temp_sp = * vm : : get_super_ptr < u64 > ( static_cast < u32 > ( sp ) ) ;
if ( temp_sp < = sp )
{
// Ensure inequality and that the old stack pointer is higher than current
break ;
}
// Read the first stack frame so caller addresses can be obtained
sp = temp_sp ;
continue ;
2020-11-07 17:12:52 +01:00
}
2020-07-03 05:18:14 +02:00
}
2023-08-20 03:24:42 +02:00
u64 addr = * vm : : get_super_ptr < u64 > ( static_cast < u32 > ( sp + 16 ) ) ;
if ( skip_single_frame )
{
skip_single_frame = false ;
}
else if ( ! is_invalid ( addr ) )
{
// TODO: function addresses too
call_stack_list . emplace_back ( static_cast < u32 > ( addr ) , static_cast < u32 > ( sp ) ) ;
}
else if ( ! is_first )
{
break ;
}
2023-05-10 10:23:09 +02:00
const u64 temp_sp = * vm : : get_super_ptr < u64 > ( static_cast < u32 > ( sp ) ) ;
if ( temp_sp < = sp )
{
// Ensure inequality and that the old stack pointer is higher than current
break ;
}
sp = temp_sp ;
2023-08-20 03:24:42 +02:00
is_first = false ;
2015-02-01 14:52:34 +01:00
}
2020-03-31 02:11:37 +02:00
return call_stack_list ;
}
std : : string ppu_thread : : dump_misc ( ) const
{
2022-06-22 11:10:40 +02:00
std : : string ret = cpu_thread : : dump_misc ( ) ;
2020-03-31 02:11:37 +02:00
2022-09-25 13:10:59 +02:00
if ( ack_suspend )
{
if ( ret . ends_with ( " \n " ) )
{
ret . pop_back ( ) ;
}
fmt : : append ( ret , " (LV2 suspended) \n " ) ;
}
2023-04-28 19:10:21 +02:00
fmt : : append ( ret , " Priority: %d \n " , prio . load ( ) . prio ) ;
2020-03-31 02:11:37 +02:00
fmt : : append ( ret , " Stack: 0x%x..0x%x \n " , stack_addr , stack_addr + stack_size - 1 ) ;
fmt : : append ( ret , " Joiner: %s \n " , joiner . load ( ) ) ;
2020-04-03 10:21:18 +02:00
if ( const auto size = cmd_queue . size ( ) )
fmt : : append ( ret , " Commands: %u \n " , size ) ;
2020-03-31 02:11:37 +02:00
const char * _func = current_function ;
if ( _func )
{
2020-04-16 20:16:40 +02:00
ret + = " In function: " ;
2020-03-31 02:11:37 +02:00
ret + = _func ;
ret + = ' \n ' ;
2021-03-16 14:41:32 +01:00
for ( u32 i = 3 ; i < = 10 ; i + + )
2021-03-16 14:20:45 +01:00
if ( u64 v = gpr [ i ] ; v ! = syscall_args [ i - 3 ] )
fmt : : append ( ret , " ** r%d: 0x%llx \n " , i , v ) ;
2020-03-31 02:11:37 +02:00
}
2022-09-25 13:10:59 +02:00
else if ( is_paused ( ) | | is_stopped ( ) )
2020-03-31 02:11:37 +02:00
{
if ( const auto last_func = last_function )
{
_func = last_func ;
ret + = " Last function: " ;
ret + = _func ;
ret + = ' \n ' ;
}
}
if ( const auto _time = start_time )
{
fmt : : append ( ret , " Waiting: %fs \n " , ( get_guest_system_time ( ) - _time ) / 1000000. ) ;
}
else
{
ret + = ' \n ' ;
}
if ( ! _func )
{
ret + = ' \n ' ;
}
2016-07-27 23:43:22 +02:00
return ret ;
2015-08-10 21:39:52 +02:00
}
2022-06-22 11:00:06 +02:00
void ppu_thread : : dump_all ( std : : string & ret ) const
2021-07-10 10:56:48 +02:00
{
2022-06-22 11:00:06 +02:00
cpu_thread : : dump_all ( ret ) ;
2021-07-10 10:56:48 +02:00
2023-05-19 17:41:17 +02:00
if ( call_history . data . size ( ) > 1 )
2021-07-10 10:56:48 +02:00
{
ret + =
" \n Calling History: "
" \n ================ " ;
fmt : : append ( ret , " %s " , call_history ) ;
}
2023-07-09 07:45:15 +02:00
if ( syscall_history . data . size ( ) > 1 )
{
ret + =
" \n HLE/LV2 History: "
" \n ================ " ;
fmt : : append ( ret , " %s " , syscall_history ) ;
}
2021-07-10 10:56:48 +02:00
}
2025-04-05 21:50:45 +02:00
extern thread_local std : : string ( * g_tls_log_prefix ) ( ) ;
2016-04-27 00:27:24 +02:00
2016-07-27 23:43:22 +02:00
void ppu_thread : : cpu_task ( )
2012-11-15 00:39:56 +01:00
{
2017-02-07 14:14:44 +01:00
std : : fesetround ( FE_TONEAREST ) ;
2015-03-16 19:44:49 +01:00
2021-12-30 17:39:18 +01:00
if ( g_cfg . core . set_daz_and_ftz )
{
gv_set_zeroing_denormals ( ) ;
}
else
2018-05-09 22:35:05 +02:00
{
2021-12-30 17:39:18 +01:00
gv_unset_zeroing_denormals ( ) ;
2018-05-09 22:35:05 +02:00
}
2016-07-27 23:43:22 +02:00
// Execute cmd_queue
2016-08-09 16:14:41 +02:00
while ( cmd64 cmd = cmd_wait ( ) )
2016-07-27 23:43:22 +02:00
{
const u32 arg = cmd . arg2 < u32 > ( ) ; // 32-bit arg extracted
2016-08-09 16:14:41 +02:00
switch ( auto type = cmd . arg1 < ppu_cmd > ( ) )
2016-07-27 23:43:22 +02:00
{
case ppu_cmd : : opcode :
{
2021-12-30 17:39:18 +01:00
cmd_pop ( ) , g_fxo - > get < ppu_interpreter_rt > ( ) . decode ( arg ) ( * this , { arg } , vm : : _ptr < u32 > ( cia - 4 ) , & ppu_ret ) ;
2016-07-27 23:43:22 +02:00
break ;
}
case ppu_cmd : : set_gpr :
{
if ( arg > = 32 )
{
2020-12-09 16:04:52 +01:00
fmt : : throw_exception ( " Invalid ppu_cmd::set_gpr arg (0x%x) " , arg ) ;
2016-07-27 23:43:22 +02:00
}
2016-08-09 16:14:41 +02:00
gpr [ arg % 32 ] = cmd_get ( 1 ) . as < u64 > ( ) ;
2016-07-27 23:43:22 +02:00
cmd_pop ( 1 ) ;
break ;
}
case ppu_cmd : : set_args :
{
if ( arg > 8 )
{
2020-12-09 16:04:52 +01:00
fmt : : throw_exception ( " Unsupported ppu_cmd::set_args size (0x%x) " , arg ) ;
2016-07-27 23:43:22 +02:00
}
for ( u32 i = 0 ; i < arg ; i + + )
{
2016-08-09 16:14:41 +02:00
gpr [ i + 3 ] = cmd_get ( 1 + i ) . as < u64 > ( ) ;
2016-07-27 23:43:22 +02:00
}
cmd_pop ( arg ) ;
break ;
}
case ppu_cmd : : lle_call :
{
2022-06-18 04:54:54 +02:00
# ifdef __APPLE__
pthread_jit_write_protect_np ( true ) ;
# endif
2016-07-27 23:43:22 +02:00
const vm : : ptr < u32 > opd ( arg < 32 ? vm : : cast ( gpr [ arg ] ) : vm : : cast ( arg ) ) ;
cmd_pop ( ) , fast_call ( opd [ 0 ] , opd [ 1 ] ) ;
break ;
}
2023-06-07 13:34:39 +02:00
case ppu_cmd : : entry_call :
{
# ifdef __APPLE__
pthread_jit_write_protect_np ( true ) ;
# endif
cmd_pop ( ) , fast_call ( entry_func . addr , entry_func . rtoc , true ) ;
break ;
}
2016-07-27 23:43:22 +02:00
case ppu_cmd : : hle_call :
{
2022-09-19 14:57:51 +02:00
cmd_pop ( ) , : : at32 ( ppu_function_manager : : get ( ) , arg ) ( * this , { arg } , vm : : _ptr < u32 > ( cia - 4 ) , & ppu_ret ) ;
2016-07-27 23:43:22 +02:00
break ;
}
2020-04-08 13:26:31 +02:00
case ppu_cmd : : opd_call :
{
2022-06-18 04:54:54 +02:00
# ifdef __APPLE__
pthread_jit_write_protect_np ( true ) ;
# endif
2020-04-11 10:16:28 +02:00
const ppu_func_opd_t opd = cmd_get ( 1 ) . as < ppu_func_opd_t > ( ) ;
2020-04-08 13:26:31 +02:00
cmd_pop ( 1 ) , fast_call ( opd . addr , opd . rtoc ) ;
break ;
}
2018-10-11 00:17:19 +02:00
case ppu_cmd : : ptr_call :
{
2021-12-30 17:39:18 +01:00
const ppu_intrp_func_t func = cmd_get ( 1 ) . as < ppu_intrp_func_t > ( ) ;
cmd_pop ( 1 ) , func ( * this , { } , vm : : _ptr < u32 > ( cia - 4 ) , & ppu_ret ) ;
2018-10-11 00:17:19 +02:00
break ;
}
2022-07-04 15:02:17 +02:00
case ppu_cmd : : cia_call :
{
loaded_from_savestate = true ;
2023-06-07 13:34:39 +02:00
cmd_pop ( ) , fast_call ( std : : exchange ( cia , 0 ) , gpr [ 2 ] , true ) ;
2022-07-04 15:02:17 +02:00
break ;
}
2017-01-22 20:03:57 +01:00
case ppu_cmd : : initialize :
{
2022-06-14 14:28:38 +02:00
# ifdef __APPLE__
pthread_jit_write_protect_np ( false ) ;
# endif
2021-03-21 17:55:47 +01:00
cmd_pop ( ) ;
2023-08-28 14:40:18 +02:00
ppu_initialize ( ) ;
if ( Emu . IsStopped ( ) )
{
return ;
}
spu_cache : : initialize ( ) ;
2021-04-06 20:05:16 +02:00
2022-06-14 14:28:38 +02:00
# ifdef __APPLE__
pthread_jit_write_protect_np ( true ) ;
# endif
# ifdef ARCH_ARM64
// Flush all cache lines after potentially writing executable code
asm ( " ISB " ) ;
asm ( " DSB ISH " ) ;
# endif
2022-07-04 15:02:17 +02:00
// Wait until the progress dialog is closed.
// We don't want to open a cell dialog while a native progress dialog is still open.
2023-07-31 22:57:26 +02:00
while ( u32 v = g_progr_ptotal )
{
2023-08-06 11:24:32 +02:00
if ( Emu . IsStopped ( ) )
{
return ;
}
2023-07-31 22:57:26 +02:00
g_progr_ptotal . wait ( v ) ;
}
2023-06-15 23:58:28 +02:00
g_fxo - > get < progress_dialog_workaround > ( ) . show_overlay_message_only = true ;
2022-07-04 15:02:17 +02:00
2022-07-05 13:12:21 +02:00
// Sadly we can't postpone initializing guest time because we need to run PPU threads
2022-07-04 15:02:17 +02:00
// (the farther it's postponed, the less accuracy of guest time has been lost)
Emu . FixGuestTime ( ) ;
2022-07-14 21:07:02 +02:00
// Run SPUs waiting on a syscall (savestates related)
idm : : select < named_thread < spu_thread > > ( [ & ] ( u32 , named_thread < spu_thread > & spu )
{
2025-04-05 21:50:45 +02:00
if ( spu . group & & spu . index = = spu . group - > waiter_spu_index )
2022-07-14 21:07:02 +02:00
{
2025-04-05 21:50:45 +02:00
if ( std : : exchange ( spu . stop_flag_removal_protection , false ) )
{
return ;
}
2022-07-14 21:07:02 +02:00
2025-04-05 21:50:45 +02:00
ensure ( spu . state . test_and_reset ( cpu_flag : : stop ) ) ;
spu . state . notify_one ( ) ;
}
} ) ;
2022-07-14 21:07:02 +02:00
2022-07-04 15:02:17 +02:00
// Check if this is the only PPU left to initialize (savestates related)
if ( lv2_obj : : is_scheduler_ready ( ) )
{
if ( Emu . IsStarting ( ) )
{
Emu . FinalizeRunRequest ( ) ;
}
}
2017-01-22 20:03:57 +01:00
break ;
}
2017-02-06 19:36:46 +01:00
case ppu_cmd : : sleep :
{
2017-02-22 11:10:55 +01:00
cmd_pop ( ) , lv2_obj : : sleep ( * this ) ;
2017-02-06 19:36:46 +01:00
break ;
}
2018-03-06 03:36:33 +01:00
case ppu_cmd : : reset_stack :
{
2020-07-03 05:18:14 +02:00
cmd_pop ( ) , gpr [ 1 ] = stack_addr + stack_size - ppu_stack_start_offset ;
2018-03-06 03:36:33 +01:00
break ;
}
2016-07-27 23:43:22 +02:00
default :
{
2020-12-09 16:04:52 +01:00
fmt : : throw_exception ( " Unknown ppu_cmd(0x%x) " , static_cast < u32 > ( type ) ) ;
2016-07-27 23:43:22 +02:00
}
}
}
2016-06-25 07:16:15 +02:00
}
2014-04-23 13:59:14 +02:00
2018-04-03 16:19:07 +02:00
void ppu_thread : : cpu_sleep ( )
{
2020-10-30 14:32:49 +01:00
// Clear reservation
raddr = 0 ;
2018-04-03 16:19:07 +02:00
2020-10-30 14:32:49 +01:00
// Setup wait flag and memory flags to relock itself
2020-11-14 07:03:33 +01:00
state + = g_use_rtm ? cpu_flag : : wait : cpu_flag : : wait + cpu_flag : : memory ;
2018-04-03 16:19:07 +02:00
2020-10-30 14:32:49 +01:00
if ( auto ptr = vm : : g_tls_locked )
{
ptr - > compare_and_swap ( this , nullptr ) ;
}
lv2_obj : : awake ( this ) ;
2018-04-03 16:19:07 +02:00
}
2021-02-13 15:50:07 +01:00
void ppu_thread : : cpu_on_stop ( )
{
2024-03-28 12:35:11 +01:00
if ( current_function & & is_stopped ( ) )
2021-02-13 15:50:07 +01:00
{
if ( start_time )
{
ppu_log . warning ( " '%s' aborted (%fs) " , current_function , ( get_guest_system_time ( ) - start_time ) / 1000000. ) ;
}
else
{
ppu_log . warning ( " '%s' aborted " , current_function ) ;
}
}
2022-08-26 09:50:58 +02:00
2024-03-28 12:35:11 +01:00
current_function = { } ;
2022-08-26 09:50:58 +02:00
// TODO: More conditions
2023-07-10 16:43:59 +02:00
if ( Emu . IsStopped ( ) & & g_cfg . core . ppu_debug )
2022-08-26 09:50:58 +02:00
{
std : : string ret ;
dump_all ( ret ) ;
ppu_log . notice ( " thread context: %s " , ret ) ;
}
2024-03-06 16:28:07 +01:00
if ( is_stopped ( ) )
{
if ( last_succ = = 0 & & last_fail = = 0 & & exec_bytes = = 0 )
{
perf_log . notice ( " PPU thread perf stats are not available. " ) ;
}
else
{
perf_log . notice ( " Perf stats for STCX reload: success %u, failure %u " , last_succ , last_fail ) ;
perf_log . notice ( " Perf stats for instructions: total %u " , exec_bytes / 4 ) ;
}
}
2021-02-13 15:50:07 +01:00
}
2024-11-30 15:46:50 +01:00
void ppu_thread : : cpu_wait ( bs_t < cpu_flag > old )
{
// Meanwhile while waiting, notify SPU waiters
if ( u32 addr = res_notify )
{
res_notify = 0 ;
if ( res_notify_time = = vm : : reservation_notifier_count_index ( addr ) . second )
{
vm : : reservation_notifier_notify ( addr ) ;
}
}
if ( old ! = state )
{
return ;
}
state . wait ( old ) ;
}
2025-04-24 12:41:04 +02:00
// static_assert(offsetof(ppu_thread, gpr[0]) == 24);
2016-07-27 23:43:22 +02:00
void ppu_thread : : exec_task ( )
2016-06-25 07:16:15 +02:00
{
2025-04-24 12:41:04 +02:00
if ( g_cfg . core . ppu_decoder = = ppu_decoder_type : : llvm_legacy )
2016-06-07 22:24:20 +02:00
{
2024-08-04 04:09:06 +02:00
// HVContext push to allow recursion. This happens with guest callback invocations.
const auto old_hv_ctx = hv_ctx ;
2021-01-31 19:38:47 +01:00
while ( true )
2017-06-22 23:52:09 +02:00
{
2021-01-31 19:38:47 +01:00
if ( state ) [[unlikely]]
{
if ( check_state ( ) )
break ;
}
ppu_gateway ( this ) ;
2017-06-22 23:52:09 +02:00
}
2017-12-19 22:01:03 +01:00
2024-08-04 04:09:06 +02:00
// HVContext pop
hv_ctx = old_hv_ctx ;
2017-03-22 21:23:47 +01:00
return ;
2016-06-07 22:24:20 +02:00
}
2021-12-30 17:39:18 +01:00
const auto mem_ = vm : : g_base_addr ;
2012-11-15 00:39:56 +01:00
2025-04-24 12:41:04 +02:00
if ( g_cfg . core . ppu_decoder = = ppu_decoder_type : : interpreter )
{
static PPUInterpreter interpreter ;
while ( true )
{
if ( test_stopped ( ) ) [[unlikely]]
{
return ;
}
std : : uint32_t inst = * reinterpret_cast < be_t < std : : uint32_t > * > ( mem_ + std : : uint64_t { cia } ) ;
interpreter . interpret ( * this , inst ) ;
}
return ;
}
const auto cache = vm : : g_exec_addr ;
2016-04-14 01:09:41 +02:00
while ( true )
{
2021-12-30 17:39:18 +01:00
if ( test_stopped ( ) ) [[unlikely]]
2016-04-14 01:09:41 +02:00
{
2021-12-30 17:39:18 +01:00
return ;
2016-05-13 15:55:34 +02:00
}
2016-04-14 01:09:41 +02:00
2021-12-30 17:39:18 +01:00
gv_zeroupper ( ) ;
2017-02-10 13:20:54 +01:00
2021-12-30 17:39:18 +01:00
// Execute instruction (may be step; execute only one instruction if state)
const auto op = reinterpret_cast < be_t < u32 > * > ( mem_ + u64 { cia } ) ;
const auto fn = reinterpret_cast < ppu_intrp_func * > ( cache + u64 { cia } * 2 ) ;
fn - > fn ( * this , { * op } , op , state ? & ppu_ret : fn + 1 ) ;
2016-04-14 01:09:41 +02:00
}
2012-11-15 00:39:56 +01:00
}
2016-07-27 23:43:22 +02:00
ppu_thread : : ~ ppu_thread ( )
2012-11-15 00:39:56 +01:00
{
2016-07-27 23:43:22 +02:00
}
2016-05-13 15:55:34 +02:00
2023-04-28 19:10:21 +02:00
ppu_thread : : ppu_thread ( const ppu_thread_params & param , std : : string_view name , u32 _prio , int detached )
2025-04-05 21:50:45 +02:00
: cpu_thread ( idm : : last_id ( ) ) , stack_size ( param . stack_size ) , stack_addr ( param . stack_addr ) , joiner ( detached ! = 0 ? ppu_join_status : : detached : ppu_join_status : : joinable ) , entry_func ( param . entry ) , start_time ( get_guest_system_time ( ) ) , is_interrupt_thread ( detached < 0 ) , ppu_tname ( make_single < std : : string > ( name ) )
2016-07-27 23:43:22 +02:00
{
2023-04-28 19:10:21 +02:00
prio . raw ( ) . prio = _prio ;
2024-08-04 04:09:06 +02:00
memset ( & hv_ctx , 0 , sizeof ( hv_ctx ) ) ;
2020-07-03 05:18:14 +02:00
gpr [ 1 ] = stack_addr + stack_size - ppu_stack_start_offset ;
2018-10-11 00:17:19 +02:00
gpr [ 13 ] = param . tls_addr ;
2020-04-29 07:03:07 +02:00
if ( detached > = 0 )
{
// Initialize thread args
gpr [ 3 ] = param . arg0 ;
gpr [ 4 ] = param . arg1 ;
2018-10-11 00:17:19 +02:00
}
2022-07-05 13:12:21 +02:00
optional_savestate_state = std : : make_shared < utils : : serial > ( ) ;
2017-02-06 19:36:46 +01:00
// Trigger the scheduler
2018-05-14 22:07:36 +02:00
state + = cpu_flag : : suspend ;
if ( ! g_use_rtm )
{
state + = cpu_flag : : memory ;
}
2021-07-10 10:56:48 +02:00
2023-05-19 17:41:17 +02:00
call_history . data . resize ( g_cfg . core . ppu_call_history ? call_history_max_size : 1 ) ;
syscall_history . data . resize ( g_cfg . core . ppu_call_history ? syscall_history_max_size : 1 ) ;
syscall_history . count_debug_arguments = static_cast < u32 > ( g_cfg . core . ppu_call_history ? std : : size ( syscall_history . data [ 0 ] . args ) : 0 ) ;
2022-06-14 14:28:38 +02:00
# ifdef __APPLE__
pthread_jit_write_protect_np ( true ) ;
# endif
# ifdef ARCH_ARM64
// Flush all cache lines after potentially writing executable code
asm ( " ISB " ) ;
asm ( " DSB ISH " ) ;
# endif
2016-07-27 23:43:22 +02:00
}
2016-05-13 15:55:34 +02:00
2022-07-04 15:02:17 +02:00
struct disable_precomp_t
{
atomic_t < bool > disable = false ;
} ;
void vdecEntry ( ppu_thread & ppu , u32 vid ) ;
bool ppu_thread : : savable ( ) const
{
if ( joiner = = ppu_join_status : : exited )
{
return false ;
}
if ( cia = = g_fxo - > get < ppu_function_manager > ( ) . func_addr ( FIND_FUNC ( vdecEntry ) ) )
{
// Do not attempt to save the state of HLE VDEC threads
return false ;
}
return true ;
}
void ppu_thread : : serialize_common ( utils : : serial & ar )
{
2023-04-28 19:10:21 +02:00
[[maybe_unused]] const s32 version = GET_OR_USE_SERIALIZATION_VERSION ( ar . is_writing ( ) , ppu ) ;
2025-04-24 12:41:04 +02:00
// ar(gpr, fpr, cr, fpscr.bits, lr, ctr, vrsave, cia, xer, sat, nj, prio.raw().all);
2023-04-28 19:10:21 +02:00
2024-03-25 04:08:09 +01:00
if ( cia % 4 | | ( cia > > 28 ) > = 0xCu )
2023-11-15 20:07:42 +01:00
{
fmt : : throw_exception ( " Failed to serialize PPU thread ID=0x%x (cia=0x%x, ar=%s) " , this - > id , cia , ar ) ;
}
2023-04-28 19:10:21 +02:00
ar ( optional_savestate_state , vr ) ;
2022-07-05 13:12:21 +02:00
2023-11-25 18:06:13 +01:00
if ( ! ar . is_writing ( ) )
2022-07-05 13:12:21 +02:00
{
2023-11-25 18:06:13 +01:00
if ( optional_savestate_state - > data . empty ( ) )
{
optional_savestate_state - > clear ( ) ;
}
optional_savestate_state - > set_reading_state ( ) ;
2022-07-05 13:12:21 +02:00
}
2022-07-04 15:02:17 +02:00
}
2024-03-26 12:16:23 +01:00
struct save_lv2_tag
{
atomic_t < bool > saved = false ;
atomic_t < bool > loaded = false ;
} ;
2022-07-04 15:02:17 +02:00
ppu_thread : : ppu_thread ( utils : : serial & ar )
: cpu_thread ( idm : : last_id ( ) ) // last_id() is showed to constructor on serialization
2025-04-05 21:50:45 +02:00
,
stack_size ( ar ) , stack_addr ( ar ) , joiner ( ar . pop < ppu_join_status > ( ) ) , entry_func ( std : : bit_cast < ppu_func_opd_t , u64 > ( ar ) ) , is_interrupt_thread ( ar )
2022-07-04 15:02:17 +02:00
{
2024-03-26 12:16:23 +01:00
[[maybe_unused]] const s32 version = GET_SERIALIZATION_VERSION ( ppu ) ;
2022-07-04 15:02:17 +02:00
struct init_pushed
{
bool pushed = false ;
2023-07-31 22:57:26 +02:00
atomic_t < u32 > inited = false ;
2022-07-04 15:02:17 +02:00
} ;
2023-05-19 17:41:17 +02:00
call_history . data . resize ( g_cfg . core . ppu_call_history ? call_history_max_size : 1 ) ;
syscall_history . data . resize ( g_cfg . core . ppu_call_history ? syscall_history_max_size : 1 ) ;
syscall_history . count_debug_arguments = static_cast < u32 > ( g_cfg . core . ppu_call_history ? std : : size ( syscall_history . data [ 0 ] . args ) : 0 ) ;
2024-03-26 12:16:23 +01:00
if ( version > = 2 & & ! g_fxo - > get < save_lv2_tag > ( ) . loaded . exchange ( true ) )
{
ar ( lv2_obj : : g_priority_order_tag ) ;
}
2024-03-27 13:44:33 +01:00
if ( version > = 3 )
{
// Function and module for HLE function relocation
// TODO: Use it
ar . pop < std : : string > ( ) ;
ar . pop < std : : string > ( ) ;
}
2022-07-04 15:02:17 +02:00
serialize_common ( ar ) ;
// Restore jm_mask
jm_mask = nj ? 0x7F800000 : 0x7fff'ffff ;
auto queue_intr_entry = [ & ] ( )
{
if ( is_interrupt_thread )
{
void ppu_interrupt_thread_entry ( ppu_thread & , ppu_opcode_t , be_t < u32 > * , struct ppu_intrp_func * ) ;
2025-04-05 21:50:45 +02:00
cmd_list ( { { ppu_cmd : : ptr_call , 0 } ,
std : : bit_cast < u64 > ( & ppu_interrupt_thread_entry ) } ) ;
2022-07-04 15:02:17 +02:00
}
} ;
2025-01-28 17:49:19 +01:00
const u32 status = ar . pop < u32 > ( ) ;
switch ( status )
2022-07-04 15:02:17 +02:00
{
case PPU_THREAD_STATUS_IDLE :
{
stop_flag_removal_protection = true ;
break ;
}
case PPU_THREAD_STATUS_RUNNABLE :
case PPU_THREAD_STATUS_ONPROC :
{
2024-03-26 12:16:23 +01:00
if ( version > = 2 )
{
const u32 order = ar . pop < u32 > ( ) ;
struct awake_pushed
{
bool pushed = false ;
shared_mutex dummy ;
std : : map < u32 , ppu_thread * > awake_ppus ;
} ;
g_fxo - > get < awake_pushed > ( ) . awake_ppus [ order ] = this ;
if ( ! std : : exchange ( g_fxo - > get < awake_pushed > ( ) . pushed , true ) )
{
Emu . PostponeInitCode ( [ this ] ( )
{
2025-04-05 21:50:45 +02:00
u32 prev = umax ;
2024-03-26 12:16:23 +01:00
2025-04-05 21:50:45 +02:00
for ( auto ppu : g_fxo - > get < awake_pushed > ( ) . awake_ppus )
{
ensure ( prev + 1 = = ppu . first ) ;
prev = ppu . first ;
lv2_obj : : awake ( ppu . second ) ;
}
g_fxo - > get < awake_pushed > ( ) . awake_ppus . clear ( ) ;
} ) ;
2024-03-26 12:16:23 +01:00
}
}
else
{
lv2_obj : : awake ( this ) ;
}
2022-07-04 15:02:17 +02:00
[[fallthrough]] ;
}
case PPU_THREAD_STATUS_SLEEP :
{
if ( std : : exchange ( g_fxo - > get < init_pushed > ( ) . pushed , true ) )
{
2025-04-05 21:50:45 +02:00
cmd_list ( { { ppu_cmd : : ptr_call , 0 } , + [ ] ( ppu_thread & ) - > bool
2022-07-04 15:02:17 +02:00
{
while ( ! Emu . IsStopped ( ) & & ! g_fxo - > get < init_pushed > ( ) . inited )
{
2023-07-31 22:57:26 +02:00
thread_ctrl : : wait_on ( g_fxo - > get < init_pushed > ( ) . inited , 0 ) ;
2022-07-04 15:02:17 +02:00
}
return false ;
2025-04-05 21:50:45 +02:00
} } ) ;
2022-07-04 15:02:17 +02:00
}
else
{
g_fxo - > init < disable_precomp_t > ( ) ;
g_fxo - > get < disable_precomp_t > ( ) . disable = true ;
cmd_push ( { ppu_cmd : : initialize , 0 } ) ;
2025-04-05 21:50:45 +02:00
cmd_list ( { { ppu_cmd : : ptr_call , 0 } , + [ ] ( ppu_thread & ) - > bool
2022-07-04 15:02:17 +02:00
{
auto & inited = g_fxo - > get < init_pushed > ( ) . inited ;
2023-07-31 22:57:26 +02:00
inited = 1 ;
2022-07-04 15:02:17 +02:00
inited . notify_all ( ) ;
return true ;
2025-04-05 21:50:45 +02:00
} } ) ;
2022-07-04 15:02:17 +02:00
}
if ( status = = PPU_THREAD_STATUS_SLEEP )
{
2025-04-05 21:50:45 +02:00
cmd_list ( { { ppu_cmd : : ptr_call , 0 } ,
2022-07-04 15:02:17 +02:00
+ [ ] ( ppu_thread & ppu ) - > bool
{
2022-07-05 13:12:21 +02:00
const u32 op = vm : : read32 ( ppu . cia ) ;
const auto & table = g_fxo - > get < ppu_interpreter_rt > ( ) ;
2022-07-04 15:02:17 +02:00
ppu . loaded_from_savestate = true ;
2024-03-26 12:16:23 +01:00
ppu . prio . raw ( ) . preserve_bit = 1 ;
2022-07-05 13:12:21 +02:00
table . decode ( op ) ( ppu , { op } , vm : : _ptr < u32 > ( ppu . cia ) , & ppu_ret ) ;
2024-03-27 15:38:25 +01:00
ppu . prio . raw ( ) . preserve_bit = 0 ;
2022-07-05 13:12:21 +02:00
ppu . optional_savestate_state - > clear ( ) ; // Reset to writing state
2022-07-04 15:02:17 +02:00
ppu . loaded_from_savestate = false ;
return true ;
2025-04-05 21:50:45 +02:00
} } ) ;
2022-07-04 15:02:17 +02:00
lv2_obj : : set_future_sleep ( this ) ;
}
queue_intr_entry ( ) ;
cmd_push ( { ppu_cmd : : cia_call , 0 } ) ;
break ;
}
case PPU_THREAD_STATUS_ZOMBIE :
{
state + = cpu_flag : : exit ;
break ;
}
case PPU_THREAD_STATUS_STOP :
{
queue_intr_entry ( ) ;
break ;
}
}
// Trigger the scheduler
state + = cpu_flag : : suspend ;
if ( ! g_use_rtm )
{
state + = cpu_flag : : memory ;
}
2023-11-15 20:07:42 +01:00
ppu_tname = make_single < std : : string > ( ar . pop < std : : string > ( ) ) ;
2025-01-28 17:49:19 +01:00
ppu_log . notice ( " Loading PPU Thread [0x%x: %s]: cia=0x%x, state=%s, status=%s " , id , * ppu_tname . load ( ) , cia , + state , ppu_thread_status { status } ) ;
2022-07-04 15:02:17 +02:00
}
void ppu_thread : : save ( utils : : serial & ar )
{
2025-01-28 17:49:19 +01:00
// For debugging purposes, load this as soon as this function enters
const bs_t < cpu_flag > state_flags = state ;
2022-07-05 13:12:21 +02:00
USING_SERIALIZATION_VERSION ( ppu ) ;
2022-07-04 15:02:17 +02:00
const u64 entry = std : : bit_cast < u64 > ( entry_func ) ;
ppu_join_status _joiner = joiner ;
if ( _joiner > = ppu_join_status : : max )
{
// Joining thread should recover this member properly
2022-09-13 15:08:55 +02:00
_joiner = ppu_join_status : : joinable ;
2022-07-04 15:02:17 +02:00
}
ar ( stack_size , stack_addr , _joiner , entry , is_interrupt_thread ) ;
2024-03-26 12:16:23 +01:00
const bool is_null = ar . m_file_handler & & ar . m_file_handler - > is_null ( ) ;
if ( ! is_null & & ! g_fxo - > get < save_lv2_tag > ( ) . saved . exchange ( true ) )
{
ar ( lv2_obj : : g_priority_order_tag ) ;
}
2024-03-27 13:44:33 +01:00
if ( current_module & & current_module [ 0 ] )
{
ar ( std : : string { current_module } ) ;
ar ( std : : string { last_function } ) ;
}
else
{
ar ( std : : string { } ) ;
ar ( std : : string { } ) ;
}
2022-07-04 15:02:17 +02:00
serialize_common ( ar ) ;
2024-03-26 12:16:23 +01:00
auto [ status , order ] = lv2_obj : : ppu_state ( this , false ) ;
2022-07-04 15:02:17 +02:00
if ( status = = PPU_THREAD_STATUS_SLEEP & & cpu_flag : : again - state )
{
// Hack for sys_fs
status = PPU_THREAD_STATUS_RUNNABLE ;
}
ar ( status ) ;
2024-03-26 12:16:23 +01:00
if ( status = = PPU_THREAD_STATUS_RUNNABLE | | status = = PPU_THREAD_STATUS_ONPROC )
{
ar ( order ) ;
}
2022-07-04 15:02:17 +02:00
ar ( * ppu_tname . load ( ) ) ;
2025-01-28 17:49:19 +01:00
if ( current_module & & current_module [ 0 ] )
{
ppu_log . notice ( " Saving PPU Thread [0x%x: %s]: cia=0x%x, state=%s, statu=%s (at function: %s) " , id , * ppu_tname . load ( ) , cia , state_flags , ppu_thread_status { status } , last_function ) ;
}
else
{
ppu_log . notice ( " Saving PPU Thread [0x%x: %s]: cia=0x%x, state=%s, statu=%s " , id , * ppu_tname . load ( ) , cia , state_flags , ppu_thread_status { status } ) ;
}
2022-07-04 15:02:17 +02:00
}
2021-05-01 08:34:52 +02:00
ppu_thread : : thread_name_t : : operator std : : string ( ) const
{
std : : string thread_name = fmt : : format ( " PPU[0x%x] " , _this - > id ) ;
if ( const std : : string name = * _this - > ppu_tname . load ( ) ; ! name . empty ( ) )
{
fmt : : append ( thread_name , " %s " , name ) ;
}
return thread_name ;
}
2016-08-09 16:14:41 +02:00
void ppu_thread : : cmd_push ( cmd64 cmd )
2016-07-27 23:43:22 +02:00
{
// Reserve queue space
const u32 pos = cmd_queue . push_begin ( ) ;
2016-05-13 15:55:34 +02:00
2016-07-27 23:43:22 +02:00
// Write single command
cmd_queue [ pos ] = cmd ;
}
2016-05-13 15:55:34 +02:00
2016-08-09 16:14:41 +02:00
void ppu_thread : : cmd_list ( std : : initializer_list < cmd64 > list )
2016-07-27 23:43:22 +02:00
{
// Reserve queue space
const u32 pos = cmd_queue . push_begin ( static_cast < u32 > ( list . size ( ) ) ) ;
2016-05-13 15:55:34 +02:00
2016-07-27 23:43:22 +02:00
// Write command tail in relaxed manner
for ( u32 i = 1 ; i < list . size ( ) ; i + + )
2016-05-13 15:55:34 +02:00
{
2016-07-27 23:43:22 +02:00
cmd_queue [ pos + i ] . raw ( ) = list . begin ( ) [ i ] ;
2016-05-13 15:55:34 +02:00
}
2016-07-27 23:43:22 +02:00
// Write command head after all
cmd_queue [ pos ] = * list . begin ( ) ;
2012-11-15 00:39:56 +01:00
}
2016-07-27 23:43:22 +02:00
void ppu_thread : : cmd_pop ( u32 count )
2012-11-15 00:39:56 +01:00
{
2016-07-27 23:43:22 +02:00
// Get current position
const u32 pos = cmd_queue . peek ( ) ;
// Clean command buffer for command tail
for ( u32 i = 1 ; i < = count ; i + + )
2016-04-14 01:09:41 +02:00
{
2016-08-09 16:14:41 +02:00
cmd_queue [ pos + i ] . raw ( ) = cmd64 { } ;
2016-04-14 01:09:41 +02:00
}
2016-07-27 23:43:22 +02:00
// Free
cmd_queue . pop_end ( count + 1 ) ;
2014-04-10 00:54:32 +02:00
}
2014-08-15 14:50:59 +02:00
2016-08-09 16:14:41 +02:00
cmd64 ppu_thread : : cmd_wait ( )
2016-04-25 12:49:12 +02:00
{
2016-07-27 23:43:22 +02:00
while ( true )
{
2021-02-13 15:50:07 +01:00
if ( cmd64 result = cmd_queue [ cmd_queue . peek ( ) ] . exchange ( cmd64 { } ) )
2016-07-27 23:43:22 +02:00
{
2021-02-13 15:50:07 +01:00
return result ;
2016-07-27 23:43:22 +02:00
}
2021-02-13 15:50:07 +01:00
if ( is_stopped ( ) )
2016-07-27 23:43:22 +02:00
{
2021-02-13 15:50:07 +01:00
return { } ;
2016-07-27 23:43:22 +02:00
}
2021-02-13 15:50:07 +01:00
thread_ctrl : : wait_on ( cmd_notify , 0 ) ;
cmd_notify = 0 ;
2016-07-27 23:43:22 +02:00
}
2016-04-25 12:49:12 +02:00
}
2016-07-27 23:43:22 +02:00
be_t < u64 > * ppu_thread : : get_stack_arg ( s32 i , u64 align )
2014-08-23 16:51:51 +02:00
{
2025-04-05 21:50:45 +02:00
if ( align ! = 1 & & align ! = 2 & & align ! = 4 & & align ! = 8 & & align ! = 16 )
fmt : : throw_exception ( " Unsupported alignment: 0x%llx " , align ) ;
2020-12-09 16:04:52 +01:00
return vm : : _ptr < u64 > ( vm : : cast ( ( gpr [ 1 ] + 0x30 + 0x8 * ( i - 1 ) ) & ( 0 - align ) ) ) ;
2014-08-23 16:51:51 +02:00
}
2023-06-07 13:34:39 +02:00
void ppu_thread : : fast_call ( u32 addr , u64 rtoc , bool is_thread_entry )
2014-08-19 20:17:20 +02:00
{
2017-02-09 23:51:29 +01:00
const auto old_cia = cia ;
2016-07-27 23:43:22 +02:00
const auto old_rtoc = gpr [ 2 ] ;
const auto old_lr = lr ;
2019-07-09 19:44:07 +02:00
const auto old_func = current_function ;
2016-07-27 23:43:22 +02:00
const auto old_fmt = g_tls_log_prefix ;
2015-07-19 13:36:32 +02:00
2022-07-04 15:02:17 +02:00
interrupt_thread_executing = true ;
2016-07-27 23:43:22 +02:00
cia = addr ;
gpr [ 2 ] = rtoc ;
2022-11-19 12:50:31 +01:00
lr = g_fxo - > get < ppu_function_manager > ( ) . func_addr ( 1 , true ) ; // HLE stop address
2019-07-09 19:44:07 +02:00
current_function = nullptr ;
2014-08-19 20:17:20 +02:00
2022-07-04 15:02:17 +02:00
if ( std : : exchange ( loaded_from_savestate , false ) )
{
lr = old_lr ;
}
2016-07-27 23:43:22 +02:00
g_tls_log_prefix = [ ]
{
2017-02-09 23:51:29 +01:00
const auto _this = static_cast < ppu_thread * > ( get_current_cpu_thread ( ) ) ;
2020-02-28 08:43:37 +01:00
2020-11-26 10:30:51 +01:00
static thread_local shared_ptr < std : : string > name_cache ;
2020-02-28 08:43:37 +01:00
if ( ! _this - > ppu_tname . is_equal ( name_cache ) ) [[unlikely]]
{
2020-12-06 09:13:34 +01:00
_this - > ppu_tname . peek_op ( [ & ] ( const shared_ptr < std : : string > & ptr )
{
2025-04-05 21:50:45 +02:00
if ( ptr ! = name_cache )
{
name_cache = ptr ;
}
} ) ;
2020-02-28 08:43:37 +01:00
}
2020-08-27 17:40:13 +02:00
const auto cia = _this - > cia ;
2024-06-23 17:34:18 +02:00
if ( _this - > current_function & & g_fxo - > get < ppu_function_manager > ( ) . is_func ( cia ) )
2020-08-27 17:40:13 +02:00
{
2020-10-08 15:13:55 +02:00
return fmt : : format ( " PPU[0x%x] Thread (%s) [HLE:0x%08x, LR:0x%08x] " , _this - > id , * name_cache . get ( ) , cia , _this - > lr ) ;
2020-08-27 17:40:13 +02:00
}
2022-10-02 11:59:41 +02:00
extern const char * get_prx_name_by_cia ( u32 addr ) ;
if ( auto name = get_prx_name_by_cia ( cia ) )
{
2022-12-24 15:15:29 +01:00
return fmt : : format ( " PPU[0x%x] Thread (%s) [%s: 0x%08x] " , _this - > id , * name_cache . get ( ) , name , cia ) ;
2022-10-02 11:59:41 +02:00
}
2020-08-27 17:40:13 +02:00
return fmt : : format ( " PPU[0x%x] Thread (%s) [0x%08x] " , _this - > id , * name_cache . get ( ) , cia ) ;
2016-07-27 23:43:22 +02:00
} ;
2019-11-09 17:11:01 +01:00
auto at_ret = [ & ] ( )
2015-07-01 00:25:52 +02:00
{
2023-06-06 13:05:07 +02:00
if ( old_cia )
2016-06-25 07:16:15 +02:00
{
2022-07-05 13:12:21 +02:00
if ( state & cpu_flag : : again )
2022-07-04 15:02:17 +02:00
{
ppu_log . error ( " HLE callstack savestate is not implemented! " ) ;
}
2017-02-09 23:51:29 +01:00
cia = old_cia ;
gpr [ 2 ] = old_rtoc ;
lr = old_lr ;
2016-06-25 07:16:15 +02:00
}
2023-06-07 13:34:39 +02:00
else if ( state & cpu_flag : : ret & & cia = = g_fxo - > get < ppu_function_manager > ( ) . func_addr ( 1 , true ) + 4 & & is_thread_entry )
2023-06-06 13:05:07 +02:00
{
std : : string ret ;
dump_all ( ret ) ;
ppu_log . error ( " Returning from the thread entry function! (func=0x%x) " , entry_func . addr ) ;
ppu_log . notice ( " Thread context: %s " , ret ) ;
lv2_obj : : sleep ( * this ) ;
2023-06-07 13:34:39 +02:00
// For savestates
state + = cpu_flag : : again ;
std : : memcpy ( syscall_args , & gpr [ 3 ] , sizeof ( syscall_args ) ) ;
2023-11-15 20:07:42 +01:00
}
if ( ! old_cia & & state & cpu_flag : : again )
{
// Fixup argument registers and CIA for reloading
std : : memcpy ( & gpr [ 3 ] , syscall_args , sizeof ( syscall_args ) ) ;
cia - = 4 ;
2023-06-06 13:05:07 +02:00
}
2022-07-04 15:02:17 +02:00
current_function = old_func ;
g_tls_log_prefix = old_fmt ;
state - = cpu_flag : : ret ;
2019-11-09 17:11:01 +01:00
} ;
2020-03-09 17:18:39 +01:00
exec_task ( ) ;
2019-11-09 17:11:01 +01:00
at_ret ( ) ;
2014-08-19 20:17:20 +02:00
}
2016-06-07 22:24:20 +02:00
2021-06-26 13:15:10 +02:00
std : : pair < vm : : addr_t , u32 > ppu_thread : : stack_push ( u32 size , u32 align_v )
2016-08-09 16:14:41 +02:00
{
2021-05-20 06:00:22 +02:00
if ( auto cpu = get_current_cpu_thread < ppu_thread > ( ) )
2016-08-09 16:14:41 +02:00
{
ppu_thread & context = static_cast < ppu_thread & > ( * cpu ) ;
2020-12-09 16:04:52 +01:00
const u32 old_pos = vm : : cast ( context . gpr [ 1 ] ) ;
2025-04-05 21:50:45 +02:00
context . gpr [ 1 ] - = size ; // room minimal possible size
2019-12-02 22:31:34 +01:00
context . gpr [ 1 ] & = ~ ( u64 { align_v } - 1 ) ; // fix stack alignment
2016-08-09 16:14:41 +02:00
2021-06-26 13:15:10 +02:00
auto is_stack = [ & ] ( u64 addr )
{
return addr > = context . stack_addr & & addr < context . stack_addr + context . stack_size ;
} ;
// TODO: This check does not care about custom stack memory
if ( is_stack ( old_pos ) ! = is_stack ( context . gpr [ 1 ] ) )
2016-08-09 16:14:41 +02:00
{
2020-12-09 16:04:52 +01:00
fmt : : throw_exception ( " Stack overflow (size=0x%x, align=0x%x, SP=0x%llx, stack=*0x%x) " , size , align_v , old_pos , context . stack_addr ) ;
2016-08-09 16:14:41 +02:00
}
else
{
const u32 addr = static_cast < u32 > ( context . gpr [ 1 ] ) ;
std : : memset ( vm : : base ( addr ) , 0 , size ) ;
2021-06-26 13:15:10 +02:00
return { vm : : cast ( addr ) , old_pos - addr } ;
2016-08-09 16:14:41 +02:00
}
}
2020-12-09 16:04:52 +01:00
fmt : : throw_exception ( " Invalid thread " ) ;
2016-08-09 16:14:41 +02:00
}
void ppu_thread : : stack_pop_verbose ( u32 addr , u32 size ) noexcept
{
2021-05-20 06:00:22 +02:00
if ( auto cpu = get_current_cpu_thread < ppu_thread > ( ) )
2016-08-09 16:14:41 +02:00
{
ppu_thread & context = static_cast < ppu_thread & > ( * cpu ) ;
if ( context . gpr [ 1 ] ! = addr )
{
2020-02-01 09:31:27 +01:00
ppu_log . error ( " Stack inconsistency (addr=0x%x, SP=0x%llx, size=0x%x) " , addr , context . gpr [ 1 ] , size ) ;
2016-08-09 16:14:41 +02:00
return ;
}
2021-06-26 13:15:10 +02:00
context . gpr [ 1 ] + = size ;
2016-08-09 16:14:41 +02:00
return ;
}
2020-12-09 16:04:52 +01:00
ppu_log . error ( " Invalid thread " ) ;
2016-08-09 16:14:41 +02:00
}
2021-12-30 17:39:18 +01:00
extern ppu_intrp_func_t ppu_get_syscall ( u64 code ) ;
2016-06-07 22:24:20 +02:00
2020-05-15 17:57:48 +02:00
void ppu_trap ( ppu_thread & ppu , u64 addr )
2016-06-07 22:24:20 +02:00
{
2021-05-22 09:35:15 +02:00
ensure ( ( addr & ( ~ u64 { 0xffff'ffff } | 0x3 ) ) = = 0 ) ;
2020-05-15 17:57:48 +02:00
ppu . cia = static_cast < u32 > ( addr ) ;
u32 add = static_cast < u32 > ( g_cfg . core . stub_ppu_traps ) * 4 ;
// If stubbing is enabled, check current instruction and the following
2020-11-10 18:09:28 +01:00
if ( ! add | | ! vm : : check_addr ( ppu . cia , vm : : page_executable ) | | ! vm : : check_addr ( ppu . cia + add , vm : : page_executable ) )
2020-05-15 17:57:48 +02:00
{
2021-09-30 20:33:55 +02:00
fmt : : throw_exception ( " PPU Trap! Sometimes tweaking the setting \" Stub PPU Traps \" can be a workaround to this crash. \n Best values depend on game code, if unsure try 1. " ) ;
2020-05-15 17:57:48 +02:00
}
ppu_log . error ( " PPU Trap: Stubbing %d instructions %s. " , std : : abs ( static_cast < s32 > ( add ) / 4 ) , add > > 31 ? " backwards " : " forwards " ) ;
ppu . cia + = add ; // Skip instructions, hope for valid code (interprter may be invoked temporarily)
2016-06-22 15:37:51 +02:00
}
2016-06-07 22:24:20 +02:00
2021-09-22 22:42:40 +02:00
static void ppu_error ( ppu_thread & ppu , u64 addr , u32 /*op*/ )
2016-07-07 20:42:39 +02:00
{
2017-06-22 23:52:09 +02:00
ppu . cia = : : narrow < u32 > ( addr ) ;
2021-09-01 12:38:17 +02:00
ppu_recompiler_fallback ( ppu ) ;
2016-07-07 20:42:39 +02:00
}
2017-02-13 18:51:37 +01:00
static void ppu_check ( ppu_thread & ppu , u64 addr )
{
2017-06-22 23:52:09 +02:00
ppu . cia = : : narrow < u32 > ( addr ) ;
2018-10-11 00:17:19 +02:00
if ( ppu . test_stopped ( ) )
2023-08-01 00:10:16 +02:00
{
return ;
}
2017-02-13 18:51:37 +01:00
}
2016-06-22 15:37:51 +02:00
static void ppu_trace ( u64 addr )
2016-06-07 22:24:20 +02:00
{
2020-02-01 09:31:27 +01:00
ppu_log . notice ( " Trace: 0x%llx " , addr ) ;
2016-06-22 15:37:51 +02:00
}
2016-06-07 22:24:20 +02:00
2018-04-28 19:09:35 +02:00
template < typename T >
static T ppu_load_acquire_reservation ( ppu_thread & ppu , u32 addr )
2016-06-22 15:37:51 +02:00
{
2020-10-29 19:46:50 +01:00
perf_meter < " LARX " _u32 > perf0 ;
2020-09-14 07:08:26 +02:00
// Do not allow stores accessed from the same cache line to past reservation load
2020-12-06 10:10:00 +01:00
atomic_fence_seq_cst ( ) ;
2020-09-14 07:08:26 +02:00
2020-09-10 05:27:55 +02:00
if ( addr % sizeof ( T ) )
{
2020-12-09 16:04:52 +01:00
fmt : : throw_exception ( " PPU %s: Unaligned address: 0x%08x " , sizeof ( T ) = = 4 ? " LWARX " : " LDARX " , addr ) ;
2020-09-10 05:27:55 +02:00
}
// Always load aligned 64-bit value
2018-09-14 12:53:50 +02:00
auto & data = vm : : _ref < const atomic_be_t < u64 > > ( addr & - 8 ) ;
const u64 size_off = ( sizeof ( T ) * 8 ) & 63 ;
const u64 data_off = ( addr & 7 ) * 8 ;
2018-04-28 19:09:35 +02:00
2017-02-17 20:35:57 +01:00
ppu . raddr = addr ;
2018-04-28 19:09:35 +02:00
2020-10-29 23:22:28 +01:00
u32 addr_mask = - 1 ;
2020-04-07 19:29:11 +02:00
if ( const s32 max = g_cfg . core . ppu_128_reservations_loop_max_length )
{
// If we use it in HLE it means we want the accurate version
ppu . use_full_rdata = max < 0 | | ppu . current_function | | [ & ] ( )
{
const u32 cia = ppu . cia ;
if ( ( cia & 0xffff ) > = 0x10000u - max * 4 )
{
// Do not cross 64k boundary
2020-10-29 23:22:28 +01:00
return false ;
2020-04-07 19:29:11 +02:00
}
const auto inst = vm : : _ptr < const nse_t < u32 > > ( cia ) ;
// Search for STWCX or STDCX nearby (LDARX-STWCX and LWARX-STDCX loops will use accurate 128-byte reservations)
2020-12-13 14:34:45 +01:00
constexpr u32 store_cond = stx : : se_storage < u32 > : : swap ( sizeof ( T ) = = 8 ? 0x7C00012D : 0x7C0001AD ) ;
constexpr u32 mask = stx : : se_storage < u32 > : : swap ( 0xFC0007FF ) ;
2020-04-07 19:29:11 +02:00
const auto store_vec = v128 : : from32p ( store_cond ) ;
const auto mask_vec = v128 : : from32p ( mask ) ;
s32 i = 2 ;
for ( const s32 _max = max - 3 ; i < _max ; i + = 4 )
{
const auto _inst = v128 : : loadu ( inst + i ) & mask_vec ;
2021-12-30 17:39:18 +01:00
if ( ! gv_testz ( gv_eq32 ( _inst , store_vec ) ) )
2020-04-07 19:29:11 +02:00
{
return false ;
}
}
for ( ; i < max ; i + + )
{
const u32 val = inst [ i ] & mask ;
if ( val = = store_cond )
{
return false ;
}
}
return true ;
} ( ) ;
2020-10-29 23:22:28 +01:00
if ( ppu . use_full_rdata )
{
addr_mask = - 128 ;
}
2020-04-07 19:29:11 +02:00
}
else
{
ppu . use_full_rdata = false ;
}
2023-06-12 03:47:20 +02:00
if ( ppu_log . trace & & ( addr & addr_mask ) = = ( ppu . last_faddr & addr_mask ) )
2020-10-29 23:22:28 +01:00
{
ppu_log . trace ( u8 " LARX after fail: addr=0x%x, faddr=0x%x, time=%u c " , addr , ppu . last_faddr , ( perf0 . get ( ) - ppu . last_ftsc ) ) ;
}
2021-03-05 20:05:37 +01:00
if ( ( addr & addr_mask ) = = ( ppu . last_faddr & addr_mask ) & & ( perf0 . get ( ) - ppu . last_ftsc ) < 600 & & ( vm : : reservation_acquire ( addr ) & - 128 ) = = ppu . last_ftime )
2020-10-29 23:22:28 +01:00
{
be_t < u64 > rdata ;
std : : memcpy ( & rdata , & ppu . rdata [ addr & 0x78 ] , 8 ) ;
if ( rdata = = data . load ( ) )
{
ppu . rtime = ppu . last_ftime ;
ppu . raddr = ppu . last_faddr ;
2020-10-30 23:52:24 +01:00
ppu . last_ftime = 0 ;
2020-10-29 23:22:28 +01:00
return static_cast < T > ( rdata < < data_off > > size_off ) ;
}
ppu . last_fail + + ;
ppu . last_faddr = 0 ;
}
else
{
// Silent failure
ppu . last_faddr = 0 ;
}
PPU/cellSpurs: MGS4: Fix cellSpursAddUrgentCommand race condition
cellSpursAddUrgentCommand searches in 4 slots for an empty slot to put the command at.
At first, it seems to do so unordered.
Meanwhile, on SPU side, it expects an order between all the commands because it pops them it in FIFO manner.
Not keeping track of how many commands are queued in total.
After second observation of cellSpursAddUrgentCommand, something odd comes takes places here.
Usually, reservation loops are individual and are expected to be closed without any changes of the previous loop affected by the proceeding one.
But in this case, after a single failure, the entire operayion is reset, a loop of 4 reservation operations suddenly is reset completely.
This makes one wonder if it the HW expects sometjing else here, perhaps it caches the reservation internally here?
After some adjustments to LDARX and STDCX to cache the reservation between succeeding loops, Metal Gear Solid 4 no longer freezes!
2025-03-23 05:58:35 +01:00
const u32 res_cached = ppu . res_cached ;
if ( ( addr & - 128 ) = = ( res_cached & - 128 ) )
{
// Reload "cached" reservation of previous succeeded conditional store
// This seems like a hardware feature according to cellSpursAddUrgentCommand function
ppu . rtime - = 128 ;
}
else
{
ppu . rtime = vm : : reservation_acquire ( addr ) & - 128 ;
}
2020-10-30 03:17:00 +01:00
2020-10-30 07:40:58 +01:00
be_t < u64 > rdata ;
2020-10-17 13:55:31 +02:00
2020-10-30 07:40:58 +01:00
if ( ! ppu . use_full_rdata )
2018-05-18 17:51:48 +02:00
{
2020-10-30 07:40:58 +01:00
rdata = data . load ( ) ;
2020-09-02 23:58:29 +02:00
2020-10-30 07:40:58 +01:00
// Store only 64 bits of reservation data
std : : memcpy ( & ppu . rdata [ addr & 0x78 ] , & rdata , 8 ) ;
}
else
{
mov_rdata ( ppu . rdata , vm : : _ref < spu_rdata_t > ( addr & - 128 ) ) ;
2020-12-06 10:10:00 +01:00
atomic_fence_acquire ( ) ;
2020-10-13 19:23:10 +02:00
2020-10-30 07:40:58 +01:00
// Load relevant 64 bits of reservation data
std : : memcpy ( & rdata , & ppu . rdata [ addr & 0x78 ] , 8 ) ;
2018-05-18 17:51:48 +02:00
}
2020-10-17 13:55:31 +02:00
return static_cast < T > ( rdata < < data_off > > size_off ) ;
2018-04-28 19:09:35 +02:00
}
extern u32 ppu_lwarx ( ppu_thread & ppu , u32 addr )
{
return ppu_load_acquire_reservation < u32 > ( ppu , addr ) ;
2016-06-22 15:37:51 +02:00
}
2016-06-07 22:24:20 +02:00
2017-02-26 16:56:31 +01:00
extern u64 ppu_ldarx ( ppu_thread & ppu , u32 addr )
2016-06-22 15:37:51 +02:00
{
2018-04-28 19:09:35 +02:00
return ppu_load_acquire_reservation < u64 > ( ppu , addr ) ;
2016-06-22 15:37:51 +02:00
}
2016-06-07 22:24:20 +02:00
2025-04-05 21:50:45 +02:00
const auto ppu_stcx_accurate_tx = build_function_asm < u64 ( * ) ( u32 raddr , u64 rtime , const void * _old , u64 _new ) > ( " ppu_stcx_accurate_tx " , [ ] ( native_asm & c , auto & args )
{
using namespace asmjit ;
2020-04-07 19:29:11 +02:00
2021-12-30 17:39:18 +01:00
# if defined(ARCH_X64)
2025-04-05 21:50:45 +02:00
Label fall = c . newLabel ( ) ;
Label fail = c . newLabel ( ) ;
Label _ret = c . newLabel ( ) ;
Label load = c . newLabel ( ) ;
// if (utils::has_avx() && !s_tsx_avx)
//{
// c.vzeroupper();
// }
// Create stack frame if necessary (Windows ABI has only 6 volatile vector registers)
c . push ( x86 : : rbp ) ;
c . push ( x86 : : r14 ) ;
c . sub ( x86 : : rsp , 40 ) ;
2020-04-07 19:29:11 +02:00
# ifdef _WIN32
2025-04-05 21:50:45 +02:00
if ( ! s_tsx_avx )
{
c . movups ( x86 : : oword_ptr ( x86 : : rsp , 0 ) , x86 : : xmm6 ) ;
c . movups ( x86 : : oword_ptr ( x86 : : rsp , 16 ) , x86 : : xmm7 ) ;
}
2020-04-07 19:29:11 +02:00
# endif
2025-04-05 21:50:45 +02:00
// Prepare registers
build_swap_rdx_with ( c , args , x86 : : r10 ) ;
c . mov ( x86 : : rbp , x86 : : qword_ptr ( reinterpret_cast < u64 > ( & vm : : g_sudo_addr ) ) ) ;
c . lea ( x86 : : rbp , x86 : : qword_ptr ( x86 : : rbp , args [ 0 ] ) ) ;
c . and_ ( x86 : : rbp , - 128 ) ;
c . prefetchw ( x86 : : byte_ptr ( x86 : : rbp , 0 ) ) ;
c . prefetchw ( x86 : : byte_ptr ( x86 : : rbp , 64 ) ) ;
c . movzx ( args [ 0 ] . r32 ( ) , args [ 0 ] . r16 ( ) ) ;
c . shr ( args [ 0 ] . r32 ( ) , 1 ) ;
c . lea ( x86 : : r11 , x86 : : qword_ptr ( reinterpret_cast < u64 > ( + vm : : g_reservations ) , args [ 0 ] ) ) ;
c . and_ ( x86 : : r11 , - 128 / 2 ) ;
c . and_ ( args [ 0 ] . r32 ( ) , 63 ) ;
// Prepare data
if ( s_tsx_avx )
{
c . vmovups ( x86 : : ymm0 , x86 : : ymmword_ptr ( args [ 2 ] , 0 ) ) ;
c . vmovups ( x86 : : ymm1 , x86 : : ymmword_ptr ( args [ 2 ] , 32 ) ) ;
c . vmovups ( x86 : : ymm2 , x86 : : ymmword_ptr ( args [ 2 ] , 64 ) ) ;
c . vmovups ( x86 : : ymm3 , x86 : : ymmword_ptr ( args [ 2 ] , 96 ) ) ;
}
else
{
c . movaps ( x86 : : xmm0 , x86 : : oword_ptr ( args [ 2 ] , 0 ) ) ;
c . movaps ( x86 : : xmm1 , x86 : : oword_ptr ( args [ 2 ] , 16 ) ) ;
c . movaps ( x86 : : xmm2 , x86 : : oword_ptr ( args [ 2 ] , 32 ) ) ;
c . movaps ( x86 : : xmm3 , x86 : : oword_ptr ( args [ 2 ] , 48 ) ) ;
c . movaps ( x86 : : xmm4 , x86 : : oword_ptr ( args [ 2 ] , 64 ) ) ;
c . movaps ( x86 : : xmm5 , x86 : : oword_ptr ( args [ 2 ] , 80 ) ) ;
c . movaps ( x86 : : xmm6 , x86 : : oword_ptr ( args [ 2 ] , 96 ) ) ;
c . movaps ( x86 : : xmm7 , x86 : : oword_ptr ( args [ 2 ] , 112 ) ) ;
}
2020-04-07 19:29:11 +02:00
2025-04-05 21:50:45 +02:00
// Alloc r14 to stamp0
const auto stamp0 = x86 : : r14 ;
build_get_tsc ( c , stamp0 ) ;
2020-10-30 23:52:24 +01:00
2025-04-05 21:50:45 +02:00
Label fail2 = c . newLabel ( ) ;
2020-04-07 19:29:11 +02:00
2025-04-05 21:50:45 +02:00
Label tx1 = build_transaction_enter ( c , fall , [ & ] ( )
{
build_get_tsc ( c ) ;
c . sub ( x86 : : rax , stamp0 ) ;
c . cmp ( x86 : : rax , x86 : : qword_ptr ( reinterpret_cast < u64 > ( & g_rtm_tx_limit2 ) ) ) ;
c . jae ( fall ) ;
} ) ;
// Check pause flag
2025-04-24 12:41:04 +02:00
c . bt ( x86 : : dword_ptr ( args [ 2 ] , OFFSET_OF ( ppu_thread , state ) - OFFSET_OF ( ppu_thread , rdata ) ) , static_cast < u32 > ( cpu_flag : : pause ) ) ;
2025-04-05 21:50:45 +02:00
c . jc ( fall ) ;
c . xbegin ( tx1 ) ;
if ( s_tsx_avx )
{
c . vxorps ( x86 : : ymm0 , x86 : : ymm0 , x86 : : ymmword_ptr ( x86 : : rbp , 0 ) ) ;
c . vxorps ( x86 : : ymm1 , x86 : : ymm1 , x86 : : ymmword_ptr ( x86 : : rbp , 32 ) ) ;
c . vxorps ( x86 : : ymm2 , x86 : : ymm2 , x86 : : ymmword_ptr ( x86 : : rbp , 64 ) ) ;
c . vxorps ( x86 : : ymm3 , x86 : : ymm3 , x86 : : ymmword_ptr ( x86 : : rbp , 96 ) ) ;
c . vorps ( x86 : : ymm0 , x86 : : ymm0 , x86 : : ymm1 ) ;
c . vorps ( x86 : : ymm1 , x86 : : ymm2 , x86 : : ymm3 ) ;
c . vorps ( x86 : : ymm0 , x86 : : ymm1 , x86 : : ymm0 ) ;
c . vptest ( x86 : : ymm0 , x86 : : ymm0 ) ;
}
else
{
c . xorps ( x86 : : xmm0 , x86 : : oword_ptr ( x86 : : rbp , 0 ) ) ;
c . xorps ( x86 : : xmm1 , x86 : : oword_ptr ( x86 : : rbp , 16 ) ) ;
c . xorps ( x86 : : xmm2 , x86 : : oword_ptr ( x86 : : rbp , 32 ) ) ;
c . xorps ( x86 : : xmm3 , x86 : : oword_ptr ( x86 : : rbp , 48 ) ) ;
c . xorps ( x86 : : xmm4 , x86 : : oword_ptr ( x86 : : rbp , 64 ) ) ;
c . xorps ( x86 : : xmm5 , x86 : : oword_ptr ( x86 : : rbp , 80 ) ) ;
c . xorps ( x86 : : xmm6 , x86 : : oword_ptr ( x86 : : rbp , 96 ) ) ;
c . xorps ( x86 : : xmm7 , x86 : : oword_ptr ( x86 : : rbp , 112 ) ) ;
c . orps ( x86 : : xmm0 , x86 : : xmm1 ) ;
c . orps ( x86 : : xmm2 , x86 : : xmm3 ) ;
c . orps ( x86 : : xmm4 , x86 : : xmm5 ) ;
c . orps ( x86 : : xmm6 , x86 : : xmm7 ) ;
c . orps ( x86 : : xmm0 , x86 : : xmm2 ) ;
c . orps ( x86 : : xmm4 , x86 : : xmm6 ) ;
c . orps ( x86 : : xmm0 , x86 : : xmm4 ) ;
c . ptest ( x86 : : xmm0 , x86 : : xmm0 ) ;
}
c . jnz ( fail ) ;
// Store 8 bytes
c . mov ( x86 : : qword_ptr ( x86 : : rbp , args [ 0 ] , 1 , 0 ) , args [ 3 ] ) ;
c . xend ( ) ;
c . lock ( ) . add ( x86 : : qword_ptr ( x86 : : r11 ) , 64 ) ;
2020-10-30 23:52:24 +01:00
build_get_tsc ( c ) ;
2021-12-18 16:12:37 +01:00
c . sub ( x86 : : rax , stamp0 ) ;
2025-04-05 21:50:45 +02:00
c . jmp ( _ret ) ;
2020-10-15 18:24:00 +02:00
2025-04-05 21:50:45 +02:00
// XABORT is expensive so try to finish with xend instead
c . bind ( fail ) ;
2020-04-07 19:29:11 +02:00
2025-04-05 21:50:45 +02:00
// Load old data to store back in rdata
if ( s_tsx_avx )
{
c . vmovaps ( x86 : : ymm0 , x86 : : ymmword_ptr ( x86 : : rbp , 0 ) ) ;
c . vmovaps ( x86 : : ymm1 , x86 : : ymmword_ptr ( x86 : : rbp , 32 ) ) ;
c . vmovaps ( x86 : : ymm2 , x86 : : ymmword_ptr ( x86 : : rbp , 64 ) ) ;
c . vmovaps ( x86 : : ymm3 , x86 : : ymmword_ptr ( x86 : : rbp , 96 ) ) ;
}
else
{
c . movaps ( x86 : : xmm0 , x86 : : oword_ptr ( x86 : : rbp , 0 ) ) ;
c . movaps ( x86 : : xmm1 , x86 : : oword_ptr ( x86 : : rbp , 16 ) ) ;
c . movaps ( x86 : : xmm2 , x86 : : oword_ptr ( x86 : : rbp , 32 ) ) ;
c . movaps ( x86 : : xmm3 , x86 : : oword_ptr ( x86 : : rbp , 48 ) ) ;
c . movaps ( x86 : : xmm4 , x86 : : oword_ptr ( x86 : : rbp , 64 ) ) ;
c . movaps ( x86 : : xmm5 , x86 : : oword_ptr ( x86 : : rbp , 80 ) ) ;
c . movaps ( x86 : : xmm6 , x86 : : oword_ptr ( x86 : : rbp , 96 ) ) ;
c . movaps ( x86 : : xmm7 , x86 : : oword_ptr ( x86 : : rbp , 112 ) ) ;
}
2020-10-20 07:22:25 +02:00
2025-04-05 21:50:45 +02:00
c . xend ( ) ;
c . jmp ( fail2 ) ;
2020-10-20 07:22:25 +02:00
2025-04-05 21:50:45 +02:00
c . bind ( fall ) ;
c . mov ( x86 : : rax , - 1 ) ;
c . jmp ( _ret ) ;
2020-04-07 19:29:11 +02:00
2025-04-05 21:50:45 +02:00
c . bind ( fail2 ) ;
c . lock ( ) . sub ( x86 : : qword_ptr ( x86 : : r11 ) , 64 ) ;
c . bind ( load ) ;
2020-10-29 23:22:28 +01:00
2025-04-05 21:50:45 +02:00
// Store previous data back to rdata
if ( s_tsx_avx )
{
c . vmovaps ( x86 : : ymmword_ptr ( args [ 2 ] , 0 ) , x86 : : ymm0 ) ;
c . vmovaps ( x86 : : ymmword_ptr ( args [ 2 ] , 32 ) , x86 : : ymm1 ) ;
c . vmovaps ( x86 : : ymmword_ptr ( args [ 2 ] , 64 ) , x86 : : ymm2 ) ;
c . vmovaps ( x86 : : ymmword_ptr ( args [ 2 ] , 96 ) , x86 : : ymm3 ) ;
}
else
{
c . movaps ( x86 : : oword_ptr ( args [ 2 ] , 0 ) , x86 : : xmm0 ) ;
c . movaps ( x86 : : oword_ptr ( args [ 2 ] , 16 ) , x86 : : xmm1 ) ;
c . movaps ( x86 : : oword_ptr ( args [ 2 ] , 32 ) , x86 : : xmm2 ) ;
c . movaps ( x86 : : oword_ptr ( args [ 2 ] , 48 ) , x86 : : xmm3 ) ;
c . movaps ( x86 : : oword_ptr ( args [ 2 ] , 64 ) , x86 : : xmm4 ) ;
c . movaps ( x86 : : oword_ptr ( args [ 2 ] , 80 ) , x86 : : xmm5 ) ;
c . movaps ( x86 : : oword_ptr ( args [ 2 ] , 96 ) , x86 : : xmm6 ) ;
c . movaps ( x86 : : oword_ptr ( args [ 2 ] , 112 ) , x86 : : xmm7 ) ;
}
2020-10-29 23:22:28 +01:00
2025-04-05 21:50:45 +02:00
c . mov ( x86 : : rax , - 1 ) ;
2025-04-24 12:41:04 +02:00
c . mov ( x86 : : qword_ptr ( args [ 2 ] , OFFSET_OF ( ppu_thread , last_ftime ) - OFFSET_OF ( ppu_thread , rdata ) ) , x86 : : rax ) ;
2025-04-05 21:50:45 +02:00
c . xor_ ( x86 : : eax , x86 : : eax ) ;
// c.jmp(_ret);
2020-04-07 19:29:11 +02:00
2025-04-05 21:50:45 +02:00
c . bind ( _ret ) ;
2020-04-07 19:29:11 +02:00
# ifdef _WIN32
2025-04-05 21:50:45 +02:00
if ( ! s_tsx_avx )
{
c . vmovups ( x86 : : xmm6 , x86 : : oword_ptr ( x86 : : rsp , 0 ) ) ;
c . vmovups ( x86 : : xmm7 , x86 : : oword_ptr ( x86 : : rsp , 16 ) ) ;
}
2020-04-07 19:29:11 +02:00
# endif
2025-04-05 21:50:45 +02:00
if ( s_tsx_avx )
{
c . vzeroupper ( ) ;
}
2020-04-07 19:29:11 +02:00
2025-04-05 21:50:45 +02:00
c . add ( x86 : : rsp , 40 ) ;
c . pop ( x86 : : r14 ) ;
c . pop ( x86 : : rbp ) ;
2021-12-24 18:33:32 +01:00
2025-04-05 21:50:45 +02:00
maybe_flush_lbr ( c ) ;
c . ret ( ) ;
2021-12-30 17:39:18 +01:00
# else
2025-04-05 21:50:45 +02:00
UNUSED ( args ) ;
2024-08-19 21:44:32 +02:00
2025-04-05 21:50:45 +02:00
// Unimplemented should fail.
c . brk ( Imm ( 0x42 ) ) ;
c . ret ( a64 : : x30 ) ;
2021-12-30 17:39:18 +01:00
# endif
2025-04-05 21:50:45 +02:00
} ) ;
2020-04-07 19:29:11 +02:00
2020-04-13 12:29:01 +02:00
template < typename T >
2020-05-01 22:52:10 +02:00
static bool ppu_store_reservation ( ppu_thread & ppu , u32 addr , u64 reg_value )
2016-06-22 15:37:51 +02:00
{
2020-10-18 14:00:10 +02:00
perf_meter < " STCX " _u32 > perf0 ;
2020-09-10 05:27:55 +02:00
if ( addr % sizeof ( T ) )
{
2020-12-09 16:04:52 +01:00
fmt : : throw_exception ( " PPU %s: Unaligned address: 0x%08x " , sizeof ( T ) = = 4 ? " STWCX " : " STDCX " , addr ) ;
2020-09-10 05:27:55 +02:00
}
2020-05-01 22:52:10 +02:00
auto & data = vm : : _ref < atomic_be_t < u64 > > ( addr & - 8 ) ;
2021-03-05 20:05:37 +01:00
auto & res = vm : : reservation_acquire ( addr ) ;
2020-04-07 19:29:11 +02:00
const u64 rtime = ppu . rtime ;
2017-02-17 20:35:57 +01:00
2020-10-29 19:46:50 +01:00
be_t < u64 > old_data = 0 ;
std : : memcpy ( & old_data , & ppu . rdata [ addr & 0x78 ] , sizeof ( old_data ) ) ;
be_t < u64 > new_data = old_data ;
2020-05-01 22:52:10 +02:00
if constexpr ( sizeof ( T ) = = sizeof ( u32 ) )
{
// Rebuild reg_value to be 32-bits of new data and 32-bits of old data
2020-10-29 19:46:50 +01:00
const be_t < u32 > reg32 = static_cast < u32 > ( reg_value ) ;
std : : memcpy ( reinterpret_cast < char * > ( & new_data ) + ( addr & 4 ) , & reg32 , sizeof ( u32 ) ) ;
}
else
{
new_data = reg_value ;
2020-05-01 22:52:10 +02:00
}
2020-04-07 19:29:11 +02:00
// Test if store address is on the same aligned 8-bytes memory as load
PPU/cellSpurs: MGS4: Fix cellSpursAddUrgentCommand race condition
cellSpursAddUrgentCommand searches in 4 slots for an empty slot to put the command at.
At first, it seems to do so unordered.
Meanwhile, on SPU side, it expects an order between all the commands because it pops them it in FIFO manner.
Not keeping track of how many commands are queued in total.
After second observation of cellSpursAddUrgentCommand, something odd comes takes places here.
Usually, reservation loops are individual and are expected to be closed without any changes of the previous loop affected by the proceeding one.
But in this case, after a single failure, the entire operayion is reset, a loop of 4 reservation operations suddenly is reset completely.
This makes one wonder if it the HW expects sometjing else here, perhaps it caches the reservation internally here?
After some adjustments to LDARX and STDCX to cache the reservation between succeeding loops, Metal Gear Solid 4 no longer freezes!
2025-03-23 05:58:35 +01:00
if ( const u32 raddr = ppu . raddr ; raddr / 8 ! = addr / 8 )
2017-02-17 20:35:57 +01:00
{
2020-04-07 19:29:11 +02:00
// If not and it is on the same aligned 128-byte memory, proceed only if 128-byte reservations are enabled
// In realhw the store address can be at any address of the 128-byte cache line
if ( raddr / 128 ! = addr / 128 | | ! ppu . use_full_rdata )
2020-09-10 05:27:55 +02:00
{
2020-04-07 19:29:11 +02:00
// Even when the reservation address does not match the target address must be valid
2020-11-10 18:09:28 +01:00
if ( ! vm : : check_addr ( addr , vm : : page_writable ) )
2020-04-07 19:29:11 +02:00
{
// Access violate
data + = 0 ;
}
PPU/cellSpurs: MGS4: Fix cellSpursAddUrgentCommand race condition
cellSpursAddUrgentCommand searches in 4 slots for an empty slot to put the command at.
At first, it seems to do so unordered.
Meanwhile, on SPU side, it expects an order between all the commands because it pops them it in FIFO manner.
Not keeping track of how many commands are queued in total.
After second observation of cellSpursAddUrgentCommand, something odd comes takes places here.
Usually, reservation loops are individual and are expected to be closed without any changes of the previous loop affected by the proceeding one.
But in this case, after a single failure, the entire operayion is reset, a loop of 4 reservation operations suddenly is reset completely.
This makes one wonder if it the HW expects sometjing else here, perhaps it caches the reservation internally here?
After some adjustments to LDARX and STDCX to cache the reservation between succeeding loops, Metal Gear Solid 4 no longer freezes!
2025-03-23 05:58:35 +01:00
ppu . raddr = 0 ;
ppu . res_cached = 0 ;
2020-04-07 19:29:11 +02:00
return false ;
2020-09-10 05:27:55 +02:00
}
2020-04-07 19:29:11 +02:00
}
2020-09-10 05:27:55 +02:00
2020-04-07 19:29:11 +02:00
if ( old_data ! = data | | rtime ! = ( res & - 128 ) )
{
PPU/cellSpurs: MGS4: Fix cellSpursAddUrgentCommand race condition
cellSpursAddUrgentCommand searches in 4 slots for an empty slot to put the command at.
At first, it seems to do so unordered.
Meanwhile, on SPU side, it expects an order between all the commands because it pops them it in FIFO manner.
Not keeping track of how many commands are queued in total.
After second observation of cellSpursAddUrgentCommand, something odd comes takes places here.
Usually, reservation loops are individual and are expected to be closed without any changes of the previous loop affected by the proceeding one.
But in this case, after a single failure, the entire operayion is reset, a loop of 4 reservation operations suddenly is reset completely.
This makes one wonder if it the HW expects sometjing else here, perhaps it caches the reservation internally here?
After some adjustments to LDARX and STDCX to cache the reservation between succeeding loops, Metal Gear Solid 4 no longer freezes!
2025-03-23 05:58:35 +01:00
ppu . raddr = 0 ;
ppu . res_cached = 0 ;
2020-05-08 19:41:15 +02:00
return false ;
}
2020-04-07 19:29:11 +02:00
if ( [ & ] ( )
2020-05-08 19:41:15 +02:00
{
2025-04-05 21:50:45 +02:00
if ( ppu . use_full_rdata ) [[unlikely]]
2021-12-18 16:12:37 +01:00
{
2025-04-05 21:50:45 +02:00
auto [ _oldd , _ok ] = res . fetch_op ( [ & ] ( u64 & r )
{
if ( ( r & - 128 ) ! = rtime | | ( r & 127 ) )
{
return false ;
}
r + = vm : : rsrv_unique_lock ;
return true ;
} ) ;
if ( ! _ok )
2021-12-18 16:12:37 +01:00
{
2025-04-05 21:50:45 +02:00
// Already locked or updated: give up
2021-12-18 16:12:37 +01:00
return false ;
}
2025-04-05 21:50:45 +02:00
if ( g_use_rtm ) [[likely]]
{
switch ( u64 count = ppu_stcx_accurate_tx ( addr & - 8 , rtime , ppu . rdata , std : : bit_cast < u64 > ( new_data ) ) )
{
case umax :
{
auto & all_data = * vm : : get_super_ptr < spu_rdata_t > ( addr & - 128 ) ;
auto & sdata = * vm : : get_super_ptr < atomic_be_t < u64 > > ( addr & - 8 ) ;
2021-12-18 16:12:37 +01:00
2025-04-05 21:50:45 +02:00
const bool ok = cpu_thread : : suspend_all < + 3 > ( & ppu , { all_data , all_data + 64 , & res } , [ & ]
{
if ( ( res & - 128 ) = = rtime & & cmp_rdata ( ppu . rdata , all_data ) )
{
sdata . release ( new_data ) ;
res + = 64 ;
return true ;
}
2021-12-18 16:12:37 +01:00
2025-04-05 21:50:45 +02:00
mov_rdata_nt ( ppu . rdata , all_data ) ;
res - = 64 ;
return false ;
} ) ;
if ( ok )
{
break ;
}
2020-10-29 23:22:28 +01:00
2025-04-05 21:50:45 +02:00
ppu . last_ftime = - 1 ;
[[fallthrough]] ;
}
case 0 :
2020-10-30 03:17:00 +01:00
{
2025-04-05 21:50:45 +02:00
if ( ppu . last_faddr = = addr )
2020-10-29 23:22:28 +01:00
{
2025-04-05 21:50:45 +02:00
ppu . last_fail + + ;
2020-10-29 23:22:28 +01:00
}
2025-04-05 21:50:45 +02:00
if ( ppu . last_ftime ! = umax )
{
ppu . last_faddr = 0 ;
return false ;
}
2020-10-29 23:22:28 +01:00
2025-04-05 21:50:45 +02:00
utils : : prefetch_read ( ppu . rdata ) ;
utils : : prefetch_read ( ppu . rdata + 64 ) ;
ppu . last_faddr = addr ;
ppu . last_ftime = res . load ( ) & - 128 ;
ppu . last_ftsc = utils : : get_tsc ( ) ;
return false ;
}
default :
2020-10-29 23:22:28 +01:00
{
2025-04-05 21:50:45 +02:00
if ( count > 20000 & & g_cfg . core . perf_report ) [[unlikely]]
{
perf_log . warning ( " STCX: took too long: %.3fus (%u c) " , count / ( utils : : get_tsc_freq ( ) / 1000'000 . ) , count ) ;
}
2020-10-29 23:22:28 +01:00
break ;
}
}
2025-04-05 21:50:45 +02:00
if ( ppu . last_faddr = = addr )
2020-10-30 23:52:24 +01:00
{
2025-04-05 21:50:45 +02:00
ppu . last_succ + + ;
2020-10-30 23:52:24 +01:00
}
2025-04-05 21:50:45 +02:00
ppu . last_faddr = 0 ;
return true ;
2020-10-19 14:31:10 +02:00
}
2025-04-05 21:50:45 +02:00
// Align address: we do not need the lower 7 bits anymore
addr & = - 128 ;
// Cache line data
// auto& cline_data = vm::_ref<spu_rdata_t>(addr);
data + = 0 ;
auto range_lock = vm : : alloc_range_lock ( ) ;
bool success = false ;
2020-04-07 19:29:11 +02:00
{
2025-04-05 21:50:45 +02:00
rsx : : reservation_lock rsx_lock ( addr , 128 ) ;
auto & super_data = * vm : : get_super_ptr < spu_rdata_t > ( addr ) ;
success = [ & ] ( )
2020-10-19 14:31:10 +02:00
{
2025-04-05 21:50:45 +02:00
// Full lock (heavyweight)
// TODO: vm::check_addr
vm : : writer_lock lock ( addr , range_lock ) ;
2020-10-19 14:31:10 +02:00
2025-04-05 21:50:45 +02:00
if ( cmp_rdata ( ppu . rdata , super_data ) )
{
data . release ( new_data ) ;
res + = 64 ;
return true ;
}
2020-05-08 19:41:15 +02:00
2025-04-05 21:50:45 +02:00
res - = 64 ;
return false ;
} ( ) ;
2020-10-29 23:22:28 +01:00
}
2025-04-05 21:50:45 +02:00
vm : : free_range_lock ( range_lock ) ;
2020-05-01 22:52:10 +02:00
2025-04-05 21:50:45 +02:00
return success ;
2020-04-07 19:29:11 +02:00
}
2025-04-05 21:50:45 +02:00
if ( new_data = = old_data )
{
ppu . last_faddr = 0 ;
return res . compare_and_swap_test ( rtime , rtime + 128 ) ;
}
2020-04-07 19:29:11 +02:00
2025-04-05 21:50:45 +02:00
// Aligned 8-byte reservations will be used here
addr & = - 8 ;
2020-04-07 19:29:11 +02:00
2025-04-05 21:50:45 +02:00
const u64 lock_bits = vm : : rsrv_unique_lock ;
2020-04-07 19:29:11 +02:00
2025-04-05 21:50:45 +02:00
auto [ _oldd , _ok ] = res . fetch_op ( [ & ] ( u64 & r )
2020-04-07 19:29:11 +02:00
{
2025-04-05 21:50:45 +02:00
if ( ( r & - 128 ) ! = rtime | | ( r & 127 ) )
2024-03-27 09:36:00 +01:00
{
2025-04-05 21:50:45 +02:00
return false ;
2024-03-27 09:36:00 +01:00
}
2025-04-05 21:50:45 +02:00
r + = lock_bits ;
return true ;
} ) ;
2020-10-31 18:27:28 +01:00
2025-04-05 21:50:45 +02:00
// Give up if reservation has been locked or updated
if ( ! _ok )
2020-10-08 15:13:55 +02:00
{
2025-04-05 21:50:45 +02:00
ppu . last_faddr = 0 ;
2020-10-08 15:13:55 +02:00
return false ;
}
2025-04-05 21:50:45 +02:00
// Store previous value in old_data on failure
if ( data . compare_exchange ( old_data , new_data ) )
{
res + = 128 - lock_bits ;
return true ;
}
2020-10-08 15:13:55 +02:00
2025-04-05 21:50:45 +02:00
const u64 old_rtime = res . fetch_sub ( lock_bits ) ;
2020-10-29 23:22:28 +01:00
2025-04-05 21:50:45 +02:00
// TODO: disabled with this setting on, since it's dangerous to mix
if ( ! g_cfg . core . ppu_128_reservations_loop_max_length )
2020-10-29 23:22:28 +01:00
{
2025-04-05 21:50:45 +02:00
// Store old_data on failure
if ( ppu . last_faddr = = addr )
{
ppu . last_fail + + ;
}
2020-10-29 23:22:28 +01:00
2025-04-05 21:50:45 +02:00
ppu . last_faddr = addr ;
ppu . last_ftime = old_rtime & - 128 ;
ppu . last_ftsc = utils : : get_tsc ( ) ;
std : : memcpy ( & ppu . rdata [ addr & 0x78 ] , & old_data , 8 ) ;
}
2020-10-29 23:22:28 +01:00
2025-04-05 21:50:45 +02:00
return false ;
} ( ) )
2017-02-17 20:35:57 +01:00
{
2022-09-03 05:46:16 +02:00
extern atomic_t < u32 > liblv2_begin , liblv2_end ;
2022-08-18 15:10:24 +02:00
2023-08-16 08:47:45 +02:00
// Avoid notifications from lwmutex or sys_spinlock
2024-04-12 07:40:40 +02:00
if ( new_data ! = old_data & & ( ppu . cia < liblv2_begin | | ppu . cia > = liblv2_end ) )
2022-08-18 15:10:24 +02:00
{
2024-11-30 09:46:33 +01:00
u32 notify = ppu . res_notify ;
2024-04-12 07:40:40 +02:00
if ( notify )
{
2024-11-30 09:46:33 +01:00
if ( ppu . res_notify_time = = vm : : reservation_notifier_count_index ( notify ) . second )
2024-06-29 21:00:19 +02:00
{
2024-06-30 16:05:30 +02:00
ppu . state + = cpu_flag : : wait ;
2024-08-13 14:30:34 +02:00
vm : : reservation_notifier_notify ( notify ) ;
2024-06-30 16:05:30 +02:00
}
else
2024-06-29 21:00:19 +02:00
{
2024-11-30 09:46:33 +01:00
notify = 0 ;
2024-06-29 21:00:19 +02:00
}
2024-11-30 09:46:33 +01:00
ppu . res_notify = 0 ;
2024-04-12 07:40:40 +02:00
}
2024-11-30 09:46:33 +01:00
if ( ( addr ^ notify ) & - 128 )
2023-08-16 08:47:45 +02:00
{
// Try to postpone notification to when PPU is asleep or join notifications on the same address
// This also optimizes a mutex - won't notify after lock is aqcuired (prolonging the critical section duration), only notifies on unlock
2024-11-30 09:46:33 +01:00
const auto [ count , index ] = vm : : reservation_notifier_count_index ( addr ) ;
switch ( count )
{
case 0 :
{
// Nothing to do
break ;
}
case 1 :
{
if ( ! notify )
{
ppu . res_notify = addr ;
ppu . res_notify_time = index ;
break ;
}
// Notify both
[[fallthrough]] ;
}
default :
2024-06-29 21:00:19 +02:00
{
2024-11-30 09:46:33 +01:00
if ( ! notify )
{
ppu . state + = cpu_flag : : wait ;
}
vm : : reservation_notifier_notify ( addr ) ;
break ;
}
2024-06-29 21:00:19 +02:00
}
2023-08-16 08:47:45 +02:00
}
2024-11-30 09:46:33 +01:00
static_cast < void > ( ppu . test_stopped ( ) ) ;
2022-08-18 15:10:24 +02:00
}
2020-10-29 23:22:28 +01:00
if ( addr = = ppu . last_faddr )
{
ppu . last_succ + + ;
}
ppu . last_faddr = 0 ;
PPU/cellSpurs: MGS4: Fix cellSpursAddUrgentCommand race condition
cellSpursAddUrgentCommand searches in 4 slots for an empty slot to put the command at.
At first, it seems to do so unordered.
Meanwhile, on SPU side, it expects an order between all the commands because it pops them it in FIFO manner.
Not keeping track of how many commands are queued in total.
After second observation of cellSpursAddUrgentCommand, something odd comes takes places here.
Usually, reservation loops are individual and are expected to be closed without any changes of the previous loop affected by the proceeding one.
But in this case, after a single failure, the entire operayion is reset, a loop of 4 reservation operations suddenly is reset completely.
This makes one wonder if it the HW expects sometjing else here, perhaps it caches the reservation internally here?
After some adjustments to LDARX and STDCX to cache the reservation between succeeding loops, Metal Gear Solid 4 no longer freezes!
2025-03-23 05:58:35 +01:00
ppu . res_cached = ppu . raddr ;
ppu . rtime + = 128 ;
ppu . raddr = 0 ;
2020-04-07 19:29:11 +02:00
return true ;
2018-05-21 19:25:05 +02:00
}
2017-02-17 20:35:57 +01:00
2023-08-16 08:47:45 +02:00
const u32 notify = ppu . res_notify ;
// Do not risk postponing too much (because this is probably an indefinite loop)
// And on failure it has some time to do something else
if ( notify & & ( ( addr ^ notify ) & - 128 ) )
{
2024-11-30 09:46:33 +01:00
if ( ppu . res_notify_time = = vm : : reservation_notifier_count_index ( notify ) . second )
2024-06-29 21:00:19 +02:00
{
2024-06-30 16:05:30 +02:00
ppu . state + = cpu_flag : : wait ;
2024-08-13 14:30:34 +02:00
vm : : reservation_notifier_notify ( notify ) ;
2024-06-30 16:05:30 +02:00
static_cast < void > ( ppu . test_stopped ( ) ) ;
2024-06-29 21:00:19 +02:00
}
2023-08-16 08:47:45 +02:00
ppu . res_notify = 0 ;
}
PPU/cellSpurs: MGS4: Fix cellSpursAddUrgentCommand race condition
cellSpursAddUrgentCommand searches in 4 slots for an empty slot to put the command at.
At first, it seems to do so unordered.
Meanwhile, on SPU side, it expects an order between all the commands because it pops them it in FIFO manner.
Not keeping track of how many commands are queued in total.
After second observation of cellSpursAddUrgentCommand, something odd comes takes places here.
Usually, reservation loops are individual and are expected to be closed without any changes of the previous loop affected by the proceeding one.
But in this case, after a single failure, the entire operayion is reset, a loop of 4 reservation operations suddenly is reset completely.
This makes one wonder if it the HW expects sometjing else here, perhaps it caches the reservation internally here?
After some adjustments to LDARX and STDCX to cache the reservation between succeeding loops, Metal Gear Solid 4 no longer freezes!
2025-03-23 05:58:35 +01:00
ppu . raddr = 0 ;
ppu . res_cached = 0 ;
2020-04-07 19:29:11 +02:00
return false ;
2016-06-22 15:37:51 +02:00
}
2016-06-07 22:24:20 +02:00
2020-04-13 12:29:01 +02:00
extern bool ppu_stwcx ( ppu_thread & ppu , u32 addr , u32 reg_value )
{
return ppu_store_reservation < u32 > ( ppu , addr , reg_value ) ;
}
extern bool ppu_stdcx ( ppu_thread & ppu , u32 addr , u64 reg_value )
{
return ppu_store_reservation < u64 > ( ppu , addr , reg_value ) ;
}
2023-09-09 12:28:33 +02:00
struct jit_core_allocator
{
2023-09-11 11:52:10 +02:00
const s16 thread_count = g_cfg . core . llvm_threads ? std : : min < s32 > ( g_cfg . core . llvm_threads , limit ( ) ) : limit ( ) ;
2023-09-09 12:28:33 +02:00
// Initialize global semaphore with the max number of threads
2023-09-11 11:52:10 +02:00
: : semaphore < 0x7fff > sem { std : : max < s16 > ( thread_count , 1 ) } ;
2023-09-09 12:28:33 +02:00
2025-01-10 16:34:24 +01:00
// Mutex for special extra-large modules to compile alone
shared_mutex shared_mtx ;
2023-09-11 11:52:10 +02:00
static s16 limit ( )
2023-09-09 12:28:33 +02:00
{
2023-09-11 11:52:10 +02:00
return static_cast < s16 > ( std : : min < s32 > ( 0x7fff , utils : : get_thread_count ( ) ) ) ;
2023-09-09 12:28:33 +02:00
}
} ;
2021-01-27 14:08:43 +01:00
# ifdef LLVM_AVAILABLE
namespace
{
// Compiled PPU module info
struct jit_module
{
2025-04-05 21:50:45 +02:00
std : : vector < void ( * ) ( u8 * , u64 ) > symbol_resolvers ;
2025-01-10 16:34:24 +01:00
std : : vector < std : : shared_ptr < jit_compiler > > pjit ;
2021-01-31 19:38:47 +01:00
bool init = false ;
2021-01-27 14:08:43 +01:00
} ;
struct jit_module_manager
{
2023-09-09 21:00:52 +02:00
struct bucket_t
{
shared_mutex mutex ;
std : : unordered_map < std : : string , jit_module > map ;
} ;
std : : array < bucket_t , 30 > buckets ;
bucket_t & get_bucket ( std : : string_view sv )
{
return buckets [ std : : hash < std : : string_view > ( ) ( sv ) % std : : size ( buckets ) ] ;
}
2021-01-27 14:08:43 +01:00
jit_module & get ( const std : : string & name )
{
2023-09-09 21:00:52 +02:00
bucket_t & bucket = get_bucket ( name ) ;
std : : lock_guard lock ( bucket . mutex ) ;
return bucket . map . emplace ( name , jit_module { } ) . first - > second ;
2021-01-27 14:08:43 +01:00
}
void remove ( const std : : string & name ) noexcept
{
2023-09-09 21:00:52 +02:00
bucket_t & bucket = get_bucket ( name ) ;
2021-01-27 14:08:43 +01:00
2023-09-09 21:00:52 +02:00
jit_module to_destroy { } ;
2021-01-27 14:08:43 +01:00
2023-09-09 21:00:52 +02:00
std : : lock_guard lock ( bucket . mutex ) ;
const auto found = bucket . map . find ( name ) ;
if ( found = = bucket . map . end ( ) ) [[unlikely]]
2021-01-27 14:08:43 +01:00
{
ppu_log . error ( " Failed to remove module %s " , name ) ;
2024-03-30 18:25:03 +01:00
for ( auto & buck : buckets )
{
for ( auto & mod : buck . map )
{
ppu_log . notice ( " But there is module %s " , mod . first ) ;
}
}
2021-01-27 14:08:43 +01:00
return ;
}
2023-09-09 21:00:52 +02:00
to_destroy . pjit = std : : move ( found - > second . pjit ) ;
2025-01-10 16:34:24 +01:00
to_destroy . symbol_resolvers = std : : move ( found - > second . symbol_resolvers ) ;
2023-09-09 21:00:52 +02:00
bucket . map . erase ( found ) ;
2021-01-27 14:08:43 +01:00
}
2025-01-21 12:27:10 +01:00
jit_module_manager & operator = ( thread_state s ) noexcept
{
if ( s = = thread_state : : destroying_context )
{
for ( auto & buck : buckets )
{
for ( auto & mod : buck . map )
{
for ( auto & jit : mod . second . pjit )
{
* jit = s ;
}
}
}
}
return * this ;
}
2021-01-27 14:08:43 +01:00
} ;
2025-04-05 21:50:45 +02:00
} // namespace
2021-01-27 14:08:43 +01:00
# endif
2021-01-29 11:32:19 +01:00
namespace
{
// Read-only file view starting with specified offset (for MSELF)
struct file_view : fs : : file_base
{
2023-10-29 01:46:52 +02:00
const fs : : file m_storage ;
const fs : : file & m_file ;
2021-01-29 11:32:19 +01:00
const u64 m_off ;
2023-10-29 01:46:52 +02:00
const u64 m_max_size ;
2021-01-29 11:32:19 +01:00
u64 m_pos ;
2023-10-29 01:46:52 +02:00
explicit file_view ( const fs : : file & _file , u64 offset , u64 max_size ) noexcept
2025-04-05 21:50:45 +02:00
: m_storage ( fs : : file ( ) ) , m_file ( _file ) , m_off ( offset ) , m_max_size ( max_size ) , m_pos ( 0 )
2023-10-29 01:46:52 +02:00
{
}
explicit file_view ( fs : : file & & _file , u64 offset , u64 max_size ) noexcept
2025-04-05 21:50:45 +02:00
: m_storage ( std : : move ( _file ) ) , m_file ( m_storage ) , m_off ( offset ) , m_max_size ( max_size ) , m_pos ( 0 )
2021-01-29 11:32:19 +01:00
{
}
~ file_view ( ) override
{
}
2023-07-11 20:40:30 +02:00
fs : : stat_t get_stat ( ) override
2021-01-29 11:32:19 +01:00
{
2023-11-28 12:04:03 +01:00
fs : : stat_t stat = m_file . get_stat ( ) ;
stat . size = std : : min < u64 > ( utils : : sub_saturate < u64 > ( stat . size , m_off ) , m_max_size ) ;
stat . is_writable = false ;
return stat ;
2021-01-29 11:32:19 +01:00
}
2021-03-05 20:05:37 +01:00
bool trunc ( u64 ) override
2021-01-29 11:32:19 +01:00
{
return false ;
}
u64 read ( void * buffer , u64 size ) override
{
2023-10-29 01:46:52 +02:00
const u64 result = file_view : : read_at ( m_pos , buffer , size ) ;
2021-01-29 11:32:19 +01:00
m_pos + = result ;
return result ;
}
2022-12-24 15:15:29 +01:00
u64 read_at ( u64 offset , void * buffer , u64 size ) override
{
2023-11-28 12:04:03 +01:00
return m_file . read_at ( offset + m_off , buffer , std : : min < u64 > ( size , utils : : sub_saturate < u64 > ( m_max_size , offset ) ) ) ;
2022-12-24 15:15:29 +01:00
}
2021-03-05 20:05:37 +01:00
u64 write ( const void * , u64 ) override
2021-01-29 11:32:19 +01:00
{
return 0 ;
}
u64 seek ( s64 offset , fs : : seek_mode whence ) override
{
const s64 new_pos =
whence = = fs : : seek_set ? offset :
whence = = fs : : seek_cur ? offset + m_pos :
2025-04-05 21:50:45 +02:00
whence = = fs : : seek_end ? offset + size ( ) :
- 1 ;
2021-01-29 11:32:19 +01:00
if ( new_pos < 0 )
{
fs : : g_tls_error = fs : : error : : inval ;
return - 1 ;
}
m_pos = new_pos ;
return m_pos ;
}
u64 size ( ) override
{
2023-11-28 12:04:03 +01:00
return std : : min < u64 > ( utils : : sub_saturate < u64 > ( m_file . size ( ) , m_off ) , m_max_size ) ;
2021-01-29 11:32:19 +01:00
}
} ;
2025-04-05 21:50:45 +02:00
} // namespace
2021-01-29 11:32:19 +01:00
2023-10-29 01:46:52 +02:00
extern fs : : file make_file_view ( const fs : : file & _file , u64 offset , u64 max_size = umax )
{
fs : : file file ;
file . reset ( std : : make_unique < file_view > ( _file , offset , max_size ) ) ;
return file ;
}
extern fs : : file make_file_view ( fs : : file & & _file , u64 offset , u64 max_size = umax )
2022-07-04 15:02:17 +02:00
{
fs : : file file ;
2023-10-29 01:46:52 +02:00
file . reset ( std : : make_unique < file_view > ( std : : move ( _file ) , offset , max_size ) ) ;
2022-07-04 15:02:17 +02:00
return file ;
}
2024-12-22 19:59:48 +01:00
extern void ppu_finalize ( const ppu_module < lv2_obj > & info , bool force_mem_release )
2021-01-27 14:08:43 +01:00
{
2024-04-01 10:21:36 +02:00
if ( info . segs . empty ( ) )
{
// HLEd modules
return ;
}
2024-03-30 15:20:08 +01:00
if ( ! force_mem_release & & info . name . empty ( ) )
2021-01-27 14:08:43 +01:00
{
// Don't remove main module from memory
return ;
}
2024-03-30 15:20:08 +01:00
if ( ! force_mem_release & & Emu . GetCat ( ) = = " 1P " )
2022-11-09 21:41:46 +01:00
{
return ;
}
2021-01-27 14:08:43 +01:00
2024-03-30 15:20:08 +01:00
const bool may_be_elf = fmt : : to_lower ( info . path . substr ( std : : max < usz > ( info . path . size ( ) , 3 ) - 3 ) ) ! = " prx " ;
2024-03-30 18:25:03 +01:00
const std : : string dev_flash = vfs : : get ( " /dev_flash/ " ) ;
2024-03-30 15:20:08 +01:00
if ( ! may_be_elf )
{
2024-03-30 18:25:03 +01:00
if ( ! force_mem_release & & info . path . starts_with ( dev_flash + " sys/external/ " ) )
2024-03-30 15:20:08 +01:00
{
// Don't remove dev_flash prx from memory
return ;
}
}
2025-04-24 12:41:04 +02:00
if ( g_cfg . core . ppu_decoder ! = ppu_decoder_type : : llvm_legacy )
2024-03-30 18:25:03 +01:00
{
return ;
}
2022-11-09 21:41:46 +01:00
// Get cache path for this executable
2024-05-07 00:56:15 +02:00
std : : string cache_path = rpcs3 : : utils : : get_cache_dir ( info . path ) ;
2021-01-27 14:08:43 +01:00
2022-11-09 21:41:46 +01:00
// Add PPU hash and filename
fmt : : append ( cache_path , " ppu-%s-%s/ " , fmt : : base57 ( info . sha1 ) , info . path . substr ( info . path . find_last_of ( ' / ' ) + 1 ) ) ;
2021-01-27 14:08:43 +01:00
# ifdef LLVM_AVAILABLE
2023-08-06 20:04:48 +02:00
g_fxo - > get < jit_module_manager > ( ) . remove ( cache_path + " _ " + std : : to_string ( std : : bit_cast < usz > ( info . segs [ 0 ] . ptr ) ) ) ;
2021-01-27 14:08:43 +01:00
# endif
}
2024-12-22 19:59:48 +01:00
extern void ppu_precompile ( std : : vector < std : : string > & dir_queue , std : : vector < ppu_module < lv2_obj > * > * loaded_modules )
2021-01-29 11:32:19 +01:00
{
2025-04-24 12:41:04 +02:00
if ( g_cfg . core . ppu_decoder ! = ppu_decoder_type : : llvm_legacy )
2021-02-23 15:09:23 +01:00
{
return ;
}
2022-07-04 15:02:17 +02:00
if ( auto dis = g_fxo - > try_get < disable_precomp_t > ( ) ; dis & & dis - > disable )
{
return ;
}
2024-11-15 03:24:03 +01:00
std : : optional < scoped_progress_dialog > progress_dialog ( std : : in_place , get_localized_string ( localized_string_id : : PROGRESS_DIALOG_SCANNING_PPU_EXECUTABLE ) ) ;
2023-11-28 18:41:14 +01:00
2021-01-30 16:18:10 +01:00
// Make sure we only have one '/' at the end and remove duplicates.
for ( std : : string & dir : dir_queue )
{
while ( dir . back ( ) = = ' / ' | | dir . back ( ) = = ' \\ ' )
dir . pop_back ( ) ;
dir + = ' / ' ;
}
2023-11-28 18:41:14 +01:00
2021-01-30 14:08:22 +01:00
std : : sort ( dir_queue . begin ( ) , dir_queue . end ( ) ) ;
dir_queue . erase ( std : : unique ( dir_queue . begin ( ) , dir_queue . end ( ) ) , dir_queue . end ( ) ) ;
const std : : string firmware_sprx_path = vfs : : get ( " /dev_flash/sys/external/ " ) ;
2023-09-05 20:15:52 +02:00
struct file_info
{
std : : string path ;
u64 offset ;
u64 file_size ;
file_info ( ) noexcept = default ;
file_info ( std : : string _path , u64 offs , u64 size ) noexcept
2025-04-05 21:50:45 +02:00
: path ( std : : move ( _path ) ) , offset ( offs ) , file_size ( size )
2023-09-05 20:15:52 +02:00
{
}
} ;
std : : vector < file_info > file_queue ;
2021-01-29 11:32:19 +01:00
file_queue . reserve ( 2000 ) ;
2021-01-30 15:25:21 +01:00
// Find all .sprx files recursively
2021-01-29 11:32:19 +01:00
for ( usz i = 0 ; i < dir_queue . size ( ) ; i + + )
{
if ( Emu . IsStopped ( ) )
{
2021-01-30 14:08:22 +01:00
file_queue . clear ( ) ;
2021-01-29 11:32:19 +01:00
break ;
}
ppu_log . notice ( " Scanning directory: %s " , dir_queue [ i ] ) ;
for ( auto & & entry : fs : : dir ( dir_queue [ i ] ) )
{
if ( Emu . IsStopped ( ) )
{
2021-01-30 14:08:22 +01:00
file_queue . clear ( ) ;
2021-01-29 11:32:19 +01:00
break ;
}
if ( entry . is_directory )
{
if ( entry . name ! = " . " & & entry . name ! = " .. " )
{
dir_queue . emplace_back ( dir_queue [ i ] + entry . name + ' / ' ) ;
}
continue ;
}
2023-09-05 20:15:52 +02:00
// SCE header size
if ( entry . size < = 0x20 )
{
continue ;
}
2021-01-30 14:08:22 +01:00
std : : string upper = fmt : : to_upper ( entry . name ) ;
2022-07-04 15:02:17 +02:00
// Skip already loaded modules or HLEd ones
2022-09-13 15:08:55 +02:00
auto is_ignored = [ & ] ( s64 /*offset*/ ) - > bool
2021-01-29 11:32:19 +01:00
{
2022-07-04 15:02:17 +02:00
if ( dir_queue [ i ] ! = firmware_sprx_path )
2021-01-30 14:08:22 +01:00
{
2022-07-04 15:02:17 +02:00
return false ;
}
2021-01-30 14:08:22 +01:00
2022-07-04 15:02:17 +02:00
if ( loaded_modules )
{
2024-12-22 19:59:48 +01:00
if ( std : : any_of ( loaded_modules - > begin ( ) , loaded_modules - > end ( ) , [ & ] ( ppu_module < lv2_obj > * obj )
2025-04-05 21:50:45 +02:00
{
return obj - > name = = entry . name ;
} ) )
2021-01-30 14:08:22 +01:00
{
2022-07-04 15:02:17 +02:00
return true ;
2021-01-30 14:08:22 +01:00
}
2022-07-04 15:02:17 +02:00
}
2021-01-30 14:08:22 +01:00
2022-07-04 15:02:17 +02:00
if ( g_cfg . core . libraries_control . get_set ( ) . count ( entry . name + " :lle " ) )
{
// Force LLE
return false ;
}
else if ( g_cfg . core . libraries_control . get_set ( ) . count ( entry . name + " :hle " ) )
{
// Force HLE
return true ;
}
2021-01-30 14:08:22 +01:00
2022-07-04 15:02:17 +02:00
extern const std : : map < std : : string_view , int > g_prx_list ;
// Use list
2022-09-19 14:57:51 +02:00
return g_prx_list . count ( entry . name ) & & : : at32 ( g_prx_list , entry . name ) ! = 0 ;
2022-07-04 15:02:17 +02:00
} ;
2023-08-30 15:08:27 +02:00
// Check PRX filename
if ( upper . ends_with ( " .PRX " ) | | ( upper . ends_with ( " .SPRX " ) & & entry . name ! = " libfs_utility_init.sprx " sv ) )
2022-07-04 15:02:17 +02:00
{
if ( is_ignored ( 0 ) )
{
continue ;
2021-01-30 14:08:22 +01:00
}
2021-01-29 11:32:19 +01:00
// Get full path
2023-09-05 20:15:52 +02:00
file_queue . emplace_back ( dir_queue [ i ] + entry . name , 0 , entry . size ) ;
2021-01-30 14:08:22 +01:00
continue ;
}
2023-08-30 15:08:27 +02:00
// Check ELF filename
2023-09-02 08:01:35 +02:00
if ( ( upper . ends_with ( " .ELF " ) | | upper . ends_with ( " .SELF " ) ) & & Emu . GetBoot ( ) ! = dir_queue [ i ] + entry . name )
2021-01-30 14:08:22 +01:00
{
// Get full path
2023-09-05 20:15:52 +02:00
file_queue . emplace_back ( dir_queue [ i ] + entry . name , 0 , entry . size ) ;
2021-01-29 11:32:19 +01:00
continue ;
}
// Check .mself filename
2021-01-30 14:08:22 +01:00
if ( upper . ends_with ( " .MSELF " ) )
2021-01-29 11:32:19 +01:00
{
if ( fs : : file mself { dir_queue [ i ] + entry . name } )
{
mself_header hdr { } ;
if ( mself . read ( hdr ) & & hdr . get_count ( mself . size ( ) ) )
{
2021-01-29 19:06:49 +01:00
for ( u32 j = 0 ; j < hdr . count ; j + + )
2021-01-29 11:32:19 +01:00
{
mself_record rec { } ;
2024-02-16 08:05:14 +01:00
std : : set < u64 > offs ;
2021-01-29 11:32:19 +01:00
if ( mself . read ( rec ) & & rec . get_pos ( mself . size ( ) ) )
{
2024-02-16 08:05:14 +01:00
if ( rec . size < = 0x20 )
{
continue ;
}
if ( ! offs . emplace ( rec . off ) . second )
{
// Duplicate
continue ;
}
2024-02-15 18:18:30 +01:00
// Read characters safely
std : : string name ( sizeof ( rec . name ) , ' \0 ' ) ;
std : : memcpy ( name . data ( ) , rec . name , name . size ( ) ) ;
name = std : : string ( name . c_str ( ) ) ;
2021-01-29 11:32:19 +01:00
2021-01-30 14:08:22 +01:00
upper = fmt : : to_upper ( name ) ;
2024-02-16 08:14:47 +01:00
if ( upper . find ( " .SPRX " ) ! = umax | | upper . find ( " .PRX " ) ! = umax )
2021-01-29 11:32:19 +01:00
{
// .sprx inside .mself found
2023-09-05 20:15:52 +02:00
file_queue . emplace_back ( dir_queue [ i ] + entry . name , rec . off , rec . size ) ;
2021-01-30 14:08:22 +01:00
continue ;
}
2024-02-16 08:14:47 +01:00
if ( upper . find ( " .SELF " ) ! = umax | | upper . find ( " .ELF " ) ! = umax )
2021-01-30 14:08:22 +01:00
{
// .self inside .mself found
2023-09-05 20:15:52 +02:00
file_queue . emplace_back ( dir_queue [ i ] + entry . name , rec . off , rec . size ) ;
2021-01-30 14:08:22 +01:00
continue ;
2021-01-29 11:32:19 +01:00
}
}
else
{
ppu_log . error ( " MSELF file is possibly truncated " ) ;
break ;
}
}
}
}
}
}
}
2023-12-30 19:53:07 +01:00
g_progr_ftotal + = : : size32 ( file_queue ) ;
2023-09-05 20:15:52 +02:00
u64 total_files_size = 0 ;
for ( const file_info & info : file_queue )
{
total_files_size + = info . file_size ;
}
g_progr_ftotal_bits + = total_files_size ;
2024-11-15 03:24:03 +01:00
* progress_dialog = get_localized_string ( localized_string_id : : PROGRESS_DIALOG_COMPILING_PPU_MODULES ) ;
2021-01-30 14:08:22 +01:00
2021-01-29 11:32:19 +01:00
atomic_t < usz > fnext = 0 ;
2023-09-05 20:15:52 +02:00
lf_queue < file_info > possible_exec_file_paths ;
2021-01-29 11:32:19 +01:00
2025-03-22 02:28:00 +01:00
concurent_memory_limit memory_limit ( utils : : get_total_memory ( ) / 3 ) ;
2023-09-11 14:03:39 +02:00
2024-03-30 15:20:08 +01:00
const u32 software_thread_limit = std : : min < u32 > ( g_cfg . core . llvm_threads ? g_cfg . core . llvm_threads : u32 { umax } , : : size32 ( file_queue ) ) ;
const u32 cpu_thread_limit = utils : : get_thread_count ( ) > 8u ? std : : max < u32 > ( utils : : get_thread_count ( ) , 2 ) - 1 : utils : : get_thread_count ( ) ; // One LLVM thread less
2025-02-23 18:04:14 +01:00
std : : vector < u128 > decrypt_klics ;
if ( loaded_modules )
{
for ( auto mod : * loaded_modules )
{
for ( const auto & [ stub , data_vec ] : mod - > stub_addr_to_constant_state_of_registers )
{
if ( decrypt_klics . size ( ) > = 4u )
{
break ;
}
for ( const auto & [ reg_mask , constant_value ] : data_vec )
{
if ( decrypt_klics . size ( ) > = 4u )
{
break ;
}
if ( constant_value > u32 { umax } )
{
continue ;
}
// R3 - first argument
if ( reg_mask . mask & ( 1u < < 3 ) )
{
// Sizeof KLIC
if ( auto klic_ptr = mod - > get_ptr < const u8 > ( static_cast < u32 > ( constant_value ) , 16 ) )
{
// Try to read from that address
if ( const u128 klic_value = read_from_ptr < u128 > ( klic_ptr ) )
{
if ( ! std : : count_if ( decrypt_klics . begin ( ) , decrypt_klics . end ( ) , FN ( std : : memcmp ( & x , & klic_value , 16 ) = = 0 ) ) )
{
decrypt_klics . emplace_back ( klic_value ) ;
}
}
}
}
}
}
}
}
2024-03-30 15:20:08 +01:00
named_thread_group workers ( " SPRX Worker " , std : : min < u32 > ( software_thread_limit , cpu_thread_limit ) , [ & ]
2025-04-05 21:50:45 +02:00
{
2022-06-14 14:28:38 +02:00
# ifdef __APPLE__
2025-04-05 21:50:45 +02:00
pthread_jit_write_protect_np ( false ) ;
2022-06-14 14:28:38 +02:00
# endif
2025-04-05 21:50:45 +02:00
// Set low priority
thread_ctrl : : scoped_priority low_prio ( - 1 ) ;
u32 inc_fdone = 1 ;
2021-01-30 16:18:10 +01:00
2025-04-05 21:50:45 +02:00
for ( usz func_i = fnext + + ; func_i < file_queue . size ( ) ; func_i = fnext + + , g_progr_fdone + = std : : exchange ( inc_fdone , 1 ) )
2021-01-30 14:08:22 +01:00
{
2025-04-05 21:50:45 +02:00
if ( Emu . IsStopped ( ) )
{
continue ;
}
2021-01-30 14:08:22 +01:00
2025-04-05 21:50:45 +02:00
auto & [ path , offset , file_size ] = file_queue [ func_i ] ;
2025-03-22 02:28:00 +01:00
2025-04-05 21:50:45 +02:00
ppu_log . notice ( " Trying to load: %s " , path ) ;
2021-01-29 11:32:19 +01:00
2025-04-05 21:50:45 +02:00
auto file_allocation = memory_limit . acquire ( file_size * 2 ) ;
2021-01-30 14:08:22 +01:00
2025-04-05 21:50:45 +02:00
// Load MSELF, SPRX or SELF
fs : : file src { path } ;
2021-01-29 19:06:49 +01:00
2025-04-05 21:50:45 +02:00
if ( ! src )
2025-02-23 18:04:14 +01:00
{
2025-04-05 21:50:45 +02:00
ppu_log . error ( " Failed to open '%s' (%s) " , path , fs : : g_tls_error ) ;
continue ;
2025-02-23 18:04:14 +01:00
}
2025-04-05 21:50:45 +02:00
if ( u64 off = offset )
2025-02-23 18:04:14 +01:00
{
2025-04-05 21:50:45 +02:00
// Adjust offset for MSELF
src = make_file_view ( std : : move ( src ) , offset , file_size ) ;
// Adjust path for MSELF too
fmt : : append ( path , " _x%x " , off ) ;
2025-02-23 18:04:14 +01:00
}
2021-01-29 11:32:19 +01:00
2025-04-05 21:50:45 +02:00
for ( usz i = 0 ; ; i + + )
2025-03-01 10:45:03 +01:00
{
2025-04-05 21:50:45 +02:00
if ( i > decrypt_klics . size ( ) )
2025-03-01 10:45:03 +01:00
{
2025-04-05 21:50:45 +02:00
src . close ( ) ;
break ;
}
2025-03-01 10:45:03 +01:00
2025-04-05 21:50:45 +02:00
// Some files may fail to decrypt due to the lack of klic
u128 key = i = = decrypt_klics . size ( ) ? u128 { } : decrypt_klics [ i ] ;
if ( auto result = decrypt_self ( src , i = = decrypt_klics . size ( ) ? nullptr : reinterpret_cast < const u8 * > ( & key ) ) )
{
src = std : : move ( result ) ;
break ;
2025-03-01 10:45:03 +01:00
}
}
2025-04-05 21:50:45 +02:00
if ( ! src & & ! Emu . klic . empty ( ) & & src . open ( path ) )
{
src = decrypt_self ( src , reinterpret_cast < u8 * > ( & Emu . klic [ 0 ] ) ) ;
2025-03-01 10:45:03 +01:00
2025-04-05 21:50:45 +02:00
if ( src )
{
ppu_log . error ( " Possible missed KLIC for precompilation of '%s', please report to developers. " , path ) ;
2025-03-01 10:45:03 +01:00
2025-04-05 21:50:45 +02:00
// Ignore executables larger than 500KB to prevent a long pause on exitspawn
if ( src . size ( ) > = 500000 )
{
g_progr_ftotal_bits - = file_size ;
2021-01-30 14:08:22 +01:00
2025-04-05 21:50:45 +02:00
continue ;
}
}
}
2021-01-29 11:32:19 +01:00
2025-04-05 21:50:45 +02:00
if ( ! src )
2021-01-29 11:32:19 +01:00
{
2025-04-05 21:50:45 +02:00
ppu_log . notice ( " Failed to decrypt '%s' " , path ) ;
g_progr_ftotal_bits - = file_size ;
2021-01-29 11:32:19 +01:00
continue ;
}
2021-01-30 14:08:22 +01:00
2025-04-05 21:50:45 +02:00
elf_error prx_err { } , ovl_err { } ;
2021-01-30 16:18:10 +01:00
2025-04-05 21:50:45 +02:00
if ( ppu_prx_object obj = src ; ( prx_err = obj , obj = = elf_error : : ok ) )
2021-01-30 14:08:22 +01:00
{
2025-04-05 21:50:45 +02:00
if ( auto prx = ppu_load_prx ( obj , true , path , offset ) )
2023-09-11 14:03:39 +02:00
{
2025-04-05 21:50:45 +02:00
obj . clear ( ) , src . close ( ) ; // Clear decrypted file and elf object memory
file_allocation = { } ; // release used file memory
ppu_initialize ( * prx , false , file_size , memory_limit ) ;
ppu_finalize ( * prx , true ) ;
continue ;
2023-09-11 14:03:39 +02:00
}
2025-04-05 21:50:45 +02:00
// Log error
prx_err = elf_error : : header_type ;
}
2021-01-30 14:08:22 +01:00
2025-04-05 21:50:45 +02:00
if ( ppu_exec_object obj = src ; ( ovl_err = obj , obj = = elf_error : : ok ) )
{
while ( ovl_err = = elf_error : : ok )
2021-01-30 14:08:22 +01:00
{
2025-04-05 21:50:45 +02:00
if ( Emu . IsStopped ( ) )
{
break ;
}
const auto [ ovlm , error ] = ppu_load_overlay ( obj , true , path , offset ) ;
if ( error )
{
if ( error = = CELL_CANCEL + 0u )
{
// Emulation stopped
break ;
}
// Abort
ovl_err = elf_error : : header_type ;
break ;
}
obj . clear ( ) , src . close ( ) ; // Clear decrypted file and elf object memory
file_allocation = { } ; // release used file memory
// Participate in thread execution limitation (takes a long time)
if ( std : : lock_guard lock ( g_fxo - > get < jit_core_allocator > ( ) . sem ) ; ! ovlm - > analyse ( 0 , ovlm - > entry , ovlm - > seg0_code_end , ovlm - > applied_patches , std : : vector < u32 > { } , [ ] ( )
{
return Emu . IsStopped ( ) ;
} ) )
2023-07-14 16:57:43 +02:00
{
// Emulation stopped
break ;
}
2025-04-05 21:50:45 +02:00
ppu_initialize ( * ovlm , false , file_size , memory_limit ) ;
ppu_finalize ( * ovlm , true ) ;
2021-01-30 14:08:22 +01:00
break ;
}
2021-01-29 11:32:19 +01:00
2025-04-05 21:50:45 +02:00
if ( ovl_err = = elf_error : : ok )
2021-01-30 14:08:22 +01:00
{
2025-04-05 21:50:45 +02:00
continue ;
2021-01-30 14:08:22 +01:00
}
}
2025-04-05 21:50:45 +02:00
ppu_log . notice ( " Failed to precompile '%s' (prx: %s, ovl: %s): Attempting compilation as executable file " , path , prx_err , ovl_err ) ;
possible_exec_file_paths . push ( path , offset , file_size ) ;
inc_fdone = 0 ;
2021-01-30 14:08:22 +01:00
}
2025-04-05 21:50:45 +02:00
} ) ;
2021-01-29 11:32:19 +01:00
// Join every thread
workers . join ( ) ;
2021-01-30 14:08:22 +01:00
2023-06-25 14:53:42 +02:00
named_thread exec_worker ( " PPU Exec Worker " , [ & ]
{
2025-04-05 21:50:45 +02:00
if ( ! possible_exec_file_paths )
{
return ;
}
2023-06-25 14:53:42 +02:00
# ifdef __APPLE__
2025-04-05 21:50:45 +02:00
pthread_jit_write_protect_np ( false ) ;
2023-06-25 14:53:42 +02:00
# endif
2025-04-05 21:50:45 +02:00
// Set low priority
thread_ctrl : : scoped_priority low_prio ( - 1 ) ;
2023-06-25 14:53:42 +02:00
2025-04-05 21:50:45 +02:00
auto slice = possible_exec_file_paths . pop_all ( ) ;
2023-06-25 14:53:42 +02:00
2025-04-05 21:50:45 +02:00
auto main_module = std : : move ( g_fxo - > get < main_ppu_module < lv2_obj > > ( ) ) ;
2023-06-25 14:53:42 +02:00
2025-04-05 21:50:45 +02:00
for ( ; slice ; slice . pop_front ( ) , g_progr_fdone + + )
2023-06-25 14:53:42 +02:00
{
2025-04-05 21:50:45 +02:00
if ( Emu . IsStopped ( ) )
{
continue ;
}
2023-06-25 14:53:42 +02:00
2025-04-05 21:50:45 +02:00
const auto & [ path , _ , file_size ] = * slice ;
2023-06-25 14:53:42 +02:00
2025-04-05 21:50:45 +02:00
ppu_log . notice ( " Trying to load as executable: %s " , path ) ;
2023-06-25 14:53:42 +02:00
2025-04-05 21:50:45 +02:00
// Load SELF
fs : : file src { path } ;
2025-03-22 02:28:00 +01:00
2025-04-05 21:50:45 +02:00
if ( ! src )
2025-02-23 18:04:14 +01:00
{
2025-04-05 21:50:45 +02:00
ppu_log . error ( " Failed to open '%s' (%s) " , path , fs : : g_tls_error ) ;
continue ;
2025-02-23 18:04:14 +01:00
}
2025-04-05 21:50:45 +02:00
auto file_allocation = memory_limit . acquire ( file_size * 2 ) ;
2025-02-23 18:04:14 +01:00
2025-04-05 21:50:45 +02:00
for ( usz i = 0 ; ; i + + )
2025-02-23 18:04:14 +01:00
{
2025-04-05 21:50:45 +02:00
if ( i > decrypt_klics . size ( ) )
{
src . close ( ) ;
break ;
}
// Some files may fail to decrypt due to the lack of klic
u128 key = i = = decrypt_klics . size ( ) ? u128 { } : decrypt_klics [ i ] ;
if ( auto result = decrypt_self ( src , i = = decrypt_klics . size ( ) ? nullptr : reinterpret_cast < const u8 * > ( & key ) ) )
{
src = std : : move ( result ) ;
break ;
}
2025-02-23 18:04:14 +01:00
}
2023-06-25 14:53:42 +02:00
2025-04-05 21:50:45 +02:00
if ( ! src & & ! Emu . klic . empty ( ) & & src . open ( path ) )
2025-03-01 10:45:03 +01:00
{
2025-04-05 21:50:45 +02:00
src = decrypt_self ( src , reinterpret_cast < u8 * > ( & Emu . klic [ 0 ] ) ) ;
if ( src )
{
ppu_log . error ( " Possible missed KLIC for precompilation of '%s', please report to developers. " , path ) ;
}
2025-03-01 10:45:03 +01:00
}
2025-04-05 21:50:45 +02:00
if ( ! src )
{
ppu_log . notice ( " Failed to decrypt '%s' " , path ) ;
2025-03-01 10:45:03 +01:00
2025-04-05 21:50:45 +02:00
g_progr_ftotal_bits - = file_size ;
2025-03-01 10:45:03 +01:00
2025-04-05 21:50:45 +02:00
continue ;
}
2023-06-25 14:53:42 +02:00
2025-04-05 21:50:45 +02:00
elf_error exec_err { } ;
2023-06-25 14:53:42 +02:00
2025-04-05 21:50:45 +02:00
if ( ppu_exec_object obj = src ; ( exec_err = obj , obj = = elf_error : : ok ) )
2023-06-25 14:53:42 +02:00
{
2025-04-05 21:50:45 +02:00
while ( exec_err = = elf_error : : ok )
{
main_ppu_module < lv2_obj > & _main = g_fxo - > get < main_ppu_module < lv2_obj > > ( ) ;
_main = { } ;
2023-06-29 07:42:21 +02:00
2025-04-05 21:50:45 +02:00
auto current_cache = std : : move ( g_fxo - > get < spu_cache > ( ) ) ;
2023-09-01 14:07:46 +02:00
2025-04-05 21:50:45 +02:00
if ( ! ppu_load_exec ( obj , true , path ) )
{
// Abort
exec_err = elf_error : : header_type ;
break ;
}
2023-06-25 14:53:42 +02:00
2025-04-05 21:50:45 +02:00
if ( std : : memcmp ( main_module . sha1 , _main . sha1 , sizeof ( _main . sha1 ) ) = = 0 )
{
g_fxo - > get < spu_cache > ( ) = std : : move ( current_cache ) ;
break ;
}
if ( ! _main . analyse ( 0 , _main . elf_entry , _main . seg0_code_end , _main . applied_patches , std : : vector < u32 > { } , [ ] ( )
{
return Emu . IsStopped ( ) ;
} ) )
{
g_fxo - > get < spu_cache > ( ) = std : : move ( current_cache ) ;
break ;
}
obj . clear ( ) , src . close ( ) ; // Clear decrypted file and elf object memory
file_allocation = { } ;
_main . name = ' ' ; // Make ppu_finalize work
Emu . ConfigurePPUCache ( ) ;
ppu_initialize ( _main , false , file_size , memory_limit ) ;
spu_cache : : initialize ( false ) ;
ppu_finalize ( _main , true ) ;
_main = { } ;
2023-09-01 14:07:46 +02:00
g_fxo - > get < spu_cache > ( ) = std : : move ( current_cache ) ;
2023-08-06 08:43:13 +02:00
break ;
2023-08-04 19:14:52 +02:00
}
2025-04-05 21:50:45 +02:00
if ( exec_err = = elf_error : : ok )
2023-06-25 14:53:42 +02:00
{
2025-04-05 21:50:45 +02:00
continue ;
2023-06-25 14:53:42 +02:00
}
}
2025-04-05 21:50:45 +02:00
ppu_log . notice ( " Failed to precompile '%s' as executable (%s) " , path , exec_err ) ;
2023-06-25 14:53:42 +02:00
}
2025-04-05 21:50:45 +02:00
g_fxo - > get < main_ppu_module < lv2_obj > > ( ) = std : : move ( main_module ) ;
g_fxo - > get < spu_cache > ( ) . collect_funcs_to_precompile = true ;
Emu . ConfigurePPUCache ( ) ;
} ) ;
2023-06-25 14:53:42 +02:00
exec_worker ( ) ;
2021-01-29 11:32:19 +01:00
}
2017-02-26 16:56:31 +01:00
extern void ppu_initialize ( )
2016-06-22 15:37:51 +02:00
{
2024-12-22 19:59:48 +01:00
if ( ! g_fxo - > is_init < main_ppu_module < lv2_obj > > ( ) )
2017-02-26 16:56:31 +01:00
{
return ;
}
2017-01-22 20:03:57 +01:00
2018-05-04 23:01:27 +02:00
if ( Emu . IsStopped ( ) )
{
return ;
}
2021-01-31 18:00:04 +01:00
2024-12-22 19:59:48 +01:00
auto & _main = g_fxo - > get < main_ppu_module < lv2_obj > > ( ) ;
2022-07-04 15:02:17 +02:00
2024-11-15 03:24:03 +01:00
std : : optional < scoped_progress_dialog > progress_dialog ( std : : in_place , get_localized_string ( localized_string_id : : PROGRESS_DIALOG_ANALYZING_PPU_EXECUTABLE ) ) ;
2023-04-08 17:03:05 +02:00
// Analyse executable
2025-04-05 21:50:45 +02:00
if ( ! _main . analyse ( 0 , _main . elf_entry , _main . seg0_code_end , _main . applied_patches , std : : vector < u32 > { } , [ ] ( )
{
return Emu . IsStopped ( ) ;
} ) )
2023-04-08 17:03:05 +02:00
{
return ;
}
// Validate analyser results (not required)
_main . validate ( 0 ) ;
2024-11-15 03:24:03 +01:00
* progress_dialog = get_localized_string ( localized_string_id : : PROGRESS_DIALOG_SCANNING_PPU_MODULES ) ;
2019-12-04 23:17:57 +01:00
2021-01-30 14:08:22 +01:00
bool compile_main = false ;
2018-05-04 23:01:27 +02:00
2021-01-30 14:08:22 +01:00
// Check main module cache
2021-03-02 12:59:19 +01:00
if ( ! _main . segs . empty ( ) )
2019-09-26 16:06:43 +02:00
{
2021-03-02 12:59:19 +01:00
compile_main = ppu_initialize ( _main , true ) ;
2019-09-26 16:06:43 +02:00
}
2017-02-26 16:56:31 +01:00
2024-12-22 19:59:48 +01:00
std : : vector < ppu_module < lv2_obj > * > module_list ;
2025-02-23 18:04:14 +01:00
module_list . emplace_back ( & g_fxo - > get < main_ppu_module < lv2_obj > > ( ) ) ;
2017-04-22 15:00:23 +02:00
2022-07-04 15:02:17 +02:00
const std : : string firmware_sprx_path = vfs : : get ( " /dev_flash/sys/external/ " ) ;
// If empty we have no indication for firmware cache state, check everything
2023-08-05 20:40:11 +02:00
bool compile_fw = ! Emu . IsVsh ( ) ;
2022-07-04 15:02:17 +02:00
idm : : select < lv2_obj , lv2_prx > ( [ & ] ( u32 , lv2_prx & _module )
2023-01-15 21:12:54 +01:00
{
2025-04-05 21:50:45 +02:00
if ( _module . get_funcs ( ) . empty ( ) )
{
return ;
}
2023-01-15 21:12:54 +01:00
2025-04-05 21:50:45 +02:00
if ( _module . path . starts_with ( firmware_sprx_path ) )
{
// Postpone testing
compile_fw = false ;
}
2022-07-04 15:02:17 +02:00
2025-04-05 21:50:45 +02:00
module_list . emplace_back ( & _module ) ;
} ) ;
2017-04-22 15:00:23 +02:00
2022-07-04 15:02:17 +02:00
idm : : select < lv2_obj , lv2_overlay > ( [ & ] ( u32 , lv2_overlay & _module )
2025-04-05 21:50:45 +02:00
{
module_list . emplace_back ( & _module ) ;
} ) ;
2021-01-30 14:08:22 +01:00
// Check preloaded libraries cache
2022-07-04 15:02:17 +02:00
if ( ! compile_fw )
2021-01-30 14:08:22 +01:00
{
2022-07-04 15:02:17 +02:00
for ( auto ptr : module_list )
{
if ( ptr - > path . starts_with ( firmware_sprx_path ) )
{
compile_fw | = ppu_initialize ( * ptr , true ) ;
2022-11-05 16:14:34 +01:00
// Fixup for compatibility with old savestates
if ( Emu . DeserialManager ( ) & & ptr - > name = = " liblv2.sprx " )
{
static_cast < lv2_prx * > ( ptr ) - > state = PRX_STATE_STARTED ;
static_cast < lv2_prx * > ( ptr ) - > load_exports ( ) ;
}
2022-07-04 15:02:17 +02:00
}
}
2021-01-30 14:08:22 +01:00
}
std : : vector < std : : string > dir_queue ;
2022-05-12 07:08:36 +02:00
const std : : string mount_point = vfs : : get ( " /dev_flash/ " ) ;
2023-08-26 10:23:42 +02:00
bool dev_flash_located = ! Emu . GetCat ( ) . ends_with ( ' P ' ) & & Emu . IsPathInsideDir ( Emu . GetBoot ( ) , mount_point ) & & g_cfg . core . llvm_precompilation ;
2022-05-12 07:08:36 +02:00
if ( compile_fw | | dev_flash_located )
2021-01-30 14:08:22 +01:00
{
2022-05-12 07:08:36 +02:00
if ( dev_flash_located )
{
const std : : string eseibrd = mount_point + " /vsh/module/eseibrd.sprx " ;
2023-06-25 14:53:42 +02:00
if ( auto prx = ppu_load_prx ( ppu_prx_object { decrypt_self ( fs : : file { eseibrd } ) } , true , eseibrd , 0 ) )
2022-05-12 07:08:36 +02:00
{
// Check if cache exists for this infinitesimally small prx
dev_flash_located = ppu_initialize ( * prx , true ) ;
}
}
2024-03-30 18:25:03 +01:00
const std : : string firmware_sprx_path = vfs : : get ( dev_flash_located ? " /dev_flash/ " sv : " /dev_flash/sys/external/ " sv ) ;
2021-01-30 14:08:22 +01:00
dir_queue . emplace_back ( firmware_sprx_path ) ;
}
// Avoid compilation if main's cache exists or it is a standalone SELF with no PARAM.SFO
2023-08-26 10:23:42 +02:00
if ( compile_main & & g_cfg . core . llvm_precompilation & & ! Emu . GetTitleID ( ) . empty ( ) & & ! Emu . IsChildProcess ( ) )
2021-01-30 14:08:22 +01:00
{
2021-01-30 16:18:10 +01:00
// Try to add all related directories
const std : : set < std : : string > dirs = Emu . GetGameDirs ( ) ;
dir_queue . insert ( std : : end ( dir_queue ) , std : : begin ( dirs ) , std : : end ( dirs ) ) ;
2021-01-30 14:08:22 +01:00
}
2024-11-15 03:24:03 +01:00
progress_dialog . reset ( ) ;
2023-11-28 18:41:14 +01:00
2022-07-04 15:02:17 +02:00
ppu_precompile ( dir_queue , & module_list ) ;
2021-01-30 14:08:22 +01:00
if ( Emu . IsStopped ( ) )
{
return ;
}
// Initialize main module cache
2021-03-02 12:59:19 +01:00
if ( ! _main . segs . empty ( ) )
2021-01-30 14:08:22 +01:00
{
2021-03-02 12:59:19 +01:00
ppu_initialize ( _main ) ;
2021-01-30 14:08:22 +01:00
}
2017-06-22 23:52:09 +02:00
// Initialize preloaded libraries
2022-07-04 15:02:17 +02:00
for ( auto ptr : module_list )
2017-02-26 16:56:31 +01:00
{
2021-01-30 14:08:22 +01:00
if ( Emu . IsStopped ( ) )
{
return ;
}
2017-06-22 23:52:09 +02:00
ppu_initialize ( * ptr ) ;
2017-04-22 15:00:23 +02:00
}
2017-02-26 16:56:31 +01:00
}
2025-04-05 21:50:45 +02:00
bool ppu_initialize ( const ppu_module < lv2_obj > & info , bool check_only , u64 file_size , concurent_memory_limit & memory_limit )
2017-02-26 16:56:31 +01:00
{
2025-04-24 12:41:04 +02:00
if ( g_cfg . core . ppu_decoder ! = ppu_decoder_type : : llvm_legacy )
2017-02-26 16:56:31 +01:00
{
2023-07-24 11:40:01 +02:00
if ( check_only | | vm : : base ( info . segs [ 0 ] . addr ) ! = info . segs [ 0 ] . ptr )
2021-01-30 14:08:22 +01:00
{
return false ;
}
2023-06-19 17:05:50 +02:00
auto & toc_manager = g_fxo - > get < ppu_toc_manager > ( ) ;
std : : lock_guard lock ( toc_manager . mutex ) ;
auto & ppu_toc = toc_manager . toc_map ;
2017-04-22 15:00:23 +02:00
2025-01-10 16:34:24 +01:00
for ( const auto & func : info . get_funcs ( ) )
2017-02-26 16:56:31 +01:00
{
2024-03-21 14:56:31 +01:00
if ( func . size & & func . blocks . empty ( ) )
{
ppu_register_function_at ( func . addr , func . size ) ;
}
2017-04-08 22:58:00 +02:00
for ( auto & block : func . blocks )
{
2024-03-21 14:56:31 +01:00
if ( ! block . second )
{
continue ;
}
2023-08-07 20:33:36 +02:00
if ( g_fxo - > is_init < ppu_far_jumps_t > ( ) & & ! g_fxo - > get < ppu_far_jumps_t > ( ) . get_targets ( block . first , block . second ) . empty ( ) )
2023-08-07 17:33:47 +02:00
{
// Replace the block with ppu_far_jump
continue ;
}
2021-09-01 12:38:17 +02:00
ppu_register_function_at ( block . first , block . second ) ;
2017-04-08 22:58:00 +02:00
}
2023-08-07 17:33:47 +02:00
if ( g_cfg . core . ppu_debug & & func . size & & func . toc ! = umax & & ! ppu_get_far_jump ( func . addr ) )
2017-04-08 22:58:00 +02:00
{
2023-07-24 11:40:01 +02:00
ppu_toc [ func . addr ] = func . toc ;
2024-03-21 14:56:31 +01:00
write_to_ptr < ppu_intrp_func_t > ( ppu_ptr ( func . addr ) , & ppu_check_toc ) ;
2017-04-08 22:58:00 +02:00
}
2017-02-26 16:56:31 +01:00
}
2021-01-30 14:08:22 +01:00
return false ;
2017-02-26 16:56:31 +01:00
}
2017-06-24 17:36:49 +02:00
// Link table
static const std : : unordered_map < std : : string , u64 > s_link_table = [ ] ( )
2017-02-26 16:56:31 +01:00
{
2025-04-05 21:50:45 +02:00
std : : unordered_map < std : : string , u64 > link_table {
{ " sys_game_set_system_sw_version " , reinterpret_cast < u64 > ( ppu_execute_syscall ) } ,
{ " __trap " , reinterpret_cast < u64 > ( & ppu_trap ) } ,
{ " __error " , reinterpret_cast < u64 > ( & ppu_error ) } ,
{ " __check " , reinterpret_cast < u64 > ( & ppu_check ) } ,
{ " __trace " , reinterpret_cast < u64 > ( & ppu_trace ) } ,
{ " __syscall " , reinterpret_cast < u64 > ( ppu_execute_syscall ) } ,
{ " __get_tb " , reinterpret_cast < u64 > ( get_timebased_time ) } ,
{ " __lwarx " , reinterpret_cast < u64 > ( ppu_lwarx ) } ,
{ " __ldarx " , reinterpret_cast < u64 > ( ppu_ldarx ) } ,
{ " __stwcx " , reinterpret_cast < u64 > ( ppu_stwcx ) } ,
{ " __stdcx " , reinterpret_cast < u64 > ( ppu_stdcx ) } ,
{ " __dcbz " , reinterpret_cast < u64 > ( + [ ] ( u32 addr )
{
alignas ( 64 ) static constexpr u8 z [ 128 ] { } ;
do_cell_atomic_128_store ( addr , z ) ;
} ) } ,
{ " __resupdate " , reinterpret_cast < u64 > ( vm : : reservation_update ) } ,
{ " __resinterp " , reinterpret_cast < u64 > ( ppu_reservation_fallback ) } ,
{ " __escape " , reinterpret_cast < u64 > ( + ppu_escape ) } ,
{ " __read_maybe_mmio32 " , reinterpret_cast < u64 > ( + ppu_read_mmio_aware_u32 ) } ,
{ " __write_maybe_mmio32 " , reinterpret_cast < u64 > ( + ppu_write_mmio_aware_u32 ) } ,
2017-02-26 16:56:31 +01:00
} ;
for ( u64 index = 0 ; index < 1024 ; index + + )
{
2021-01-12 11:01:06 +01:00
if ( ppu_get_syscall ( index ) )
2017-02-26 16:56:31 +01:00
{
2020-03-02 18:17:48 +01:00
link_table . emplace ( fmt : : format ( " %s " , ppu_syscall_code ( index ) ) , reinterpret_cast < u64 > ( ppu_execute_syscall ) ) ;
link_table . emplace ( fmt : : format ( " syscall_%u " , index ) , reinterpret_cast < u64 > ( ppu_execute_syscall ) ) ;
2017-02-26 16:56:31 +01:00
}
}
2017-06-24 17:36:49 +02:00
return link_table ;
} ( ) ;
2017-07-10 21:22:54 +02:00
// Get cache path for this executable
std : : string cache_path ;
2023-08-04 19:14:52 +02:00
if ( ! info . cache . empty ( ) )
2017-07-10 21:22:54 +02:00
{
2019-01-13 18:06:30 +01:00
cache_path = info . cache ;
2017-07-10 21:22:54 +02:00
}
else
{
2019-01-13 18:06:30 +01:00
// New PPU cache location
2024-05-07 00:56:15 +02:00
cache_path = rpcs3 : : utils : : get_cache_dir ( info . path ) ;
2019-01-13 18:06:30 +01:00
// Add PPU hash and filename
fmt : : append ( cache_path , " ppu-%s-%s/ " , fmt : : base57 ( info . sha1 ) , info . path . substr ( info . path . find_last_of ( ' / ' ) + 1 ) ) ;
if ( ! fs : : create_path ( cache_path ) )
2017-07-10 21:22:54 +02:00
{
2019-01-13 18:06:30 +01:00
fmt : : throw_exception ( " Failed to create cache directory: %s (%s) " , cache_path , fs : : g_tls_error ) ;
2017-07-10 21:22:54 +02:00
}
}
2017-06-24 17:36:49 +02:00
# ifdef LLVM_AVAILABLE
2024-11-15 03:24:03 +01:00
std : : optional < scoped_progress_dialog > progress_dialog ;
2021-03-31 15:31:21 +02:00
2019-12-04 23:17:57 +01:00
if ( ! check_only )
{
// Initialize progress dialog
2024-11-15 03:24:03 +01:00
progress_dialog . emplace ( get_localized_string ( localized_string_id : : PROGRESS_DIALOG_LOADING_PPU_MODULES ) ) ;
2019-12-04 23:17:57 +01:00
}
2018-05-30 19:34:36 +02:00
2017-07-10 21:22:54 +02:00
// Permanently loaded compiled PPU modules (name -> data)
2023-08-06 20:04:48 +02:00
jit_module & jit_mod = g_fxo - > get < jit_module_manager > ( ) . get ( cache_path + " _ " + std : : to_string ( std : : bit_cast < usz > ( info . segs [ 0 ] . ptr ) ) ) ;
2017-07-10 21:22:54 +02:00
// Compiler instance (deferred initialization)
2025-01-10 16:34:24 +01:00
std : : vector < std : : shared_ptr < jit_compiler > > & jits = jit_mod . pjit ;
2017-06-24 17:36:49 +02:00
2017-06-22 23:52:09 +02:00
// Split module into fragments <= 1 MiB
2020-12-18 08:39:54 +01:00
usz fpos = 0 ;
2017-02-26 16:56:31 +01:00
2025-01-10 16:34:24 +01:00
// Modules counted so far
usz module_counter = 0 ;
2017-07-01 01:08:51 +02:00
// Difference between function name and current location
2025-01-31 13:09:22 +01:00
const u32 reloc = info . is_relocatable ? : : at32 ( info . segs , 0 ) . addr : 0 ;
2016-06-07 22:24:20 +02:00
2020-03-03 20:37:29 +01:00
// Info sent to threads
2024-12-22 19:59:48 +01:00
std : : vector < std : : pair < std : : string , ppu_module < lv2_obj > > > workload ;
2020-03-03 20:37:29 +01:00
// Info to load to main JIT instance (true - compiled)
std : : vector < std : : pair < std : : string , bool > > link_workload ;
// Sync variable to acquire workloads
atomic_t < u32 > work_cv = 0 ;
2021-01-30 14:08:22 +01:00
bool compiled_new = false ;
2021-06-25 09:50:42 +02:00
bool has_mfvscr = false ;
2023-07-14 16:05:27 +02:00
const bool is_being_used_in_emulation = vm : : base ( info . segs [ 0 ] . addr ) = = info . segs [ 0 ] . ptr ;
const cpu_thread * cpu = cpu_thread : : get_current ( ) ;
2025-01-10 16:34:24 +01:00
for ( auto & func : info . get_funcs ( ) )
2021-06-25 09:50:42 +02:00
{
if ( func . size = = 0 )
{
continue ;
}
2025-01-10 16:34:24 +01:00
for ( const auto [ addr , size ] : func )
2021-06-25 09:50:42 +02:00
{
if ( size = = 0 )
{
continue ;
}
2023-06-25 14:53:42 +02:00
auto i_ptr = ensure ( info . get_ptr < u32 > ( addr ) ) ;
for ( u32 i = addr ; i < addr + size ; i + = 4 , i_ptr + + )
2021-06-25 09:50:42 +02:00
{
2023-06-25 14:53:42 +02:00
if ( g_ppu_itype . decode ( * i_ptr ) = = ppu_itype : : MFVSCR )
2021-06-25 09:50:42 +02:00
{
ppu_log . warning ( " MFVSCR found " ) ;
has_mfvscr = true ;
break ;
}
}
if ( has_mfvscr )
{
break ;
}
}
if ( has_mfvscr )
{
break ;
}
}
2025-01-10 16:34:24 +01:00
// Limit how many modules are per JIt instance
// Advantage to lower the limit:
// 1. Lowering contoniues memory requirements for allocations
// Its disadvantage:
// 1. B instruction can wander up to 16MB relatively to its range,
// each additional split of JIT instance results in a downgraded version of around (100% / N-1th) - (100% / Nth) percent of instructions
// where N is the total amunt of JIT instances
// Subject to change
constexpr u32 c_moudles_per_jit = 100 ;
std : : shared_ptr < std : : pair < u32 , u32 > > local_jit_bounds = std : : make_shared < std : : pair < u32 , u32 > > ( u32 { umax } , 0 ) ;
const auto shared_runtime = make_shared < jit_runtime > ( ) ;
const auto shared_map = make_shared < std : : unordered_map < u32 , u64 > > ( ) ;
2025-01-22 10:15:55 +01:00
const auto full_sample = make_shared < u64 > ( 0 ) ;
2025-01-10 16:34:24 +01:00
const auto shared_mtx = make_shared < shared_mutex > ( ) ;
2025-01-22 10:15:55 +01:00
auto symbols_cement = [ runtime = shared_runtime , reloc , seg0 = info . segs [ 0 ] . addr , bound = info . segs [ 0 ] . addr + info . segs [ 0 ] . size - reloc , func_map = shared_map , shared_mtx , full_sample ] ( const std : : string & name ) - > u64
2017-06-22 23:52:09 +02:00
{
2025-01-10 16:34:24 +01:00
u32 func_addr = umax ;
if ( name . starts_with ( " __0x " ) )
{
u32 addr = umax ;
auto res = std : : from_chars ( name . c_str ( ) + 4 , name . c_str ( ) + name . size ( ) , addr , 16 ) ;
if ( res . ec = = std : : errc ( ) & & res . ptr = = name . c_str ( ) + name . size ( ) & & addr < bound )
{
func_addr = addr + reloc ;
}
}
if ( func_addr = = umax )
{
return { } ;
}
reader_lock rlock ( * shared_mtx ) ;
if ( auto it = func_map - > find ( func_addr ) ; it ! = func_map - > end ( ) )
2017-12-31 13:45:12 +01:00
{
2025-01-10 16:34:24 +01:00
return it - > second ;
2017-12-31 13:45:12 +01:00
}
2017-07-10 21:22:54 +02:00
2025-01-10 16:34:24 +01:00
rlock . upgrade ( ) ;
u64 & code_ptr = ( * func_map ) [ func_addr ] ;
if ( code_ptr )
{
return + code_ptr ;
}
2025-04-05 21:50:45 +02:00
[[maybe_unused]] constexpr auto abs_diff = [ ] ( u64 a , u64 b )
{
return a < = b ? b - a : a - b ;
} ;
2025-01-22 10:15:55 +01:00
2025-03-31 09:44:38 +02:00
[[maybe_unused]] auto write_le = [ ] ( u8 * & code , auto value )
2025-01-22 10:15:55 +01:00
{
write_to_ptr < le_t < std : : remove_cvref_t < decltype ( value ) > > > ( code , value ) ;
code + = sizeof ( value ) ;
} ;
# if defined(ARCH_X64)
// Try to make the code fit in 16 bytes, may fail and fallback
if ( * full_sample & & ( * full_sample < = s32 { smax } | | abs_diff ( * full_sample , reinterpret_cast < u64 > ( jit_runtime : : peek ( true ) ) ) < = s32 { smax } ) )
{
u8 * code = jit_runtime : : alloc ( 16 , 8 , true ) ;
code_ptr = reinterpret_cast < u64 > ( code ) ;
// mov edx, func_addr
* code + + = 0xba ;
write_le ( code , func_addr - seg0 ) ;
const u64 diff_for_jump = abs_diff ( reinterpret_cast < u64 > ( code + 5 ) , * full_sample ) ;
if ( diff_for_jump < = s32 { smax } )
{
// jmp (rel32) full_sample
* code + + = 0xe9 ;
write_le ( code , static_cast < s32 > ( * full_sample - reinterpret_cast < u64 > ( code + 4 ) ) ) ;
return code_ptr ;
}
else if ( * full_sample < = s32 { smax } )
{
// mov eax, full_sample
* code + + = 0xb8 ;
write_le ( code , static_cast < s32 > ( * full_sample ) ) ;
// jmp rax
* code + + = 0xff ;
* code + + = 0xea ;
return code_ptr ;
}
else // fallback (requiring more than 16 bytes)
{
// movabs rax, full_sample
// *code++ = 0x48;
// *code++ = 0xb8;
// write_le(code, *full_sample);
// // jmp rax
// *code++ = 0xff;
// *code++ = 0xea;
// return code_ptr;
ppu_log . error ( " JIT symbol trampoline failed. " ) ;
}
}
2025-01-25 11:03:22 +01:00
# elif 0
2025-01-22 10:15:55 +01:00
// Try to make the code fit in 16 bytes, may fail and fallback
if ( * full_sample & & abs_diff ( * full_sample , reinterpret_cast < u64 > ( jit_runtime : : peek ( true ) + 3 * 4 ) ) < ( 128u < < 20 ) )
{
# ifdef __APPLE__
pthread_jit_write_protect_np ( false ) ;
# endif
u8 * code = jit_runtime : : alloc ( 12 , 4 , true ) ;
code_ptr = reinterpret_cast < u64 > ( code ) ;
union arm_op
{
u32 op ;
bf_t < u32 , 0 , 26 > b_target ;
bf_t < u32 , 5 , 16 > mov_imm16 ;
} ;
const u64 diff_for_jump = abs_diff ( reinterpret_cast < u64 > ( code + 3 * 4 ) , * full_sample ) ;
if ( diff_for_jump < ( 128u < < 20 ) )
{
// MOVZ w15, func_addr
arm_op mov_pcl { 0x5280000F } ;
mov_pcl . mov_imm16 = func_addr & 0xffff ;
write_le ( code , mov_pcl . op ) ;
// MOVK w15, func_addr >> 16, LSL #16
arm_op mov_pch { 0x72A0000F } ;
mov_pch . mov_imm16 = func_addr > > 16 ;
write_le ( code , mov_pch . op ) ;
const s64 branch_offset = ( * full_sample - reinterpret_cast < u64 > ( code + 4 ) ) ;
// B full_sample
arm_op b_sample { 0x14000000 } ;
b_sample . b_target = static_cast < s32 > ( branch_offset / 4 ) ;
write_le ( code , b_sample . op ) ;
return code_ptr ;
}
else // fallback
{
ppu_log . error ( " JIT symbol trampoline failed. " ) ;
}
}
# endif
2025-01-10 16:34:24 +01:00
using namespace asmjit ;
2025-01-22 10:15:55 +01:00
usz code_size_until_jump = umax ;
2025-04-05 21:50:45 +02:00
auto func = build_function_asm < u8 * ( * ) ( ppu_thread & , u64 , u8 * , u64 , u64 , u64 ) > ( name , [ & ] ( native_asm & c , auto & /*args*/ )
{
2025-01-10 16:34:24 +01:00
# if defined(ARCH_X64)
2025-04-05 21:50:45 +02:00
c . mov ( x86 : : edx , func_addr - seg0 ) ; // Load PC
2025-01-22 10:15:55 +01:00
2025-04-05 21:50:45 +02:00
const auto buf_start = reinterpret_cast < const u8 * > ( c . bufferData ( ) ) ;
const auto buf_end = reinterpret_cast < const u8 * > ( c . bufferPtr ( ) ) ;
2025-01-22 10:15:55 +01:00
2025-04-05 21:50:45 +02:00
code_size_until_jump = buf_end - buf_start ;
2025-01-22 10:15:55 +01:00
2025-04-05 21:50:45 +02:00
c . add ( x86 : : edx , seg0 ) ;
c . mov ( x86 : : rax , x86 : : qword_ptr ( reinterpret_cast < u64 > ( & vm : : g_exec_addr ) ) ) ;
2025-04-24 12:41:04 +02:00
c . mov ( x86 : : dword_ptr ( x86 : : rbp , OFFSET_OF ( ppu_thread , cia ) ) , x86 : : edx ) ;
2025-01-10 16:34:24 +01:00
2025-04-05 21:50:45 +02:00
c . mov ( x86 : : rax , x86 : : qword_ptr ( x86 : : rax , x86 : : rdx , 1 , 0 ) ) ; // Load call target
c . mov ( x86 : : rdx , x86 : : rax ) ;
c . shl ( x86 : : rax , 16 ) ;
c . shr ( x86 : : rax , 16 ) ;
c . shr ( x86 : : rdx , 48 ) ;
c . shl ( x86 : : edx , 13 ) ;
c . mov ( x86 : : r12d , x86 : : edx ) ; // Load relocation base
c . jmp ( x86 : : rax ) ;
2025-01-10 16:34:24 +01:00
# else
2025-04-05 21:50:45 +02:00
// Load REG_Base - use absolute jump target to bypass rel jmp range limits
// X19 contains vm::g_exec_addr
const arm : : GpX exec_addr = a64 : : x19 ;
2025-01-10 16:34:24 +01:00
2025-04-05 21:50:45 +02:00
// X20 contains ppu_thread*
const arm : : GpX ppu_t_base = a64 : : x20 ;
2025-01-10 16:34:24 +01:00
2025-04-05 21:50:45 +02:00
// Load PC
const arm : : GpX pc = a64 : : x15 ;
const arm : : GpX cia_addr_reg = a64 : : x11 ;
2025-01-10 16:34:24 +01:00
2025-04-05 21:50:45 +02:00
// Load CIA
c . mov ( pc . w ( ) , func_addr ) ;
2025-01-22 10:15:55 +01:00
2025-04-05 21:50:45 +02:00
const auto buf_start = reinterpret_cast < const u8 * > ( c . bufferData ( ) ) ;
const auto buf_end = reinterpret_cast < const u8 * > ( c . bufferPtr ( ) ) ;
2025-01-22 10:15:55 +01:00
2025-04-05 21:50:45 +02:00
code_size_until_jump = buf_end - buf_start ;
2025-01-22 10:15:55 +01:00
2025-04-05 21:50:45 +02:00
// Load offset value
2025-04-24 12:41:04 +02:00
c . mov ( cia_addr_reg , static_cast < u64 > ( OFFSET_OF ( ppu_thread , cia ) ) ) ;
2025-01-10 16:34:24 +01:00
2025-04-05 21:50:45 +02:00
// Update CIA
c . str ( pc . w ( ) , arm : : Mem ( ppu_t_base , cia_addr_reg ) ) ;
2025-01-10 16:34:24 +01:00
2025-04-05 21:50:45 +02:00
// Multiply by 2 to index into ptr table
c . add ( pc , pc , pc ) ;
2025-01-10 16:34:24 +01:00
2025-04-05 21:50:45 +02:00
// Load call target
const arm : : GpX call_target = a64 : : x13 ;
c . ldr ( call_target , arm : : Mem ( exec_addr , pc ) ) ;
2025-01-10 16:34:24 +01:00
2025-04-05 21:50:45 +02:00
// Compute REG_Hp
const arm : : GpX reg_hp = a64 : : x21 ;
c . mov ( reg_hp , call_target ) ;
c . lsr ( reg_hp , reg_hp , 48 ) ;
c . lsl ( reg_hp . w ( ) , reg_hp . w ( ) , 13 ) ;
2025-01-10 16:34:24 +01:00
2025-04-05 21:50:45 +02:00
// Zero top 16 bits of call target
c . lsl ( call_target , call_target , 16 ) ;
c . lsr ( call_target , call_target , 16 ) ;
2025-01-10 16:34:24 +01:00
2025-04-05 21:50:45 +02:00
// Execute LLE call
c . br ( call_target ) ;
2025-01-10 16:34:24 +01:00
# endif
2025-04-05 21:50:45 +02:00
} ,
runtime . get ( ) , true ) ;
2025-01-10 16:34:24 +01:00
2025-01-22 10:15:55 +01:00
// Full sample may exist already, but is very far away
// So in this case, a new sample is written
ensure ( code_size_until_jump ! = umax ) ;
* full_sample = reinterpret_cast < u64 > ( func ) + code_size_until_jump ;
2025-01-10 16:34:24 +01:00
code_ptr = reinterpret_cast < u64 > ( func ) ;
return code_ptr ;
} ;
if ( has_mfvscr & & g_cfg . core . ppu_set_sat_bit )
{
info . attr + = ppu_attr : : has_mfvscr ;
}
while ( ! jit_mod . init & & fpos < info . get_funcs ( ) . size ( ) )
{
// Copy module information
2024-12-22 19:59:48 +01:00
ppu_module < lv2_obj > part ;
2017-10-10 15:40:46 +02:00
part . copy_part ( info ) ;
2017-06-22 23:52:09 +02:00
2017-07-01 01:08:51 +02:00
// Overall block size in bytes
2020-12-18 08:39:54 +01:00
usz bsize = 0 ;
2021-01-19 18:40:15 +01:00
usz bcount = 0 ;
2017-07-01 01:08:51 +02:00
2025-01-10 16:34:24 +01:00
while ( fpos < info . get_funcs ( ) . size ( ) )
2017-06-22 23:52:09 +02:00
{
2025-01-10 16:34:24 +01:00
auto & func = info . get_funcs ( ) [ fpos ] ;
2017-06-22 23:52:09 +02:00
2021-01-19 18:40:15 +01:00
if ( ! func . size )
{
fpos + + ;
continue ;
}
2019-01-13 18:06:30 +01:00
if ( bsize + func . size > 100 * 1024 & & bsize )
2017-06-22 23:52:09 +02:00
{
2021-01-19 18:40:15 +01:00
if ( bcount > = 1000 )
{
break ;
}
2017-06-22 23:52:09 +02:00
}
2022-12-09 19:06:50 +01:00
if ( g_fxo - > is_init < ppu_far_jumps_t > ( ) )
2022-08-17 15:53:05 +02:00
{
2022-12-09 19:06:50 +01:00
auto targets = g_fxo - > get < ppu_far_jumps_t > ( ) . get_targets ( func . addr , func . size ) ;
for ( auto [ source , target ] : targets )
{
auto far_jump = ensure ( g_fxo - > get < ppu_far_jumps_t > ( ) . gen_jump ( source ) ) ;
2025-01-10 16:34:24 +01:00
if ( source = = func . addr )
2022-12-09 19:06:50 +01:00
{
2025-01-27 16:09:42 +01:00
( * shared_map ) [ func . addr ] = reinterpret_cast < u64 > ( far_jump ) ;
2022-12-09 19:06:50 +01:00
}
ppu_register_function_at ( source , 4 , far_jump ) ;
}
2022-08-17 15:53:05 +02:00
2022-12-09 19:06:50 +01:00
if ( ! targets . empty ( ) )
2022-08-17 15:53:05 +02:00
{
// Replace the function with ppu_far_jump
fpos + + ;
continue ;
}
}
2025-01-10 16:34:24 +01:00
local_jit_bounds - > first = std : : min < u32 > ( local_jit_bounds - > first , func . addr ) ;
local_jit_bounds - > second = std : : max < u32 > ( local_jit_bounds - > second , func . addr + func . size ) ;
2021-01-19 18:40:15 +01:00
2025-01-10 16:34:24 +01:00
part . local_bounds . first = std : : min < u32 > ( part . local_bounds . first , func . addr ) ;
part . local_bounds . second = std : : max < u32 > ( part . local_bounds . second , func . addr + func . size ) ;
2021-01-19 18:40:15 +01:00
bsize + = func . size ;
2017-06-22 23:52:09 +02:00
fpos + + ;
2021-01-19 18:40:15 +01:00
bcount + + ;
2017-06-22 23:52:09 +02:00
}
2019-01-13 18:06:30 +01:00
// Compute module hash to generate (hopefully) unique object name
std : : string obj_name ;
2017-06-24 17:36:49 +02:00
{
sha1_context ctx ;
u8 output [ 20 ] ;
sha1_starts ( & ctx ) ;
2017-06-22 23:52:09 +02:00
2020-09-25 16:29:25 +02:00
int has_dcbz = ! ! g_cfg . core . accurate_cache_line_stores ;
2025-01-10 16:34:24 +01:00
for ( const auto & func : part . get_funcs ( ) )
2017-06-24 17:36:49 +02:00
{
if ( func . size = = 0 )
{
continue ;
}
2017-06-22 23:52:09 +02:00
2017-07-01 01:08:51 +02:00
const be_t < u32 > addr = func . addr - reloc ;
2017-06-24 17:36:49 +02:00
const be_t < u32 > size = func . size ;
sha1_update ( & ctx , reinterpret_cast < const u8 * > ( & addr ) , sizeof ( addr ) ) ;
sha1_update ( & ctx , reinterpret_cast < const u8 * > ( & size ) , sizeof ( size ) ) ;
2017-06-22 23:52:09 +02:00
2025-01-10 16:34:24 +01:00
for ( const auto block : func )
2017-06-24 17:36:49 +02:00
{
2017-07-01 01:08:51 +02:00
if ( block . second = = 0 | | reloc )
2017-06-24 17:36:49 +02:00
{
continue ;
}
2017-06-22 23:52:09 +02:00
2018-03-17 18:41:35 +01:00
// Find relevant relocations
auto low = std : : lower_bound ( part . relocs . cbegin ( ) , part . relocs . cend ( ) , block . first ) ;
auto high = std : : lower_bound ( low , part . relocs . cend ( ) , block . first + block . second ) ;
auto addr = block . first ;
for ( ; low ! = high ; + + low )
{
// Aligned relocation address
const u32 roff = low - > addr & ~ 3 ;
if ( roff > addr )
{
// Hash from addr to the beginning of the relocation
2023-06-25 14:53:42 +02:00
sha1_update ( & ctx , ensure ( info . get_ptr < const u8 > ( addr ) ) , roff - addr ) ;
2018-03-17 18:41:35 +01:00
}
// Hash relocation type instead
const be_t < u32 > type = low - > type ;
sha1_update ( & ctx , reinterpret_cast < const u8 * > ( & type ) , sizeof ( type ) ) ;
// Set the next addr
addr = roff + 4 ;
}
2020-09-25 16:29:25 +02:00
if ( has_dcbz = = 1 )
{
2023-06-25 14:53:42 +02:00
auto i_ptr = ensure ( info . get_ptr < u32 > ( addr ) ) ;
for ( u32 i = addr , end = block . second + block . first - 1 ; i < = end ; i + = 4 , i_ptr + + )
2020-09-25 16:29:25 +02:00
{
2023-06-25 14:53:42 +02:00
if ( g_ppu_itype . decode ( * i_ptr ) = = ppu_itype : : DCBZ )
2020-09-25 16:29:25 +02:00
{
has_dcbz = 2 ;
break ;
}
}
}
2018-03-17 18:41:35 +01:00
// Hash from addr to the end of the block
2023-06-25 14:53:42 +02:00
sha1_update ( & ctx , ensure ( info . get_ptr < const u8 > ( addr ) ) , block . second - ( addr - block . first ) ) ;
2017-06-24 17:36:49 +02:00
}
2017-07-01 01:08:51 +02:00
if ( reloc )
{
continue ;
}
2020-09-25 16:29:25 +02:00
if ( has_dcbz = = 1 )
{
2023-06-25 14:53:42 +02:00
auto i_ptr = ensure ( info . get_ptr < u32 > ( func . addr ) ) ;
for ( u32 i = func . addr , end = func . addr + func . size - 1 ; i < = end ; i + = 4 , i_ptr + + )
2020-09-25 16:29:25 +02:00
{
2023-06-25 14:53:42 +02:00
if ( g_ppu_itype . decode ( * i_ptr ) = = ppu_itype : : DCBZ )
2020-09-25 16:29:25 +02:00
{
has_dcbz = 2 ;
break ;
}
}
}
2023-06-25 14:53:42 +02:00
sha1_update ( & ctx , ensure ( info . get_ptr < const u8 > ( func . addr ) ) , func . size ) ;
2017-06-22 23:52:09 +02:00
}
2017-02-26 16:56:31 +01:00
2025-01-10 16:34:24 +01:00
if ( fpos > = info . get_funcs ( ) . size ( ) | | module_counter % c_moudles_per_jit = = c_moudles_per_jit - 1 )
2025-01-07 12:41:41 +01:00
{
// Hash the entire function grouped addresses for the integrity of the symbol resolver function
// Potentially occuring during patches
// Avoid doing it for files with a single module such as most PRX
2025-01-07 18:39:37 +01:00
std : : vector < be_t < u32 > > addrs ;
2025-01-07 12:41:41 +01:00
2025-04-05 21:50:45 +02:00
constexpr auto compare = [ ] ( const ppu_function & a , u32 addr )
{
return a . addr < addr ;
} ;
2025-01-10 16:34:24 +01:00
const auto start = std : : lower_bound ( info . funcs . begin ( ) , info . funcs . end ( ) , local_jit_bounds - > first , compare ) ;
2025-04-05 21:50:45 +02:00
std : : span < const ppu_function > span_range { start , std : : lower_bound ( start , info . funcs . end ( ) , local_jit_bounds - > second , compare ) } ;
2025-01-10 16:34:24 +01:00
for ( const ppu_function & func : span_range )
2025-01-07 12:41:41 +01:00
{
2025-01-07 18:39:37 +01:00
if ( func . size = = 0 )
{
continue ;
}
addrs . emplace_back ( func . addr - reloc ) ;
2025-01-07 12:41:41 +01:00
}
2025-01-07 18:39:37 +01:00
// Hash its size too
addrs . emplace_back ( : : size32 ( addrs ) ) ;
2025-01-07 12:41:41 +01:00
2025-01-10 16:34:24 +01:00
if ( module_counter ! = 0 )
{
sha1_update ( & ctx , reinterpret_cast < const u8 * > ( addrs . data ( ) ) , addrs . size ( ) * sizeof ( be_t < u32 > ) ) ;
}
2025-04-05 21:50:45 +02:00
part . jit_bounds = std : : move ( local_jit_bounds ) ;
2025-01-10 16:34:24 +01:00
local_jit_bounds = std : : make_shared < std : : pair < u32 , u32 > > ( u32 { umax } , 0 ) ;
2025-01-07 12:41:41 +01:00
}
2019-01-13 18:06:30 +01:00
if ( false )
2017-07-22 15:39:39 +02:00
{
2017-09-25 17:52:34 +02:00
const be_t < u64 > forced_upd = 3 ;
2017-07-22 15:39:39 +02:00
sha1_update ( & ctx , reinterpret_cast < const u8 * > ( & forced_upd ) , sizeof ( forced_upd ) ) ;
}
2017-06-24 17:36:49 +02:00
sha1_finish ( & ctx , output ) ;
2019-01-13 18:06:30 +01:00
// Settings: should be populated by settings which affect codegen (TODO)
enum class ppu_settings : u32
{
2023-04-08 14:21:22 +02:00
platform_bit ,
2021-12-30 17:39:18 +01:00
accurate_dfma ,
fixup_vnan ,
2022-01-15 12:30:13 +01:00
fixup_nj_denormals ,
2020-09-25 16:29:25 +02:00
accurate_cache_line_stores ,
2020-04-07 19:29:11 +02:00
reservations_128_byte ,
2021-01-19 18:40:15 +01:00
greedy_mode ,
2021-12-30 17:39:18 +01:00
accurate_sat ,
accurate_fpcc ,
accurate_vnan ,
2022-01-15 12:30:13 +01:00
accurate_nj_mode ,
2024-03-18 15:14:45 +01:00
contains_symbol_resolver ,
2019-01-13 18:06:30 +01:00
__bitset_enum_max
} ;
be_t < bs_t < ppu_settings > > settings { } ;
2023-04-08 14:21:22 +02:00
# if !defined(_WIN32) && !defined(__APPLE__)
settings + = ppu_settings : : platform_bit ;
2019-01-13 18:06:30 +01:00
# endif
2021-12-30 17:39:18 +01:00
if ( g_cfg . core . use_accurate_dfma )
settings + = ppu_settings : : accurate_dfma ;
if ( g_cfg . core . ppu_fix_vnan )
settings + = ppu_settings : : fixup_vnan ;
2022-01-15 12:30:13 +01:00
if ( g_cfg . core . ppu_llvm_nj_fixup )
settings + = ppu_settings : : fixup_nj_denormals ;
2020-09-25 16:29:25 +02:00
if ( has_dcbz = = 2 )
settings + = ppu_settings : : accurate_cache_line_stores ;
2020-10-13 21:36:00 +02:00
if ( g_cfg . core . ppu_128_reservations_loop_max_length )
2020-04-07 19:29:11 +02:00
settings + = ppu_settings : : reservations_128_byte ;
2021-01-19 18:40:15 +01:00
if ( g_cfg . core . ppu_llvm_greedy_mode )
settings + = ppu_settings : : greedy_mode ;
2021-12-30 17:39:18 +01:00
if ( has_mfvscr & & g_cfg . core . ppu_set_sat_bit )
settings + = ppu_settings : : accurate_sat ;
if ( g_cfg . core . ppu_set_fpcc )
settings + = ppu_settings : : accurate_fpcc , fmt : : throw_exception ( " FPCC Not implemented " ) ;
if ( g_cfg . core . ppu_set_vnan )
2022-01-15 12:30:13 +01:00
settings + = ppu_settings : : accurate_vnan , settings - = ppu_settings : : fixup_vnan , fmt : : throw_exception ( " VNAN Not implemented " ) ;
if ( g_cfg . core . ppu_use_nj_bit )
settings + = ppu_settings : : accurate_nj_mode , settings - = ppu_settings : : fixup_nj_denormals , fmt : : throw_exception ( " NJ Not implemented " ) ;
2025-01-10 16:34:24 +01:00
if ( fpos > = info . get_funcs ( ) . size ( ) | | module_counter % c_moudles_per_jit = = c_moudles_per_jit - 1 )
2024-03-18 15:14:45 +01:00
settings + = ppu_settings : : contains_symbol_resolver ; // Avoid invalidating all modules for this purpose
2019-01-13 18:06:30 +01:00
// Write version, hash, CPU, settings
2023-05-24 20:22:18 +02:00
fmt : : append ( obj_name , " v6-kusa-%s-%s-%s.obj " , fmt : : base57 ( output , 16 ) , fmt : : base57 ( settings ) , jit_compiler : : cpu ( g_cfg . core . llvm_cpu ) ) ;
2017-06-24 17:36:49 +02:00
}
2017-06-22 23:52:09 +02:00
2023-09-10 16:39:55 +02:00
if ( cpu ? cpu - > state . all_of ( cpu_flag : : exit ) : Emu . IsStopped ( ) )
2017-02-26 16:56:31 +01:00
{
2017-06-24 17:36:49 +02:00
break ;
}
2017-06-22 23:52:09 +02:00
2025-01-10 16:34:24 +01:00
module_counter + + ;
2021-01-30 23:04:07 +01:00
if ( ! check_only )
2017-07-01 01:08:51 +02:00
{
2021-01-30 23:04:07 +01:00
link_workload . emplace_back ( obj_name , false ) ;
}
2020-03-03 20:37:29 +01:00
2017-06-24 17:36:49 +02:00
// Check object file
2020-04-07 15:09:47 +02:00
if ( jit_compiler : : check ( cache_path + obj_name ) )
2017-06-24 17:36:49 +02:00
{
2025-01-10 16:34:24 +01:00
if ( ! is_being_used_in_emulation & & ! check_only )
2018-03-17 18:41:35 +01:00
{
2020-03-03 20:37:29 +01:00
ppu_log . success ( " LLVM: Module exists: %s " , obj_name ) ;
2025-01-10 16:34:24 +01:00
link_workload . pop_back ( ) ;
2021-07-29 21:31:45 +02:00
}
2017-06-24 17:36:49 +02:00
continue ;
}
2017-02-26 16:56:31 +01:00
2021-01-30 14:08:22 +01:00
if ( check_only )
{
return true ;
}
2021-01-30 23:04:07 +01:00
// Remember, used in ppu_initialize(void)
compiled_new = true ;
2020-03-03 20:37:29 +01:00
// Adjust information (is_compiled)
link_workload . back ( ) . second = true ;
// Fill workload list for compilation
workload . emplace_back ( std : : move ( obj_name ) , std : : move ( part ) ) ;
}
2021-01-30 23:04:59 +01:00
if ( check_only )
{
return false ;
}
2023-09-05 20:15:52 +02:00
if ( g_progr_ftotal_bits & & file_size )
2023-09-05 13:20:50 +02:00
{
2023-09-05 20:15:52 +02:00
g_progr_fknown_bits + = file_size ;
2023-09-05 13:20:50 +02:00
}
2023-09-09 12:28:33 +02:00
// Create worker threads for compilation
2019-12-04 23:17:57 +01:00
if ( ! workload . empty ( ) )
{
2025-01-21 10:42:50 +01:00
// Update progress dialog
2025-01-25 15:05:02 +01:00
g_progr_ptotal + = : : size32 ( workload ) ;
2025-01-21 10:42:50 +01:00
2024-11-15 03:24:03 +01:00
* progress_dialog = get_localized_string ( localized_string_id : : PROGRESS_DIALOG_COMPILING_PPU_MODULES ) ;
2019-12-04 23:17:57 +01:00
2025-01-25 15:05:02 +01:00
const u32 thread_count = std : : min ( : : size32 ( workload ) , rpcs3 : : utils : : get_max_threads ( ) ) ;
2020-03-03 20:37:29 +01:00
struct thread_index_allocator
{
atomic_t < u64 > index = 0 ;
} ;
2018-05-30 19:34:36 +02:00
2023-09-09 12:28:33 +02:00
struct thread_op
2017-06-24 17:36:49 +02:00
{
2025-04-05 21:50:45 +02:00
concurent_memory_limit & memory_limit ;
2023-09-09 12:28:33 +02:00
atomic_t < u32 > & work_cv ;
2024-12-22 19:59:48 +01:00
std : : vector < std : : pair < std : : string , ppu_module < lv2_obj > > > & workload ;
const ppu_module < lv2_obj > & main_module ;
2023-09-09 12:28:33 +02:00
const std : : string & cache_path ;
const cpu_thread * cpu ;
2017-12-19 22:01:03 +01:00
2023-09-09 12:28:33 +02:00
std : : unique_lock < decltype ( jit_core_allocator : : sem ) > core_lock ;
2025-04-05 21:50:45 +02:00
thread_op ( concurent_memory_limit & memory_limit , atomic_t < u32 > & work_cv , std : : vector < std : : pair < std : : string , ppu_module < lv2_obj > > > & workload , const cpu_thread * cpu , const ppu_module < lv2_obj > & main_module , const std : : string & cache_path , decltype ( jit_core_allocator : : sem ) & sem ) noexcept
2023-09-09 12:28:33 +02:00
2025-04-05 21:50:45 +02:00
: memory_limit ( memory_limit ) , work_cv ( work_cv ) , workload ( workload ) , main_module ( main_module ) , cache_path ( cache_path ) , cpu ( cpu )
2017-02-26 16:56:31 +01:00
{
2023-09-09 12:28:33 +02:00
// Save mutex
core_lock = std : : unique_lock { sem , std : : defer_lock } ;
}
thread_op ( const thread_op & other ) noexcept
2025-04-05 21:50:45 +02:00
: memory_limit ( other . memory_limit ) , work_cv ( other . work_cv ) , workload ( other . workload ) , main_module ( other . main_module ) , cache_path ( other . cache_path ) , cpu ( other . cpu )
2023-09-09 12:28:33 +02:00
{
if ( auto mtx = other . core_lock . mutex ( ) )
2021-01-30 23:03:20 +01:00
{
2023-09-09 12:28:33 +02:00
// Save mutex
core_lock = std : : unique_lock { * mtx , std : : defer_lock } ;
2021-01-30 23:03:20 +01:00
}
2023-09-09 12:28:33 +02:00
}
2021-01-30 23:03:20 +01:00
2023-09-09 12:28:33 +02:00
thread_op ( thread_op & & other ) noexcept = default ;
2020-03-03 20:37:29 +01:00
2023-09-09 12:28:33 +02:00
void operator ( ) ( )
{
// Set low priority
thread_ctrl : : scoped_priority low_prio ( - 1 ) ;
2017-06-24 17:36:49 +02:00
2025-04-05 21:50:45 +02:00
# ifdef __APPLE__
2023-09-09 12:28:33 +02:00
pthread_jit_write_protect_np ( false ) ;
2025-04-05 21:50:45 +02:00
# endif
2023-09-09 12:28:33 +02:00
for ( u32 i = work_cv + + ; i < workload . size ( ) ; i = work_cv + + , g_progr_pdone + + )
2022-10-03 22:20:03 +02:00
{
2023-09-09 12:28:33 +02:00
if ( cpu ? cpu - > state . all_of ( cpu_flag : : exit ) : Emu . IsStopped ( ) )
{
continue ;
}
2022-10-03 22:20:03 +02:00
2023-09-09 12:28:33 +02:00
// Keep allocating workload
const auto & [ obj_name , part ] = std : : as_const ( workload ) [ i ] ;
2020-03-03 20:37:29 +01:00
2025-03-22 02:28:00 +01:00
std : : size_t total_fn_size = 0 ;
2025-04-05 21:50:45 +02:00
for ( auto & fn : part . get_funcs ( ) )
2025-03-22 02:28:00 +01:00
{
total_fn_size + = fn . size ;
}
ppu_log . warning ( " LLVM: reporting used memory %u (free/total: %u/%u) by %s%s " , total_fn_size * 1024 * 16 , memory_limit . free_memory ( ) , memory_limit . total_memory ( ) , cache_path , obj_name ) ;
auto used_memory = memory_limit . acquire ( total_fn_size * 1024 * 16 ) ;
2025-01-10 16:34:24 +01:00
std : : shared_lock rlock ( g_fxo - > get < jit_core_allocator > ( ) . shared_mtx , std : : defer_lock ) ;
std : : unique_lock lock ( g_fxo - > get < jit_core_allocator > ( ) . shared_mtx , std : : defer_lock ) ;
2025-01-21 11:28:23 +01:00
if ( false & & part . jit_bounds & & part . parent - > funcs . size ( ) > = 0x8000 )
2025-01-10 16:34:24 +01:00
{
// Make a large symbol-resolving function compile alone because it has massive memory requirements
lock . lock ( ) ;
}
else
{
rlock . lock ( ) ;
}
2023-09-09 12:28:33 +02:00
ppu_log . warning ( " LLVM: Compiling module %s%s " , cache_path , obj_name ) ;
2017-06-22 23:52:09 +02:00
2025-01-10 16:34:24 +01:00
{
// Use another JIT instance
jit_compiler jit2 ( { } , g_cfg . core . llvm_cpu , 0x1 ) ;
ppu_initialize2 ( jit2 , part , cache_path , obj_name ) ;
}
2023-09-09 12:28:33 +02:00
ppu_log . success ( " LLVM: Compiled module %s " , obj_name ) ;
}
core_lock . unlock ( ) ;
2017-02-26 16:56:31 +01:00
}
2023-09-09 12:28:33 +02:00
} ;
// Prevent watchdog thread from terminating
g_watchdog_hold_ctr + + ;
2025-04-05 21:50:45 +02:00
named_thread_group threads ( fmt : : format ( " PPUW.%u. " , + + g_fxo - > get < thread_index_allocator > ( ) . index ) , thread_count , thread_op ( memory_limit , work_cv , workload , cpu , info , cache_path , g_fxo - > get < jit_core_allocator > ( ) . sem ) , [ & ] ( u32 /*thread_index*/ , thread_op & op )
{
// Allocate "core"
op . core_lock . lock ( ) ;
2023-09-09 12:28:33 +02:00
2025-04-05 21:50:45 +02:00
// Second check before creating another thread
return work_cv < workload . size ( ) & & ( cpu ? ! cpu - > state . all_of ( cpu_flag : : exit ) : ! Emu . IsStopped ( ) ) ;
} ) ;
2020-03-03 20:37:29 +01:00
threads . join ( ) ;
2020-06-01 01:27:33 +02:00
g_watchdog_hold_ctr - - ;
2023-09-09 12:28:33 +02:00
}
2020-06-01 01:27:33 +02:00
2025-01-10 16:34:24 +01:00
// Initialize compiler instance
while ( jits . size ( ) < utils : : aligned_div < u64 > ( module_counter , c_moudles_per_jit ) & & is_being_used_in_emulation )
{
jits . emplace_back ( std : : make_shared < jit_compiler > ( s_link_table , g_cfg . core . llvm_cpu , 0 , symbols_cement ) ) ;
2025-01-27 16:09:42 +01:00
for ( const auto & [ addr , func ] : * shared_map )
{
jits . back ( ) - > update_global_mapping ( fmt : : format ( " __0x%x " , addr - reloc ) , func ) ;
}
2025-01-10 16:34:24 +01:00
}
if ( jit_mod . symbol_resolvers . empty ( ) & & is_being_used_in_emulation )
{
jit_mod . symbol_resolvers . resize ( jits . size ( ) ) ;
}
2024-05-29 17:16:50 +02:00
bool failed_to_load = false ;
2023-09-09 12:28:33 +02:00
{
2023-07-14 16:05:27 +02:00
if ( ! is_being_used_in_emulation | | ( cpu ? cpu - > state . all_of ( cpu_flag : : exit ) : Emu . IsStopped ( ) ) )
2020-03-03 20:37:29 +01:00
{
2021-01-30 14:08:22 +01:00
return compiled_new ;
2020-03-03 20:37:29 +01:00
}
2025-01-21 10:42:50 +01:00
* progress_dialog = get_localized_string ( localized_string_id : : PROGRESS_DIALOG_LINKING_PPU_MODULES ) ;
// Because linking is faster than compiling, consider each module linkages as a single module compilation in time
const bool divide_by_twenty = ! workload . empty ( ) ;
const usz increment_link_count_at = ( divide_by_twenty ? 20 : 1 ) ;
2025-01-25 15:05:02 +01:00
g_progr_ptotal + = static_cast < u32 > ( utils : : aligned_div < u64 > ( link_workload . size ( ) , increment_link_count_at ) ) ;
2019-12-04 23:17:57 +01:00
2025-01-10 16:34:24 +01:00
usz mod_index = umax ;
2023-08-22 23:31:08 +02:00
for ( const auto & [ obj_name , is_compiled ] : link_workload )
2020-03-03 20:37:29 +01:00
{
2025-01-10 16:34:24 +01:00
mod_index + + ;
2023-07-14 16:05:27 +02:00
if ( cpu ? cpu - > state . all_of ( cpu_flag : : exit ) : Emu . IsStopped ( ) )
2017-06-25 14:16:07 +02:00
{
2020-03-03 20:37:29 +01:00
break ;
2017-06-25 14:16:07 +02:00
}
2025-01-10 16:34:24 +01:00
if ( ! failed_to_load & & ! jits [ mod_index / c_moudles_per_jit ] - > add ( cache_path + obj_name ) )
2024-05-29 17:16:50 +02:00
{
ppu_log . error ( " LLVM: Failed to load module %s " , obj_name ) ;
failed_to_load = true ;
}
2025-01-21 10:42:50 +01:00
if ( mod_index % increment_link_count_at = = ( link_workload . size ( ) - 1 ) % increment_link_count_at )
2024-05-29 17:16:50 +02:00
{
2025-01-21 10:42:50 +01:00
// Incremenet 'pdone' Nth times where N is link workload size ceil-divided by increment_link_count_at
g_progr_pdone + + ;
}
2024-05-29 17:16:50 +02:00
2025-01-21 10:42:50 +01:00
if ( failed_to_load )
{
2024-05-29 17:16:50 +02:00
continue ;
}
2018-12-31 19:25:19 +01:00
2020-03-03 20:37:29 +01:00
if ( ! is_compiled )
{
ppu_log . success ( " LLVM: Loaded module %s " , obj_name ) ;
}
}
2016-06-07 22:24:20 +02:00
}
2024-05-29 17:16:50 +02:00
if ( failed_to_load | | ! is_being_used_in_emulation | | ( cpu ? cpu - > state . all_of ( cpu_flag : : exit ) : Emu . IsStopped ( ) ) )
2017-06-25 14:16:07 +02:00
{
2021-01-30 14:08:22 +01:00
return compiled_new ;
2017-06-25 14:16:07 +02:00
}
2017-09-10 06:04:29 +02:00
// Jit can be null if the loop doesn't ever enter.
2022-06-14 14:28:38 +02:00
# ifdef __APPLE__
pthread_jit_write_protect_np ( false ) ;
# endif
2023-09-04 21:20:23 +02:00
// Try to patch all single and unregistered BLRs with the same function (TODO: Maybe generalize it into PIC code detection and patching)
ppu_intrp_func_t BLR_func = nullptr ;
2024-11-15 12:25:37 +01:00
const bool showing_only_apply_stage = ! g_progr_text . operator bool ( ) & & ! g_progr_ptotal & & ! g_progr_ftotal & & g_progr_ptotal . compare_and_swap_test ( 0 , 1 ) ;
2024-03-18 15:14:45 +01:00
2024-11-15 03:24:03 +01:00
progress_dialog = get_localized_string ( localized_string_id : : PROGRESS_DIALOG_APPLYING_PPU_CODE ) ;
2024-03-18 15:14:45 +01:00
2025-01-10 16:34:24 +01:00
if ( jits . empty ( ) )
2024-04-18 14:44:42 +02:00
{
// No functions - nothing to do
2025-01-10 16:34:24 +01:00
ensure ( info . get_funcs ( ) . empty ( ) ) ;
2024-04-18 14:44:42 +02:00
return compiled_new ;
}
const bool is_first = ! jit_mod . init ;
2023-11-28 18:41:14 +01:00
if ( is_first )
2017-12-19 22:01:03 +01:00
{
2025-01-10 16:34:24 +01:00
for ( auto & jit : jits )
{
jit - > fin ( ) ;
}
2023-11-28 18:41:14 +01:00
}
2017-07-10 21:22:54 +02:00
2024-08-24 03:55:51 +02:00
# ifdef __APPLE__
// Symbol resolver is in JIT mem, so we must enable execution
pthread_jit_write_protect_np ( true ) ;
# endif
2025-01-10 16:34:24 +01:00
{
usz index = umax ;
2024-08-24 03:55:51 +02:00
2025-01-10 16:34:24 +01:00
for ( auto & sim : jit_mod . symbol_resolvers )
{
index + + ;
2025-04-05 21:50:45 +02:00
sim = ensure ( ! is_first ? sim : reinterpret_cast < void ( * ) ( u8 * , u64 ) > ( jits [ index ] - > get ( " __resolve_symbols " ) ) ) ;
2025-01-10 16:34:24 +01:00
sim ( vm : : g_exec_addr , info . segs [ 0 ] . addr ) ;
}
}
2023-09-04 21:20:23 +02:00
2024-08-24 03:55:51 +02:00
# ifdef __APPLE__
// Symbol resolver is in JIT mem, so we must enable execution
pthread_jit_write_protect_np ( false ) ;
# endif
2024-03-18 15:14:45 +01:00
// Find a BLR-only function in order to copy it to all BLRs (some games need it)
2025-01-10 16:34:24 +01:00
for ( const auto & func : info . get_funcs ( ) )
2023-11-28 18:41:14 +01:00
{
2024-03-18 15:14:45 +01:00
if ( func . size = = 4 & & * info . get_ptr < u32 > ( func . addr ) = = ppu_instructions : : BLR ( ) )
2023-11-28 18:41:14 +01:00
{
2024-03-21 14:56:31 +01:00
BLR_func = ppu_read ( func . addr ) ;
2023-11-28 18:41:14 +01:00
break ;
2017-07-10 21:22:54 +02:00
}
2023-11-28 18:41:14 +01:00
}
2017-07-10 21:22:54 +02:00
2024-03-18 15:14:45 +01:00
if ( is_first )
2023-11-28 18:41:14 +01:00
{
jit_mod . init = true ;
2017-06-29 16:25:39 +02:00
}
2021-01-30 14:08:22 +01:00
2023-09-04 21:20:23 +02:00
if ( BLR_func )
{
auto inst_ptr = info . get_ptr < u32 > ( info . segs [ 0 ] . addr ) ;
for ( u32 addr = info . segs [ 0 ] . addr ; addr < info . segs [ 0 ] . addr + info . segs [ 0 ] . size ; addr + = 4 , inst_ptr + + )
{
2024-03-21 14:56:31 +01:00
if ( * inst_ptr = = ppu_instructions : : BLR ( ) & & ( reinterpret_cast < uptr > ( ppu_read ( addr ) ) < < 16 > > 16 ) = = reinterpret_cast < uptr > ( ppu_recompiler_fallback_ghc ) )
2023-09-04 21:20:23 +02:00
{
2024-03-21 14:56:31 +01:00
write_to_ptr < ppu_intrp_func_t > ( ppu_ptr ( addr ) , BLR_func ) ;
2023-09-04 21:20:23 +02:00
}
}
}
2024-03-18 15:14:45 +01:00
if ( showing_only_apply_stage )
{
// Done
g_progr_pdone + + ;
}
2021-01-30 14:08:22 +01:00
return compiled_new ;
2017-07-23 09:54:00 +02:00
# else
fmt : : throw_exception ( " LLVM is not available in this build. " ) ;
2017-06-24 21:01:27 +02:00
# endif
2017-06-24 17:36:49 +02:00
}
2025-03-22 02:28:00 +01:00
bool ppu_initialize ( const ppu_module < lv2_obj > & info , bool check_only , u64 file_size )
{
concurent_memory_limit memory_limit ( utils : : aligned_div < u64 > ( utils : : get_total_memory ( ) , 2 ) ) ;
return ppu_initialize ( info , check_only , file_size , memory_limit ) ;
}
2025-01-10 16:34:24 +01:00
static void ppu_initialize2 ( jit_compiler & jit , const ppu_module < lv2_obj > & module_part , const std : : string & cache_path , const std : : string & obj_name )
2017-06-24 17:36:49 +02:00
{
2017-06-24 21:01:27 +02:00
# ifdef LLVM_AVAILABLE
2017-06-24 17:36:49 +02:00
using namespace llvm ;
2017-06-22 23:52:09 +02:00
// Create LLVM module
2020-05-06 17:18:30 +02:00
std : : unique_ptr < Module > _module = std : : make_unique < Module > ( obj_name , jit . get_context ( ) ) ;
2017-06-22 23:52:09 +02:00
// Initialize target
2023-04-08 14:21:22 +02:00
_module - > setTargetTriple ( jit_compiler : : triple1 ( ) ) ;
2020-05-06 17:18:30 +02:00
_module - > setDataLayout ( jit . get_engine ( ) . getTargetMachine ( ) - > createDataLayout ( ) ) ;
2017-12-19 22:01:03 +01:00
2017-06-22 23:52:09 +02:00
// Initialize translator
2020-05-06 17:18:30 +02:00
PPUTranslator translator ( jit . get_context ( ) , _module . get ( ) , module_part , jit . get_engine ( ) ) ;
2017-06-22 23:52:09 +02:00
// Define some types
2021-01-31 19:38:47 +01:00
const auto _func = FunctionType : : get ( translator . get_type < void > ( ) , {
2025-04-05 21:50:45 +02:00
translator . get_type < u8 * > ( ) , // Exec base
translator . GetContextType ( ) - > getPointerTo ( ) , // PPU context
translator . get_type < u64 > ( ) , // Segment address (for PRX)
translator . get_type < u8 * > ( ) , // Memory base
translator . get_type < u64 > ( ) , // r0
translator . get_type < u64 > ( ) , // r1
translator . get_type < u64 > ( ) , // r2
} ,
false ) ;
2017-06-22 23:52:09 +02:00
2025-01-10 16:34:24 +01:00
// Difference between function name and current location
2025-01-31 13:09:22 +01:00
const u32 reloc = module_part . is_relocatable ? : : at32 ( module_part . segs , 0 ) . addr : 0 ;
2025-01-10 16:34:24 +01:00
2017-06-22 23:52:09 +02:00
// Initialize function list
2025-01-10 16:34:24 +01:00
for ( const auto & func : module_part . get_funcs ( ) )
2017-01-22 20:03:57 +01:00
{
2017-06-22 23:52:09 +02:00
if ( func . size )
2017-01-22 20:03:57 +01:00
{
2025-01-15 18:49:56 +01:00
const auto f = cast < Function > ( _module - > getOrInsertFunction ( fmt : : format ( " __0x%x " , func . addr - reloc ) , _func ) . getCallee ( ) ) ;
2021-01-31 19:38:47 +01:00
f - > setCallingConv ( CallingConv : : GHC ) ;
2023-03-10 23:57:21 +01:00
f - > addParamAttr ( 1 , llvm : : Attribute : : NoAlias ) ;
2021-01-19 18:40:15 +01:00
f - > addFnAttr ( Attribute : : NoUnwind ) ;
2017-06-22 23:52:09 +02:00
}
}
2017-01-22 20:03:57 +01:00
2016-06-07 22:24:20 +02:00
{
2023-03-11 20:08:27 +01:00
if ( g_cfg . core . ppu_debug )
{
translator . build_interpreter ( ) ;
}
2025-03-19 23:15:46 +01:00
# ifdef ARCH_X64
2023-11-29 19:53:38 +01:00
// Create the analysis managers.
// These must be declared in this order so that they are destroyed in the
// correct order due to inter-analysis-manager references.
LoopAnalysisManager lam ;
FunctionAnalysisManager fam ;
CGSCCAnalysisManager cgam ;
ModuleAnalysisManager mam ;
// Create the new pass manager builder.
// Take a look at the PassBuilder constructor parameters for more
// customization, e.g. specifying a TargetMachine or various debugging
// options.
PassBuilder pb ;
// Register all the basic analyses with the managers.
pb . registerModuleAnalyses ( mam ) ;
pb . registerCGSCCAnalyses ( cgam ) ;
pb . registerFunctionAnalyses ( fam ) ;
pb . registerLoopAnalyses ( lam ) ;
pb . crossRegisterProxies ( lam , fam , cgam , mam ) ;
FunctionPassManager fpm ;
// Basic optimizations
fpm . addPass ( EarlyCSEPass ( ) ) ;
2025-03-19 23:15:46 +01:00
# endif
2025-01-10 16:34:24 +01:00
u32 guest_code_size = 0 ;
u32 min_addr = umax ;
u32 max_addr = 0 ;
u32 num_func = 0 ;
2017-06-22 23:52:09 +02:00
// Translate functions
2025-01-10 16:34:24 +01:00
// Start with the lowest bound of the module, function list is sorted
for ( const auto & mod_func : module_part . get_funcs ( ) )
2016-06-07 22:24:20 +02:00
{
2017-06-22 23:52:09 +02:00
if ( Emu . IsStopped ( ) )
2017-01-22 20:03:57 +01:00
{
2020-02-01 09:31:27 +01:00
ppu_log . success ( " LLVM: Translation cancelled " ) ;
2017-06-22 23:52:09 +02:00
return ;
}
2017-01-22 20:03:57 +01:00
2025-01-10 16:34:24 +01:00
if ( mod_func . size )
2017-06-22 23:52:09 +02:00
{
2025-01-10 16:34:24 +01:00
num_func + + ;
guest_code_size + = mod_func . size ;
max_addr = std : : max < u32 > ( max_addr , mod_func . addr + mod_func . size ) ;
min_addr = std : : min < u32 > ( min_addr , mod_func . addr ) ;
2017-06-22 23:52:09 +02:00
// Translate
2025-03-31 09:44:38 +02:00
if ( [[maybe_unused]] const auto func = translator . Translate ( mod_func ) )
2017-07-01 01:08:51 +02:00
{
2024-08-04 04:09:06 +02:00
# ifdef ARCH_X64 // TODO
2025-04-05 21:50:45 +02:00
// Run optimization passes
2023-11-29 19:53:38 +01:00
fpm . run ( * func , fam ) ;
2024-08-04 04:09:06 +02:00
# endif // ARCH_X64
2017-07-01 01:08:51 +02:00
}
else
{
Emu . Pause ( ) ;
return ;
2017-12-19 22:01:03 +01:00
}
2016-06-25 07:16:15 +02:00
}
2016-06-07 22:24:20 +02:00
}
2025-01-10 16:34:24 +01:00
// Run this only in one module for all functions compiled
if ( module_part . jit_bounds )
2024-03-18 15:14:45 +01:00
{
2025-03-31 09:44:38 +02:00
if ( [[maybe_unused]] const auto func = translator . GetSymbolResolver ( module_part ) )
2024-03-18 15:14:45 +01:00
{
2024-08-04 04:09:06 +02:00
# ifdef ARCH_X64 // TODO
2025-04-05 21:50:45 +02:00
// Run optimization passes
2023-11-29 19:53:38 +01:00
fpm . run ( * func , fam ) ;
2024-08-04 04:09:06 +02:00
# endif // ARCH_X64
2024-03-18 15:14:45 +01:00
}
else
{
Emu . Pause ( ) ;
return ;
}
}
2025-04-05 21:50:45 +02:00
// legacy::PassManager mpm;
2016-06-07 22:24:20 +02:00
2017-06-22 23:52:09 +02:00
// Remove unused functions, structs, global variables, etc
2025-04-05 21:50:45 +02:00
// mpm.add(createStripDeadPrototypesPass());
// mpm.add(createFunctionInliningPass());
// mpm.add(createDeadInstEliminationPass());
// mpm.run(*module);
2025-03-19 23:15:46 +01:00
# ifndef ANDROID
2017-06-22 23:52:09 +02:00
std : : string result ;
raw_string_ostream out ( result ) ;
2016-06-07 22:24:20 +02:00
2017-06-22 23:52:09 +02:00
if ( g_cfg . core . llvm_logs )
{
2020-05-06 17:18:30 +02:00
out < < * _module ; // print IR
2024-06-10 10:49:00 +02:00
fs : : write_file ( cache_path + obj_name + " .log " , fs : : rewrite , out . str ( ) ) ;
2017-06-22 23:52:09 +02:00
result . clear ( ) ;
}
2016-06-07 22:24:20 +02:00
2020-05-06 17:18:30 +02:00
if ( verifyModule ( * _module , & out ) )
2016-06-07 22:24:20 +02:00
{
2017-06-22 23:52:09 +02:00
out . flush ( ) ;
2020-02-01 09:31:27 +01:00
ppu_log . error ( " LLVM: Verification failed for %s: \n %s " , obj_name , result ) ;
2025-04-05 21:50:45 +02:00
Emu . CallFromMainThread ( [ ]
{
Emu . GracefulShutdown ( false , true ) ;
} ) ;
2017-06-22 23:52:09 +02:00
return ;
2016-06-07 22:24:20 +02:00
}
2025-03-19 23:15:46 +01:00
# endif
2025-01-10 16:34:24 +01:00
ppu_log . notice ( " LLVM: %zu functions generated (code_size=0x%x, num_func=%d, max_addr(-)min_addr=0x%x) " , _module - > getFunctionList ( ) . size ( ) , guest_code_size , num_func , max_addr - min_addr ) ;
2016-06-07 22:24:20 +02:00
}
2017-06-24 17:36:49 +02:00
// Load or compile module
2020-05-06 17:18:30 +02:00
jit . add ( std : : move ( _module ) , cache_path ) ;
2017-06-24 17:36:49 +02:00
# endif // LLVM_AVAILABLE
2016-06-22 15:37:51 +02:00
}