2021-07-29 21:31:45 +02:00
# include "stdafx.h"
2018-05-14 22:07:36 +02:00
# include "Utilities/JIT.h"
2021-01-29 11:32:19 +01:00
# include "Utilities/StrUtil.h"
2022-07-04 15:02:17 +02:00
# include "util/serialization.hpp"
2017-02-26 16:56:31 +01:00
# include "Crypto/sha1.h"
2021-01-29 11:32:19 +01:00
# include "Crypto/unself.h"
# include "Loader/ELF.h"
# include "Loader/mself.hpp"
2020-10-18 14:00:10 +02:00
# include "Emu/perf_meter.hpp"
2018-09-25 22:34:45 +02:00
# include "Emu/Memory/vm_reservation.h"
2020-10-30 14:32:49 +01:00
# include "Emu/Memory/vm_locking.h"
2023-01-07 17:20:21 +01:00
# include "Emu/RSX/Core/RSXReservationLock.hpp"
2020-02-15 23:36:20 +01:00
# include "Emu/VFS.h"
2023-08-14 16:00:46 +02:00
# include "Emu/vfs_config.h"
2021-04-21 22:12:21 +02:00
# include "Emu/system_progress.hpp"
# include "Emu/system_utils.hpp"
2016-04-14 01:09:41 +02:00
# include "PPUThread.h"
# include "PPUInterpreter.h"
2016-06-07 22:24:20 +02:00
# include "PPUAnalyser.h"
2016-04-14 01:09:41 +02:00
# include "PPUModule.h"
2020-11-10 15:57:06 +01:00
# include "PPUDisAsm.h"
2018-05-04 23:01:27 +02:00
# include "SPURecompiler.h"
2021-03-23 20:32:50 +01:00
# include "timers.hpp"
2017-02-06 19:36:46 +01:00
# include "lv2/sys_sync.h"
2017-02-26 16:56:31 +01:00
# include "lv2/sys_prx.h"
2021-01-30 14:08:22 +01:00
# include "lv2/sys_overlay.h"
# include "lv2/sys_process.h"
2022-07-14 21:07:02 +02:00
# include "lv2/sys_spu.h"
2015-03-16 22:38:21 +01:00
2016-06-22 15:37:51 +02:00
# ifdef LLVM_AVAILABLE
# ifdef _MSC_VER
# pragma warning(push, 0)
2019-11-30 00:11:28 +01:00
# else
# pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Wold-style-cast"
2021-03-05 20:05:37 +01:00
# pragma GCC diagnostic ignored "-Wunused-parameter"
2021-04-07 23:52:18 +02:00
# pragma GCC diagnostic ignored "-Wmissing-noreturn"
2016-06-22 15:37:51 +02:00
# endif
2023-11-29 19:53:38 +01:00
# include <llvm/IR/Verifier.h>
# include <llvm/Transforms/Utils/BasicBlockUtils.h>
2023-03-10 23:57:21 +01:00
# if LLVM_VERSION_MAJOR < 17
2023-11-29 19:53:38 +01:00
# include <llvm/Support/FormattedStream.h>
# include <llvm/TargetParser/Host.h>
# include <llvm/Object/ObjectFile.h>
# include <llvm/IR/InstIterator.h>
# include <llvm/IR/LegacyPassManager.h>
# include <llvm/Transforms/Scalar.h>
# else
# include <llvm/Analysis/CGSCCPassManager.h>
# include <llvm/Analysis/LoopAnalysisManager.h>
# include <llvm/Passes/PassBuilder.h>
# include <llvm/Transforms/Scalar/EarlyCSE.h>
2023-03-10 23:57:21 +01:00
# endif
2016-06-22 15:37:51 +02:00
# ifdef _MSC_VER
# pragma warning(pop)
2019-11-30 00:11:28 +01:00
# else
# pragma GCC diagnostic pop
2016-06-22 15:37:51 +02:00
# endif
# include "PPUTranslator.h"
# endif
2017-02-07 14:14:44 +01:00
# include <cfenv>
2020-03-31 02:11:37 +02:00
# include <cctype>
2023-07-05 12:52:16 +02:00
# include <span>
2021-03-31 15:31:21 +02:00
# include <optional>
2023-07-05 12:52:16 +02:00
2020-11-24 06:18:31 +01:00
# include "util/asm.hpp"
2020-11-07 23:56:35 +01:00
# include "util/vm.hpp"
2020-12-13 14:34:45 +01:00
# include "util/v128.hpp"
2021-12-30 17:39:18 +01:00
# include "util/simd.hpp"
2020-12-21 15:12:05 +01:00
# include "util/sysinfo.hpp"
2017-02-07 14:14:44 +01:00
2022-06-14 14:28:38 +02:00
# ifdef __APPLE__
# include <libkern/OSCacheControl.h>
# endif
2020-06-01 01:27:33 +02:00
extern atomic_t < u64 > g_watchdog_hold_ctr ;
2020-04-07 19:29:11 +02:00
// Should be of the same type
2020-10-01 17:15:07 +02:00
using spu_rdata_t = decltype ( ppu_thread : : rdata ) ;
2020-04-07 19:29:11 +02:00
extern void mov_rdata ( spu_rdata_t & _dst , const spu_rdata_t & _src ) ;
2020-10-30 03:17:00 +01:00
extern void mov_rdata_nt ( spu_rdata_t & _dst , const spu_rdata_t & _src ) ;
2020-04-07 19:29:11 +02:00
extern bool cmp_rdata ( const spu_rdata_t & _lhs , const spu_rdata_t & _rhs ) ;
// Verify AVX availability for TSX transactions
static const bool s_tsx_avx = utils : : has_avx ( ) ;
2017-02-22 11:10:55 +01:00
template < >
2020-03-03 21:39:40 +01:00
void fmt_class_string < ppu_join_status > : : format ( std : : string & out , u64 arg )
2017-02-22 11:10:55 +01:00
{
2020-03-03 21:39:40 +01:00
format_enum ( out , arg , [ ] ( ppu_join_status js )
2017-02-22 11:10:55 +01:00
{
switch ( js )
{
2020-04-29 08:49:13 +02:00
case ppu_join_status : : joinable : return " none " ;
2020-03-03 21:39:40 +01:00
case ppu_join_status : : detached : return " detached " ;
case ppu_join_status : : zombie : return " zombie " ;
case ppu_join_status : : exited : return " exited " ;
2020-03-14 18:06:58 +01:00
case ppu_join_status : : max : break ;
2017-02-22 11:10:55 +01:00
}
return unknown ;
} ) ;
}
2021-05-21 07:48:37 +02:00
template < >
void fmt_class_string < ppu_thread_status > : : format ( std : : string & out , u64 arg )
{
format_enum ( out , arg , [ ] ( ppu_thread_status s )
{
switch ( s )
{
case PPU_THREAD_STATUS_IDLE : return " IDLE " ;
case PPU_THREAD_STATUS_RUNNABLE : return " RUN " ;
case PPU_THREAD_STATUS_ONPROC : return " ONPROC " ;
case PPU_THREAD_STATUS_SLEEP : return " SLEEP " ;
case PPU_THREAD_STATUS_STOP : return " STOP " ;
case PPU_THREAD_STATUS_ZOMBIE : return " Zombie " ;
case PPU_THREAD_STATUS_DELETED : return " Deleted " ;
case PPU_THREAD_STATUS_UNKNOWN : break ;
}
return unknown ;
} ) ;
}
2021-07-10 10:56:48 +02:00
template < >
void fmt_class_string < typename ppu_thread : : call_history_t > : : format ( std : : string & out , u64 arg )
{
const auto & history = get_object ( arg ) ;
2021-07-18 20:06:06 +02:00
PPUDisAsm dis_asm ( cpu_disasm_mode : : normal , vm : : g_sudo_addr ) ;
2023-07-09 07:45:15 +02:00
for ( u64 count = 0 , idx = history . index - 1 ; idx ! = umax & & count < history . data . size ( ) ; count + + , idx - - )
2021-07-10 10:56:48 +02:00
{
2023-07-09 07:45:15 +02:00
const u32 pc = history . data [ idx % history . data . size ( ) ] ;
2021-07-18 20:06:06 +02:00
dis_asm . disasm ( pc ) ;
fmt : : append ( out , " \n (%u) 0x%08x: %s " , count , pc , dis_asm . last_opcode ) ;
2021-07-10 10:56:48 +02:00
}
}
2023-07-09 07:45:15 +02:00
template < >
void fmt_class_string < typename ppu_thread : : syscall_history_t > : : format ( std : : string & out , u64 arg )
{
const auto & history = get_object ( arg ) ;
for ( u64 count = 0 , idx = history . index - 1 ; idx ! = umax & & count < history . data . size ( ) ; count + + , idx - - )
{
const auto & entry = history . data [ idx % history . data . size ( ) ] ;
fmt : : append ( out , " \n (%u) 0x%08x: %s, 0x%x, r3=0x%x, r4=0x%x, r5=0x%x, r6=0x%x " , count , entry . cia , entry . func_name , entry . error , entry . args [ 0 ] , entry . args [ 1 ] , entry . args [ 2 ] , entry . args [ 3 ] ) ;
}
}
2021-12-30 17:39:18 +01:00
extern const ppu_decoder < ppu_itype > g_ppu_itype { } ;
extern const ppu_decoder < ppu_iname > g_ppu_iname { } ;
2012-11-15 00:39:56 +01:00
2022-07-04 15:02:17 +02:00
template < >
bool serialize < ppu_thread : : cr_bits > ( utils : : serial & ar , typename ppu_thread : : cr_bits & o )
{
if ( ar . is_writing ( ) )
{
ar ( o . pack ( ) ) ;
}
else
{
o . unpack ( ar ) ;
}
return true ;
}
2017-02-26 16:56:31 +01:00
extern void ppu_initialize ( ) ;
2024-03-30 15:20:08 +01:00
extern void ppu_finalize ( const ppu_module & info , bool force_mem_release = false ) ;
2023-09-05 20:15:52 +02:00
extern bool ppu_initialize ( const ppu_module & info , bool check_only = false , u64 file_size = 0 ) ;
2024-03-18 15:14:45 +01:00
static void ppu_initialize2 ( class jit_compiler & jit , const ppu_module & module_part , const std : : string & cache_path , const std : : string & obj_name , const ppu_module & whole_module ) ;
2023-06-25 14:53:42 +02:00
extern bool ppu_load_exec ( const ppu_exec_object & , bool virtual_load , const std : : string & , utils : : serial * = nullptr ) ;
extern std : : pair < std : : shared_ptr < lv2_overlay > , CellError > ppu_load_overlay ( const ppu_exec_object & , bool virtual_load , const std : : string & path , s64 file_offset , utils : : serial * = nullptr ) ;
2021-01-29 11:32:19 +01:00
extern void ppu_unload_prx ( const lv2_prx & ) ;
2023-06-25 14:53:42 +02:00
extern std : : shared_ptr < lv2_prx > ppu_load_prx ( const ppu_prx_object & , bool virtual_load , const std : : string & , s64 file_offset , utils : : serial * = nullptr ) ;
2016-07-27 23:43:22 +02:00
extern void ppu_execute_syscall ( ppu_thread & ppu , u64 code ) ;
2021-12-30 17:39:18 +01:00
static void ppu_break ( ppu_thread & , ppu_opcode_t , be_t < u32 > * , ppu_intrp_func * ) ;
2016-07-27 23:43:22 +02:00
2020-09-25 16:29:25 +02:00
extern void do_cell_atomic_128_store ( u32 addr , const void * to_write ) ;
2022-01-23 13:20:07 +01:00
const auto ppu_gateway = build_function_asm < void ( * ) ( ppu_thread * ) > ( " ppu_gateway " , [ ] ( native_asm & c , auto & args )
2021-01-31 19:38:47 +01:00
{
// Gateway for PPU, converts from native to GHC calling convention, also saves RSP value for escape
using namespace asmjit ;
2021-12-30 17:39:18 +01:00
# if defined(ARCH_X64)
2021-01-31 19:38:47 +01:00
# ifdef _WIN32
c . push ( x86 : : r15 ) ;
c . push ( x86 : : r14 ) ;
c . push ( x86 : : r13 ) ;
c . push ( x86 : : r12 ) ;
c . push ( x86 : : rsi ) ;
c . push ( x86 : : rdi ) ;
c . push ( x86 : : rbp ) ;
c . push ( x86 : : rbx ) ;
c . sub ( x86 : : rsp , 0xa8 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rsp , 0x90 ) , x86 : : xmm15 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rsp , 0x80 ) , x86 : : xmm14 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rsp , 0x70 ) , x86 : : xmm13 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rsp , 0x60 ) , x86 : : xmm12 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rsp , 0x50 ) , x86 : : xmm11 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rsp , 0x40 ) , x86 : : xmm10 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rsp , 0x30 ) , x86 : : xmm9 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rsp , 0x20 ) , x86 : : xmm8 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rsp , 0x10 ) , x86 : : xmm7 ) ;
c . movaps ( x86 : : oword_ptr ( x86 : : rsp , 0 ) , x86 : : xmm6 ) ;
# else
c . push ( x86 : : rbp ) ;
c . push ( x86 : : r15 ) ;
c . push ( x86 : : r14 ) ;
c . push ( x86 : : r13 ) ;
c . push ( x86 : : r12 ) ;
c . push ( x86 : : rbx ) ;
c . push ( x86 : : rax ) ;
# endif
// Save native stack pointer for longjmp emulation
c . mov ( x86 : : qword_ptr ( args [ 0 ] , : : offset32 ( & ppu_thread : : saved_native_sp ) ) , x86 : : rsp ) ;
// Initialize args
c . mov ( x86 : : r13 , x86 : : qword_ptr ( reinterpret_cast < u64 > ( & vm : : g_exec_addr ) ) ) ;
c . mov ( x86 : : rbp , args [ 0 ] ) ;
c . mov ( x86 : : edx , x86 : : dword_ptr ( x86 : : rbp , : : offset32 ( & ppu_thread : : cia ) ) ) ; // Load PC
c . mov ( x86 : : rax , x86 : : qword_ptr ( x86 : : r13 , x86 : : edx , 1 , 0 ) ) ; // Load call target
c . mov ( x86 : : rdx , x86 : : rax ) ;
2021-12-30 17:39:18 +01:00
c . shl ( x86 : : rax , 16 ) ;
c . shr ( x86 : : rax , 16 ) ;
c . shr ( x86 : : rdx , 48 ) ;
c . shl ( x86 : : edx , 13 ) ;
2021-01-31 19:38:47 +01:00
c . mov ( x86 : : r12d , x86 : : edx ) ; // Load relocation base
c . mov ( x86 : : rbx , x86 : : qword_ptr ( reinterpret_cast < u64 > ( & vm : : g_base_addr ) ) ) ;
c . mov ( x86 : : r14 , x86 : : qword_ptr ( x86 : : rbp , : : offset32 ( & ppu_thread : : gpr , 0 ) ) ) ; // Load some registers
c . mov ( x86 : : rsi , x86 : : qword_ptr ( x86 : : rbp , : : offset32 ( & ppu_thread : : gpr , 1 ) ) ) ;
c . mov ( x86 : : rdi , x86 : : qword_ptr ( x86 : : rbp , : : offset32 ( & ppu_thread : : gpr , 2 ) ) ) ;
if ( utils : : has_avx ( ) )
{
c . vzeroupper ( ) ;
}
c . call ( x86 : : rax ) ;
if ( utils : : has_avx ( ) )
{
c . vzeroupper ( ) ;
}
# ifdef _WIN32
c . movaps ( x86 : : xmm6 , x86 : : oword_ptr ( x86 : : rsp , 0 ) ) ;
c . movaps ( x86 : : xmm7 , x86 : : oword_ptr ( x86 : : rsp , 0x10 ) ) ;
c . movaps ( x86 : : xmm8 , x86 : : oword_ptr ( x86 : : rsp , 0x20 ) ) ;
c . movaps ( x86 : : xmm9 , x86 : : oword_ptr ( x86 : : rsp , 0x30 ) ) ;
c . movaps ( x86 : : xmm10 , x86 : : oword_ptr ( x86 : : rsp , 0x40 ) ) ;
c . movaps ( x86 : : xmm11 , x86 : : oword_ptr ( x86 : : rsp , 0x50 ) ) ;
c . movaps ( x86 : : xmm12 , x86 : : oword_ptr ( x86 : : rsp , 0x60 ) ) ;
c . movaps ( x86 : : xmm13 , x86 : : oword_ptr ( x86 : : rsp , 0x70 ) ) ;
c . movaps ( x86 : : xmm14 , x86 : : oword_ptr ( x86 : : rsp , 0x80 ) ) ;
c . movaps ( x86 : : xmm15 , x86 : : oword_ptr ( x86 : : rsp , 0x90 ) ) ;
c . add ( x86 : : rsp , 0xa8 ) ;
c . pop ( x86 : : rbx ) ;
c . pop ( x86 : : rbp ) ;
c . pop ( x86 : : rdi ) ;
c . pop ( x86 : : rsi ) ;
c . pop ( x86 : : r12 ) ;
c . pop ( x86 : : r13 ) ;
c . pop ( x86 : : r14 ) ;
c . pop ( x86 : : r15 ) ;
# else
c . add ( x86 : : rsp , + 8 ) ;
c . pop ( x86 : : rbx ) ;
c . pop ( x86 : : r12 ) ;
c . pop ( x86 : : r13 ) ;
c . pop ( x86 : : r14 ) ;
c . pop ( x86 : : r15 ) ;
c . pop ( x86 : : rbp ) ;
# endif
c . ret ( ) ;
2021-12-30 17:39:18 +01:00
# else
2022-06-14 14:28:38 +02:00
// See https://github.com/ghc/ghc/blob/master/rts/include/stg/MachRegs.h
// for GHC calling convention definitions on Aarch64
// and https://developer.arm.com/documentation/den0024/a/The-ABI-for-ARM-64-bit-Architecture/Register-use-in-the-AArch64-Procedure-Call-Standard/Parameters-in-general-purpose-registers
// for AArch64 calling convention
2022-07-19 06:33:28 +02:00
// Save sp for native longjmp emulation
Label native_sp_offset = c . newLabel ( ) ;
c . ldr ( a64 : : x10 , arm : : Mem ( native_sp_offset ) ) ;
// sp not allowed to be used in load/stores directly
c . mov ( a64 : : x15 , a64 : : sp ) ;
c . str ( a64 : : x15 , arm : : Mem ( args [ 0 ] , a64 : : x10 ) ) ;
2022-06-14 14:28:38 +02:00
// Push callee saved registers to the stack
// We need to save x18-x30 = 13 x 8B each + 8 bytes for 16B alignment = 112B
c . sub ( a64 : : sp , a64 : : sp , Imm ( 112 ) ) ;
c . stp ( a64 : : x18 , a64 : : x19 , arm : : Mem ( a64 : : sp ) ) ;
c . stp ( a64 : : x20 , a64 : : x21 , arm : : Mem ( a64 : : sp , 16 ) ) ;
c . stp ( a64 : : x22 , a64 : : x23 , arm : : Mem ( a64 : : sp , 32 ) ) ;
c . stp ( a64 : : x24 , a64 : : x25 , arm : : Mem ( a64 : : sp , 48 ) ) ;
c . stp ( a64 : : x26 , a64 : : x27 , arm : : Mem ( a64 : : sp , 64 ) ) ;
c . stp ( a64 : : x28 , a64 : : x29 , arm : : Mem ( a64 : : sp , 80 ) ) ;
c . str ( a64 : : x30 , arm : : Mem ( a64 : : sp , 96 ) ) ;
// Load REG_Base - use absolute jump target to bypass rel jmp range limits
Label exec_addr = c . newLabel ( ) ;
c . ldr ( a64 : : x19 , arm : : Mem ( exec_addr ) ) ;
c . ldr ( a64 : : x19 , arm : : Mem ( a64 : : x19 ) ) ;
// Load PPUThread struct base -> REG_Sp
const arm : : GpX ppu_t_base = a64 : : x20 ;
c . mov ( ppu_t_base , args [ 0 ] ) ;
// Load PC
2022-07-19 06:33:28 +02:00
const arm : : GpX pc = a64 : : x15 ;
2022-06-14 14:28:38 +02:00
Label cia_offset = c . newLabel ( ) ;
const arm : : GpX cia_addr_reg = a64 : : x11 ;
// Load offset value
c . ldr ( cia_addr_reg , arm : : Mem ( cia_offset ) ) ;
// Load cia
2022-07-19 06:33:28 +02:00
c . ldr ( a64 : : w15 , arm : : Mem ( ppu_t_base , cia_addr_reg ) ) ;
2022-06-14 14:28:38 +02:00
// Multiply by 2 to index into ptr table
2022-07-19 06:33:28 +02:00
const arm : : GpX index_shift = a64 : : x12 ;
2022-06-14 14:28:38 +02:00
c . mov ( index_shift , Imm ( 2 ) ) ;
c . mul ( pc , pc , index_shift ) ;
// Load call target
2022-07-19 06:33:28 +02:00
const arm : : GpX call_target = a64 : : x13 ;
2022-06-14 14:28:38 +02:00
c . ldr ( call_target , arm : : Mem ( a64 : : x19 , pc ) ) ;
// Compute REG_Hp
const arm : : GpX reg_hp = a64 : : x21 ;
c . mov ( reg_hp , call_target ) ;
c . lsr ( reg_hp , reg_hp , 48 ) ;
2022-07-19 06:33:28 +02:00
c . lsl ( a64 : : w21 , a64 : : w21 , 13 ) ;
2022-06-14 14:28:38 +02:00
// Zero top 16 bits of call target
c . lsl ( call_target , call_target , Imm ( 16 ) ) ;
c . lsr ( call_target , call_target , Imm ( 16 ) ) ;
// Load registers
Label base_addr = c . newLabel ( ) ;
c . ldr ( a64 : : x22 , arm : : Mem ( base_addr ) ) ;
c . ldr ( a64 : : x22 , arm : : Mem ( a64 : : x22 ) ) ;
Label gpr_addr_offset = c . newLabel ( ) ;
const arm : : GpX gpr_addr_reg = a64 : : x9 ;
c . ldr ( gpr_addr_reg , arm : : Mem ( gpr_addr_offset ) ) ;
c . add ( gpr_addr_reg , gpr_addr_reg , ppu_t_base ) ;
c . ldr ( a64 : : x23 , arm : : Mem ( gpr_addr_reg ) ) ;
c . ldr ( a64 : : x24 , arm : : Mem ( gpr_addr_reg , 8 ) ) ;
c . ldr ( a64 : : x25 , arm : : Mem ( gpr_addr_reg , 16 ) ) ;
// Execute LLE call
c . blr ( call_target ) ;
// Restore registers from the stack
c . ldp ( a64 : : x18 , a64 : : x19 , arm : : Mem ( a64 : : sp ) ) ;
c . ldp ( a64 : : x20 , a64 : : x21 , arm : : Mem ( a64 : : sp , 16 ) ) ;
c . ldp ( a64 : : x22 , a64 : : x23 , arm : : Mem ( a64 : : sp , 32 ) ) ;
c . ldp ( a64 : : x24 , a64 : : x25 , arm : : Mem ( a64 : : sp , 48 ) ) ;
c . ldp ( a64 : : x26 , a64 : : x27 , arm : : Mem ( a64 : : sp , 64 ) ) ;
c . ldp ( a64 : : x28 , a64 : : x29 , arm : : Mem ( a64 : : sp , 80 ) ) ;
c . ldr ( a64 : : x30 , arm : : Mem ( a64 : : sp , 96 ) ) ;
// Restore stack ptr
c . add ( a64 : : sp , a64 : : sp , Imm ( 112 ) ) ;
// Return
2021-12-30 17:39:18 +01:00
c . ret ( a64 : : x30 ) ;
2022-06-14 14:28:38 +02:00
c . bind ( exec_addr ) ;
c . embedUInt64 ( reinterpret_cast < u64 > ( & vm : : g_exec_addr ) ) ;
c . bind ( base_addr ) ;
c . embedUInt64 ( reinterpret_cast < u64 > ( & vm : : g_base_addr ) ) ;
c . bind ( cia_offset ) ;
c . embedUInt64 ( static_cast < u64 > ( : : offset32 ( & ppu_thread : : cia ) ) ) ;
c . bind ( gpr_addr_offset ) ;
c . embedUInt64 ( static_cast < u64 > ( : : offset32 ( & ppu_thread : : gpr ) ) ) ;
c . bind ( native_sp_offset ) ;
c . embedUInt64 ( static_cast < u64 > ( : : offset32 ( & ppu_thread : : saved_native_sp ) ) ) ;
2021-12-30 17:39:18 +01:00
# endif
2021-01-31 19:38:47 +01:00
} ) ;
2021-12-30 17:39:18 +01:00
const extern auto ppu_escape = build_function_asm < void ( * ) ( ppu_thread * ) > ( " ppu_escape " , [ ] ( native_asm & c , auto & args )
2021-01-31 19:38:47 +01:00
{
using namespace asmjit ;
2021-12-30 17:39:18 +01:00
# if defined(ARCH_X64)
2021-01-31 19:38:47 +01:00
// Restore native stack pointer (longjmp emulation)
c . mov ( x86 : : rsp , x86 : : qword_ptr ( args [ 0 ] , : : offset32 ( & ppu_thread : : saved_native_sp ) ) ) ;
// Return to the return location
2021-12-24 18:33:32 +01:00
c . sub ( x86 : : rsp , 8 ) ;
c . ret ( ) ;
2021-12-30 17:39:18 +01:00
# endif
2021-01-31 19:38:47 +01:00
} ) ;
void ppu_recompiler_fallback ( ppu_thread & ppu ) ;
2021-12-30 17:39:18 +01:00
# if defined(ARCH_X64)
2022-01-23 13:20:07 +01:00
const auto ppu_recompiler_fallback_ghc = build_function_asm < void ( * ) ( ppu_thread & ppu ) > ( " " , [ ] ( native_asm & c , auto & args )
2021-01-31 19:38:47 +01:00
{
using namespace asmjit ;
c . mov ( args [ 0 ] , x86 : : rbp ) ;
2022-01-17 17:24:53 +01:00
c . jmp ( ppu_recompiler_fallback ) ;
2021-01-31 19:38:47 +01:00
} ) ;
2021-12-30 17:39:18 +01:00
# elif defined(ARCH_ARM64)
const auto ppu_recompiler_fallback_ghc = & ppu_recompiler_fallback ;
# endif
2021-01-31 19:38:47 +01:00
2017-03-25 16:53:45 +01:00
// Get pointer to executable cache
2024-03-21 14:56:31 +01:00
static inline u8 * ppu_ptr ( u32 addr )
2023-12-02 18:04:44 +01:00
{
2024-03-21 14:56:31 +01:00
return vm : : g_exec_addr + u64 { addr } * 2 ;
2023-12-02 18:04:44 +01:00
}
2024-03-21 14:56:31 +01:00
static inline ppu_intrp_func_t ppu_read ( u32 addr )
2017-02-10 20:56:16 +01:00
{
2024-03-21 14:56:31 +01:00
return read_from_ptr < ppu_intrp_func_t > ( ppu_ptr ( addr ) ) ;
2017-02-10 20:56:16 +01:00
}
2017-02-10 13:20:54 +01:00
// Get interpreter cache value
2021-12-30 17:39:18 +01:00
static ppu_intrp_func_t ppu_cache ( u32 addr )
2016-06-07 22:24:20 +02:00
{
2021-12-30 17:39:18 +01:00
if ( g_cfg . core . ppu_decoder ! = ppu_decoder_type : : _static )
2021-01-18 19:34:54 +01:00
{
fmt : : throw_exception ( " Invalid PPU decoder " ) ;
}
2021-12-30 17:39:18 +01:00
return g_fxo - > get < ppu_interpreter_rt > ( ) . decode ( vm : : read32 ( addr ) ) ;
2017-02-10 13:20:54 +01:00
}
2021-12-30 17:39:18 +01:00
static ppu_intrp_func ppu_ret = { [ ] ( ppu_thread & ppu , ppu_opcode_t , be_t < u32 > * this_op , ppu_intrp_func * )
2017-02-11 00:42:59 +01:00
{
2021-12-30 17:39:18 +01:00
// Fix PC and return (step execution)
ppu . cia = vm : : get_addr ( this_op ) ;
return ;
} } ;
2017-02-11 15:17:07 +01:00
2021-12-30 17:39:18 +01:00
static void ppu_fallback ( ppu_thread & ppu , ppu_opcode_t op , be_t < u32 > * this_op , ppu_intrp_func * next_fn )
{
const auto _pc = vm : : get_addr ( this_op ) ;
const auto _fn = ppu_cache ( _pc ) ;
2024-03-21 14:56:31 +01:00
write_to_ptr < ppu_intrp_func_t > ( ppu_ptr ( _pc ) , _fn ) ;
2021-12-30 17:39:18 +01:00
return _fn ( ppu , op , this_op , next_fn ) ;
2019-03-20 16:20:13 +01:00
}
// TODO: Make this a dispatch call
void ppu_recompiler_fallback ( ppu_thread & ppu )
{
2021-12-30 17:39:18 +01:00
perf_meter < " PPUFALL1 " _u64 > perf0 ;
2017-05-20 13:45:02 +02:00
if ( g_cfg . core . ppu_debug )
2017-04-08 22:58:00 +02:00
{
2021-12-30 17:39:18 +01:00
ppu_log . error ( " Unregistered PPU Function (LR=0x%x) " , ppu . lr ) ;
2017-04-08 22:58:00 +02:00
}
2021-12-30 17:39:18 +01:00
const auto & table = g_fxo - > get < ppu_interpreter_rt > ( ) ;
2021-01-31 19:38:47 +01:00
2019-03-20 16:20:13 +01:00
while ( true )
{
2024-03-21 14:56:31 +01:00
if ( uptr func = uptr ( ppu_read ( ppu . cia ) ) ; ( func < < 16 > > 16 ) ! = reinterpret_cast < uptr > ( ppu_recompiler_fallback_ghc ) )
2021-09-01 12:38:17 +02:00
{
// We found a recompiler function at cia, return
break ;
}
2021-12-30 17:39:18 +01:00
// Run one instruction in interpreter (TODO)
const u32 op = vm : : read32 ( ppu . cia ) ;
table . decode ( op ) ( ppu , { op } , vm : : _ptr < u32 > ( ppu . cia ) , & ppu_ret ) ;
2019-03-20 16:20:13 +01:00
if ( ppu . test_stopped ( ) )
{
2021-01-31 19:38:47 +01:00
break ;
2019-03-20 16:20:13 +01:00
}
}
2017-04-08 22:58:00 +02:00
}
2020-10-13 21:36:00 +02:00
void ppu_reservation_fallback ( ppu_thread & ppu )
{
2021-12-30 17:39:18 +01:00
perf_meter < " PPUFALL2 " _u64 > perf0 ;
const auto & table = g_fxo - > get < ppu_interpreter_rt > ( ) ;
2020-10-13 21:36:00 +02:00
while ( true )
{
2021-12-30 17:39:18 +01:00
// Run one instruction in interpreter (TODO)
2020-10-13 21:36:00 +02:00
const u32 op = vm : : read32 ( ppu . cia ) ;
2021-12-30 17:39:18 +01:00
table . decode ( op ) ( ppu , { op } , vm : : _ptr < u32 > ( ppu . cia ) , & ppu_ret ) ;
2020-10-13 21:36:00 +02:00
if ( ! ppu . raddr | | ! ppu . use_full_rdata )
{
// We've escaped from reservation, return.
return ;
}
if ( ppu . test_stopped ( ) )
{
return ;
}
}
}
2023-07-05 12:52:16 +02:00
u32 ppu_read_mmio_aware_u32 ( u8 * vm_base , u32 eal )
{
if ( eal > = RAW_SPU_BASE_ADDR )
{
// RawSPU MMIO
auto thread = idm : : get < named_thread < spu_thread > > ( spu_thread : : find_raw_spu ( ( eal - RAW_SPU_BASE_ADDR ) / RAW_SPU_OFFSET ) ) ;
if ( ! thread )
{
// Access Violation
}
else if ( ( eal - RAW_SPU_BASE_ADDR ) % RAW_SPU_OFFSET + sizeof ( u32 ) - 1 < SPU_LS_SIZE ) // LS access
{
}
else if ( u32 value { } ; thread - > read_reg ( eal , value ) )
{
return std : : bit_cast < be_t < u32 > > ( value ) ;
}
else
{
fmt : : throw_exception ( " Invalid RawSPU MMIO offset (addr=0x%x) " , eal ) ;
}
}
// Value is assumed to be swapped
return read_from_ptr < u32 > ( vm_base + eal ) ;
}
void ppu_write_mmio_aware_u32 ( u8 * vm_base , u32 eal , u32 value )
{
if ( eal > = RAW_SPU_BASE_ADDR )
{
// RawSPU MMIO
auto thread = idm : : get < named_thread < spu_thread > > ( spu_thread : : find_raw_spu ( ( eal - RAW_SPU_BASE_ADDR ) / RAW_SPU_OFFSET ) ) ;
if ( ! thread )
{
// Access Violation
}
else if ( ( eal - RAW_SPU_BASE_ADDR ) % RAW_SPU_OFFSET + sizeof ( u32 ) - 1 < SPU_LS_SIZE ) // LS access
{
}
else if ( thread - > write_reg ( eal , std : : bit_cast < be_t < u32 > > ( value ) ) )
{
return ;
}
else
{
fmt : : throw_exception ( " Invalid RawSPU MMIO offset (addr=0x%x) " , eal ) ;
}
}
// Value is assumed swapped
write_to_ptr < u32 > ( vm_base + eal , value ) ;
}
extern bool ppu_test_address_may_be_mmio ( std : : span < const be_t < u32 > > insts )
{
std : : set < u32 > reg_offsets ;
bool found_raw_spu_base = false ;
bool found_spu_area_offset_element = false ;
for ( u32 inst : insts )
{
// Common around MMIO (orders IO)
if ( inst = = ppu_instructions : : EIEIO ( ) )
{
return true ;
}
const u32 op_imm16 = ( inst & 0xfc00ffff ) ;
// RawSPU MMIO base
// 0xe00000000 is a common constant so try to find an ORIS 0x10 or ADDIS 0x10 nearby (for multiplying SPU ID by it)
if ( op_imm16 = = ppu_instructions : : ADDIS ( { } , { } , - 0x2000 ) | | op_imm16 = = ppu_instructions : : ORIS ( { } , { } , 0xe000 ) | | op_imm16 = = ppu_instructions : : XORIS ( { } , { } , 0xe000 ) )
{
found_raw_spu_base = true ;
if ( found_spu_area_offset_element )
{
// Found both
return true ;
}
}
else if ( op_imm16 = = ppu_instructions : : ORIS ( { } , { } , 0x10 ) | | op_imm16 = = ppu_instructions : : ADDIS ( { } , { } , 0x10 ) )
{
found_spu_area_offset_element = true ;
if ( found_raw_spu_base )
{
// Found both
return true ;
}
}
// RawSPU MMIO base + problem state offset
else if ( op_imm16 = = ppu_instructions : : ADDIS ( { } , { } , - 0x1ffc ) )
{
return true ;
}
else if ( op_imm16 = = ppu_instructions : : ORIS ( { } , { } , 0xe004 ) )
{
return true ;
}
else if ( op_imm16 = = ppu_instructions : : XORIS ( { } , { } , 0xe004 ) )
{
return true ;
}
// RawSPU MMIO base + problem state offset + 64k of SNR1 offset
else if ( op_imm16 = = ppu_instructions : : ADDIS ( { } , { } , - 0x1ffb ) )
{
return true ;
}
else if ( op_imm16 = = ppu_instructions : : ORIS ( { } , { } , 0xe005 ) )
{
return true ;
}
else if ( op_imm16 = = ppu_instructions : : XORIS ( { } , { } , 0xe005 ) )
{
return true ;
}
// RawSPU MMIO base + problem state offset + 264k of SNR2 offset (STW allows 32K+- offset so in order to access SNR2 it needs to first add another 64k)
// SNR2 is the only register currently implemented that has its 0x80000 bit is set so its the only one its hardcoded access is done this way
else if ( op_imm16 = = ppu_instructions : : ADDIS ( { } , { } , - 0x1ffa ) )
{
return true ;
}
else if ( op_imm16 = = ppu_instructions : : ORIS ( { } , { } , 0xe006 ) )
{
return true ;
}
else if ( op_imm16 = = ppu_instructions : : XORIS ( { } , { } , 0xe006 ) )
{
return true ;
}
// Try to detect a function that receives RawSPU problem state base pointer as an argument
else if ( ( op_imm16 & ~ 0xffff ) = = ppu_instructions : : LWZ ( { } , { } , 0 ) | |
( op_imm16 & ~ 0xffff ) = = ppu_instructions : : STW ( { } , { } , 0 ) | |
( op_imm16 & ~ 0xffff ) = = ppu_instructions : : ADDI ( { } , { } , 0 ) )
{
const bool is_load = ( op_imm16 & ~ 0xffff ) = = ppu_instructions : : LWZ ( { } , { } , 0 ) ;
const bool is_store = ( op_imm16 & ~ 0xffff ) = = ppu_instructions : : STW ( { } , { } , 0 ) ;
const bool is_neither = ! is_store & & ! is_load ;
const bool is_snr = ( is_store | | is_neither ) & & ( ( op_imm16 & 0xffff ) = = ( SPU_RdSigNotify2_offs & 0xffff ) | | ( op_imm16 & 0xffff ) = = ( SPU_RdSigNotify1_offs & 0xffff ) ) ;
if ( is_snr | | spu_thread : : test_is_problem_state_register_offset ( op_imm16 & 0xffff , is_load | | is_neither , is_store | | is_neither ) )
{
reg_offsets . insert ( op_imm16 & 0xffff ) ;
if ( reg_offsets . size ( ) > = 2 )
{
// Assume high MMIO likelyhood if more than one offset appears in nearby code
// Such as common IN_MBOX + OUT_MBOX
return true ;
}
}
}
}
return false ;
}
2023-06-19 17:05:50 +02:00
struct ppu_toc_manager
{
std : : unordered_map < u32 , u32 > toc_map ;
shared_mutex mutex ;
} ;
2017-04-08 22:58:00 +02:00
2021-12-30 17:39:18 +01:00
static void ppu_check_toc ( ppu_thread & ppu , ppu_opcode_t op , be_t < u32 > * this_op , ppu_intrp_func * next_fn )
2017-04-08 22:58:00 +02:00
{
2023-06-19 17:05:50 +02:00
ppu . cia = vm : : get_addr ( this_op ) ;
2017-04-08 22:58:00 +02:00
{
2023-06-19 17:05:50 +02:00
auto & toc_manager = g_fxo - > get < ppu_toc_manager > ( ) ;
reader_lock lock ( toc_manager . mutex ) ;
auto & ppu_toc = toc_manager . toc_map ;
const auto found = ppu_toc . find ( ppu . cia ) ;
if ( found ! = ppu_toc . end ( ) )
{
const u32 toc = atomic_storage < u32 > : : load ( found - > second ) ;
// Compare TOC with expected value
if ( toc ! = umax & & ppu . gpr [ 2 ] ! = toc )
{
ppu_log . error ( " Unexpected TOC (0x%x, expected 0x%x) " , ppu . gpr [ 2 ] , toc ) ;
atomic_storage < u32 > : : exchange ( found - > second , u32 { umax } ) ;
}
}
2017-04-08 22:58:00 +02:00
}
// Fallback to the interpreter function
2021-12-30 17:39:18 +01:00
return ppu_cache ( ppu . cia ) ( ppu , op , this_op , next_fn ) ;
2017-02-11 00:42:59 +01:00
}
2017-02-10 13:20:54 +01:00
extern void ppu_register_range ( u32 addr , u32 size )
{
2017-02-11 00:42:59 +01:00
if ( ! size )
{
2020-02-01 09:31:27 +01:00
ppu_log . error ( " ppu_register_range(0x%x): empty range " , addr ) ;
2017-04-06 15:57:32 +02:00
return ;
2017-02-11 00:42:59 +01:00
}
2021-02-08 16:04:50 +01:00
size = utils : : align ( size + addr % 0x10000 , 0x10000 ) ;
addr & = - 0x10000 ;
2017-02-10 13:20:54 +01:00
// Register executable range at
2023-12-02 18:04:44 +01:00
utils : : memory_commit ( ppu_ptr ( addr ) , u64 { size } * 2 , utils : : protection : : rw ) ;
2021-09-01 12:38:17 +02:00
ensure ( vm : : page_protect ( addr , size , 0 , vm : : page_executable ) ) ;
2017-02-10 13:20:54 +01:00
2021-02-26 10:20:25 +01:00
if ( g_cfg . core . ppu_debug )
{
utils : : memory_commit ( vm : : g_stat_addr + addr , size ) ;
}
2021-01-31 19:38:47 +01:00
const u64 seg_base = addr ;
2017-02-11 00:42:59 +01:00
2017-02-10 13:20:54 +01:00
while ( size )
{
2021-01-31 19:38:47 +01:00
if ( g_cfg . core . ppu_decoder = = ppu_decoder_type : : llvm )
{
// Assume addr is the start of first segment of PRX
2024-03-21 14:56:31 +01:00
const uptr entry_value = reinterpret_cast < uptr > ( ppu_recompiler_fallback_ghc ) | ( seg_base < < ( 32 + 3 ) ) ;
write_to_ptr < uptr > ( ppu_ptr ( addr ) , entry_value ) ;
2021-01-31 19:38:47 +01:00
}
else
{
2024-03-21 14:56:31 +01:00
write_to_ptr < ppu_intrp_func_t > ( ppu_ptr ( addr ) , ppu_fallback ) ;
2021-01-31 19:38:47 +01:00
}
2017-02-10 13:20:54 +01:00
addr + = 4 ;
size - = 4 ;
}
}
2021-12-30 17:39:18 +01:00
static void ppu_far_jump ( ppu_thread & , ppu_opcode_t , be_t < u32 > * , ppu_intrp_func * ) ;
2021-09-01 12:38:17 +02:00
2021-12-30 17:39:18 +01:00
extern void ppu_register_function_at ( u32 addr , u32 size , ppu_intrp_func_t ptr = nullptr )
2017-02-10 13:20:54 +01:00
{
2017-03-22 21:23:47 +01:00
// Initialize specific function
if ( ptr )
{
2024-03-21 14:56:31 +01:00
write_to_ptr < uptr > ( ppu_ptr ( addr ) , ( reinterpret_cast < uptr > ( ptr ) & 0xffff'ffff'ffffu ) | ( uptr ( ppu_read ( addr ) ) & ~ 0xffff'ffff'ffffu ) ) ;
2017-03-22 21:23:47 +01:00
return ;
}
2017-02-11 00:42:59 +01:00
if ( ! size )
{
2017-05-20 13:45:02 +02:00
if ( g_cfg . core . ppu_debug )
2017-04-08 22:58:00 +02:00
{
2020-02-01 09:31:27 +01:00
ppu_log . error ( " ppu_register_function_at(0x%x): empty range " , addr ) ;
2017-04-08 22:58:00 +02:00
}
2017-12-19 22:01:03 +01:00
return ;
2017-02-11 00:42:59 +01:00
}
2017-05-20 13:45:02 +02:00
if ( g_cfg . core . ppu_decoder = = ppu_decoder_type : : llvm )
2016-06-07 22:24:20 +02:00
{
2017-02-10 13:20:54 +01:00
return ;
}
// Initialize interpreter cache
while ( size )
{
2024-03-21 14:56:31 +01:00
if ( auto old = ppu_read ( addr ) ; old ! = ppu_break & & old ! = ppu_far_jump )
2017-03-22 21:23:47 +01:00
{
2024-03-21 14:56:31 +01:00
write_to_ptr < ppu_intrp_func_t > ( ppu_ptr ( addr ) , ppu_cache ( addr ) ) ;
2017-03-22 21:23:47 +01:00
}
2017-02-10 13:20:54 +01:00
addr + = 4 ;
size - = 4 ;
2016-06-07 22:24:20 +02:00
}
2016-06-27 18:34:08 +02:00
}
2016-06-07 22:24:20 +02:00
2021-09-01 12:38:17 +02:00
extern void ppu_register_function_at ( u32 addr , u32 size , u64 ptr )
{
2021-12-30 17:39:18 +01:00
return ppu_register_function_at ( addr , size , reinterpret_cast < ppu_intrp_func_t > ( ptr ) ) ;
2021-09-01 12:38:17 +02:00
}
2021-09-06 09:33:44 +02:00
u32 ppu_get_exported_func_addr ( u32 fnid , const std : : string & module_name ) ;
2021-12-30 17:39:18 +01:00
void ppu_return_from_far_jump ( ppu_thread & ppu , ppu_opcode_t , be_t < u32 > * , ppu_intrp_func * )
2021-09-06 09:33:44 +02:00
{
auto & calls_info = ppu . hle_func_calls_with_toc_info ;
ensure ( ! calls_info . empty ( ) ) ;
// Branch to next instruction after far jump call entry with restored R2 and LR
const auto restore_info = & calls_info . back ( ) ;
ppu . cia = restore_info - > cia + 4 ;
ppu . lr = restore_info - > saved_lr ;
ppu . gpr [ 2 ] = restore_info - > saved_r2 ;
calls_info . pop_back ( ) ;
}
static const bool s_init_return_far_jump_func = [ ]
{
REG_HIDDEN_FUNC_PURE ( ppu_return_from_far_jump ) ;
return true ;
} ( ) ;
2021-09-01 12:38:17 +02:00
struct ppu_far_jumps_t
{
2021-09-06 09:33:44 +02:00
struct all_info_t
{
u32 target ;
bool link ;
bool with_toc ;
std : : string module_name ;
2022-08-17 15:53:05 +02:00
ppu_intrp_func_t func ;
2022-12-24 15:15:29 +01:00
2022-12-09 19:06:50 +01:00
u32 get_target ( u32 pc , ppu_thread * ppu = nullptr ) const
2021-09-01 12:38:17 +02:00
{
2022-12-09 19:06:50 +01:00
u32 direct_target = this - > target ;
2021-09-06 09:33:44 +02:00
2022-12-09 19:06:50 +01:00
bool to_link = this - > link ;
bool from_opd = this - > with_toc ;
2021-09-06 09:33:44 +02:00
2022-12-09 19:06:50 +01:00
if ( ! this - > module_name . empty ( ) )
2021-09-06 09:33:44 +02:00
{
2022-12-09 19:06:50 +01:00
direct_target = ppu_get_exported_func_addr ( direct_target , this - > module_name ) ;
2021-09-06 09:33:44 +02:00
}
2022-12-09 19:06:50 +01:00
if ( from_opd & & ! vm : : check_addr < sizeof ( ppu_func_opd_t ) > ( direct_target ) )
2021-09-06 09:33:44 +02:00
{
// Avoid reading unmapped memory under mutex
from_opd = false ;
}
if ( from_opd )
{
2022-12-09 19:06:50 +01:00
auto & opd = vm : : _ref < ppu_func_opd_t > ( direct_target ) ;
direct_target = opd . addr ;
2021-09-06 09:33:44 +02:00
2021-11-25 19:15:24 +01:00
// We modify LR to custom values here
2022-12-09 19:06:50 +01:00
to_link = false ;
2021-09-06 09:33:44 +02:00
if ( ppu )
{
auto & calls_info = ppu - > hle_func_calls_with_toc_info ;
// Save LR and R2
// Set LR to the this ppu_return_from_far_jump branch for restoration of registers
// NOTE: In order to clean up this information all calls must return in order
auto & saved_info = calls_info . emplace_back ( ) ;
saved_info . cia = pc ;
2022-11-19 12:50:31 +01:00
saved_info . saved_lr = std : : exchange ( ppu - > lr , g_fxo - > get < ppu_function_manager > ( ) . func_addr ( FIND_FUNC ( ppu_return_from_far_jump ) , true ) ) ;
2021-09-06 09:33:44 +02:00
saved_info . saved_r2 = std : : exchange ( ppu - > gpr [ 2 ] , opd . rtoc ) ;
}
}
2022-12-09 19:06:50 +01:00
if ( to_link & & ppu )
2021-09-06 09:33:44 +02:00
{
ppu - > lr = pc + 4 ;
}
2022-12-09 19:06:50 +01:00
return direct_target ;
}
} ;
ppu_far_jumps_t ( int ) noexcept { }
std : : map < u32 , all_info_t > vals ;
: : jit_runtime rt ;
mutable shared_mutex mutex ;
// Get target address, 'ppu' is used in ppu_far_jump in order to modify registers
u32 get_target ( u32 pc , ppu_thread * ppu = nullptr )
{
reader_lock lock ( mutex ) ;
if ( auto it = vals . find ( pc ) ; it ! = vals . end ( ) )
{
all_info_t & all_info = it - > second ;
return all_info . get_target ( pc , ppu ) ;
2021-09-01 12:38:17 +02:00
}
2021-09-02 17:14:26 +02:00
return { } ;
2021-09-01 12:38:17 +02:00
}
2022-08-17 15:53:05 +02:00
2022-12-09 19:06:50 +01:00
// Get function patches in range (entry -> target)
std : : vector < std : : pair < u32 , u32 > > get_targets ( u32 pc , u32 size )
{
std : : vector < std : : pair < u32 , u32 > > targets ;
reader_lock lock ( mutex ) ;
auto it = vals . lower_bound ( pc ) ;
if ( it = = vals . end ( ) )
{
return targets ;
}
if ( it - > first > = pc + size )
{
return targets ;
}
2022-12-24 15:15:29 +01:00
2022-12-09 19:06:50 +01:00
for ( auto end = vals . lower_bound ( pc + size ) ; it ! = end ; it + + )
{
all_info_t & all_info = it - > second ;
if ( u32 target = all_info . get_target ( it - > first ) )
{
targets . emplace_back ( it - > first , target ) ;
}
}
return targets ;
}
// Generate a mini-function which updates PC (for LLVM) and jumps to ppu_far_jump to handle redirections
2022-08-17 15:53:05 +02:00
template < bool Locked = true >
ppu_intrp_func_t gen_jump ( u32 pc )
{
[[maybe_unused]] std : : conditional_t < Locked , std : : lock_guard < shared_mutex > , const shared_mutex & > lock ( mutex ) ;
auto it = vals . find ( pc ) ;
if ( it = = vals . end ( ) )
{
return nullptr ;
}
if ( ! it - > second . func )
{
it - > second . func = build_function_asm < ppu_intrp_func_t > ( " " , [ & ] ( native_asm & c , auto & args )
{
using namespace asmjit ;
# ifdef ARCH_X64
c . mov ( args [ 0 ] , x86 : : rbp ) ;
c . mov ( x86 : : dword_ptr ( args [ 0 ] , : : offset32 ( & ppu_thread : : cia ) ) , pc ) ;
c . jmp ( ppu_far_jump ) ;
# else
Label jmp_address = c . newLabel ( ) ;
Label imm_address = c . newLabel ( ) ;
2022-09-04 20:11:04 +02:00
c . ldr ( args [ 1 ] . w ( ) , arm : : ptr ( imm_address ) ) ;
c . str ( args [ 1 ] . w ( ) , arm : : Mem ( args [ 0 ] , : : offset32 ( & ppu_thread : : cia ) ) ) ;
2022-08-17 15:53:05 +02:00
c . ldr ( args [ 1 ] , arm : : ptr ( jmp_address ) ) ;
c . br ( args [ 1 ] ) ;
c . align ( AlignMode : : kCode , 16 ) ;
c . bind ( jmp_address ) ;
c . embedUInt64 ( reinterpret_cast < u64 > ( ppu_far_jump ) ) ;
c . bind ( imm_address ) ;
c . embedUInt32 ( pc ) ;
# endif
2022-09-13 15:08:55 +02:00
} , & rt ) ;
2022-08-17 15:53:05 +02:00
}
return it - > second . func ;
}
2021-09-01 12:38:17 +02:00
} ;
u32 ppu_get_far_jump ( u32 pc )
{
2022-08-17 15:53:05 +02:00
if ( ! g_fxo - > is_init < ppu_far_jumps_t > ( ) )
{
return 0 ;
}
2021-09-06 09:33:44 +02:00
return g_fxo - > get < ppu_far_jumps_t > ( ) . get_target ( pc ) ;
2021-09-01 12:38:17 +02:00
}
2021-09-01 13:38:20 +02:00
2022-08-17 15:53:05 +02:00
static void ppu_far_jump ( ppu_thread & ppu , ppu_opcode_t , be_t < u32 > * , ppu_intrp_func * )
2021-09-01 12:38:17 +02:00
{
2022-08-17 15:53:05 +02:00
const u32 cia = g_fxo - > get < ppu_far_jumps_t > ( ) . get_target ( ppu . cia , & ppu ) ;
2021-09-06 09:33:44 +02:00
if ( ! vm : : check_addr ( cia , vm : : page_executable ) )
{
fmt : : throw_exception ( " PPU far jump failed! (returned cia = 0x%08x) " , cia ) ;
}
2021-09-02 17:14:26 +02:00
ppu . cia = cia ;
2021-09-01 12:38:17 +02:00
}
2021-09-06 09:33:44 +02:00
bool ppu_form_branch_to_code ( u32 entry , u32 target , bool link , bool with_toc , std : : string module_name )
2021-09-01 12:38:17 +02:00
{
2021-09-06 09:33:44 +02:00
// Force align entry and target
2021-09-01 12:38:17 +02:00
entry & = - 4 ;
2021-09-06 09:33:44 +02:00
// Exported functions are using target as FNID, must not be changed
if ( module_name . empty ( ) )
{
target & = - 4 ;
u32 cia_target = target ;
if ( with_toc )
{
ppu_func_opd_t opd { } ;
if ( ! vm : : try_access ( target , & opd , sizeof ( opd ) , false ) )
{
// Cannot access function descriptor
return false ;
}
// For now allow situations where OPD is changed later by patches or by the program itself
//cia_target = opd.addr;
// So force a valid target (executable, yet not equal to entry)
cia_target = entry ^ 8 ;
}
// Target CIA must be aligned, executable and not equal with
if ( cia_target % 4 | | entry = = cia_target | | ! vm : : check_addr ( cia_target , vm : : page_executable ) )
{
return false ;
}
}
// Entry must be executable
if ( ! vm : : check_addr ( entry , vm : : page_executable ) )
2021-09-01 12:38:17 +02:00
{
return false ;
}
2022-08-17 15:53:05 +02:00
g_fxo - > init < ppu_far_jumps_t > ( 0 ) ;
2021-09-01 12:38:17 +02:00
2021-09-06 09:33:44 +02:00
if ( ! module_name . empty ( ) )
{
// Always use function descriptor for exported functions
with_toc = true ;
}
if ( with_toc )
{
// Always link for calls with function descriptor
link = true ;
}
2021-09-01 12:38:17 +02:00
// Register branch target in host memory, not guest memory
auto & jumps = g_fxo - > get < ppu_far_jumps_t > ( ) ;
std : : lock_guard lock ( jumps . mutex ) ;
2021-12-30 17:39:18 +01:00
jumps . vals . insert_or_assign ( entry , ppu_far_jumps_t : : all_info_t { target , link , with_toc , std : : move ( module_name ) } ) ;
2022-08-17 15:53:05 +02:00
ppu_register_function_at ( entry , 4 , g_cfg . core . ppu_decoder = = ppu_decoder_type : : _static ? & ppu_far_jump : ensure ( g_fxo - > get < ppu_far_jumps_t > ( ) . gen_jump < false > ( entry ) ) ) ;
2021-09-01 12:38:17 +02:00
return true ;
}
2021-09-06 09:33:44 +02:00
bool ppu_form_branch_to_code ( u32 entry , u32 target , bool link , bool with_toc )
{
return ppu_form_branch_to_code ( entry , target , link , with_toc , std : : string { } ) ;
}
bool ppu_form_branch_to_code ( u32 entry , u32 target , bool link )
{
return ppu_form_branch_to_code ( entry , target , link , false ) ;
}
bool ppu_form_branch_to_code ( u32 entry , u32 target )
{
return ppu_form_branch_to_code ( entry , target , false ) ;
}
2021-09-01 12:38:17 +02:00
void ppu_remove_hle_instructions ( u32 addr , u32 size )
{
2022-08-17 15:53:05 +02:00
if ( Emu . IsStopped ( ) | | ! g_fxo - > is_init < ppu_far_jumps_t > ( ) )
{
return ;
}
2021-09-01 15:56:38 +02:00
2021-09-01 12:38:17 +02:00
auto & jumps = g_fxo - > get < ppu_far_jumps_t > ( ) ;
std : : lock_guard lock ( jumps . mutex ) ;
for ( auto it = jumps . vals . begin ( ) ; it ! = jumps . vals . end ( ) ; )
{
if ( it - > first > = addr & & it - > first < = addr + size - 1 & & size )
{
it = jumps . vals . erase ( it ) ;
continue ;
}
it + + ;
}
}
2023-07-24 12:33:23 +02:00
atomic_t < bool > g_debugger_pause_all_threads_on_bp = false ;
2021-03-31 19:08:01 +02:00
2017-02-10 14:13:17 +01:00
// Breakpoint entry point
2021-12-30 17:39:18 +01:00
static void ppu_break ( ppu_thread & ppu , ppu_opcode_t , be_t < u32 > * this_op , ppu_intrp_func * next_fn )
2017-02-10 14:13:17 +01:00
{
2021-03-31 19:08:01 +02:00
const bool pause_all = g_debugger_pause_all_threads_on_bp ;
2021-12-30 17:39:18 +01:00
const u32 old_cia = vm : : get_addr ( this_op ) ;
ppu . cia = old_cia ;
2021-02-06 22:25:40 +01:00
// Pause
2021-03-31 19:08:01 +02:00
ppu . state . atomic_op ( [ & ] ( bs_t < cpu_flag > & state )
{
if ( pause_all ) state + = cpu_flag : : dbg_global_pause ;
if ( pause_all | | ! ( state & cpu_flag : : dbg_step ) ) state + = cpu_flag : : dbg_pause ;
} ) ;
if ( pause_all )
{
// Pause all other threads
2022-01-20 18:44:49 +01:00
Emu . CallFromMainThread ( [ ] ( ) { Emu . Pause ( ) ; } ) ;
2021-03-31 19:08:01 +02:00
}
2021-03-02 12:59:19 +01:00
2021-12-30 17:39:18 +01:00
if ( ppu . check_state ( ) | | old_cia ! = atomic_storage < u32 > : : load ( ppu . cia ) )
2017-02-10 14:13:17 +01:00
{
2021-12-30 17:39:18 +01:00
// Do not execute if PC changed
return ;
2017-02-10 14:13:17 +01:00
}
// Fallback to the interpreter function
2022-03-27 10:37:11 +02:00
return ppu_cache ( ppu . cia ) ( ppu , { * this_op } , this_op , ppu . state ? & ppu_ret : next_fn ) ;
2017-02-10 14:13:17 +01:00
}
// Set or remove breakpoint
2021-07-30 20:30:29 +02:00
extern bool ppu_breakpoint ( u32 addr , bool is_adding )
2017-02-10 14:13:17 +01:00
{
2021-08-12 20:58:18 +02:00
if ( addr % 4 | | ! vm : : check_addr ( addr , vm : : page_executable ) | | g_cfg . core . ppu_decoder = = ppu_decoder_type : : llvm )
2017-02-10 14:13:17 +01:00
{
2021-07-30 20:30:29 +02:00
return false ;
2017-02-10 14:13:17 +01:00
}
2021-07-30 20:30:29 +02:00
// Remove breakpoint parameters
2024-03-21 14:56:31 +01:00
ppu_intrp_func_t func_original = 0 ;
ppu_intrp_func_t breakpoint = & ppu_break ;
2017-02-10 14:13:17 +01:00
2021-07-30 20:30:29 +02:00
if ( u32 hle_addr { } ; g_fxo - > is_init < ppu_function_manager > ( ) & & ( hle_addr = g_fxo - > get < ppu_function_manager > ( ) . addr ) )
2017-04-02 20:10:06 +02:00
{
2021-07-30 20:30:29 +02:00
// HLE function index
const u32 index = ( addr - hle_addr ) / 8 ;
2017-04-02 20:10:06 +02:00
2021-07-30 20:30:29 +02:00
if ( addr % 8 = = 4 & & index < ppu_function_manager : : get ( ) . size ( ) )
{
// HLE function placement
2024-03-21 14:56:31 +01:00
func_original = ppu_function_manager : : get ( ) [ index ] ;
2021-07-30 20:30:29 +02:00
}
2017-04-02 20:10:06 +02:00
}
2024-03-21 14:56:31 +01:00
if ( ! func_original )
2017-04-02 20:10:06 +02:00
{
2021-07-30 20:30:29 +02:00
// If not an HLE function use regular instruction function
2024-03-21 14:56:31 +01:00
func_original = ppu_cache ( addr ) ;
2017-04-02 20:10:06 +02:00
}
2021-07-30 20:30:29 +02:00
if ( is_adding )
2017-04-02 20:10:06 +02:00
{
2024-03-21 14:56:31 +01:00
if ( ppu_read ( addr ) = = ppu_fallback )
2021-08-12 20:58:18 +02:00
{
ppu_log . error ( " Unregistered instruction replaced with a breakpoint at 0x%08x " , addr ) ;
2024-03-21 14:56:31 +01:00
func_original = ppu_fallback ;
}
if ( ppu_read ( addr ) ! = func_original )
{
return false ;
2021-08-12 20:58:18 +02:00
}
2024-03-21 14:56:31 +01:00
write_to_ptr < ppu_intrp_func_t > ( ppu_ptr ( addr ) , breakpoint ) ;
return true ;
2017-04-02 20:10:06 +02:00
}
2021-07-30 20:30:29 +02:00
2024-03-21 14:56:31 +01:00
if ( ppu_read ( addr ) ! = breakpoint )
{
return false ;
}
write_to_ptr < ppu_intrp_func_t > ( ppu_ptr ( addr ) , func_original ) ;
return true ;
2017-04-02 20:10:06 +02:00
}
2017-10-06 17:39:15 +02:00
extern bool ppu_patch ( u32 addr , u32 value )
{
2020-08-21 11:10:00 +02:00
if ( addr % 4 )
2017-10-06 17:39:15 +02:00
{
2020-08-21 11:10:00 +02:00
ppu_log . fatal ( " Patch failed at 0x%x: unanligned memory address. " , addr ) ;
2018-05-07 20:57:06 +02:00
return false ;
}
2017-10-06 17:39:15 +02:00
2022-04-30 15:51:52 +02:00
vm : : writer_lock rlock ;
2020-08-21 11:10:00 +02:00
2020-11-10 18:09:28 +01:00
if ( ! vm : : check_addr ( addr ) )
2018-05-07 20:57:06 +02:00
{
2020-02-01 09:31:27 +01:00
ppu_log . fatal ( " Patch failed at 0x%x: invalid memory address. " , addr ) ;
2018-05-07 20:57:06 +02:00
return false ;
}
2017-10-06 17:39:15 +02:00
2020-11-10 18:09:28 +01:00
const bool is_exec = vm : : check_addr ( addr , vm : : page_executable ) ;
2020-08-21 11:10:00 +02:00
if ( is_exec & & g_cfg . core . ppu_decoder = = ppu_decoder_type : : llvm & & ! Emu . IsReady ( ) )
{
// TODO: support recompilers
ppu_log . fatal ( " Patch failed at 0x%x: LLVM recompiler is used. " , addr ) ;
return false ;
}
* vm : : get_super_ptr < u32 > ( addr ) = value ;
if ( is_exec )
2018-05-07 20:57:06 +02:00
{
2024-03-21 14:56:31 +01:00
if ( auto old = ppu_read ( addr ) ; old ! = ppu_break & & old ! = ppu_fallback )
2020-08-21 11:10:00 +02:00
{
2024-03-21 14:56:31 +01:00
write_to_ptr < ppu_intrp_func_t > ( ppu_ptr ( addr ) , ppu_cache ( addr ) ) ;
2020-08-21 11:10:00 +02:00
}
2017-10-06 17:39:15 +02:00
}
2018-05-07 20:57:06 +02:00
return true ;
2017-10-06 17:39:15 +02:00
}
2020-11-23 18:57:34 +01:00
std : : array < u32 , 2 > op_branch_targets ( u32 pc , ppu_opcode_t op )
{
2021-05-22 09:35:15 +02:00
std : : array < u32 , 2 > res { pc + 4 , umax } ;
2020-11-23 18:57:34 +01:00
2023-08-08 08:48:12 +02:00
if ( u32 target = g_fxo - > is_init < ppu_far_jumps_t > ( ) ? g_fxo - > get < ppu_far_jumps_t > ( ) . get_target ( pc ) : 0 )
2021-09-01 12:38:17 +02:00
{
res [ 0 ] = target ;
return res ;
}
2020-11-23 18:57:34 +01:00
switch ( const auto type = g_ppu_itype . decode ( op . opcode ) )
{
case ppu_itype : : B :
case ppu_itype : : BC :
{
res [ type = = ppu_itype : : BC ? 1 : 0 ] = ( ( op . aa ? 0 : pc ) + ( type = = ppu_itype : : B ? + op . bt24 : + op . bt14 ) ) ;
break ;
}
case ppu_itype : : BCCTR :
case ppu_itype : : BCLR :
case ppu_itype : : UNK :
{
2021-05-22 09:35:15 +02:00
res [ 0 ] = umax ;
2020-11-23 18:57:34 +01:00
break ;
}
default : break ;
}
return res ;
}
2023-07-12 10:02:12 +02:00
void ppu_thread : : dump_regs ( std : : string & ret , std : : any & custom_data ) const
2020-03-31 02:11:37 +02:00
{
2023-07-10 16:43:59 +02:00
const system_state emu_state = Emu . GetStatus ( false ) ;
const bool is_stopped_or_frozen = state & cpu_flag : : exit | | emu_state = = system_state : : frozen | | emu_state < = system_state : : stopping ;
const ppu_debugger_mode mode = debugger_mode . load ( ) ;
const bool is_decimal = ! is_stopped_or_frozen & & mode = = ppu_debugger_mode : : is_decimal ;
2023-07-12 10:02:12 +02:00
struct dump_registers_data_t
{
u32 preferred_cr_field_index = 7 ;
} ;
dump_registers_data_t * func_data = nullptr ;
func_data = std : : any_cast < dump_registers_data_t > ( & custom_data ) ;
if ( ! func_data )
{
custom_data . reset ( ) ;
custom_data = std : : make_any < dump_registers_data_t > ( ) ;
func_data = ensure ( std : : any_cast < dump_registers_data_t > ( & custom_data ) ) ;
}
2021-10-12 22:12:30 +02:00
PPUDisAsm dis_asm ( cpu_disasm_mode : : normal , vm : : g_sudo_addr ) ;
2020-03-31 02:57:54 +02:00
for ( uint i = 0 ; i < 32 ; + + i )
{
auto reg = gpr [ i ] ;
2021-03-16 14:20:45 +01:00
// Fixup for syscall arguments
2021-03-16 14:41:32 +01:00
if ( current_function & & i > = 3 & & i < = 10 ) reg = syscall_args [ i - 3 ] ;
2021-03-16 14:20:45 +01:00
2022-05-08 12:40:21 +02:00
auto [ is_const , const_value ] = dis_asm . try_get_const_gpr_value ( i , cia ) ;
if ( const_value ! = reg )
{
2022-12-09 19:06:50 +01:00
// Expectation of predictable code path has not been met (such as a branch directly to the instruction)
2022-05-08 12:40:21 +02:00
is_const = false ;
}
2023-02-11 20:32:38 +01:00
fmt : : append ( ret , " r%d%s%s " , i , i < = 9 ? " " : " " , is_const ? " © " : " : " ) ;
bool printed_error = false ;
if ( ( reg > > 31 ) = = 0x1'ffff'ffff )
{
const usz old_size = ret . size ( ) ;
fmt : : append ( ret , " %s (0x%x) " , CellError { static_cast < u32 > ( reg ) } , reg ) ;
// Test if failed to format (appended " 0x8".. in such case)
if ( ret [ old_size ] = = ' 0 ' )
{
// Failed
ret . resize ( old_size ) ;
}
else
{
printed_error = true ;
}
}
if ( ! printed_error )
{
2023-07-10 16:43:59 +02:00
if ( is_decimal )
{
fmt : : append ( ret , " %-11d " , reg ) ;
}
else
{
fmt : : append ( ret , " 0x%-8llx " , reg ) ;
}
2023-02-11 20:32:38 +01:00
}
2020-03-31 02:57:54 +02:00
2020-11-10 18:09:28 +01:00
constexpr u32 max_str_len = 32 ;
constexpr u32 hex_count = 8 ;
2020-03-31 02:57:54 +02:00
2021-05-22 09:35:15 +02:00
if ( reg < = u32 { umax } & & vm : : check_addr < max_str_len > ( static_cast < u32 > ( reg ) ) )
2020-03-31 02:57:54 +02:00
{
2020-04-17 08:15:30 +02:00
bool is_function = false ;
u32 toc = 0 ;
2022-05-18 15:09:27 +02:00
auto is_exec_code = [ & ] ( u32 addr )
{
return addr % 4 = = 0 & & vm : : check_addr ( addr , vm : : page_executable ) & & g_ppu_itype . decode ( * vm : : get_super_ptr < u32 > ( addr ) ) ! = ppu_itype : : UNK ;
} ;
2022-09-29 11:04:38 +02:00
if ( const u32 reg_ptr = * vm : : get_super_ptr < be_t < u32 , 1 > > ( static_cast < u32 > ( reg ) ) ;
2022-05-18 15:09:27 +02:00
vm : : check_addr < 8 > ( reg_ptr ) & & ! vm : : check_addr ( toc , vm : : page_executable ) )
2020-03-31 02:57:54 +02:00
{
2022-05-18 15:09:27 +02:00
// Check executability and alignment
if ( reg % 4 = = 0 & & is_exec_code ( reg_ptr ) )
2020-04-17 08:15:30 +02:00
{
toc = * vm : : get_super_ptr < u32 > ( static_cast < u32 > ( reg + 4 ) ) ;
2022-05-18 17:00:32 +02:00
if ( toc % 4 = = 0 & & ( toc > > 29 ) = = ( reg_ptr > > 29 ) & & vm : : check_addr ( toc ) & & ! vm : : check_addr ( toc , vm : : page_executable ) )
2020-04-17 08:15:30 +02:00
{
is_function = true ;
2020-08-21 19:58:38 +02:00
reg = reg_ptr ;
2020-04-17 08:15:30 +02:00
}
}
2020-03-31 02:57:54 +02:00
}
2023-12-29 18:33:29 +01:00
else if ( is_exec_code ( static_cast < u32 > ( reg ) ) )
2020-04-17 08:15:30 +02:00
{
is_function = true ;
}
2020-03-31 02:57:54 +02:00
2023-12-29 18:33:29 +01:00
const auto gpr_buf = vm : : get_super_ptr < u8 > ( static_cast < u32 > ( reg ) ) ;
2020-03-31 02:57:54 +02:00
std : : string buf_tmp ( gpr_buf , gpr_buf + max_str_len ) ;
2021-02-13 09:13:26 +01:00
std : : string_view sv ( buf_tmp . data ( ) , std : : min < usz > ( buf_tmp . size ( ) , buf_tmp . find_first_of ( " \0 \n " sv ) ) ) ;
2020-04-17 08:15:30 +02:00
if ( is_function )
{
if ( toc )
{
fmt : : append ( ret , " -> func(at=0x%x, toc=0x%x) " , reg , toc ) ;
}
else
{
2023-12-29 18:33:29 +01:00
dis_asm . disasm ( static_cast < u32 > ( reg ) ) ;
2020-11-10 15:57:06 +01:00
fmt : : append ( ret , " -> %s " , dis_asm . last_opcode ) ;
2020-04-17 08:15:30 +02:00
}
}
2021-02-13 09:13:26 +01:00
// NTS: size of 3 and above is required
// If ends with a newline, only one character is required
else if ( ( sv . size ( ) = = buf_tmp . size ( ) | | ( sv . size ( ) > = ( buf_tmp [ sv . size ( ) ] = = ' \n ' ? 1 : 3 ) ) ) & &
std : : all_of ( sv . begin ( ) , sv . end ( ) , [ ] ( u8 c ) { return std : : isprint ( c ) ; } ) )
2020-03-31 02:57:54 +02:00
{
2021-02-13 09:13:26 +01:00
fmt : : append ( ret , " -> \" %s \" " , sv ) ;
2020-03-31 02:57:54 +02:00
}
else
{
2020-04-03 10:21:18 +02:00
fmt : : append ( ret , " -> " ) ;
2020-03-31 02:57:54 +02:00
for ( u32 j = 0 ; j < hex_count ; + + j )
{
fmt : : append ( ret , " %02x " , buf_tmp [ j ] ) ;
}
}
}
2023-06-06 08:11:32 +02:00
fmt : : trim_back ( ret ) ;
ret + = ' \n ' ;
2020-03-31 02:57:54 +02:00
}
2020-04-03 10:21:18 +02:00
2023-07-12 10:02:12 +02:00
const u32 current_cia = cia ;
const u32 cr_packed = cr . pack ( ) ;
for ( u32 addr :
{
current_cia ,
current_cia + 4 ,
current_cia + 8 ,
current_cia - 4 ,
current_cia + 12 ,
} )
{
dis_asm . disasm ( addr ) ;
if ( dis_asm . last_opcode . size ( ) < = 4 )
{
continue ;
}
if ( usz index = dis_asm . last_opcode . rfind ( " ,cr " ) ; index < dis_asm . last_opcode . size ( ) - 4 )
{
const char result = dis_asm . last_opcode [ index + 3 ] ;
if ( result > = ' 0 ' & & result < = ' 7 ' )
{
func_data - > preferred_cr_field_index = result - ' 0 ' ;
break ;
}
}
if ( usz index = dis_asm . last_opcode . rfind ( " cr " ) ; index < dis_asm . last_opcode . size ( ) - 4 )
{
const char result = dis_asm . last_opcode [ index + 3 ] ;
if ( result > = ' 0 ' & & result < = ' 7 ' )
{
func_data - > preferred_cr_field_index = result - ' 0 ' ;
break ;
}
}
if ( dis_asm . last_opcode . find ( " stdcx. " ) ! = umax | | dis_asm . last_opcode . find ( " stwcx. " ) ! = umax )
{
// Modifying CR0
func_data - > preferred_cr_field_index = 0 ;
break ;
}
}
const u32 displayed_cr_field = ( cr_packed > > ( ( 7 - func_data - > preferred_cr_field_index ) * 4 ) ) & 0xf ;
fmt : : append ( ret , " CR: 0x%08x, CR%d: [LT=%u GT=%u EQ=%u SO=%u] \n " , cr_packed , func_data - > preferred_cr_field_index , displayed_cr_field > > 3 , ( displayed_cr_field > > 2 ) & 1 , ( displayed_cr_field > > 1 ) & 1 , displayed_cr_field & 1 ) ;
2020-04-03 10:21:18 +02:00
for ( uint i = 0 ; i < 32 ; + + i )
{
2022-05-31 07:36:10 +02:00
const f64 r = fpr [ i ] ;
if ( ! std : : bit_cast < u64 > ( r ) )
{
fmt : : append ( ret , " f%d%s: %-12.6G [%-18s] (f32=0x%x) \n " , i , i < = 9 ? " " : " " , r , " " , std : : bit_cast < u32 > ( f32 ( r ) ) ) ;
continue ;
}
fmt : : append ( ret , " f%d%s: %-12.6G [0x%016x] (f32=0x%x) \n " , i , i < = 9 ? " " : " " , r , std : : bit_cast < u64 > ( r ) , std : : bit_cast < u32 > ( f32 ( r ) ) ) ;
2020-04-03 10:21:18 +02:00
}
2020-11-11 04:59:24 +01:00
for ( uint i = 0 ; i < 32 ; + + i , ret + = ' \n ' )
2020-04-03 10:21:18 +02:00
{
2020-11-11 04:59:24 +01:00
fmt : : append ( ret , " v%d%s: " , i , i < = 9 ? " " : " " ) ;
2020-11-14 07:03:33 +01:00
2020-11-11 04:59:24 +01:00
const auto r = vr [ i ] ;
const u32 i3 = r . u32r [ 0 ] ;
if ( v128 : : from32p ( i3 ) = = r )
{
// Shortand formatting
fmt : : append ( ret , " %08x " , i3 ) ;
fmt : : append ( ret , " [x: %g] " , r . fr [ 0 ] ) ;
}
else
{
fmt : : append ( ret , " %08x %08x %08x %08x " , r . u32r [ 0 ] , r . u32r [ 1 ] , r . u32r [ 2 ] , r . u32r [ 3 ] ) ;
fmt : : append ( ret , " [x: %g y: %g z: %g w: %g] " , r . fr [ 0 ] , r . fr [ 1 ] , r . fr [ 2 ] , r . fr [ 3 ] ) ;
}
2020-04-03 10:21:18 +02:00
}
2017-04-28 15:28:37 +02:00
2023-07-12 10:02:12 +02:00
fmt : : append ( ret , " CIA: 0x%x \n " , current_cia ) ;
2020-05-05 06:23:12 +02:00
fmt : : append ( ret , " LR: 0x%llx \n " , lr ) ;
fmt : : append ( ret , " CTR: 0x%llx \n " , ctr ) ;
fmt : : append ( ret , " VRSAVE: 0x%08x \n " , vrsave ) ;
fmt : : append ( ret , " XER: [CA=%u | OV=%u | SO=%u | CNT=%u] \n " , xer . ca , xer . ov , xer . so , xer . cnt ) ;
fmt : : append ( ret , " VSCR: [SAT=%u | NJ=%u] \n " , sat , nj ) ;
fmt : : append ( ret , " FPSCR: [FL=%u | FG=%u | FE=%u | FU=%u] \n " , fpscr . fl , fpscr . fg , fpscr . fe , fpscr . fu ) ;
2021-11-21 10:41:05 +01:00
const u32 addr = raddr ;
if ( addr )
2020-05-05 06:23:12 +02:00
fmt : : append ( ret , " Reservation Addr: 0x%x " , addr ) ;
2020-05-01 07:59:15 +02:00
else
2020-05-05 06:23:12 +02:00
fmt : : append ( ret , " Reservation Addr: none " ) ;
2017-04-28 15:28:37 +02:00
2022-05-18 15:09:27 +02:00
fmt : : append ( ret , " \n Reservation Data (entire cache line): \n " ) ;
2021-11-21 10:41:05 +01:00
be_t < u32 > data [ 32 ] { } ;
std : : memcpy ( data , rdata , sizeof ( rdata ) ) ; // Show the data even if the reservation was lost inside the atomic loop
if ( addr & & ! use_full_rdata )
{
const u32 offset = addr & 0x78 ;
fmt : : append ( ret , " [0x%02x] %08x %08x \n " , offset , data [ offset / sizeof ( u32 ) ] , data [ offset / sizeof ( u32 ) + 1 ] ) ;
// Asterisk marks the offset of data that had been given to the guest PPU code
* ( & ret . back ( ) - ( addr & 4 ? 9 : 18 ) ) = ' * ' ;
}
else
{
for ( usz i = 0 ; i < std : : size ( data ) ; i + = 4 )
{
fmt : : append ( ret , " [0x%02x] %08x %08x %08x %08x \n " , i * sizeof ( data [ 0 ] )
, data [ i + 0 ] , data [ i + 1 ] , data [ i + 2 ] , data [ i + 3 ] ) ;
}
if ( addr )
{
// See the note above
* ( & ret . back ( ) - ( 4 - ( addr % 16 / 4 ) ) * 9 - ( 8 - ( addr % 128 / 16 ) ) * std : : size ( " [0x00] " sv ) ) = ' * ' ;
}
}
2020-03-31 02:11:37 +02:00
}
std : : string ppu_thread : : dump_callstack ( ) const
{
std : : string ret ;
fmt : : append ( ret , " Call stack: \n ========= \n 0x%08x (0x0) called \n " , cia ) ;
2020-07-03 06:56:55 +02:00
for ( const auto & sp : dump_callstack_list ( ) )
2020-03-31 02:11:37 +02:00
{
// TODO: function addresses too
2020-11-07 17:12:52 +01:00
fmt : : append ( ret , " > from 0x%08x (sp=0x%08x) \n " , sp . first , sp . second ) ;
2020-03-31 02:11:37 +02:00
}
return ret ;
}
2020-07-03 06:56:55 +02:00
std : : vector < std : : pair < u32 , u32 > > ppu_thread : : dump_callstack_list ( ) const
2020-03-31 02:11:37 +02:00
{
2020-03-19 11:29:50 +01:00
//std::shared_lock rlock(vm::g_mutex); // Needs optimizations
2017-04-28 15:28:37 +02:00
// Determine stack range
2020-08-21 19:49:57 +02:00
const u64 r1 = gpr [ 1 ] ;
2021-05-22 09:35:15 +02:00
if ( r1 > u32 { umax } | | r1 % 0x10 )
2020-08-21 19:49:57 +02:00
{
return { } ;
}
const u32 stack_ptr = static_cast < u32 > ( r1 ) ;
2020-03-19 11:29:50 +01:00
2020-11-10 18:09:28 +01:00
if ( ! vm : : check_addr ( stack_ptr , vm : : page_writable ) )
2020-03-19 11:29:50 +01:00
{
// Normally impossible unless the code does not follow ABI rules
2020-03-31 02:11:37 +02:00
return { } ;
2020-03-19 11:29:50 +01:00
}
2017-04-28 15:28:37 +02:00
u32 stack_min = stack_ptr & ~ 0xfff ;
u32 stack_max = stack_min + 4096 ;
2020-11-10 18:09:28 +01:00
while ( stack_min & & vm : : check_addr ( stack_min - 4096 , vm : : page_writable ) )
2017-04-28 15:28:37 +02:00
{
stack_min - = 4096 ;
}
2020-11-10 18:09:28 +01:00
while ( stack_max + 4096 & & vm : : check_addr ( stack_max , vm : : page_writable ) )
2017-04-28 15:28:37 +02:00
{
stack_max + = 4096 ;
}
2020-07-03 06:56:55 +02:00
std : : vector < std : : pair < u32 , u32 > > call_stack_list ;
2020-03-31 02:11:37 +02:00
2023-08-20 03:24:42 +02:00
bool is_first = true ;
bool skip_single_frame = false ;
const u64 _lr = this - > lr ;
const u32 _cia = this - > cia ;
const u64 gpr0 = this - > gpr [ 0 ] ;
2020-11-07 17:12:52 +01:00
2020-03-31 02:11:37 +02:00
for (
2020-11-07 17:12:52 +01:00
u64 sp = r1 ;
2020-07-03 05:18:14 +02:00
sp % 0x10 = = 0u & & sp > = stack_min & & sp < = stack_max - ppu_stack_start_offset ;
2023-08-20 03:24:42 +02:00
is_first = false
2020-03-31 02:11:37 +02:00
)
2015-02-01 14:52:34 +01:00
{
2020-11-07 17:12:52 +01:00
auto is_invalid = [ ] ( u64 addr )
{
2021-05-22 09:35:15 +02:00
if ( addr > u32 { umax } | | addr % 4 | | ! vm : : check_addr ( static_cast < u32 > ( addr ) , vm : : page_executable ) )
2020-11-07 20:05:44 +01:00
{
return true ;
}
// Ignore HLE stop address
2022-11-19 12:50:31 +01:00
return addr = = g_fxo - > get < ppu_function_manager > ( ) . func_addr ( 1 , true ) ;
2020-11-07 17:12:52 +01:00
} ;
2020-07-03 05:18:14 +02:00
2023-08-20 03:24:42 +02:00
if ( is_first & & ! is_invalid ( _lr ) )
2020-07-03 05:18:14 +02:00
{
2023-08-20 03:24:42 +02:00
// Detect functions with no stack or before LR has been stored
// Tracking if instruction has already been passed through
// Instead of using map or set, use two vectors relative to CIA and resize as needed
std : : vector < be_t < u32 > > inst_neg ;
std : : vector < be_t < u32 > > inst_pos ;
auto get_inst = [ & ] ( u32 pos ) - > be_t < u32 > &
2020-11-07 17:12:52 +01:00
{
2023-08-20 03:24:42 +02:00
static be_t < u32 > s_inst_empty { } ;
if ( pos < _cia )
{
const u32 neg_dist = ( _cia - pos - 4 ) / 4 ;
if ( neg_dist > = inst_neg . size ( ) )
{
const u32 inst_bound = pos & - 256 ;
const usz old_size = inst_neg . size ( ) ;
const usz new_size = neg_dist + ( pos - inst_bound ) / 4 + 1 ;
if ( new_size > = 0x8000 )
{
// Gross lower limit for the function (if it is that size it is unlikely that it is even a leaf function)
return s_inst_empty ;
}
inst_neg . resize ( new_size ) ;
2023-12-30 19:53:07 +01:00
if ( ! vm : : try_access ( inst_bound , & inst_neg [ old_size ] , : : narrow < u32 > ( ( new_size - old_size ) * sizeof ( be_t < u32 > ) ) , false ) )
2023-08-20 03:24:42 +02:00
{
// Failure (this would be detected as failure by zeroes)
}
2020-11-07 17:12:52 +01:00
2023-08-20 03:24:42 +02:00
// Reverse the array (because this buffer directs backwards in address)
for ( usz start = old_size , end = new_size - 1 ; start < end ; start + + , end - - )
{
std : : swap ( inst_neg [ start ] , inst_neg [ end ] ) ;
}
}
return inst_neg [ neg_dist ] ;
}
const u32 pos_dist = ( pos - _cia ) / 4 ;
if ( pos_dist > = inst_pos . size ( ) )
2020-11-07 17:12:52 +01:00
{
2023-08-20 03:24:42 +02:00
const u32 inst_bound = utils : : align < u32 > ( pos , 256 ) ;
const usz old_size = inst_pos . size ( ) ;
const usz new_size = pos_dist + ( inst_bound - pos ) / 4 + 1 ;
if ( new_size > = 0x8000 )
{
// Gross upper limit for the function (if it is that size it is unlikely that it is even a leaf function)
return s_inst_empty ;
}
inst_pos . resize ( new_size ) ;
2023-12-30 19:53:07 +01:00
if ( ! vm : : try_access ( pos , & inst_pos [ old_size ] , : : narrow < u32 > ( ( new_size - old_size ) * sizeof ( be_t < u32 > ) ) , false ) )
2023-08-20 03:24:42 +02:00
{
// Failure (this would be detected as failure by zeroes)
}
}
return inst_pos [ pos_dist ] ;
} ;
bool upper_abort = false ;
struct context_t
{
u32 start_point ;
bool maybe_leaf = false ; // True if the function is leaf or at the very end/start of non-leaf
bool non_leaf = false ; // Absolutely not a leaf
bool about_to_push_frame = false ; // STDU incoming
bool about_to_store_lr = false ; // Link is about to be stored on stack
bool about_to_pop_frame = false ; // ADDI R1 is about to be issued
bool about_to_load_link = false ; // MTLR is about to be issued
bool maybe_use_reg0_instead_of_lr = false ; // Use R0 at the end of a non-leaf function if ADDI has been issued before MTLR
} ;
// Start with CIA
std : : deque < context_t > workload { context_t { _cia } } ;
usz start = 0 ;
for ( ; start < workload . size ( ) ; start + + )
{
for ( u32 wa = workload [ start ] . start_point ; vm : : check_addr ( wa , vm : : page_executable ) ; )
{
be_t < u32 > & opcode = get_inst ( wa ) ;
auto & [ _ , maybe_leaf , non_leaf , about_to_push_frame , about_to_store_lr ,
about_to_pop_frame , about_to_load_link , maybe_use_reg0_instead_of_lr ] = workload [ start ] ;
if ( ! opcode )
{
// Already passed or failure of reading
break ;
}
const ppu_opcode_t op { opcode } ;
// Mark as passed through
opcode = 0 ;
const auto type = g_ppu_itype . decode ( op . opcode ) ;
if ( workload . size ( ) = = 1 & & type = = ppu_itype : : STDU & & op . rs = = 1u & & op . ra = = 1u )
{
if ( op . simm16 > = 0 )
{
// Against ABI
non_leaf = true ;
upper_abort = true ;
break ;
}
// Saving LR to register: this is indeed a new function (ok because LR has not been saved yet)
maybe_leaf = true ;
about_to_push_frame = true ;
about_to_pop_frame = false ;
upper_abort = true ;
break ;
}
if ( workload . size ( ) = = 1 & & type = = ppu_itype : : STD & & op . ra = = 1u & & op . rs = = 0u )
{
bool found_matching_stdu = false ;
for ( u32 back = 1 ; back < 20 ; back + + )
{
be_t < u32 > & opcode = get_inst ( utils : : sub_saturate < u32 > ( _cia , back * 4 ) ) ;
if ( ! opcode )
{
// Already passed or failure of reading
break ;
}
const ppu_opcode_t test_op { opcode } ;
const auto type = g_ppu_itype . decode ( test_op . opcode ) ;
if ( type = = ppu_itype : : BCLR )
{
break ;
}
if ( type = = ppu_itype : : STDU & & test_op . rs = = 1u & & test_op . ra = = 1u )
{
if ( 0 - ( test_op . ds < < 2 ) = = ( op . ds < < 2 ) - 0x10 )
{
found_matching_stdu = true ;
}
break ;
}
}
if ( found_matching_stdu )
{
// Saving LR to stack: this is indeed a new function (ok because LR has not been saved yet)
maybe_leaf = true ;
about_to_store_lr = true ;
about_to_pop_frame = true ;
upper_abort = true ;
break ;
}
}
const u32 spr = ( ( op . spr > > 5 ) | ( ( op . spr & 0x1f ) < < 5 ) ) ;
// It can be placed before or after STDU, ignore for now
// if (workload.size() == 1 && type == ppu_itype::MFSPR && op.rs == 0u && spr == 0x8)
// {
// // Saving LR to register: this is indeed a new function (ok because LR has not been saved yet)
// maybe_leaf = true;
// about_to_store_lr = true;
// about_to_pop_frame = true;
// }
if ( type = = ppu_itype : : MTSPR & & spr = = 0x8 & & op . rs = = 0u )
{
// Test for special case: if ADDI R1 is not found later in code, it means that LR is not restored and R0 should be used instead
// Can also search for ADDI R1 backwards and pull the value from stack (needs more research if it is more reliable)
maybe_use_reg0_instead_of_lr = true ;
}
if ( type = = ppu_itype : : UNK )
{
// Ignore for now
break ;
}
if ( ( type & ppu_itype : : branch ) & & op . lk )
{
// Gave up on LR before saving
non_leaf = true ;
about_to_pop_frame = true ;
upper_abort = true ;
break ;
}
// Even if BCLR is conditional, it still counts because LR value is ready for return
if ( type = = ppu_itype : : BCLR )
{
// Returned
maybe_leaf = true ;
upper_abort = true ;
break ;
}
if ( type = = ppu_itype : : ADDI & & op . ra = = 1u & & op . rd = = 1u )
{
if ( op . simm16 < 0 )
{
// Against ABI
non_leaf = true ;
upper_abort = true ;
break ;
}
else if ( op . simm16 > 0 )
{
// Remember that SP is about to be restored
about_to_pop_frame = true ;
non_leaf = true ;
upper_abort = true ;
break ;
}
}
const auto results = op_branch_targets ( wa , op ) ;
bool proceeded = false ;
for ( usz res_i = 0 ; res_i < results . size ( ) ; res_i + + )
{
const u32 route_pc = results [ res_i ] ;
if ( route_pc = = umax )
{
continue ;
}
if ( vm : : check_addr ( route_pc , vm : : page_executable ) & & get_inst ( route_pc ) )
{
if ( proceeded )
{
// Remember next route start point
workload . push_back ( context_t { route_pc } ) ;
}
else
{
// Next PC
wa = route_pc ;
proceeded = true ;
}
}
}
}
if ( upper_abort )
{
break ;
2020-11-07 17:12:52 +01:00
}
}
2023-08-20 03:24:42 +02:00
2023-08-21 11:43:05 +02:00
const context_t & res = workload [ std : : min < usz > ( start , workload . size ( ) - 1 ) ] ;
2023-08-20 03:24:42 +02:00
if ( res . maybe_leaf & & ! res . non_leaf )
2020-11-07 17:12:52 +01:00
{
2023-08-20 03:24:42 +02:00
const u32 result = res . maybe_use_reg0_instead_of_lr ? static_cast < u32 > ( gpr0 ) : static_cast < u32 > ( _lr ) ;
// Same stack as far as we know
call_stack_list . emplace_back ( result , static_cast < u32 > ( sp ) ) ;
if ( res . about_to_store_lr )
{
// LR has yet to be stored on stack, ignore the stack value
skip_single_frame = true ;
}
}
if ( res . about_to_pop_frame | | ( res . maybe_leaf & & ! res . non_leaf ) )
{
const u64 temp_sp = * vm : : get_super_ptr < u64 > ( static_cast < u32 > ( sp ) ) ;
if ( temp_sp < = sp )
{
// Ensure inequality and that the old stack pointer is higher than current
break ;
}
// Read the first stack frame so caller addresses can be obtained
sp = temp_sp ;
continue ;
2020-11-07 17:12:52 +01:00
}
2020-07-03 05:18:14 +02:00
}
2023-08-20 03:24:42 +02:00
u64 addr = * vm : : get_super_ptr < u64 > ( static_cast < u32 > ( sp + 16 ) ) ;
if ( skip_single_frame )
{
skip_single_frame = false ;
}
else if ( ! is_invalid ( addr ) )
{
// TODO: function addresses too
call_stack_list . emplace_back ( static_cast < u32 > ( addr ) , static_cast < u32 > ( sp ) ) ;
}
else if ( ! is_first )
{
break ;
}
2023-05-10 10:23:09 +02:00
const u64 temp_sp = * vm : : get_super_ptr < u64 > ( static_cast < u32 > ( sp ) ) ;
if ( temp_sp < = sp )
{
// Ensure inequality and that the old stack pointer is higher than current
break ;
}
sp = temp_sp ;
2023-08-20 03:24:42 +02:00
is_first = false ;
2015-02-01 14:52:34 +01:00
}
2020-03-31 02:11:37 +02:00
return call_stack_list ;
}
std : : string ppu_thread : : dump_misc ( ) const
{
2022-06-22 11:10:40 +02:00
std : : string ret = cpu_thread : : dump_misc ( ) ;
2020-03-31 02:11:37 +02:00
2022-09-25 13:10:59 +02:00
if ( ack_suspend )
{
if ( ret . ends_with ( " \n " ) )
{
ret . pop_back ( ) ;
}
fmt : : append ( ret , " (LV2 suspended) \n " ) ;
}
2023-04-28 19:10:21 +02:00
fmt : : append ( ret , " Priority: %d \n " , prio . load ( ) . prio ) ;
2020-03-31 02:11:37 +02:00
fmt : : append ( ret , " Stack: 0x%x..0x%x \n " , stack_addr , stack_addr + stack_size - 1 ) ;
fmt : : append ( ret , " Joiner: %s \n " , joiner . load ( ) ) ;
2020-04-03 10:21:18 +02:00
if ( const auto size = cmd_queue . size ( ) )
fmt : : append ( ret , " Commands: %u \n " , size ) ;
2020-03-31 02:11:37 +02:00
const char * _func = current_function ;
if ( _func )
{
2020-04-16 20:16:40 +02:00
ret + = " In function: " ;
2020-03-31 02:11:37 +02:00
ret + = _func ;
ret + = ' \n ' ;
2021-03-16 14:41:32 +01:00
for ( u32 i = 3 ; i < = 10 ; i + + )
2021-03-16 14:20:45 +01:00
if ( u64 v = gpr [ i ] ; v ! = syscall_args [ i - 3 ] )
fmt : : append ( ret , " ** r%d: 0x%llx \n " , i , v ) ;
2020-03-31 02:11:37 +02:00
}
2022-09-25 13:10:59 +02:00
else if ( is_paused ( ) | | is_stopped ( ) )
2020-03-31 02:11:37 +02:00
{
if ( const auto last_func = last_function )
{
_func = last_func ;
ret + = " Last function: " ;
ret + = _func ;
ret + = ' \n ' ;
}
}
if ( const auto _time = start_time )
{
fmt : : append ( ret , " Waiting: %fs \n " , ( get_guest_system_time ( ) - _time ) / 1000000. ) ;
}
else
{
ret + = ' \n ' ;
}
if ( ! _func )
{
ret + = ' \n ' ;
}
2016-07-27 23:43:22 +02:00
return ret ;
2015-08-10 21:39:52 +02:00
}
2022-06-22 11:00:06 +02:00
void ppu_thread : : dump_all ( std : : string & ret ) const
2021-07-10 10:56:48 +02:00
{
2022-06-22 11:00:06 +02:00
cpu_thread : : dump_all ( ret ) ;
2021-07-10 10:56:48 +02:00
2023-05-19 17:41:17 +02:00
if ( call_history . data . size ( ) > 1 )
2021-07-10 10:56:48 +02:00
{
ret + =
" \n Calling History: "
" \n ================ " ;
fmt : : append ( ret , " %s " , call_history ) ;
}
2023-07-09 07:45:15 +02:00
if ( syscall_history . data . size ( ) > 1 )
{
ret + =
" \n HLE/LV2 History: "
" \n ================ " ;
fmt : : append ( ret , " %s " , syscall_history ) ;
}
2021-07-10 10:56:48 +02:00
}
2016-04-27 00:27:24 +02:00
extern thread_local std : : string ( * g_tls_log_prefix ) ( ) ;
2016-07-27 23:43:22 +02:00
void ppu_thread : : cpu_task ( )
2012-11-15 00:39:56 +01:00
{
2017-02-07 14:14:44 +01:00
std : : fesetround ( FE_TONEAREST ) ;
2015-03-16 19:44:49 +01:00
2021-12-30 17:39:18 +01:00
if ( g_cfg . core . set_daz_and_ftz )
{
gv_set_zeroing_denormals ( ) ;
}
else
2018-05-09 22:35:05 +02:00
{
2021-12-30 17:39:18 +01:00
gv_unset_zeroing_denormals ( ) ;
2018-05-09 22:35:05 +02:00
}
2016-07-27 23:43:22 +02:00
// Execute cmd_queue
2016-08-09 16:14:41 +02:00
while ( cmd64 cmd = cmd_wait ( ) )
2016-07-27 23:43:22 +02:00
{
const u32 arg = cmd . arg2 < u32 > ( ) ; // 32-bit arg extracted
2016-08-09 16:14:41 +02:00
switch ( auto type = cmd . arg1 < ppu_cmd > ( ) )
2016-07-27 23:43:22 +02:00
{
case ppu_cmd : : opcode :
{
2021-12-30 17:39:18 +01:00
cmd_pop ( ) , g_fxo - > get < ppu_interpreter_rt > ( ) . decode ( arg ) ( * this , { arg } , vm : : _ptr < u32 > ( cia - 4 ) , & ppu_ret ) ;
2016-07-27 23:43:22 +02:00
break ;
}
case ppu_cmd : : set_gpr :
{
if ( arg > = 32 )
{
2020-12-09 16:04:52 +01:00
fmt : : throw_exception ( " Invalid ppu_cmd::set_gpr arg (0x%x) " , arg ) ;
2016-07-27 23:43:22 +02:00
}
2016-08-09 16:14:41 +02:00
gpr [ arg % 32 ] = cmd_get ( 1 ) . as < u64 > ( ) ;
2016-07-27 23:43:22 +02:00
cmd_pop ( 1 ) ;
break ;
}
case ppu_cmd : : set_args :
{
if ( arg > 8 )
{
2020-12-09 16:04:52 +01:00
fmt : : throw_exception ( " Unsupported ppu_cmd::set_args size (0x%x) " , arg ) ;
2016-07-27 23:43:22 +02:00
}
for ( u32 i = 0 ; i < arg ; i + + )
{
2016-08-09 16:14:41 +02:00
gpr [ i + 3 ] = cmd_get ( 1 + i ) . as < u64 > ( ) ;
2016-07-27 23:43:22 +02:00
}
cmd_pop ( arg ) ;
break ;
}
case ppu_cmd : : lle_call :
{
2022-06-18 04:54:54 +02:00
# ifdef __APPLE__
pthread_jit_write_protect_np ( true ) ;
# endif
2016-07-27 23:43:22 +02:00
const vm : : ptr < u32 > opd ( arg < 32 ? vm : : cast ( gpr [ arg ] ) : vm : : cast ( arg ) ) ;
cmd_pop ( ) , fast_call ( opd [ 0 ] , opd [ 1 ] ) ;
break ;
}
2023-06-07 13:34:39 +02:00
case ppu_cmd : : entry_call :
{
# ifdef __APPLE__
pthread_jit_write_protect_np ( true ) ;
# endif
cmd_pop ( ) , fast_call ( entry_func . addr , entry_func . rtoc , true ) ;
break ;
}
2016-07-27 23:43:22 +02:00
case ppu_cmd : : hle_call :
{
2022-09-19 14:57:51 +02:00
cmd_pop ( ) , : : at32 ( ppu_function_manager : : get ( ) , arg ) ( * this , { arg } , vm : : _ptr < u32 > ( cia - 4 ) , & ppu_ret ) ;
2016-07-27 23:43:22 +02:00
break ;
}
2020-04-08 13:26:31 +02:00
case ppu_cmd : : opd_call :
{
2022-06-18 04:54:54 +02:00
# ifdef __APPLE__
pthread_jit_write_protect_np ( true ) ;
# endif
2020-04-11 10:16:28 +02:00
const ppu_func_opd_t opd = cmd_get ( 1 ) . as < ppu_func_opd_t > ( ) ;
2020-04-08 13:26:31 +02:00
cmd_pop ( 1 ) , fast_call ( opd . addr , opd . rtoc ) ;
break ;
}
2018-10-11 00:17:19 +02:00
case ppu_cmd : : ptr_call :
{
2021-12-30 17:39:18 +01:00
const ppu_intrp_func_t func = cmd_get ( 1 ) . as < ppu_intrp_func_t > ( ) ;
cmd_pop ( 1 ) , func ( * this , { } , vm : : _ptr < u32 > ( cia - 4 ) , & ppu_ret ) ;
2018-10-11 00:17:19 +02:00
break ;
}
2022-07-04 15:02:17 +02:00
case ppu_cmd : : cia_call :
{
loaded_from_savestate = true ;
2023-06-07 13:34:39 +02:00
cmd_pop ( ) , fast_call ( std : : exchange ( cia , 0 ) , gpr [ 2 ] , true ) ;
2022-07-04 15:02:17 +02:00
break ;
}
2017-01-22 20:03:57 +01:00
case ppu_cmd : : initialize :
{
2022-06-14 14:28:38 +02:00
# ifdef __APPLE__
pthread_jit_write_protect_np ( false ) ;
# endif
2021-03-21 17:55:47 +01:00
cmd_pop ( ) ;
2023-08-28 14:40:18 +02:00
ppu_initialize ( ) ;
if ( Emu . IsStopped ( ) )
{
return ;
}
spu_cache : : initialize ( ) ;
2021-04-06 20:05:16 +02:00
2022-06-14 14:28:38 +02:00
# ifdef __APPLE__
pthread_jit_write_protect_np ( true ) ;
# endif
# ifdef ARCH_ARM64
// Flush all cache lines after potentially writing executable code
asm ( " ISB " ) ;
asm ( " DSB ISH " ) ;
# endif
2022-07-04 15:02:17 +02:00
// Wait until the progress dialog is closed.
// We don't want to open a cell dialog while a native progress dialog is still open.
2023-07-31 22:57:26 +02:00
while ( u32 v = g_progr_ptotal )
{
2023-08-06 11:24:32 +02:00
if ( Emu . IsStopped ( ) )
{
return ;
}
2023-07-31 22:57:26 +02:00
g_progr_ptotal . wait ( v ) ;
}
2023-06-15 23:58:28 +02:00
g_fxo - > get < progress_dialog_workaround > ( ) . show_overlay_message_only = true ;
2022-07-04 15:02:17 +02:00
2022-07-05 13:12:21 +02:00
// Sadly we can't postpone initializing guest time because we need to run PPU threads
2022-07-04 15:02:17 +02:00
// (the farther it's postponed, the less accuracy of guest time has been lost)
Emu . FixGuestTime ( ) ;
2022-07-14 21:07:02 +02:00
// Run SPUs waiting on a syscall (savestates related)
idm : : select < named_thread < spu_thread > > ( [ & ] ( u32 , named_thread < spu_thread > & spu )
{
if ( spu . group & & spu . index = = spu . group - > waiter_spu_index )
{
if ( std : : exchange ( spu . stop_flag_removal_protection , false ) )
{
return ;
}
ensure ( spu . state . test_and_reset ( cpu_flag : : stop ) ) ;
2023-07-31 22:57:26 +02:00
spu . state . notify_one ( ) ;
2022-07-14 21:07:02 +02:00
}
} ) ;
2022-07-04 15:02:17 +02:00
// Check if this is the only PPU left to initialize (savestates related)
if ( lv2_obj : : is_scheduler_ready ( ) )
{
if ( Emu . IsStarting ( ) )
{
Emu . FinalizeRunRequest ( ) ;
}
}
2017-01-22 20:03:57 +01:00
break ;
}
2017-02-06 19:36:46 +01:00
case ppu_cmd : : sleep :
{
2017-02-22 11:10:55 +01:00
cmd_pop ( ) , lv2_obj : : sleep ( * this ) ;
2017-02-06 19:36:46 +01:00
break ;
}
2018-03-06 03:36:33 +01:00
case ppu_cmd : : reset_stack :
{
2020-07-03 05:18:14 +02:00
cmd_pop ( ) , gpr [ 1 ] = stack_addr + stack_size - ppu_stack_start_offset ;
2018-03-06 03:36:33 +01:00
break ;
}
2016-07-27 23:43:22 +02:00
default :
{
2020-12-09 16:04:52 +01:00
fmt : : throw_exception ( " Unknown ppu_cmd(0x%x) " , static_cast < u32 > ( type ) ) ;
2016-07-27 23:43:22 +02:00
}
}
}
2016-06-25 07:16:15 +02:00
}
2014-04-23 13:59:14 +02:00
2018-04-03 16:19:07 +02:00
void ppu_thread : : cpu_sleep ( )
{
2020-10-30 14:32:49 +01:00
// Clear reservation
raddr = 0 ;
2018-04-03 16:19:07 +02:00
2020-10-30 14:32:49 +01:00
// Setup wait flag and memory flags to relock itself
2020-11-14 07:03:33 +01:00
state + = g_use_rtm ? cpu_flag : : wait : cpu_flag : : wait + cpu_flag : : memory ;
2018-04-03 16:19:07 +02:00
2020-10-30 14:32:49 +01:00
if ( auto ptr = vm : : g_tls_locked )
{
ptr - > compare_and_swap ( this , nullptr ) ;
}
lv2_obj : : awake ( this ) ;
2018-04-03 16:19:07 +02:00
}
2021-02-13 15:50:07 +01:00
void ppu_thread : : cpu_on_stop ( )
{
2024-03-28 12:35:11 +01:00
if ( current_function & & is_stopped ( ) )
2021-02-13 15:50:07 +01:00
{
if ( start_time )
{
ppu_log . warning ( " '%s' aborted (%fs) " , current_function , ( get_guest_system_time ( ) - start_time ) / 1000000. ) ;
}
else
{
ppu_log . warning ( " '%s' aborted " , current_function ) ;
}
}
2022-08-26 09:50:58 +02:00
2024-03-28 12:35:11 +01:00
current_function = { } ;
2022-08-26 09:50:58 +02:00
// TODO: More conditions
2023-07-10 16:43:59 +02:00
if ( Emu . IsStopped ( ) & & g_cfg . core . ppu_debug )
2022-08-26 09:50:58 +02:00
{
std : : string ret ;
dump_all ( ret ) ;
ppu_log . notice ( " thread context: %s " , ret ) ;
}
2021-02-13 15:50:07 +01:00
}
2016-07-27 23:43:22 +02:00
void ppu_thread : : exec_task ( )
2016-06-25 07:16:15 +02:00
{
2021-12-30 17:39:18 +01:00
if ( g_cfg . core . ppu_decoder ! = ppu_decoder_type : : _static )
2016-06-07 22:24:20 +02:00
{
2021-01-31 19:38:47 +01:00
while ( true )
2017-06-22 23:52:09 +02:00
{
2021-01-31 19:38:47 +01:00
if ( state ) [[unlikely]]
{
if ( check_state ( ) )
break ;
}
ppu_gateway ( this ) ;
2017-06-22 23:52:09 +02:00
}
2017-12-19 22:01:03 +01:00
2017-03-22 21:23:47 +01:00
return ;
2016-06-07 22:24:20 +02:00
}
2017-03-25 16:53:45 +01:00
const auto cache = vm : : g_exec_addr ;
2021-12-30 17:39:18 +01:00
const auto mem_ = vm : : g_base_addr ;
2012-11-15 00:39:56 +01:00
2016-04-14 01:09:41 +02:00
while ( true )
{
2021-12-30 17:39:18 +01:00
if ( test_stopped ( ) ) [[unlikely]]
2016-04-14 01:09:41 +02:00
{
2021-12-30 17:39:18 +01:00
return ;
2016-05-13 15:55:34 +02:00
}
2016-04-14 01:09:41 +02:00
2021-12-30 17:39:18 +01:00
gv_zeroupper ( ) ;
2017-02-10 13:20:54 +01:00
2021-12-30 17:39:18 +01:00
// Execute instruction (may be step; execute only one instruction if state)
const auto op = reinterpret_cast < be_t < u32 > * > ( mem_ + u64 { cia } ) ;
const auto fn = reinterpret_cast < ppu_intrp_func * > ( cache + u64 { cia } * 2 ) ;
fn - > fn ( * this , { * op } , op , state ? & ppu_ret : fn + 1 ) ;
2016-04-14 01:09:41 +02:00
}
2012-11-15 00:39:56 +01:00
}
2016-07-27 23:43:22 +02:00
ppu_thread : : ~ ppu_thread ( )
2012-11-15 00:39:56 +01:00
{
2023-07-28 12:09:06 +02:00
perf_log . notice ( " Perf stats for STCX reload: success %u, failure %u " , last_succ , last_fail ) ;
2021-12-30 17:39:18 +01:00
perf_log . notice ( " Perf stats for instructions: total %u " , exec_bytes / 4 ) ;
2016-07-27 23:43:22 +02:00
}
2016-05-13 15:55:34 +02:00
2023-04-28 19:10:21 +02:00
ppu_thread : : ppu_thread ( const ppu_thread_params & param , std : : string_view name , u32 _prio , int detached )
2017-01-25 18:50:30 +01:00
: cpu_thread ( idm : : last_id ( ) )
2018-10-11 00:17:19 +02:00
, stack_size ( param . stack_size )
, stack_addr ( param . stack_addr )
2020-03-03 21:39:40 +01:00
, joiner ( detached ! = 0 ? ppu_join_status : : detached : ppu_join_status : : joinable )
2020-04-08 13:26:31 +02:00
, entry_func ( param . entry )
2020-02-21 13:20:10 +01:00
, start_time ( get_guest_system_time ( ) )
2022-07-04 15:02:17 +02:00
, is_interrupt_thread ( detached < 0 )
2020-11-26 10:30:51 +01:00
, ppu_tname ( make_single < std : : string > ( name ) )
2016-07-27 23:43:22 +02:00
{
2023-04-28 19:10:21 +02:00
prio . raw ( ) . prio = _prio ;
2020-07-03 05:18:14 +02:00
gpr [ 1 ] = stack_addr + stack_size - ppu_stack_start_offset ;
2018-10-11 00:17:19 +02:00
gpr [ 13 ] = param . tls_addr ;
2020-04-29 07:03:07 +02:00
if ( detached > = 0 )
{
// Initialize thread args
gpr [ 3 ] = param . arg0 ;
gpr [ 4 ] = param . arg1 ;
2018-10-11 00:17:19 +02:00
}
2022-07-05 13:12:21 +02:00
optional_savestate_state = std : : make_shared < utils : : serial > ( ) ;
2017-02-06 19:36:46 +01:00
// Trigger the scheduler
2018-05-14 22:07:36 +02:00
state + = cpu_flag : : suspend ;
if ( ! g_use_rtm )
{
state + = cpu_flag : : memory ;
}
2021-07-10 10:56:48 +02:00
2023-05-19 17:41:17 +02:00
call_history . data . resize ( g_cfg . core . ppu_call_history ? call_history_max_size : 1 ) ;
syscall_history . data . resize ( g_cfg . core . ppu_call_history ? syscall_history_max_size : 1 ) ;
syscall_history . count_debug_arguments = static_cast < u32 > ( g_cfg . core . ppu_call_history ? std : : size ( syscall_history . data [ 0 ] . args ) : 0 ) ;
2022-06-14 14:28:38 +02:00
# ifdef __APPLE__
pthread_jit_write_protect_np ( true ) ;
# endif
# ifdef ARCH_ARM64
// Flush all cache lines after potentially writing executable code
asm ( " ISB " ) ;
asm ( " DSB ISH " ) ;
# endif
2016-07-27 23:43:22 +02:00
}
2016-05-13 15:55:34 +02:00
2022-07-04 15:02:17 +02:00
struct disable_precomp_t
{
atomic_t < bool > disable = false ;
} ;
void vdecEntry ( ppu_thread & ppu , u32 vid ) ;
bool ppu_thread : : savable ( ) const
{
if ( joiner = = ppu_join_status : : exited )
{
return false ;
}
if ( cia = = g_fxo - > get < ppu_function_manager > ( ) . func_addr ( FIND_FUNC ( vdecEntry ) ) )
{
// Do not attempt to save the state of HLE VDEC threads
return false ;
}
return true ;
}
void ppu_thread : : serialize_common ( utils : : serial & ar )
{
2023-04-28 19:10:21 +02:00
[[maybe_unused]] const s32 version = GET_OR_USE_SERIALIZATION_VERSION ( ar . is_writing ( ) , ppu ) ;
2023-07-21 17:23:09 +02:00
ar ( gpr , fpr , cr , fpscr . bits , lr , ctr , vrsave , cia , xer , sat , nj , prio . raw ( ) . all ) ;
2023-04-28 19:10:21 +02:00
2024-03-25 04:08:09 +01:00
if ( cia % 4 | | ( cia > > 28 ) > = 0xCu )
2023-11-15 20:07:42 +01:00
{
fmt : : throw_exception ( " Failed to serialize PPU thread ID=0x%x (cia=0x%x, ar=%s) " , this - > id , cia , ar ) ;
}
if ( ar . is_writing ( ) )
{
ppu_log . notice ( " Saving PPU Thread [0x%x: %s]: cia=0x%x, state=%s " , id , * ppu_tname . load ( ) , cia , + state ) ;
}
2023-04-28 19:10:21 +02:00
ar ( optional_savestate_state , vr ) ;
2022-07-05 13:12:21 +02:00
2023-11-25 18:06:13 +01:00
if ( ! ar . is_writing ( ) )
2022-07-05 13:12:21 +02:00
{
2023-11-25 18:06:13 +01:00
if ( optional_savestate_state - > data . empty ( ) )
{
optional_savestate_state - > clear ( ) ;
}
optional_savestate_state - > set_reading_state ( ) ;
2022-07-05 13:12:21 +02:00
}
2022-07-04 15:02:17 +02:00
}
2024-03-26 12:16:23 +01:00
struct save_lv2_tag
{
atomic_t < bool > saved = false ;
atomic_t < bool > loaded = false ;
} ;
2022-07-04 15:02:17 +02:00
ppu_thread : : ppu_thread ( utils : : serial & ar )
: cpu_thread ( idm : : last_id ( ) ) // last_id() is showed to constructor on serialization
, stack_size ( ar )
, stack_addr ( ar )
2023-11-15 20:07:42 +01:00
, joiner ( ar . pop < ppu_join_status > ( ) )
2022-07-04 15:02:17 +02:00
, entry_func ( std : : bit_cast < ppu_func_opd_t , u64 > ( ar ) )
, is_interrupt_thread ( ar )
{
2024-03-26 12:16:23 +01:00
[[maybe_unused]] const s32 version = GET_SERIALIZATION_VERSION ( ppu ) ;
2022-07-04 15:02:17 +02:00
struct init_pushed
{
bool pushed = false ;
2023-07-31 22:57:26 +02:00
atomic_t < u32 > inited = false ;
2022-07-04 15:02:17 +02:00
} ;
2023-05-19 17:41:17 +02:00
call_history . data . resize ( g_cfg . core . ppu_call_history ? call_history_max_size : 1 ) ;
syscall_history . data . resize ( g_cfg . core . ppu_call_history ? syscall_history_max_size : 1 ) ;
syscall_history . count_debug_arguments = static_cast < u32 > ( g_cfg . core . ppu_call_history ? std : : size ( syscall_history . data [ 0 ] . args ) : 0 ) ;
2024-03-26 12:16:23 +01:00
if ( version > = 2 & & ! g_fxo - > get < save_lv2_tag > ( ) . loaded . exchange ( true ) )
{
ar ( lv2_obj : : g_priority_order_tag ) ;
}
2024-03-27 13:44:33 +01:00
if ( version > = 3 )
{
// Function and module for HLE function relocation
// TODO: Use it
ar . pop < std : : string > ( ) ;
ar . pop < std : : string > ( ) ;
}
2022-07-04 15:02:17 +02:00
serialize_common ( ar ) ;
// Restore jm_mask
jm_mask = nj ? 0x7F800000 : 0x7fff'ffff ;
auto queue_intr_entry = [ & ] ( )
{
if ( is_interrupt_thread )
{
void ppu_interrupt_thread_entry ( ppu_thread & , ppu_opcode_t , be_t < u32 > * , struct ppu_intrp_func * ) ;
cmd_list
( {
{ ppu_cmd : : ptr_call , 0 } ,
std : : bit_cast < u64 > ( & ppu_interrupt_thread_entry )
} ) ;
}
} ;
2023-11-15 20:07:42 +01:00
switch ( const u32 status = ar . pop < u32 > ( ) )
2022-07-04 15:02:17 +02:00
{
case PPU_THREAD_STATUS_IDLE :
{
stop_flag_removal_protection = true ;
break ;
}
case PPU_THREAD_STATUS_RUNNABLE :
case PPU_THREAD_STATUS_ONPROC :
{
2024-03-26 12:16:23 +01:00
if ( version > = 2 )
{
const u32 order = ar . pop < u32 > ( ) ;
struct awake_pushed
{
bool pushed = false ;
shared_mutex dummy ;
std : : map < u32 , ppu_thread * > awake_ppus ;
} ;
g_fxo - > get < awake_pushed > ( ) . awake_ppus [ order ] = this ;
if ( ! std : : exchange ( g_fxo - > get < awake_pushed > ( ) . pushed , true ) )
{
Emu . PostponeInitCode ( [ this ] ( )
{
u32 prev = umax ;
for ( auto ppu : g_fxo - > get < awake_pushed > ( ) . awake_ppus )
{
ensure ( prev + 1 = = ppu . first ) ;
prev = ppu . first ;
lv2_obj : : awake ( ppu . second ) ;
}
g_fxo - > get < awake_pushed > ( ) . awake_ppus . clear ( ) ;
} ) ;
}
}
else
{
lv2_obj : : awake ( this ) ;
}
2022-07-04 15:02:17 +02:00
[[fallthrough]] ;
}
case PPU_THREAD_STATUS_SLEEP :
{
if ( std : : exchange ( g_fxo - > get < init_pushed > ( ) . pushed , true ) )
{
cmd_list
( {
2022-09-13 15:08:55 +02:00
{ ppu_cmd : : ptr_call , 0 } , + [ ] ( ppu_thread & ) - > bool
2022-07-04 15:02:17 +02:00
{
while ( ! Emu . IsStopped ( ) & & ! g_fxo - > get < init_pushed > ( ) . inited )
{
2023-07-31 22:57:26 +02:00
thread_ctrl : : wait_on ( g_fxo - > get < init_pushed > ( ) . inited , 0 ) ;
2022-07-04 15:02:17 +02:00
}
return false ;
}
} ) ;
}
else
{
g_fxo - > init < disable_precomp_t > ( ) ;
g_fxo - > get < disable_precomp_t > ( ) . disable = true ;
cmd_push ( { ppu_cmd : : initialize , 0 } ) ;
cmd_list
( {
{ ppu_cmd : : ptr_call , 0 } , + [ ] ( ppu_thread & ) - > bool
{
auto & inited = g_fxo - > get < init_pushed > ( ) . inited ;
2023-07-31 22:57:26 +02:00
inited = 1 ;
2022-07-04 15:02:17 +02:00
inited . notify_all ( ) ;
return true ;
}
} ) ;
}
if ( status = = PPU_THREAD_STATUS_SLEEP )
{
cmd_list
( {
{ ppu_cmd : : ptr_call , 0 } ,
+ [ ] ( ppu_thread & ppu ) - > bool
{
2022-07-05 13:12:21 +02:00
const u32 op = vm : : read32 ( ppu . cia ) ;
const auto & table = g_fxo - > get < ppu_interpreter_rt > ( ) ;
2022-07-04 15:02:17 +02:00
ppu . loaded_from_savestate = true ;
2024-03-26 12:16:23 +01:00
ppu . prio . raw ( ) . preserve_bit = 1 ;
2022-07-05 13:12:21 +02:00
table . decode ( op ) ( ppu , { op } , vm : : _ptr < u32 > ( ppu . cia ) , & ppu_ret ) ;
2024-03-27 15:38:25 +01:00
ppu . prio . raw ( ) . preserve_bit = 0 ;
2022-07-05 13:12:21 +02:00
ppu . optional_savestate_state - > clear ( ) ; // Reset to writing state
2022-07-04 15:02:17 +02:00
ppu . loaded_from_savestate = false ;
return true ;
}
} ) ;
lv2_obj : : set_future_sleep ( this ) ;
}
queue_intr_entry ( ) ;
cmd_push ( { ppu_cmd : : cia_call , 0 } ) ;
break ;
}
case PPU_THREAD_STATUS_ZOMBIE :
{
state + = cpu_flag : : exit ;
break ;
}
case PPU_THREAD_STATUS_STOP :
{
queue_intr_entry ( ) ;
break ;
}
}
// Trigger the scheduler
state + = cpu_flag : : suspend ;
if ( ! g_use_rtm )
{
state + = cpu_flag : : memory ;
}
2023-11-15 20:07:42 +01:00
ppu_tname = make_single < std : : string > ( ar . pop < std : : string > ( ) ) ;
ppu_log . notice ( " Loading PPU Thread [0x%x: %s]: cia=0x%x, state=%s " , id , * ppu_tname . load ( ) , cia , + state ) ;
2022-07-04 15:02:17 +02:00
}
void ppu_thread : : save ( utils : : serial & ar )
{
2022-07-05 13:12:21 +02:00
USING_SERIALIZATION_VERSION ( ppu ) ;
2022-07-04 15:02:17 +02:00
const u64 entry = std : : bit_cast < u64 > ( entry_func ) ;
ppu_join_status _joiner = joiner ;
if ( _joiner > = ppu_join_status : : max )
{
// Joining thread should recover this member properly
2022-09-13 15:08:55 +02:00
_joiner = ppu_join_status : : joinable ;
2022-07-04 15:02:17 +02:00
}
ar ( stack_size , stack_addr , _joiner , entry , is_interrupt_thread ) ;
2024-03-26 12:16:23 +01:00
const bool is_null = ar . m_file_handler & & ar . m_file_handler - > is_null ( ) ;
if ( ! is_null & & ! g_fxo - > get < save_lv2_tag > ( ) . saved . exchange ( true ) )
{
ar ( lv2_obj : : g_priority_order_tag ) ;
}
2024-03-27 13:44:33 +01:00
if ( current_module & & current_module [ 0 ] )
{
ar ( std : : string { current_module } ) ;
ar ( std : : string { last_function } ) ;
}
else
{
ar ( std : : string { } ) ;
ar ( std : : string { } ) ;
}
2022-07-04 15:02:17 +02:00
serialize_common ( ar ) ;
2024-03-26 12:16:23 +01:00
auto [ status , order ] = lv2_obj : : ppu_state ( this , false ) ;
2022-07-04 15:02:17 +02:00
if ( status = = PPU_THREAD_STATUS_SLEEP & & cpu_flag : : again - state )
{
// Hack for sys_fs
status = PPU_THREAD_STATUS_RUNNABLE ;
}
ar ( status ) ;
2024-03-26 12:16:23 +01:00
if ( status = = PPU_THREAD_STATUS_RUNNABLE | | status = = PPU_THREAD_STATUS_ONPROC )
{
ar ( order ) ;
}
2022-07-04 15:02:17 +02:00
ar ( * ppu_tname . load ( ) ) ;
}
2021-05-01 08:34:52 +02:00
ppu_thread : : thread_name_t : : operator std : : string ( ) const
{
std : : string thread_name = fmt : : format ( " PPU[0x%x] " , _this - > id ) ;
if ( const std : : string name = * _this - > ppu_tname . load ( ) ; ! name . empty ( ) )
{
fmt : : append ( thread_name , " %s " , name ) ;
}
return thread_name ;
}
2016-08-09 16:14:41 +02:00
void ppu_thread : : cmd_push ( cmd64 cmd )
2016-07-27 23:43:22 +02:00
{
// Reserve queue space
const u32 pos = cmd_queue . push_begin ( ) ;
2016-05-13 15:55:34 +02:00
2016-07-27 23:43:22 +02:00
// Write single command
cmd_queue [ pos ] = cmd ;
}
2016-05-13 15:55:34 +02:00
2016-08-09 16:14:41 +02:00
void ppu_thread : : cmd_list ( std : : initializer_list < cmd64 > list )
2016-07-27 23:43:22 +02:00
{
// Reserve queue space
const u32 pos = cmd_queue . push_begin ( static_cast < u32 > ( list . size ( ) ) ) ;
2016-05-13 15:55:34 +02:00
2016-07-27 23:43:22 +02:00
// Write command tail in relaxed manner
for ( u32 i = 1 ; i < list . size ( ) ; i + + )
2016-05-13 15:55:34 +02:00
{
2016-07-27 23:43:22 +02:00
cmd_queue [ pos + i ] . raw ( ) = list . begin ( ) [ i ] ;
2016-05-13 15:55:34 +02:00
}
2016-07-27 23:43:22 +02:00
// Write command head after all
cmd_queue [ pos ] = * list . begin ( ) ;
2012-11-15 00:39:56 +01:00
}
2016-07-27 23:43:22 +02:00
void ppu_thread : : cmd_pop ( u32 count )
2012-11-15 00:39:56 +01:00
{
2016-07-27 23:43:22 +02:00
// Get current position
const u32 pos = cmd_queue . peek ( ) ;
// Clean command buffer for command tail
for ( u32 i = 1 ; i < = count ; i + + )
2016-04-14 01:09:41 +02:00
{
2016-08-09 16:14:41 +02:00
cmd_queue [ pos + i ] . raw ( ) = cmd64 { } ;
2016-04-14 01:09:41 +02:00
}
2016-07-27 23:43:22 +02:00
// Free
cmd_queue . pop_end ( count + 1 ) ;
2014-04-10 00:54:32 +02:00
}
2014-08-15 14:50:59 +02:00
2016-08-09 16:14:41 +02:00
cmd64 ppu_thread : : cmd_wait ( )
2016-04-25 12:49:12 +02:00
{
2016-07-27 23:43:22 +02:00
while ( true )
{
2021-02-13 15:50:07 +01:00
if ( cmd64 result = cmd_queue [ cmd_queue . peek ( ) ] . exchange ( cmd64 { } ) )
2016-07-27 23:43:22 +02:00
{
2021-02-13 15:50:07 +01:00
return result ;
2016-07-27 23:43:22 +02:00
}
2021-02-13 15:50:07 +01:00
if ( is_stopped ( ) )
2016-07-27 23:43:22 +02:00
{
2021-02-13 15:50:07 +01:00
return { } ;
2016-07-27 23:43:22 +02:00
}
2021-02-13 15:50:07 +01:00
thread_ctrl : : wait_on ( cmd_notify , 0 ) ;
cmd_notify = 0 ;
2016-07-27 23:43:22 +02:00
}
2016-04-25 12:49:12 +02:00
}
2016-07-27 23:43:22 +02:00
be_t < u64 > * ppu_thread : : get_stack_arg ( s32 i , u64 align )
2014-08-23 16:51:51 +02:00
{
2020-12-09 16:04:52 +01:00
if ( align ! = 1 & & align ! = 2 & & align ! = 4 & & align ! = 8 & & align ! = 16 ) fmt : : throw_exception ( " Unsupported alignment: 0x%llx " , align ) ;
return vm : : _ptr < u64 > ( vm : : cast ( ( gpr [ 1 ] + 0x30 + 0x8 * ( i - 1 ) ) & ( 0 - align ) ) ) ;
2014-08-23 16:51:51 +02:00
}
2023-06-07 13:34:39 +02:00
void ppu_thread : : fast_call ( u32 addr , u64 rtoc , bool is_thread_entry )
2014-08-19 20:17:20 +02:00
{
2017-02-09 23:51:29 +01:00
const auto old_cia = cia ;
2016-07-27 23:43:22 +02:00
const auto old_rtoc = gpr [ 2 ] ;
const auto old_lr = lr ;
2019-07-09 19:44:07 +02:00
const auto old_func = current_function ;
2016-07-27 23:43:22 +02:00
const auto old_fmt = g_tls_log_prefix ;
2015-07-19 13:36:32 +02:00
2022-07-04 15:02:17 +02:00
interrupt_thread_executing = true ;
2016-07-27 23:43:22 +02:00
cia = addr ;
gpr [ 2 ] = rtoc ;
2022-11-19 12:50:31 +01:00
lr = g_fxo - > get < ppu_function_manager > ( ) . func_addr ( 1 , true ) ; // HLE stop address
2019-07-09 19:44:07 +02:00
current_function = nullptr ;
2014-08-19 20:17:20 +02:00
2022-07-04 15:02:17 +02:00
if ( std : : exchange ( loaded_from_savestate , false ) )
{
lr = old_lr ;
}
2016-07-27 23:43:22 +02:00
g_tls_log_prefix = [ ]
{
2017-02-09 23:51:29 +01:00
const auto _this = static_cast < ppu_thread * > ( get_current_cpu_thread ( ) ) ;
2020-02-28 08:43:37 +01:00
2020-11-26 10:30:51 +01:00
static thread_local shared_ptr < std : : string > name_cache ;
2020-02-28 08:43:37 +01:00
if ( ! _this - > ppu_tname . is_equal ( name_cache ) ) [[unlikely]]
{
2020-12-06 09:13:34 +01:00
_this - > ppu_tname . peek_op ( [ & ] ( const shared_ptr < std : : string > & ptr )
{
if ( ptr ! = name_cache )
{
name_cache = ptr ;
}
} ) ;
2020-02-28 08:43:37 +01:00
}
2020-08-27 17:40:13 +02:00
const auto cia = _this - > cia ;
if ( _this - > current_function & & vm : : read32 ( cia ) ! = ppu_instructions : : SC ( 0 ) )
{
2020-10-08 15:13:55 +02:00
return fmt : : format ( " PPU[0x%x] Thread (%s) [HLE:0x%08x, LR:0x%08x] " , _this - > id , * name_cache . get ( ) , cia , _this - > lr ) ;
2020-08-27 17:40:13 +02:00
}
2022-10-02 11:59:41 +02:00
extern const char * get_prx_name_by_cia ( u32 addr ) ;
if ( auto name = get_prx_name_by_cia ( cia ) )
{
2022-12-24 15:15:29 +01:00
return fmt : : format ( " PPU[0x%x] Thread (%s) [%s: 0x%08x] " , _this - > id , * name_cache . get ( ) , name , cia ) ;
2022-10-02 11:59:41 +02:00
}
2020-08-27 17:40:13 +02:00
return fmt : : format ( " PPU[0x%x] Thread (%s) [0x%08x] " , _this - > id , * name_cache . get ( ) , cia ) ;
2016-07-27 23:43:22 +02:00
} ;
2019-11-09 17:11:01 +01:00
auto at_ret = [ & ] ( )
2015-07-01 00:25:52 +02:00
{
2023-06-06 13:05:07 +02:00
if ( old_cia )
2016-06-25 07:16:15 +02:00
{
2022-07-05 13:12:21 +02:00
if ( state & cpu_flag : : again )
2022-07-04 15:02:17 +02:00
{
ppu_log . error ( " HLE callstack savestate is not implemented! " ) ;
}
2017-02-09 23:51:29 +01:00
cia = old_cia ;
gpr [ 2 ] = old_rtoc ;
lr = old_lr ;
2016-06-25 07:16:15 +02:00
}
2023-06-07 13:34:39 +02:00
else if ( state & cpu_flag : : ret & & cia = = g_fxo - > get < ppu_function_manager > ( ) . func_addr ( 1 , true ) + 4 & & is_thread_entry )
2023-06-06 13:05:07 +02:00
{
std : : string ret ;
dump_all ( ret ) ;
ppu_log . error ( " Returning from the thread entry function! (func=0x%x) " , entry_func . addr ) ;
ppu_log . notice ( " Thread context: %s " , ret ) ;
lv2_obj : : sleep ( * this ) ;
2023-06-07 13:34:39 +02:00
// For savestates
state + = cpu_flag : : again ;
std : : memcpy ( syscall_args , & gpr [ 3 ] , sizeof ( syscall_args ) ) ;
2023-11-15 20:07:42 +01:00
}
if ( ! old_cia & & state & cpu_flag : : again )
{
// Fixup argument registers and CIA for reloading
std : : memcpy ( & gpr [ 3 ] , syscall_args , sizeof ( syscall_args ) ) ;
cia - = 4 ;
2023-06-06 13:05:07 +02:00
}
2022-07-04 15:02:17 +02:00
current_function = old_func ;
g_tls_log_prefix = old_fmt ;
state - = cpu_flag : : ret ;
2019-11-09 17:11:01 +01:00
} ;
2020-03-09 17:18:39 +01:00
exec_task ( ) ;
2019-11-09 17:11:01 +01:00
at_ret ( ) ;
2014-08-19 20:17:20 +02:00
}
2016-06-07 22:24:20 +02:00
2021-06-26 13:15:10 +02:00
std : : pair < vm : : addr_t , u32 > ppu_thread : : stack_push ( u32 size , u32 align_v )
2016-08-09 16:14:41 +02:00
{
2021-05-20 06:00:22 +02:00
if ( auto cpu = get_current_cpu_thread < ppu_thread > ( ) )
2016-08-09 16:14:41 +02:00
{
ppu_thread & context = static_cast < ppu_thread & > ( * cpu ) ;
2020-12-09 16:04:52 +01:00
const u32 old_pos = vm : : cast ( context . gpr [ 1 ] ) ;
2021-06-26 13:15:10 +02:00
context . gpr [ 1 ] - = size ; // room minimal possible size
2019-12-02 22:31:34 +01:00
context . gpr [ 1 ] & = ~ ( u64 { align_v } - 1 ) ; // fix stack alignment
2016-08-09 16:14:41 +02:00
2021-06-26 13:15:10 +02:00
auto is_stack = [ & ] ( u64 addr )
{
return addr > = context . stack_addr & & addr < context . stack_addr + context . stack_size ;
} ;
// TODO: This check does not care about custom stack memory
if ( is_stack ( old_pos ) ! = is_stack ( context . gpr [ 1 ] ) )
2016-08-09 16:14:41 +02:00
{
2020-12-09 16:04:52 +01:00
fmt : : throw_exception ( " Stack overflow (size=0x%x, align=0x%x, SP=0x%llx, stack=*0x%x) " , size , align_v , old_pos , context . stack_addr ) ;
2016-08-09 16:14:41 +02:00
}
else
{
const u32 addr = static_cast < u32 > ( context . gpr [ 1 ] ) ;
std : : memset ( vm : : base ( addr ) , 0 , size ) ;
2021-06-26 13:15:10 +02:00
return { vm : : cast ( addr ) , old_pos - addr } ;
2016-08-09 16:14:41 +02:00
}
}
2020-12-09 16:04:52 +01:00
fmt : : throw_exception ( " Invalid thread " ) ;
2016-08-09 16:14:41 +02:00
}
void ppu_thread : : stack_pop_verbose ( u32 addr , u32 size ) noexcept
{
2021-05-20 06:00:22 +02:00
if ( auto cpu = get_current_cpu_thread < ppu_thread > ( ) )
2016-08-09 16:14:41 +02:00
{
ppu_thread & context = static_cast < ppu_thread & > ( * cpu ) ;
if ( context . gpr [ 1 ] ! = addr )
{
2020-02-01 09:31:27 +01:00
ppu_log . error ( " Stack inconsistency (addr=0x%x, SP=0x%llx, size=0x%x) " , addr , context . gpr [ 1 ] , size ) ;
2016-08-09 16:14:41 +02:00
return ;
}
2021-06-26 13:15:10 +02:00
context . gpr [ 1 ] + = size ;
2016-08-09 16:14:41 +02:00
return ;
}
2020-12-09 16:04:52 +01:00
ppu_log . error ( " Invalid thread " ) ;
2016-08-09 16:14:41 +02:00
}
2021-12-30 17:39:18 +01:00
extern ppu_intrp_func_t ppu_get_syscall ( u64 code ) ;
2016-06-07 22:24:20 +02:00
2020-05-15 17:57:48 +02:00
void ppu_trap ( ppu_thread & ppu , u64 addr )
2016-06-07 22:24:20 +02:00
{
2021-05-22 09:35:15 +02:00
ensure ( ( addr & ( ~ u64 { 0xffff'ffff } | 0x3 ) ) = = 0 ) ;
2020-05-15 17:57:48 +02:00
ppu . cia = static_cast < u32 > ( addr ) ;
u32 add = static_cast < u32 > ( g_cfg . core . stub_ppu_traps ) * 4 ;
// If stubbing is enabled, check current instruction and the following
2020-11-10 18:09:28 +01:00
if ( ! add | | ! vm : : check_addr ( ppu . cia , vm : : page_executable ) | | ! vm : : check_addr ( ppu . cia + add , vm : : page_executable ) )
2020-05-15 17:57:48 +02:00
{
2021-09-30 20:33:55 +02:00
fmt : : throw_exception ( " PPU Trap! Sometimes tweaking the setting \" Stub PPU Traps \" can be a workaround to this crash. \n Best values depend on game code, if unsure try 1. " ) ;
2020-05-15 17:57:48 +02:00
}
ppu_log . error ( " PPU Trap: Stubbing %d instructions %s. " , std : : abs ( static_cast < s32 > ( add ) / 4 ) , add > > 31 ? " backwards " : " forwards " ) ;
ppu . cia + = add ; // Skip instructions, hope for valid code (interprter may be invoked temporarily)
2016-06-22 15:37:51 +02:00
}
2016-06-07 22:24:20 +02:00
2021-09-22 22:42:40 +02:00
static void ppu_error ( ppu_thread & ppu , u64 addr , u32 /*op*/ )
2016-07-07 20:42:39 +02:00
{
2017-06-22 23:52:09 +02:00
ppu . cia = : : narrow < u32 > ( addr ) ;
2021-09-01 12:38:17 +02:00
ppu_recompiler_fallback ( ppu ) ;
2016-07-07 20:42:39 +02:00
}
2017-02-13 18:51:37 +01:00
static void ppu_check ( ppu_thread & ppu , u64 addr )
{
2017-06-22 23:52:09 +02:00
ppu . cia = : : narrow < u32 > ( addr ) ;
2018-10-11 00:17:19 +02:00
if ( ppu . test_stopped ( ) )
2023-08-01 00:10:16 +02:00
{
return ;
}
2017-02-13 18:51:37 +01:00
}
2016-06-22 15:37:51 +02:00
static void ppu_trace ( u64 addr )
2016-06-07 22:24:20 +02:00
{
2020-02-01 09:31:27 +01:00
ppu_log . notice ( " Trace: 0x%llx " , addr ) ;
2016-06-22 15:37:51 +02:00
}
2016-06-07 22:24:20 +02:00
2018-04-28 19:09:35 +02:00
template < typename T >
static T ppu_load_acquire_reservation ( ppu_thread & ppu , u32 addr )
2016-06-22 15:37:51 +02:00
{
2020-10-29 19:46:50 +01:00
perf_meter < " LARX " _u32 > perf0 ;
2020-09-14 07:08:26 +02:00
// Do not allow stores accessed from the same cache line to past reservation load
2020-12-06 10:10:00 +01:00
atomic_fence_seq_cst ( ) ;
2020-09-14 07:08:26 +02:00
2020-09-10 05:27:55 +02:00
if ( addr % sizeof ( T ) )
{
2020-12-09 16:04:52 +01:00
fmt : : throw_exception ( " PPU %s: Unaligned address: 0x%08x " , sizeof ( T ) = = 4 ? " LWARX " : " LDARX " , addr ) ;
2020-09-10 05:27:55 +02:00
}
// Always load aligned 64-bit value
2018-09-14 12:53:50 +02:00
auto & data = vm : : _ref < const atomic_be_t < u64 > > ( addr & - 8 ) ;
const u64 size_off = ( sizeof ( T ) * 8 ) & 63 ;
const u64 data_off = ( addr & 7 ) * 8 ;
2018-04-28 19:09:35 +02:00
2017-02-17 20:35:57 +01:00
ppu . raddr = addr ;
2018-04-28 19:09:35 +02:00
2020-10-29 23:22:28 +01:00
u32 addr_mask = - 1 ;
2020-04-07 19:29:11 +02:00
if ( const s32 max = g_cfg . core . ppu_128_reservations_loop_max_length )
{
// If we use it in HLE it means we want the accurate version
ppu . use_full_rdata = max < 0 | | ppu . current_function | | [ & ] ( )
{
const u32 cia = ppu . cia ;
if ( ( cia & 0xffff ) > = 0x10000u - max * 4 )
{
// Do not cross 64k boundary
2020-10-29 23:22:28 +01:00
return false ;
2020-04-07 19:29:11 +02:00
}
const auto inst = vm : : _ptr < const nse_t < u32 > > ( cia ) ;
// Search for STWCX or STDCX nearby (LDARX-STWCX and LWARX-STDCX loops will use accurate 128-byte reservations)
2020-12-13 14:34:45 +01:00
constexpr u32 store_cond = stx : : se_storage < u32 > : : swap ( sizeof ( T ) = = 8 ? 0x7C00012D : 0x7C0001AD ) ;
constexpr u32 mask = stx : : se_storage < u32 > : : swap ( 0xFC0007FF ) ;
2020-04-07 19:29:11 +02:00
const auto store_vec = v128 : : from32p ( store_cond ) ;
const auto mask_vec = v128 : : from32p ( mask ) ;
s32 i = 2 ;
for ( const s32 _max = max - 3 ; i < _max ; i + = 4 )
{
const auto _inst = v128 : : loadu ( inst + i ) & mask_vec ;
2021-12-30 17:39:18 +01:00
if ( ! gv_testz ( gv_eq32 ( _inst , store_vec ) ) )
2020-04-07 19:29:11 +02:00
{
return false ;
}
}
for ( ; i < max ; i + + )
{
const u32 val = inst [ i ] & mask ;
if ( val = = store_cond )
{
return false ;
}
}
return true ;
} ( ) ;
2020-10-29 23:22:28 +01:00
if ( ppu . use_full_rdata )
{
addr_mask = - 128 ;
}
2020-04-07 19:29:11 +02:00
}
else
{
ppu . use_full_rdata = false ;
}
2023-06-12 03:47:20 +02:00
if ( ppu_log . trace & & ( addr & addr_mask ) = = ( ppu . last_faddr & addr_mask ) )
2020-10-29 23:22:28 +01:00
{
ppu_log . trace ( u8 " LARX after fail: addr=0x%x, faddr=0x%x, time=%u c " , addr , ppu . last_faddr , ( perf0 . get ( ) - ppu . last_ftsc ) ) ;
}
2021-03-05 20:05:37 +01:00
if ( ( addr & addr_mask ) = = ( ppu . last_faddr & addr_mask ) & & ( perf0 . get ( ) - ppu . last_ftsc ) < 600 & & ( vm : : reservation_acquire ( addr ) & - 128 ) = = ppu . last_ftime )
2020-10-29 23:22:28 +01:00
{
be_t < u64 > rdata ;
std : : memcpy ( & rdata , & ppu . rdata [ addr & 0x78 ] , 8 ) ;
if ( rdata = = data . load ( ) )
{
ppu . rtime = ppu . last_ftime ;
ppu . raddr = ppu . last_faddr ;
2020-10-30 23:52:24 +01:00
ppu . last_ftime = 0 ;
2020-10-29 23:22:28 +01:00
return static_cast < T > ( rdata < < data_off > > size_off ) ;
}
ppu . last_fail + + ;
ppu . last_faddr = 0 ;
}
else
{
// Silent failure
ppu . last_faddr = 0 ;
}
2021-03-05 20:05:37 +01:00
ppu . rtime = vm : : reservation_acquire ( addr ) & - 128 ;
2020-10-30 03:17:00 +01:00
2020-10-30 07:40:58 +01:00
be_t < u64 > rdata ;
2020-10-17 13:55:31 +02:00
2020-10-30 07:40:58 +01:00
if ( ! ppu . use_full_rdata )
2018-05-18 17:51:48 +02:00
{
2020-10-30 07:40:58 +01:00
rdata = data . load ( ) ;
2020-09-02 23:58:29 +02:00
2020-10-30 07:40:58 +01:00
// Store only 64 bits of reservation data
std : : memcpy ( & ppu . rdata [ addr & 0x78 ] , & rdata , 8 ) ;
}
else
{
mov_rdata ( ppu . rdata , vm : : _ref < spu_rdata_t > ( addr & - 128 ) ) ;
2020-12-06 10:10:00 +01:00
atomic_fence_acquire ( ) ;
2020-10-13 19:23:10 +02:00
2020-10-30 07:40:58 +01:00
// Load relevant 64 bits of reservation data
std : : memcpy ( & rdata , & ppu . rdata [ addr & 0x78 ] , 8 ) ;
2018-05-18 17:51:48 +02:00
}
2020-10-17 13:55:31 +02:00
return static_cast < T > ( rdata < < data_off > > size_off ) ;
2018-04-28 19:09:35 +02:00
}
extern u32 ppu_lwarx ( ppu_thread & ppu , u32 addr )
{
return ppu_load_acquire_reservation < u32 > ( ppu , addr ) ;
2016-06-22 15:37:51 +02:00
}
2016-06-07 22:24:20 +02:00
2017-02-26 16:56:31 +01:00
extern u64 ppu_ldarx ( ppu_thread & ppu , u32 addr )
2016-06-22 15:37:51 +02:00
{
2018-04-28 19:09:35 +02:00
return ppu_load_acquire_reservation < u64 > ( ppu , addr ) ;
2016-06-22 15:37:51 +02:00
}
2016-06-07 22:24:20 +02:00
2022-01-23 13:20:07 +01:00
const auto ppu_stcx_accurate_tx = build_function_asm < u64 ( * ) ( u32 raddr , u64 rtime , const void * _old , u64 _new ) > ( " ppu_stcx_accurate_tx " , [ ] ( native_asm & c , auto & args )
2020-04-07 19:29:11 +02:00
{
using namespace asmjit ;
2021-12-30 17:39:18 +01:00
# if defined(ARCH_X64)
2020-04-07 19:29:11 +02:00
Label fall = c . newLabel ( ) ;
Label fail = c . newLabel ( ) ;
Label _ret = c . newLabel ( ) ;
2020-10-29 23:22:28 +01:00
Label load = c . newLabel ( ) ;
2020-04-07 19:29:11 +02:00
//if (utils::has_avx() && !s_tsx_avx)
//{
// c.vzeroupper();
//}
// Create stack frame if necessary (Windows ABI has only 6 volatile vector registers)
c . push ( x86 : : rbp ) ;
2020-10-30 23:52:24 +01:00
c . push ( x86 : : r14 ) ;
2020-04-07 19:29:11 +02:00
c . sub ( x86 : : rsp , 40 ) ;
# ifdef _WIN32
if ( ! s_tsx_avx )
{
c . movups ( x86 : : oword_ptr ( x86 : : rsp , 0 ) , x86 : : xmm6 ) ;
c . movups ( x86 : : oword_ptr ( x86 : : rsp , 16 ) , x86 : : xmm7 ) ;
}
# endif
// Prepare registers
2021-12-24 18:33:32 +01:00
build_swap_rdx_with ( c , args , x86 : : r10 ) ;
2020-11-01 12:28:56 +01:00
c . mov ( x86 : : rbp , x86 : : qword_ptr ( reinterpret_cast < u64 > ( & vm : : g_sudo_addr ) ) ) ;
2020-04-07 19:29:11 +02:00
c . lea ( x86 : : rbp , x86 : : qword_ptr ( x86 : : rbp , args [ 0 ] ) ) ;
c . and_ ( x86 : : rbp , - 128 ) ;
2020-10-19 14:20:53 +02:00
c . prefetchw ( x86 : : byte_ptr ( x86 : : rbp , 0 ) ) ;
c . prefetchw ( x86 : : byte_ptr ( x86 : : rbp , 64 ) ) ;
2020-04-07 19:29:11 +02:00
c . movzx ( args [ 0 ] . r32 ( ) , args [ 0 ] . r16 ( ) ) ;
c . shr ( args [ 0 ] . r32 ( ) , 1 ) ;
2021-12-24 18:33:32 +01:00
c . lea ( x86 : : r11 , x86 : : qword_ptr ( reinterpret_cast < u64 > ( + vm : : g_reservations ) , args [ 0 ] ) ) ;
c . and_ ( x86 : : r11 , - 128 / 2 ) ;
2020-10-15 18:24:00 +02:00
c . and_ ( args [ 0 ] . r32 ( ) , 63 ) ;
2020-04-07 19:29:11 +02:00
// Prepare data
if ( s_tsx_avx )
{
2021-12-28 20:25:36 +01:00
c . vmovups ( x86 : : ymm0 , x86 : : ymmword_ptr ( args [ 2 ] , 0 ) ) ;
c . vmovups ( x86 : : ymm1 , x86 : : ymmword_ptr ( args [ 2 ] , 32 ) ) ;
c . vmovups ( x86 : : ymm2 , x86 : : ymmword_ptr ( args [ 2 ] , 64 ) ) ;
c . vmovups ( x86 : : ymm3 , x86 : : ymmword_ptr ( args [ 2 ] , 96 ) ) ;
2020-04-07 19:29:11 +02:00
}
else
{
c . movaps ( x86 : : xmm0 , x86 : : oword_ptr ( args [ 2 ] , 0 ) ) ;
c . movaps ( x86 : : xmm1 , x86 : : oword_ptr ( args [ 2 ] , 16 ) ) ;
c . movaps ( x86 : : xmm2 , x86 : : oword_ptr ( args [ 2 ] , 32 ) ) ;
c . movaps ( x86 : : xmm3 , x86 : : oword_ptr ( args [ 2 ] , 48 ) ) ;
c . movaps ( x86 : : xmm4 , x86 : : oword_ptr ( args [ 2 ] , 64 ) ) ;
c . movaps ( x86 : : xmm5 , x86 : : oword_ptr ( args [ 2 ] , 80 ) ) ;
c . movaps ( x86 : : xmm6 , x86 : : oword_ptr ( args [ 2 ] , 96 ) ) ;
c . movaps ( x86 : : xmm7 , x86 : : oword_ptr ( args [ 2 ] , 112 ) ) ;
}
2020-10-30 23:52:24 +01:00
// Alloc r14 to stamp0
const auto stamp0 = x86 : : r14 ;
build_get_tsc ( c , stamp0 ) ;
2020-04-07 19:29:11 +02:00
Label fail2 = c . newLabel ( ) ;
2021-12-18 16:12:37 +01:00
Label tx1 = build_transaction_enter ( c , fall , [ & ] ( )
2020-10-30 23:52:24 +01:00
{
build_get_tsc ( c ) ;
2021-12-18 16:12:37 +01:00
c . sub ( x86 : : rax , stamp0 ) ;
2020-11-01 12:45:16 +01:00
c . cmp ( x86 : : rax , x86 : : qword_ptr ( reinterpret_cast < u64 > ( & g_rtm_tx_limit2 ) ) ) ;
2021-12-18 16:12:37 +01:00
c . jae ( fall ) ;
2020-10-30 23:52:24 +01:00
} ) ;
2020-10-15 18:24:00 +02:00
// Check pause flag
2020-10-09 19:33:12 +02:00
c . bt ( x86 : : dword_ptr ( args [ 2 ] , : : offset32 ( & ppu_thread : : state ) - : : offset32 ( & ppu_thread : : rdata ) ) , static_cast < u32 > ( cpu_flag : : pause ) ) ;
2021-12-18 16:12:37 +01:00
c . jc ( fall ) ;
2020-10-09 19:33:12 +02:00
c . xbegin ( tx1 ) ;
2020-04-07 19:29:11 +02:00
if ( s_tsx_avx )
{
2021-12-28 20:25:36 +01:00
c . vxorps ( x86 : : ymm0 , x86 : : ymm0 , x86 : : ymmword_ptr ( x86 : : rbp , 0 ) ) ;
c . vxorps ( x86 : : ymm1 , x86 : : ymm1 , x86 : : ymmword_ptr ( x86 : : rbp , 32 ) ) ;
c . vxorps ( x86 : : ymm2 , x86 : : ymm2 , x86 : : ymmword_ptr ( x86 : : rbp , 64 ) ) ;
c . vxorps ( x86 : : ymm3 , x86 : : ymm3 , x86 : : ymmword_ptr ( x86 : : rbp , 96 ) ) ;
2020-04-07 19:29:11 +02:00
c . vorps ( x86 : : ymm0 , x86 : : ymm0 , x86 : : ymm1 ) ;
c . vorps ( x86 : : ymm1 , x86 : : ymm2 , x86 : : ymm3 ) ;
c . vorps ( x86 : : ymm0 , x86 : : ymm1 , x86 : : ymm0 ) ;
c . vptest ( x86 : : ymm0 , x86 : : ymm0 ) ;
}
else
{
c . xorps ( x86 : : xmm0 , x86 : : oword_ptr ( x86 : : rbp , 0 ) ) ;
c . xorps ( x86 : : xmm1 , x86 : : oword_ptr ( x86 : : rbp , 16 ) ) ;
c . xorps ( x86 : : xmm2 , x86 : : oword_ptr ( x86 : : rbp , 32 ) ) ;
c . xorps ( x86 : : xmm3 , x86 : : oword_ptr ( x86 : : rbp , 48 ) ) ;
c . xorps ( x86 : : xmm4 , x86 : : oword_ptr ( x86 : : rbp , 64 ) ) ;
c . xorps ( x86 : : xmm5 , x86 : : oword_ptr ( x86 : : rbp , 80 ) ) ;
c . xorps ( x86 : : xmm6 , x86 : : oword_ptr ( x86 : : rbp , 96 ) ) ;
c . xorps ( x86 : : xmm7 , x86 : : oword_ptr ( x86 : : rbp , 112 ) ) ;
c . orps ( x86 : : xmm0 , x86 : : xmm1 ) ;
c . orps ( x86 : : xmm2 , x86 : : xmm3 ) ;
c . orps ( x86 : : xmm4 , x86 : : xmm5 ) ;
c . orps ( x86 : : xmm6 , x86 : : xmm7 ) ;
c . orps ( x86 : : xmm0 , x86 : : xmm2 ) ;
c . orps ( x86 : : xmm4 , x86 : : xmm6 ) ;
c . orps ( x86 : : xmm0 , x86 : : xmm4 ) ;
c . ptest ( x86 : : xmm0 , x86 : : xmm0 ) ;
}
2021-12-18 16:12:37 +01:00
c . jnz ( fail ) ;
2020-04-07 19:29:11 +02:00
2020-10-15 18:24:00 +02:00
// Store 8 bytes
c . mov ( x86 : : qword_ptr ( x86 : : rbp , args [ 0 ] , 1 , 0 ) , args [ 3 ] ) ;
2020-04-07 19:29:11 +02:00
c . xend ( ) ;
2021-12-24 18:33:32 +01:00
c . lock ( ) . add ( x86 : : qword_ptr ( x86 : : r11 ) , 64 ) ;
2020-10-30 23:52:24 +01:00
build_get_tsc ( c ) ;
c . sub ( x86 : : rax , stamp0 ) ;
2020-04-07 19:29:11 +02:00
c . jmp ( _ret ) ;
2020-10-20 07:22:25 +02:00
// XABORT is expensive so try to finish with xend instead
2021-12-18 16:12:37 +01:00
c . bind ( fail ) ;
2020-04-07 19:29:11 +02:00
2020-10-29 23:22:28 +01:00
// Load old data to store back in rdata
2020-10-20 07:22:25 +02:00
if ( s_tsx_avx )
{
2021-12-28 20:25:36 +01:00
c . vmovaps ( x86 : : ymm0 , x86 : : ymmword_ptr ( x86 : : rbp , 0 ) ) ;
c . vmovaps ( x86 : : ymm1 , x86 : : ymmword_ptr ( x86 : : rbp , 32 ) ) ;
c . vmovaps ( x86 : : ymm2 , x86 : : ymmword_ptr ( x86 : : rbp , 64 ) ) ;
c . vmovaps ( x86 : : ymm3 , x86 : : ymmword_ptr ( x86 : : rbp , 96 ) ) ;
2020-10-20 07:22:25 +02:00
}
else
{
c . movaps ( x86 : : xmm0 , x86 : : oword_ptr ( x86 : : rbp , 0 ) ) ;
c . movaps ( x86 : : xmm1 , x86 : : oword_ptr ( x86 : : rbp , 16 ) ) ;
c . movaps ( x86 : : xmm2 , x86 : : oword_ptr ( x86 : : rbp , 32 ) ) ;
c . movaps ( x86 : : xmm3 , x86 : : oword_ptr ( x86 : : rbp , 48 ) ) ;
c . movaps ( x86 : : xmm4 , x86 : : oword_ptr ( x86 : : rbp , 64 ) ) ;
c . movaps ( x86 : : xmm5 , x86 : : oword_ptr ( x86 : : rbp , 80 ) ) ;
c . movaps ( x86 : : xmm6 , x86 : : oword_ptr ( x86 : : rbp , 96 ) ) ;
c . movaps ( x86 : : xmm7 , x86 : : oword_ptr ( x86 : : rbp , 112 ) ) ;
}
c . xend ( ) ;
c . jmp ( fail2 ) ;
2021-12-18 16:12:37 +01:00
c . bind ( fall ) ;
2020-10-30 23:52:24 +01:00
c . mov ( x86 : : rax , - 1 ) ;
2020-04-07 19:29:11 +02:00
c . jmp ( _ret ) ;
c . bind ( fail2 ) ;
2021-12-24 18:33:32 +01:00
c . lock ( ) . sub ( x86 : : qword_ptr ( x86 : : r11 ) , 64 ) ;
2020-10-29 23:22:28 +01:00
c . bind ( load ) ;
// Store previous data back to rdata
if ( s_tsx_avx )
{
2021-12-28 20:25:36 +01:00
c . vmovaps ( x86 : : ymmword_ptr ( args [ 2 ] , 0 ) , x86 : : ymm0 ) ;
c . vmovaps ( x86 : : ymmword_ptr ( args [ 2 ] , 32 ) , x86 : : ymm1 ) ;
c . vmovaps ( x86 : : ymmword_ptr ( args [ 2 ] , 64 ) , x86 : : ymm2 ) ;
c . vmovaps ( x86 : : ymmword_ptr ( args [ 2 ] , 96 ) , x86 : : ymm3 ) ;
2020-10-29 23:22:28 +01:00
}
else
{
c . movaps ( x86 : : oword_ptr ( args [ 2 ] , 0 ) , x86 : : xmm0 ) ;
c . movaps ( x86 : : oword_ptr ( args [ 2 ] , 16 ) , x86 : : xmm1 ) ;
c . movaps ( x86 : : oword_ptr ( args [ 2 ] , 32 ) , x86 : : xmm2 ) ;
c . movaps ( x86 : : oword_ptr ( args [ 2 ] , 48 ) , x86 : : xmm3 ) ;
c . movaps ( x86 : : oword_ptr ( args [ 2 ] , 64 ) , x86 : : xmm4 ) ;
c . movaps ( x86 : : oword_ptr ( args [ 2 ] , 80 ) , x86 : : xmm5 ) ;
c . movaps ( x86 : : oword_ptr ( args [ 2 ] , 96 ) , x86 : : xmm6 ) ;
c . movaps ( x86 : : oword_ptr ( args [ 2 ] , 112 ) , x86 : : xmm7 ) ;
}
2020-10-30 23:52:24 +01:00
c . mov ( x86 : : rax , - 1 ) ;
2021-01-31 18:00:04 +01:00
c . mov ( x86 : : qword_ptr ( args [ 2 ] , : : offset32 ( & ppu_thread : : last_ftime ) - : : offset32 ( & ppu_thread : : rdata ) ) , x86 : : rax ) ;
2020-04-07 19:29:11 +02:00
c . xor_ ( x86 : : eax , x86 : : eax ) ;
//c.jmp(_ret);
c . bind ( _ret ) ;
# ifdef _WIN32
2020-10-15 18:24:00 +02:00
if ( ! s_tsx_avx )
2020-04-07 19:29:11 +02:00
{
c . vmovups ( x86 : : xmm6 , x86 : : oword_ptr ( x86 : : rsp , 0 ) ) ;
c . vmovups ( x86 : : xmm7 , x86 : : oword_ptr ( x86 : : rsp , 16 ) ) ;
}
# endif
if ( s_tsx_avx )
{
c . vzeroupper ( ) ;
}
c . add ( x86 : : rsp , 40 ) ;
2020-10-30 23:52:24 +01:00
c . pop ( x86 : : r14 ) ;
2020-04-07 19:29:11 +02:00
c . pop ( x86 : : rbp ) ;
2021-12-24 18:33:32 +01:00
2022-08-24 18:36:37 +02:00
maybe_flush_lbr ( c ) ;
2020-04-07 19:29:11 +02:00
c . ret ( ) ;
2021-12-30 17:39:18 +01:00
# else
2022-06-14 14:28:38 +02:00
// Unimplemented should fail.
c . brk ( Imm ( 0x42 ) ) ;
2021-12-30 17:39:18 +01:00
c . ret ( a64 : : x30 ) ;
# endif
2020-04-07 19:29:11 +02:00
} ) ;
2020-04-13 12:29:01 +02:00
template < typename T >
2020-05-01 22:52:10 +02:00
static bool ppu_store_reservation ( ppu_thread & ppu , u32 addr , u64 reg_value )
2016-06-22 15:37:51 +02:00
{
2020-10-18 14:00:10 +02:00
perf_meter < " STCX " _u32 > perf0 ;
2020-09-10 05:27:55 +02:00
if ( addr % sizeof ( T ) )
{
2020-12-09 16:04:52 +01:00
fmt : : throw_exception ( " PPU %s: Unaligned address: 0x%08x " , sizeof ( T ) = = 4 ? " STWCX " : " STDCX " , addr ) ;
2020-09-10 05:27:55 +02:00
}
2020-05-01 22:52:10 +02:00
auto & data = vm : : _ref < atomic_be_t < u64 > > ( addr & - 8 ) ;
2021-03-05 20:05:37 +01:00
auto & res = vm : : reservation_acquire ( addr ) ;
2020-04-07 19:29:11 +02:00
const u64 rtime = ppu . rtime ;
2017-02-17 20:35:57 +01:00
2020-10-29 19:46:50 +01:00
be_t < u64 > old_data = 0 ;
std : : memcpy ( & old_data , & ppu . rdata [ addr & 0x78 ] , sizeof ( old_data ) ) ;
be_t < u64 > new_data = old_data ;
2020-05-01 22:52:10 +02:00
if constexpr ( sizeof ( T ) = = sizeof ( u32 ) )
{
// Rebuild reg_value to be 32-bits of new data and 32-bits of old data
2020-10-29 19:46:50 +01:00
const be_t < u32 > reg32 = static_cast < u32 > ( reg_value ) ;
std : : memcpy ( reinterpret_cast < char * > ( & new_data ) + ( addr & 4 ) , & reg32 , sizeof ( u32 ) ) ;
}
else
{
new_data = reg_value ;
2020-05-01 22:52:10 +02:00
}
2020-04-07 19:29:11 +02:00
// Test if store address is on the same aligned 8-bytes memory as load
if ( const u32 raddr = std : : exchange ( ppu . raddr , 0 ) ; raddr / 8 ! = addr / 8 )
2017-02-17 20:35:57 +01:00
{
2020-04-07 19:29:11 +02:00
// If not and it is on the same aligned 128-byte memory, proceed only if 128-byte reservations are enabled
// In realhw the store address can be at any address of the 128-byte cache line
if ( raddr / 128 ! = addr / 128 | | ! ppu . use_full_rdata )
2020-09-10 05:27:55 +02:00
{
2020-04-07 19:29:11 +02:00
// Even when the reservation address does not match the target address must be valid
2020-11-10 18:09:28 +01:00
if ( ! vm : : check_addr ( addr , vm : : page_writable ) )
2020-04-07 19:29:11 +02:00
{
// Access violate
data + = 0 ;
}
return false ;
2020-09-10 05:27:55 +02:00
}
2020-04-07 19:29:11 +02:00
}
2020-09-10 05:27:55 +02:00
2020-04-07 19:29:11 +02:00
if ( old_data ! = data | | rtime ! = ( res & - 128 ) )
{
2020-05-08 19:41:15 +02:00
return false ;
}
2020-04-07 19:29:11 +02:00
if ( [ & ] ( )
2020-05-08 19:41:15 +02:00
{
2020-04-07 19:29:11 +02:00
if ( ppu . use_full_rdata ) [[unlikely]]
2020-05-08 19:41:15 +02:00
{
2021-12-18 16:12:37 +01:00
auto [ _oldd , _ok ] = res . fetch_op ( [ & ] ( u64 & r )
{
if ( ( r & - 128 ) ! = rtime | | ( r & 127 ) )
{
return false ;
}
r + = vm : : rsrv_unique_lock ;
return true ;
} ) ;
if ( ! _ok )
{
// Already locked or updated: give up
return false ;
}
2020-04-07 19:29:11 +02:00
if ( g_use_rtm ) [[likely]]
{
2020-10-30 23:52:24 +01:00
switch ( u64 count = ppu_stcx_accurate_tx ( addr & - 8 , rtime , ppu . rdata , std : : bit_cast < u64 > ( new_data ) ) )
2020-04-07 19:29:11 +02:00
{
2021-05-22 09:35:15 +02:00
case umax :
2020-04-07 19:29:11 +02:00
{
2020-10-30 03:17:00 +01:00
auto & all_data = * vm : : get_super_ptr < spu_rdata_t > ( addr & - 128 ) ;
auto & sdata = * vm : : get_super_ptr < atomic_be_t < u64 > > ( addr & - 8 ) ;
2020-10-29 23:22:28 +01:00
2020-11-09 01:41:56 +01:00
const bool ok = cpu_thread : : suspend_all < + 3 > ( & ppu , { all_data , all_data + 64 , & res } , [ & ]
2020-10-30 03:17:00 +01:00
{
2020-10-29 23:22:28 +01:00
if ( ( res & - 128 ) = = rtime & & cmp_rdata ( ppu . rdata , all_data ) )
{
2020-10-30 03:17:00 +01:00
sdata . release ( new_data ) ;
2021-12-18 16:12:37 +01:00
res + = 64 ;
2020-10-29 23:22:28 +01:00
return true ;
}
2020-10-30 03:17:00 +01:00
mov_rdata_nt ( ppu . rdata , all_data ) ;
2021-12-18 16:12:37 +01:00
res - = 64 ;
2020-10-29 23:22:28 +01:00
return false ;
} ) ;
if ( ok )
{
break ;
}
2020-10-30 23:52:24 +01:00
ppu . last_ftime = - 1 ;
2020-10-29 23:22:28 +01:00
[[fallthrough]] ;
2020-04-07 19:29:11 +02:00
}
2020-10-29 23:22:28 +01:00
case 0 :
2020-10-19 14:31:10 +02:00
{
2020-10-29 23:22:28 +01:00
if ( ppu . last_faddr = = addr )
{
ppu . last_fail + + ;
}
2020-10-30 23:52:24 +01:00
if ( ppu . last_ftime ! = umax )
{
ppu . last_faddr = 0 ;
return false ;
}
2020-11-24 06:18:31 +01:00
utils : : prefetch_read ( ppu . rdata ) ;
utils : : prefetch_read ( ppu . rdata + 64 ) ;
2020-10-29 23:22:28 +01:00
ppu . last_faddr = addr ;
ppu . last_ftime = res . load ( ) & - 128 ;
2021-12-30 17:39:18 +01:00
ppu . last_ftsc = utils : : get_tsc ( ) ;
2020-10-29 23:22:28 +01:00
return false ;
2020-10-19 14:31:10 +02:00
}
default :
2020-04-07 19:29:11 +02:00
{
2020-10-30 23:52:24 +01:00
if ( count > 20000 & & g_cfg . core . perf_report ) [[unlikely]]
2020-10-19 14:31:10 +02:00
{
2020-10-30 23:52:24 +01:00
perf_log . warning ( u8 " STCX: took too long: %.3fµs (%u c) " , count / ( utils : : get_tsc_freq ( ) / 1000'000 . ) , count ) ;
2020-10-19 14:31:10 +02:00
}
2020-10-29 23:22:28 +01:00
break ;
2020-04-07 19:29:11 +02:00
}
}
2020-05-08 19:41:15 +02:00
2020-10-29 23:22:28 +01:00
if ( ppu . last_faddr = = addr )
2020-10-08 15:13:55 +02:00
{
2020-10-29 23:22:28 +01:00
ppu . last_succ + + ;
}
2020-05-01 22:52:10 +02:00
2020-10-29 23:22:28 +01:00
ppu . last_faddr = 0 ;
return true ;
2020-04-07 19:29:11 +02:00
}
// Align address: we do not need the lower 7 bits anymore
addr & = - 128 ;
// Cache line data
2021-01-12 11:01:06 +01:00
//auto& cline_data = vm::_ref<spu_rdata_t>(addr);
2020-04-07 19:29:11 +02:00
data + = 0 ;
2024-03-27 09:36:00 +01:00
auto range_lock = vm : : alloc_range_lock ( ) ;
2024-03-27 09:40:25 +01:00
bool success = false ;
2020-04-07 19:29:11 +02:00
{
2024-03-27 09:36:00 +01:00
rsx : : reservation_lock rsx_lock ( addr , 128 ) ;
2020-04-07 19:29:11 +02:00
2024-03-27 09:36:00 +01:00
auto & super_data = * vm : : get_super_ptr < spu_rdata_t > ( addr ) ;
2024-03-27 09:40:25 +01:00
success = [ & ] ( )
2020-04-07 19:29:11 +02:00
{
2024-03-27 09:36:00 +01:00
// Full lock (heavyweight)
// TODO: vm::check_addr
vm : : writer_lock lock ( addr , range_lock ) ;
2020-04-07 19:29:11 +02:00
2024-03-27 09:36:00 +01:00
if ( cmp_rdata ( ppu . rdata , super_data ) )
{
data . release ( new_data ) ;
res + = 64 ;
return true ;
}
res - = 64 ;
return false ;
} ( ) ;
}
vm : : free_range_lock ( range_lock ) ;
2020-04-07 19:29:11 +02:00
return success ;
2019-05-18 22:47:35 +02:00
}
2020-04-07 19:29:11 +02:00
2020-10-29 19:46:50 +01:00
if ( new_data = = old_data )
2017-07-18 19:03:47 +02:00
{
2020-10-29 23:22:28 +01:00
ppu . last_faddr = 0 ;
2020-04-07 19:29:11 +02:00
return res . compare_and_swap_test ( rtime , rtime + 128 ) ;
2019-05-18 22:47:35 +02:00
}
2020-04-07 19:29:11 +02:00
// Aligned 8-byte reservations will be used here
addr & = - 8 ;
2021-12-17 19:48:01 +01:00
const u64 lock_bits = vm : : rsrv_unique_lock ;
2020-10-31 18:27:28 +01:00
2020-10-15 11:40:53 +02:00
auto [ _oldd , _ok ] = res . fetch_op ( [ & ] ( u64 & r )
2020-04-07 19:29:11 +02:00
{
2020-10-15 11:40:53 +02:00
if ( ( r & - 128 ) ! = rtime | | ( r & 127 ) )
2020-10-08 15:13:55 +02:00
{
return false ;
}
2020-10-31 18:27:28 +01:00
r + = lock_bits ;
2020-10-15 11:40:53 +02:00
return true ;
} ) ;
2020-10-08 15:13:55 +02:00
2020-10-15 11:40:53 +02:00
// Give up if reservation has been locked or updated
if ( ! _ok )
{
2020-10-29 23:22:28 +01:00
ppu . last_faddr = 0 ;
2020-10-15 11:40:53 +02:00
return false ;
2020-04-07 19:29:11 +02:00
}
2017-02-17 20:35:57 +01:00
2020-10-29 19:46:50 +01:00
// Store previous value in old_data on failure
if ( data . compare_exchange ( old_data , new_data ) )
2020-10-08 15:13:55 +02:00
{
2020-10-31 18:27:28 +01:00
res + = 128 - lock_bits ;
2020-10-08 15:13:55 +02:00
return true ;
}
2020-10-31 18:27:28 +01:00
const u64 old_rtime = res . fetch_sub ( lock_bits ) ;
2020-10-29 23:22:28 +01:00
// TODO: disabled with this setting on, since it's dangerous to mix
if ( ! g_cfg . core . ppu_128_reservations_loop_max_length )
{
// Store old_data on failure
if ( ppu . last_faddr = = addr )
{
ppu . last_fail + + ;
}
ppu . last_faddr = addr ;
ppu . last_ftime = old_rtime & - 128 ;
2021-12-30 17:39:18 +01:00
ppu . last_ftsc = utils : : get_tsc ( ) ;
2020-10-29 23:22:28 +01:00
std : : memcpy ( & ppu . rdata [ addr & 0x78 ] , & old_data , 8 ) ;
}
2020-10-08 15:13:55 +02:00
return false ;
2020-04-07 19:29:11 +02:00
} ( ) )
2017-02-17 20:35:57 +01:00
{
2022-09-03 05:46:16 +02:00
extern atomic_t < u32 > liblv2_begin , liblv2_end ;
2022-08-18 15:10:24 +02:00
2023-08-16 08:47:45 +02:00
const u32 notify = ppu . res_notify ;
if ( notify )
{
vm : : reservation_notifier ( notify ) . notify_all ( ) ;
ppu . res_notify = 0 ;
}
// Avoid notifications from lwmutex or sys_spinlock
2022-09-03 05:46:16 +02:00
if ( ppu . cia < liblv2_begin | | ppu . cia > = liblv2_end )
2022-08-18 15:10:24 +02:00
{
2023-08-16 08:47:45 +02:00
if ( ! notify )
{
// Try to postpone notification to when PPU is asleep or join notifications on the same address
// This also optimizes a mutex - won't notify after lock is aqcuired (prolonging the critical section duration), only notifies on unlock
ppu . res_notify = addr ;
}
else if ( ( addr ^ notify ) & - 128 )
{
res . notify_all ( ) ;
}
2022-08-18 15:10:24 +02:00
}
2020-10-29 23:22:28 +01:00
if ( addr = = ppu . last_faddr )
{
ppu . last_succ + + ;
}
ppu . last_faddr = 0 ;
2020-04-07 19:29:11 +02:00
return true ;
2018-05-21 19:25:05 +02:00
}
2017-02-17 20:35:57 +01:00
2023-08-16 08:47:45 +02:00
const u32 notify = ppu . res_notify ;
// Do not risk postponing too much (because this is probably an indefinite loop)
// And on failure it has some time to do something else
if ( notify & & ( ( addr ^ notify ) & - 128 ) )
{
vm : : reservation_notifier ( notify ) . notify_all ( ) ;
ppu . res_notify = 0 ;
}
2020-04-07 19:29:11 +02:00
return false ;
2016-06-22 15:37:51 +02:00
}
2016-06-07 22:24:20 +02:00
2020-04-13 12:29:01 +02:00
extern bool ppu_stwcx ( ppu_thread & ppu , u32 addr , u32 reg_value )
{
return ppu_store_reservation < u32 > ( ppu , addr , reg_value ) ;
}
extern bool ppu_stdcx ( ppu_thread & ppu , u32 addr , u64 reg_value )
{
return ppu_store_reservation < u64 > ( ppu , addr , reg_value ) ;
}
2023-09-09 12:28:33 +02:00
struct jit_core_allocator
{
2023-09-11 11:52:10 +02:00
const s16 thread_count = g_cfg . core . llvm_threads ? std : : min < s32 > ( g_cfg . core . llvm_threads , limit ( ) ) : limit ( ) ;
2023-09-09 12:28:33 +02:00
// Initialize global semaphore with the max number of threads
2023-09-11 11:52:10 +02:00
: : semaphore < 0x7fff > sem { std : : max < s16 > ( thread_count , 1 ) } ;
2023-09-09 12:28:33 +02:00
2023-09-11 11:52:10 +02:00
static s16 limit ( )
2023-09-09 12:28:33 +02:00
{
2023-09-11 11:52:10 +02:00
return static_cast < s16 > ( std : : min < s32 > ( 0x7fff , utils : : get_thread_count ( ) ) ) ;
2023-09-09 12:28:33 +02:00
}
} ;
2021-01-27 14:08:43 +01:00
# ifdef LLVM_AVAILABLE
namespace
{
// Compiled PPU module info
struct jit_module
{
2024-03-18 15:14:45 +01:00
void ( * symbol_resolver ) ( u8 * , u64 ) = nullptr ;
2021-01-27 14:08:43 +01:00
std : : shared_ptr < jit_compiler > pjit ;
2021-01-31 19:38:47 +01:00
bool init = false ;
2021-01-27 14:08:43 +01:00
} ;
struct jit_module_manager
{
2023-09-09 21:00:52 +02:00
struct bucket_t
{
shared_mutex mutex ;
std : : unordered_map < std : : string , jit_module > map ;
} ;
std : : array < bucket_t , 30 > buckets ;
bucket_t & get_bucket ( std : : string_view sv )
{
return buckets [ std : : hash < std : : string_view > ( ) ( sv ) % std : : size ( buckets ) ] ;
}
2021-01-27 14:08:43 +01:00
jit_module & get ( const std : : string & name )
{
2023-09-09 21:00:52 +02:00
bucket_t & bucket = get_bucket ( name ) ;
std : : lock_guard lock ( bucket . mutex ) ;
return bucket . map . emplace ( name , jit_module { } ) . first - > second ;
2021-01-27 14:08:43 +01:00
}
void remove ( const std : : string & name ) noexcept
{
2023-09-09 21:00:52 +02:00
bucket_t & bucket = get_bucket ( name ) ;
2021-01-27 14:08:43 +01:00
2023-09-09 21:00:52 +02:00
jit_module to_destroy { } ;
2021-01-27 14:08:43 +01:00
2023-09-09 21:00:52 +02:00
std : : lock_guard lock ( bucket . mutex ) ;
const auto found = bucket . map . find ( name ) ;
if ( found = = bucket . map . end ( ) ) [[unlikely]]
2021-01-27 14:08:43 +01:00
{
ppu_log . error ( " Failed to remove module %s " , name ) ;
return ;
}
2023-09-09 21:00:52 +02:00
to_destroy . pjit = std : : move ( found - > second . pjit ) ;
bucket . map . erase ( found ) ;
2021-01-27 14:08:43 +01:00
}
} ;
}
# endif
2021-01-29 11:32:19 +01:00
namespace
{
// Read-only file view starting with specified offset (for MSELF)
struct file_view : fs : : file_base
{
2023-10-29 01:46:52 +02:00
const fs : : file m_storage ;
const fs : : file & m_file ;
2021-01-29 11:32:19 +01:00
const u64 m_off ;
2023-10-29 01:46:52 +02:00
const u64 m_max_size ;
2021-01-29 11:32:19 +01:00
u64 m_pos ;
2023-10-29 01:46:52 +02:00
explicit file_view ( const fs : : file & _file , u64 offset , u64 max_size ) noexcept
: m_storage ( fs : : file ( ) )
, m_file ( _file )
2021-01-29 11:32:19 +01:00
, m_off ( offset )
2023-10-29 01:46:52 +02:00
, m_max_size ( max_size )
, m_pos ( 0 )
{
}
explicit file_view ( fs : : file & & _file , u64 offset , u64 max_size ) noexcept
: m_storage ( std : : move ( _file ) )
, m_file ( m_storage )
, m_off ( offset )
, m_max_size ( max_size )
2021-01-29 11:32:19 +01:00
, m_pos ( 0 )
{
}
~ file_view ( ) override
{
}
2023-07-11 20:40:30 +02:00
fs : : stat_t get_stat ( ) override
2021-01-29 11:32:19 +01:00
{
2023-11-28 12:04:03 +01:00
fs : : stat_t stat = m_file . get_stat ( ) ;
stat . size = std : : min < u64 > ( utils : : sub_saturate < u64 > ( stat . size , m_off ) , m_max_size ) ;
stat . is_writable = false ;
return stat ;
2021-01-29 11:32:19 +01:00
}
2021-03-05 20:05:37 +01:00
bool trunc ( u64 ) override
2021-01-29 11:32:19 +01:00
{
return false ;
}
u64 read ( void * buffer , u64 size ) override
{
2023-10-29 01:46:52 +02:00
const u64 result = file_view : : read_at ( m_pos , buffer , size ) ;
2021-01-29 11:32:19 +01:00
m_pos + = result ;
return result ;
}
2022-12-24 15:15:29 +01:00
u64 read_at ( u64 offset , void * buffer , u64 size ) override
{
2023-11-28 12:04:03 +01:00
return m_file . read_at ( offset + m_off , buffer , std : : min < u64 > ( size , utils : : sub_saturate < u64 > ( m_max_size , offset ) ) ) ;
2022-12-24 15:15:29 +01:00
}
2021-03-05 20:05:37 +01:00
u64 write ( const void * , u64 ) override
2021-01-29 11:32:19 +01:00
{
return 0 ;
}
u64 seek ( s64 offset , fs : : seek_mode whence ) override
{
const s64 new_pos =
whence = = fs : : seek_set ? offset :
whence = = fs : : seek_cur ? offset + m_pos :
whence = = fs : : seek_end ? offset + size ( ) : - 1 ;
if ( new_pos < 0 )
{
fs : : g_tls_error = fs : : error : : inval ;
return - 1 ;
}
m_pos = new_pos ;
return m_pos ;
}
u64 size ( ) override
{
2023-11-28 12:04:03 +01:00
return std : : min < u64 > ( utils : : sub_saturate < u64 > ( m_file . size ( ) , m_off ) , m_max_size ) ;
2021-01-29 11:32:19 +01:00
}
} ;
}
2023-10-29 01:46:52 +02:00
extern fs : : file make_file_view ( const fs : : file & _file , u64 offset , u64 max_size = umax )
{
fs : : file file ;
file . reset ( std : : make_unique < file_view > ( _file , offset , max_size ) ) ;
return file ;
}
extern fs : : file make_file_view ( fs : : file & & _file , u64 offset , u64 max_size = umax )
2022-07-04 15:02:17 +02:00
{
fs : : file file ;
2023-10-29 01:46:52 +02:00
file . reset ( std : : make_unique < file_view > ( std : : move ( _file ) , offset , max_size ) ) ;
2022-07-04 15:02:17 +02:00
return file ;
}
2024-03-30 15:20:08 +01:00
extern void ppu_finalize ( const ppu_module & info , bool force_mem_release )
2021-01-27 14:08:43 +01:00
{
2024-03-30 15:20:08 +01:00
if ( ! force_mem_release & & info . name . empty ( ) )
2021-01-27 14:08:43 +01:00
{
// Don't remove main module from memory
return ;
}
2024-03-30 15:20:08 +01:00
if ( ! force_mem_release & & Emu . GetCat ( ) = = " 1P " )
2022-11-09 21:41:46 +01:00
{
return ;
}
2021-01-27 14:08:43 +01:00
2024-03-30 15:20:08 +01:00
const bool may_be_elf = fmt : : to_lower ( info . path . substr ( std : : max < usz > ( info . path . size ( ) , 3 ) - 3 ) ) ! = " prx " ;
if ( ! may_be_elf )
{
const std : : string dev_flash = vfs : : get ( " /dev_flash/sys/external " ) ;
if ( ! force_mem_release & & info . path . starts_with ( dev_flash ) )
{
// Don't remove dev_flash prx from memory
return ;
}
}
2022-11-09 21:41:46 +01:00
// Get cache path for this executable
std : : string cache_path = fs : : get_cache_dir ( ) + " cache/ " ;
if ( ! Emu . GetTitleID ( ) . empty ( ) )
{
cache_path + = Emu . GetTitleID ( ) ;
cache_path + = ' / ' ;
2021-01-27 14:08:43 +01:00
}
2022-11-09 21:41:46 +01:00
// Add PPU hash and filename
fmt : : append ( cache_path , " ppu-%s-%s/ " , fmt : : base57 ( info . sha1 ) , info . path . substr ( info . path . find_last_of ( ' / ' ) + 1 ) ) ;
2021-01-27 14:08:43 +01:00
# ifdef LLVM_AVAILABLE
2023-08-06 20:04:48 +02:00
g_fxo - > get < jit_module_manager > ( ) . remove ( cache_path + " _ " + std : : to_string ( std : : bit_cast < usz > ( info . segs [ 0 ] . ptr ) ) ) ;
2021-01-27 14:08:43 +01:00
# endif
}
2022-07-04 15:02:17 +02:00
extern void ppu_precompile ( std : : vector < std : : string > & dir_queue , std : : vector < ppu_module * > * loaded_modules )
2021-01-29 11:32:19 +01:00
{
2021-02-23 15:09:23 +01:00
if ( g_cfg . core . ppu_decoder ! = ppu_decoder_type : : llvm )
{
return ;
}
2022-07-04 15:02:17 +02:00
if ( auto dis = g_fxo - > try_get < disable_precomp_t > ( ) ; dis & & dis - > disable )
{
return ;
}
2023-11-28 18:41:14 +01:00
std : : optional < scoped_progress_dialog > progr ( std : : in_place , " Scanning PPU Executable... " ) ;
2021-01-30 16:18:10 +01:00
// Make sure we only have one '/' at the end and remove duplicates.
for ( std : : string & dir : dir_queue )
{
while ( dir . back ( ) = = ' / ' | | dir . back ( ) = = ' \\ ' )
dir . pop_back ( ) ;
dir + = ' / ' ;
}
2023-11-28 18:41:14 +01:00
2021-01-30 14:08:22 +01:00
std : : sort ( dir_queue . begin ( ) , dir_queue . end ( ) ) ;
dir_queue . erase ( std : : unique ( dir_queue . begin ( ) , dir_queue . end ( ) ) , dir_queue . end ( ) ) ;
const std : : string firmware_sprx_path = vfs : : get ( " /dev_flash/sys/external/ " ) ;
2023-09-05 20:15:52 +02:00
struct file_info
{
std : : string path ;
u64 offset ;
u64 file_size ;
file_info ( ) noexcept = default ;
file_info ( std : : string _path , u64 offs , u64 size ) noexcept
: path ( std : : move ( _path ) )
, offset ( offs )
, file_size ( size )
{
}
} ;
std : : vector < file_info > file_queue ;
2021-01-29 11:32:19 +01:00
file_queue . reserve ( 2000 ) ;
2021-01-30 15:25:21 +01:00
// Find all .sprx files recursively
2021-01-29 11:32:19 +01:00
for ( usz i = 0 ; i < dir_queue . size ( ) ; i + + )
{
if ( Emu . IsStopped ( ) )
{
2021-01-30 14:08:22 +01:00
file_queue . clear ( ) ;
2021-01-29 11:32:19 +01:00
break ;
}
ppu_log . notice ( " Scanning directory: %s " , dir_queue [ i ] ) ;
for ( auto & & entry : fs : : dir ( dir_queue [ i ] ) )
{
if ( Emu . IsStopped ( ) )
{
2021-01-30 14:08:22 +01:00
file_queue . clear ( ) ;
2021-01-29 11:32:19 +01:00
break ;
}
if ( entry . is_directory )
{
if ( entry . name ! = " . " & & entry . name ! = " .. " )
{
dir_queue . emplace_back ( dir_queue [ i ] + entry . name + ' / ' ) ;
}
continue ;
}
2023-09-05 20:15:52 +02:00
// SCE header size
if ( entry . size < = 0x20 )
{
continue ;
}
2021-01-30 14:08:22 +01:00
std : : string upper = fmt : : to_upper ( entry . name ) ;
2022-07-04 15:02:17 +02:00
// Skip already loaded modules or HLEd ones
2022-09-13 15:08:55 +02:00
auto is_ignored = [ & ] ( s64 /*offset*/ ) - > bool
2021-01-29 11:32:19 +01:00
{
2022-07-04 15:02:17 +02:00
if ( dir_queue [ i ] ! = firmware_sprx_path )
2021-01-30 14:08:22 +01:00
{
2022-07-04 15:02:17 +02:00
return false ;
}
2021-01-30 14:08:22 +01:00
2022-07-04 15:02:17 +02:00
if ( loaded_modules )
{
if ( std : : any_of ( loaded_modules - > begin ( ) , loaded_modules - > end ( ) , [ & ] ( ppu_module * obj )
2021-01-30 14:08:22 +01:00
{
2022-07-04 15:02:17 +02:00
return obj - > name = = entry . name ;
} ) )
2021-01-30 14:08:22 +01:00
{
2022-07-04 15:02:17 +02:00
return true ;
2021-01-30 14:08:22 +01:00
}
2022-07-04 15:02:17 +02:00
}
2021-01-30 14:08:22 +01:00
2022-07-04 15:02:17 +02:00
if ( g_cfg . core . libraries_control . get_set ( ) . count ( entry . name + " :lle " ) )
{
// Force LLE
return false ;
}
else if ( g_cfg . core . libraries_control . get_set ( ) . count ( entry . name + " :hle " ) )
{
// Force HLE
return true ;
}
2021-01-30 14:08:22 +01:00
2022-07-04 15:02:17 +02:00
extern const std : : map < std : : string_view , int > g_prx_list ;
// Use list
2022-09-19 14:57:51 +02:00
return g_prx_list . count ( entry . name ) & & : : at32 ( g_prx_list , entry . name ) ! = 0 ;
2022-07-04 15:02:17 +02:00
} ;
2023-08-30 15:08:27 +02:00
// Check PRX filename
if ( upper . ends_with ( " .PRX " ) | | ( upper . ends_with ( " .SPRX " ) & & entry . name ! = " libfs_utility_init.sprx " sv ) )
2022-07-04 15:02:17 +02:00
{
if ( is_ignored ( 0 ) )
{
continue ;
2021-01-30 14:08:22 +01:00
}
2021-01-29 11:32:19 +01:00
// Get full path
2023-09-05 20:15:52 +02:00
file_queue . emplace_back ( dir_queue [ i ] + entry . name , 0 , entry . size ) ;
2021-01-30 14:08:22 +01:00
continue ;
}
2023-08-30 15:08:27 +02:00
// Check ELF filename
2023-09-02 08:01:35 +02:00
if ( ( upper . ends_with ( " .ELF " ) | | upper . ends_with ( " .SELF " ) ) & & Emu . GetBoot ( ) ! = dir_queue [ i ] + entry . name )
2021-01-30 14:08:22 +01:00
{
// Get full path
2023-09-05 20:15:52 +02:00
file_queue . emplace_back ( dir_queue [ i ] + entry . name , 0 , entry . size ) ;
2021-01-29 11:32:19 +01:00
continue ;
}
// Check .mself filename
2021-01-30 14:08:22 +01:00
if ( upper . ends_with ( " .MSELF " ) )
2021-01-29 11:32:19 +01:00
{
if ( fs : : file mself { dir_queue [ i ] + entry . name } )
{
mself_header hdr { } ;
if ( mself . read ( hdr ) & & hdr . get_count ( mself . size ( ) ) )
{
2021-01-29 19:06:49 +01:00
for ( u32 j = 0 ; j < hdr . count ; j + + )
2021-01-29 11:32:19 +01:00
{
mself_record rec { } ;
2024-02-16 08:05:14 +01:00
std : : set < u64 > offs ;
2021-01-29 11:32:19 +01:00
if ( mself . read ( rec ) & & rec . get_pos ( mself . size ( ) ) )
{
2024-02-16 08:05:14 +01:00
if ( rec . size < = 0x20 )
{
continue ;
}
if ( ! offs . emplace ( rec . off ) . second )
{
// Duplicate
continue ;
}
2024-02-15 18:18:30 +01:00
// Read characters safely
std : : string name ( sizeof ( rec . name ) , ' \0 ' ) ;
std : : memcpy ( name . data ( ) , rec . name , name . size ( ) ) ;
name = std : : string ( name . c_str ( ) ) ;
2021-01-29 11:32:19 +01:00
2021-01-30 14:08:22 +01:00
upper = fmt : : to_upper ( name ) ;
2024-02-16 08:14:47 +01:00
if ( upper . find ( " .SPRX " ) ! = umax | | upper . find ( " .PRX " ) ! = umax )
2021-01-29 11:32:19 +01:00
{
// .sprx inside .mself found
2023-09-05 20:15:52 +02:00
file_queue . emplace_back ( dir_queue [ i ] + entry . name , rec . off , rec . size ) ;
2021-01-30 14:08:22 +01:00
continue ;
}
2024-02-16 08:14:47 +01:00
if ( upper . find ( " .SELF " ) ! = umax | | upper . find ( " .ELF " ) ! = umax )
2021-01-30 14:08:22 +01:00
{
// .self inside .mself found
2023-09-05 20:15:52 +02:00
file_queue . emplace_back ( dir_queue [ i ] + entry . name , rec . off , rec . size ) ;
2021-01-30 14:08:22 +01:00
continue ;
2021-01-29 11:32:19 +01:00
}
}
else
{
ppu_log . error ( " MSELF file is possibly truncated " ) ;
break ;
}
}
}
}
}
}
}
2023-12-30 19:53:07 +01:00
g_progr_ftotal + = : : size32 ( file_queue ) ;
2023-09-05 20:15:52 +02:00
u64 total_files_size = 0 ;
for ( const file_info & info : file_queue )
{
total_files_size + = info . file_size ;
}
g_progr_ftotal_bits + = total_files_size ;
2023-11-28 18:41:14 +01:00
* progr = " Compiling PPU Modules... " ;
2021-01-30 14:08:22 +01:00
2021-01-29 11:32:19 +01:00
atomic_t < usz > fnext = 0 ;
2023-09-05 20:15:52 +02:00
lf_queue < file_info > possible_exec_file_paths ;
2021-01-29 11:32:19 +01:00
2023-09-11 14:03:39 +02:00
: : semaphore < 2 > ovl_sema ;
2024-03-30 15:20:08 +01:00
const u32 software_thread_limit = std : : min < u32 > ( g_cfg . core . llvm_threads ? g_cfg . core . llvm_threads : u32 { umax } , : : size32 ( file_queue ) ) ;
const u32 cpu_thread_limit = utils : : get_thread_count ( ) > 8u ? std : : max < u32 > ( utils : : get_thread_count ( ) , 2 ) - 1 : utils : : get_thread_count ( ) ; // One LLVM thread less
named_thread_group workers ( " SPRX Worker " , std : : min < u32 > ( software_thread_limit , cpu_thread_limit ) , [ & ]
2021-01-29 11:32:19 +01:00
{
2022-06-14 14:28:38 +02:00
# ifdef __APPLE__
pthread_jit_write_protect_np ( false ) ;
# endif
2021-01-30 16:18:10 +01:00
// Set low priority
thread_ctrl : : scoped_priority low_prio ( - 1 ) ;
2023-12-30 19:53:07 +01:00
u32 inc_fdone = 1 ;
2021-01-30 16:18:10 +01:00
2023-12-30 19:53:07 +01:00
for ( usz func_i = fnext + + ; func_i < file_queue . size ( ) ; func_i = fnext + + , g_progr_fdone + = std : : exchange ( inc_fdone , 1 ) )
2021-01-29 11:32:19 +01:00
{
2021-01-30 14:08:22 +01:00
if ( Emu . IsStopped ( ) )
{
continue ;
}
2021-01-29 11:32:19 +01:00
2023-09-05 20:15:52 +02:00
auto & [ path , offset , file_size ] = file_queue [ func_i ] ;
2021-01-29 11:32:19 +01:00
2021-01-30 14:08:22 +01:00
ppu_log . notice ( " Trying to load: %s " , path ) ;
// Load MSELF, SPRX or SELF
2021-01-29 11:32:19 +01:00
fs : : file src { path } ;
2021-01-30 14:08:22 +01:00
if ( ! src )
{
ppu_log . error ( " Failed to open '%s' (%s) " , path , fs : : g_tls_error ) ;
continue ;
}
if ( u64 off = offset )
2021-01-29 11:32:19 +01:00
{
// Adjust offset for MSELF
2024-02-15 18:18:30 +01:00
src = make_file_view ( std : : move ( src ) , offset , file_size ) ;
2021-01-29 19:06:49 +01:00
// Adjust path for MSELF too
fmt : : append ( path , " _x%x " , off ) ;
2021-01-29 11:32:19 +01:00
}
// Some files may fail to decrypt due to the lack of klic
src = decrypt_self ( std : : move ( src ) ) ;
2021-01-30 14:08:22 +01:00
if ( ! src )
{
2021-04-19 19:45:38 +02:00
ppu_log . notice ( " Failed to decrypt '%s' " , path ) ;
2021-01-30 16:18:10 +01:00
continue ;
2021-01-30 14:08:22 +01:00
}
elf_error prx_err { } , ovl_err { } ;
2021-01-29 11:32:19 +01:00
2021-02-12 12:40:55 +01:00
if ( ppu_prx_object obj = src ; ( prx_err = obj , obj = = elf_error : : ok ) )
2021-01-29 11:32:19 +01:00
{
2023-06-25 14:53:42 +02:00
if ( auto prx = ppu_load_prx ( obj , true , path , offset ) )
2021-01-29 11:32:19 +01:00
{
2021-02-12 12:40:55 +01:00
obj . clear ( ) , src . close ( ) ; // Clear decrypted file and elf object memory
2023-09-05 20:15:52 +02:00
ppu_initialize ( * prx , false , file_size ) ;
2024-03-30 15:20:08 +01:00
ppu_finalize ( * prx , true ) ;
2021-01-29 11:32:19 +01:00
continue ;
}
2021-01-30 14:08:22 +01:00
// Log error
prx_err = elf_error : : header_type ;
2021-01-29 11:32:19 +01:00
}
2021-01-30 16:18:10 +01:00
2021-02-12 12:40:55 +01:00
if ( ppu_exec_object obj = src ; ( ovl_err = obj , obj = = elf_error : : ok ) )
2021-01-30 14:08:22 +01:00
{
while ( ovl_err = = elf_error : : ok )
{
2023-09-11 14:03:39 +02:00
// Try not to process too many files at once because it seems to reduce performance
// Concurrently compiling more OVL files does not have much theoretical benefit
std : : lock_guard lock ( ovl_sema ) ;
if ( Emu . IsStopped ( ) )
{
break ;
}
2023-08-22 23:31:08 +02:00
const auto [ ovlm , error ] = ppu_load_overlay ( obj , true , path , offset ) ;
2021-01-30 14:08:22 +01:00
if ( error )
{
2023-07-14 16:57:43 +02:00
if ( error = = CELL_CANCEL + 0u )
{
// Emulation stopped
break ;
}
2021-01-30 14:08:22 +01:00
// Abort
ovl_err = elf_error : : header_type ;
break ;
}
2021-01-29 11:32:19 +01:00
2023-09-09 12:28:33 +02:00
// Participate in thread execution limitation (takes a long time)
if ( std : : lock_guard lock ( g_fxo - > get < jit_core_allocator > ( ) . sem ) ; ! ovlm - > analyse ( 0 , ovlm - > entry , ovlm - > seg0_code_end , ovlm - > applied_patches , [ ] ( )
{
return Emu . IsStopped ( ) ;
} ) )
2021-01-30 14:08:22 +01:00
{
2023-09-09 12:28:33 +02:00
// Emulation stopped
break ;
2021-01-30 14:08:22 +01:00
}
2023-09-09 12:28:33 +02:00
obj . clear ( ) , src . close ( ) ; // Clear decrypted file and elf object memory
ppu_initialize ( * ovlm , false , file_size ) ;
2024-03-30 15:20:08 +01:00
ppu_finalize ( * ovlm , true ) ;
2021-01-30 14:08:22 +01:00
break ;
}
if ( ovl_err = = elf_error : : ok )
{
continue ;
}
}
2023-06-25 14:53:42 +02:00
ppu_log . notice ( " Failed to precompile '%s' (prx: %s, ovl: %s): Attempting tratment as executable file " , path , prx_err , ovl_err ) ;
2023-09-05 20:15:52 +02:00
possible_exec_file_paths . push ( path , offset , file_size ) ;
2023-09-02 19:55:36 +02:00
inc_fdone = 0 ;
2021-01-29 11:32:19 +01:00
}
} ) ;
// Join every thread
workers . join ( ) ;
2021-01-30 14:08:22 +01:00
2023-06-25 14:53:42 +02:00
named_thread exec_worker ( " PPU Exec Worker " , [ & ]
{
if ( ! possible_exec_file_paths )
{
return ;
}
# ifdef __APPLE__
pthread_jit_write_protect_np ( false ) ;
# endif
// Set low priority
thread_ctrl : : scoped_priority low_prio ( - 1 ) ;
auto slice = possible_exec_file_paths . pop_all ( ) ;
auto main_module = std : : move ( g_fxo - > get < main_ppu_module > ( ) ) ;
for ( ; slice ; slice . pop_front ( ) , g_progr_fdone + + )
{
if ( Emu . IsStopped ( ) )
{
continue ;
}
2023-09-05 20:15:52 +02:00
const auto & [ path , _ , file_size ] = * slice ;
2023-06-25 14:53:42 +02:00
ppu_log . notice ( " Trying to load as executable: %s " , path ) ;
2023-09-09 12:28:33 +02:00
// Load SELF
2023-06-25 14:53:42 +02:00
fs : : file src { path } ;
if ( ! src )
{
ppu_log . error ( " Failed to open '%s' (%s) " , path , fs : : g_tls_error ) ;
continue ;
}
// Some files may fail to decrypt due to the lack of klic
2023-09-02 19:07:35 +02:00
src = decrypt_self ( std : : move ( src ) , nullptr , nullptr , true ) ;
2023-06-25 14:53:42 +02:00
if ( ! src )
{
ppu_log . notice ( " Failed to decrypt '%s' " , path ) ;
continue ;
}
elf_error exec_err { } ;
if ( ppu_exec_object obj = src ; ( exec_err = obj , obj = = elf_error : : ok ) )
{
while ( exec_err = = elf_error : : ok )
{
2023-06-29 07:42:21 +02:00
main_ppu_module & _main = g_fxo - > get < main_ppu_module > ( ) ;
_main = { } ;
2023-09-01 14:07:46 +02:00
auto current_cache = std : : move ( g_fxo - > get < spu_cache > ( ) ) ;
2023-06-25 14:53:42 +02:00
if ( ! ppu_load_exec ( obj , true , path ) )
{
// Abort
exec_err = elf_error : : header_type ;
break ;
}
2023-08-14 16:00:46 +02:00
if ( std : : memcmp ( main_module . sha1 , _main . sha1 , sizeof ( _main . sha1 ) ) = = 0 )
2023-08-04 19:14:52 +02:00
{
2023-09-01 14:07:46 +02:00
g_fxo - > get < spu_cache > ( ) = std : : move ( current_cache ) ;
2023-08-06 08:43:13 +02:00
break ;
2023-08-04 19:14:52 +02:00
}
2023-09-09 12:28:33 +02:00
if ( ! _main . analyse ( 0 , _main . elf_entry , _main . seg0_code_end , _main . applied_patches , [ ] ( ) { return Emu . IsStopped ( ) ; } ) )
2023-06-25 14:53:42 +02:00
{
2023-09-01 14:07:46 +02:00
g_fxo - > get < spu_cache > ( ) = std : : move ( current_cache ) ;
2023-06-25 14:53:42 +02:00
break ;
}
obj . clear ( ) , src . close ( ) ; // Clear decrypted file and elf object memory
2023-08-06 08:43:13 +02:00
_main . name = ' ' ; // Make ppu_finalize work
2023-08-14 16:00:46 +02:00
Emu . ConfigurePPUCache ( ! Emu . IsPathInsideDir ( _main . path , g_cfg_vfs . get_dev_flash ( ) ) ) ;
2023-09-05 20:15:52 +02:00
ppu_initialize ( _main , false , file_size ) ;
2023-09-01 14:07:46 +02:00
spu_cache : : initialize ( false ) ;
2024-03-30 15:20:08 +01:00
ppu_finalize ( _main , true ) ;
2023-06-25 14:53:42 +02:00
_main = { } ;
2023-09-01 14:07:46 +02:00
g_fxo - > get < spu_cache > ( ) = std : : move ( current_cache ) ;
2023-06-25 14:53:42 +02:00
break ;
}
if ( exec_err = = elf_error : : ok )
{
continue ;
}
}
ppu_log . notice ( " Failed to precompile '%s' as executable (%s) " , path , exec_err ) ;
}
g_fxo - > get < main_ppu_module > ( ) = std : : move ( main_module ) ;
2023-09-01 14:07:46 +02:00
g_fxo - > get < spu_cache > ( ) . collect_funcs_to_precompile = true ;
2023-08-06 08:43:13 +02:00
Emu . ConfigurePPUCache ( ) ;
2023-06-25 14:53:42 +02:00
} ) ;
exec_worker ( ) ;
2021-01-29 11:32:19 +01:00
}
2017-02-26 16:56:31 +01:00
extern void ppu_initialize ( )
2016-06-22 15:37:51 +02:00
{
2023-04-08 17:03:05 +02:00
if ( ! g_fxo - > is_init < main_ppu_module > ( ) )
2017-02-26 16:56:31 +01:00
{
return ;
}
2017-01-22 20:03:57 +01:00
2018-05-04 23:01:27 +02:00
if ( Emu . IsStopped ( ) )
{
return ;
}
2021-01-31 18:00:04 +01:00
2023-04-08 17:03:05 +02:00
auto & _main = g_fxo - > get < main_ppu_module > ( ) ;
2022-07-04 15:02:17 +02:00
2023-11-28 18:41:14 +01:00
std : : optional < scoped_progress_dialog > progr ( std : : in_place , " Analyzing PPU Executable... " ) ;
2023-04-08 17:03:05 +02:00
// Analyse executable
2023-09-09 12:28:33 +02:00
if ( ! _main . analyse ( 0 , _main . elf_entry , _main . seg0_code_end , _main . applied_patches , [ ] ( ) { return Emu . IsStopped ( ) ; } ) )
2023-04-08 17:03:05 +02:00
{
return ;
}
// Validate analyser results (not required)
_main . validate ( 0 ) ;
2023-11-28 18:41:14 +01:00
* progr = " Scanning PPU Modules... " ;
2019-12-04 23:17:57 +01:00
2021-01-30 14:08:22 +01:00
bool compile_main = false ;
2018-05-04 23:01:27 +02:00
2021-01-30 14:08:22 +01:00
// Check main module cache
2021-03-02 12:59:19 +01:00
if ( ! _main . segs . empty ( ) )
2019-09-26 16:06:43 +02:00
{
2021-03-02 12:59:19 +01:00
compile_main = ppu_initialize ( _main , true ) ;
2019-09-26 16:06:43 +02:00
}
2017-02-26 16:56:31 +01:00
2022-07-04 15:02:17 +02:00
std : : vector < ppu_module * > module_list ;
2017-04-22 15:00:23 +02:00
2022-07-04 15:02:17 +02:00
const std : : string firmware_sprx_path = vfs : : get ( " /dev_flash/sys/external/ " ) ;
// If empty we have no indication for firmware cache state, check everything
2023-08-05 20:40:11 +02:00
bool compile_fw = ! Emu . IsVsh ( ) ;
2022-07-04 15:02:17 +02:00
idm : : select < lv2_obj , lv2_prx > ( [ & ] ( u32 , lv2_prx & _module )
2017-04-22 15:00:23 +02:00
{
2023-01-15 21:12:54 +01:00
if ( _module . funcs . empty ( ) )
{
return ;
}
2022-07-04 15:02:17 +02:00
if ( _module . path . starts_with ( firmware_sprx_path ) )
{
// Postpone testing
compile_fw = false ;
}
module_list . emplace_back ( & _module ) ;
2017-04-22 15:00:23 +02:00
} ) ;
2022-07-04 15:02:17 +02:00
idm : : select < lv2_obj , lv2_overlay > ( [ & ] ( u32 , lv2_overlay & _module )
{
module_list . emplace_back ( & _module ) ;
} ) ;
2021-01-30 14:08:22 +01:00
// Check preloaded libraries cache
2022-07-04 15:02:17 +02:00
if ( ! compile_fw )
2021-01-30 14:08:22 +01:00
{
2022-07-04 15:02:17 +02:00
for ( auto ptr : module_list )
{
if ( ptr - > path . starts_with ( firmware_sprx_path ) )
{
compile_fw | = ppu_initialize ( * ptr , true ) ;
2022-11-05 16:14:34 +01:00
// Fixup for compatibility with old savestates
if ( Emu . DeserialManager ( ) & & ptr - > name = = " liblv2.sprx " )
{
static_cast < lv2_prx * > ( ptr ) - > state = PRX_STATE_STARTED ;
static_cast < lv2_prx * > ( ptr ) - > load_exports ( ) ;
}
2022-07-04 15:02:17 +02:00
}
}
2021-01-30 14:08:22 +01:00
}
std : : vector < std : : string > dir_queue ;
2022-05-12 07:08:36 +02:00
const std : : string mount_point = vfs : : get ( " /dev_flash/ " ) ;
2023-08-26 10:23:42 +02:00
bool dev_flash_located = ! Emu . GetCat ( ) . ends_with ( ' P ' ) & & Emu . IsPathInsideDir ( Emu . GetBoot ( ) , mount_point ) & & g_cfg . core . llvm_precompilation ;
2022-05-12 07:08:36 +02:00
if ( compile_fw | | dev_flash_located )
2021-01-30 14:08:22 +01:00
{
2022-05-12 07:08:36 +02:00
if ( dev_flash_located )
{
const std : : string eseibrd = mount_point + " /vsh/module/eseibrd.sprx " ;
2023-06-25 14:53:42 +02:00
if ( auto prx = ppu_load_prx ( ppu_prx_object { decrypt_self ( fs : : file { eseibrd } ) } , true , eseibrd , 0 ) )
2022-05-12 07:08:36 +02:00
{
// Check if cache exists for this infinitesimally small prx
dev_flash_located = ppu_initialize ( * prx , true ) ;
}
}
const std : : string firmware_sprx_path = vfs : : get ( dev_flash_located ? " /dev_flash/ " sv : " /dev_flash/sys/ " sv ) ;
2021-01-30 14:08:22 +01:00
dir_queue . emplace_back ( firmware_sprx_path ) ;
}
// Avoid compilation if main's cache exists or it is a standalone SELF with no PARAM.SFO
2023-08-26 10:23:42 +02:00
if ( compile_main & & g_cfg . core . llvm_precompilation & & ! Emu . GetTitleID ( ) . empty ( ) & & ! Emu . IsChildProcess ( ) )
2021-01-30 14:08:22 +01:00
{
2021-01-30 16:18:10 +01:00
// Try to add all related directories
const std : : set < std : : string > dirs = Emu . GetGameDirs ( ) ;
dir_queue . insert ( std : : end ( dir_queue ) , std : : begin ( dirs ) , std : : end ( dirs ) ) ;
2021-01-30 14:08:22 +01:00
}
2023-11-28 18:41:14 +01:00
progr . reset ( ) ;
2022-07-04 15:02:17 +02:00
ppu_precompile ( dir_queue , & module_list ) ;
2021-01-30 14:08:22 +01:00
if ( Emu . IsStopped ( ) )
{
return ;
}
// Initialize main module cache
2021-03-02 12:59:19 +01:00
if ( ! _main . segs . empty ( ) )
2021-01-30 14:08:22 +01:00
{
2021-03-02 12:59:19 +01:00
ppu_initialize ( _main ) ;
2021-01-30 14:08:22 +01:00
}
2017-06-22 23:52:09 +02:00
// Initialize preloaded libraries
2022-07-04 15:02:17 +02:00
for ( auto ptr : module_list )
2017-02-26 16:56:31 +01:00
{
2021-01-30 14:08:22 +01:00
if ( Emu . IsStopped ( ) )
{
return ;
}
2017-06-22 23:52:09 +02:00
ppu_initialize ( * ptr ) ;
2017-04-22 15:00:23 +02:00
}
2017-02-26 16:56:31 +01:00
}
2023-09-05 20:15:52 +02:00
bool ppu_initialize ( const ppu_module & info , bool check_only , u64 file_size )
2017-02-26 16:56:31 +01:00
{
2017-05-20 13:45:02 +02:00
if ( g_cfg . core . ppu_decoder ! = ppu_decoder_type : : llvm )
2017-02-26 16:56:31 +01:00
{
2023-07-24 11:40:01 +02:00
if ( check_only | | vm : : base ( info . segs [ 0 ] . addr ) ! = info . segs [ 0 ] . ptr )
2021-01-30 14:08:22 +01:00
{
return false ;
}
2023-06-19 17:05:50 +02:00
auto & toc_manager = g_fxo - > get < ppu_toc_manager > ( ) ;
std : : lock_guard lock ( toc_manager . mutex ) ;
auto & ppu_toc = toc_manager . toc_map ;
2017-04-22 15:00:23 +02:00
2017-02-26 16:56:31 +01:00
for ( const auto & func : info . funcs )
{
2024-03-21 14:56:31 +01:00
if ( func . size & & func . blocks . empty ( ) )
{
ppu_register_function_at ( func . addr , func . size ) ;
}
2017-04-08 22:58:00 +02:00
for ( auto & block : func . blocks )
{
2024-03-21 14:56:31 +01:00
if ( ! block . second )
{
continue ;
}
2023-08-07 20:33:36 +02:00
if ( g_fxo - > is_init < ppu_far_jumps_t > ( ) & & ! g_fxo - > get < ppu_far_jumps_t > ( ) . get_targets ( block . first , block . second ) . empty ( ) )
2023-08-07 17:33:47 +02:00
{
// Replace the block with ppu_far_jump
continue ;
}
2021-09-01 12:38:17 +02:00
ppu_register_function_at ( block . first , block . second ) ;
2017-04-08 22:58:00 +02:00
}
2023-08-07 17:33:47 +02:00
if ( g_cfg . core . ppu_debug & & func . size & & func . toc ! = umax & & ! ppu_get_far_jump ( func . addr ) )
2017-04-08 22:58:00 +02:00
{
2023-07-24 11:40:01 +02:00
ppu_toc [ func . addr ] = func . toc ;
2024-03-21 14:56:31 +01:00
write_to_ptr < ppu_intrp_func_t > ( ppu_ptr ( func . addr ) , & ppu_check_toc ) ;
2017-04-08 22:58:00 +02:00
}
2017-02-26 16:56:31 +01:00
}
2021-01-30 14:08:22 +01:00
return false ;
2017-02-26 16:56:31 +01:00
}
2017-06-24 17:36:49 +02:00
// Link table
static const std : : unordered_map < std : : string , u64 > s_link_table = [ ] ( )
2017-02-26 16:56:31 +01:00
{
2017-06-22 23:52:09 +02:00
std : : unordered_map < std : : string , u64 > link_table
2017-02-26 16:56:31 +01:00
{
2023-05-07 12:56:08 +02:00
{ " sys_game_set_system_sw_version " , reinterpret_cast < u64 > ( ppu_execute_syscall ) } ,
2019-12-02 22:31:34 +01:00
{ " __trap " , reinterpret_cast < u64 > ( & ppu_trap ) } ,
{ " __error " , reinterpret_cast < u64 > ( & ppu_error ) } ,
{ " __check " , reinterpret_cast < u64 > ( & ppu_check ) } ,
{ " __trace " , reinterpret_cast < u64 > ( & ppu_trace ) } ,
{ " __syscall " , reinterpret_cast < u64 > ( ppu_execute_syscall ) } ,
{ " __get_tb " , reinterpret_cast < u64 > ( get_timebased_time ) } ,
{ " __lwarx " , reinterpret_cast < u64 > ( ppu_lwarx ) } ,
{ " __ldarx " , reinterpret_cast < u64 > ( ppu_ldarx ) } ,
{ " __stwcx " , reinterpret_cast < u64 > ( ppu_stwcx ) } ,
{ " __stdcx " , reinterpret_cast < u64 > ( ppu_stdcx ) } ,
2020-09-25 16:29:25 +02:00
{ " __dcbz " , reinterpret_cast < u64 > ( + [ ] ( u32 addr ) { alignas ( 64 ) static constexpr u8 z [ 128 ] { } ; do_cell_atomic_128_store ( addr , z ) ; } ) } ,
2019-12-02 22:31:34 +01:00
{ " __resupdate " , reinterpret_cast < u64 > ( vm : : reservation_update ) } ,
2020-10-13 21:36:00 +02:00
{ " __resinterp " , reinterpret_cast < u64 > ( ppu_reservation_fallback ) } ,
2023-03-11 20:08:27 +01:00
{ " __escape " , reinterpret_cast < u64 > ( + ppu_escape ) } ,
2023-07-05 12:52:16 +02:00
{ " __read_maybe_mmio32 " , reinterpret_cast < u64 > ( + ppu_read_mmio_aware_u32 ) } ,
{ " __write_maybe_mmio32 " , reinterpret_cast < u64 > ( + ppu_write_mmio_aware_u32 ) } ,
2017-02-26 16:56:31 +01:00
} ;
for ( u64 index = 0 ; index < 1024 ; index + + )
{
2021-01-12 11:01:06 +01:00
if ( ppu_get_syscall ( index ) )
2017-02-26 16:56:31 +01:00
{
2020-03-02 18:17:48 +01:00
link_table . emplace ( fmt : : format ( " %s " , ppu_syscall_code ( index ) ) , reinterpret_cast < u64 > ( ppu_execute_syscall ) ) ;
link_table . emplace ( fmt : : format ( " syscall_%u " , index ) , reinterpret_cast < u64 > ( ppu_execute_syscall ) ) ;
2017-02-26 16:56:31 +01:00
}
}
2017-06-24 17:36:49 +02:00
return link_table ;
} ( ) ;
2017-07-10 21:22:54 +02:00
// Get cache path for this executable
std : : string cache_path ;
2023-08-04 19:14:52 +02:00
if ( ! info . cache . empty ( ) )
2017-07-10 21:22:54 +02:00
{
2019-01-13 18:06:30 +01:00
cache_path = info . cache ;
2017-07-10 21:22:54 +02:00
}
else
{
2019-01-13 18:06:30 +01:00
// New PPU cache location
cache_path = fs : : get_cache_dir ( ) + " cache/ " ;
const std : : string dev_flash = vfs : : get ( " /dev_flash/ " ) ;
2017-07-10 21:22:54 +02:00
2020-02-17 22:43:23 +01:00
if ( ! info . path . starts_with ( dev_flash ) & & ! Emu . GetTitleID ( ) . empty ( ) & & Emu . GetCat ( ) ! = " 1P " )
2017-07-10 21:22:54 +02:00
{
2019-01-13 18:06:30 +01:00
// Add prefix for anything except dev_flash files, standalone elfs or PS1 classics
cache_path + = Emu . GetTitleID ( ) ;
cache_path + = ' / ' ;
2017-07-10 21:22:54 +02:00
}
2019-01-13 18:06:30 +01:00
// Add PPU hash and filename
fmt : : append ( cache_path , " ppu-%s-%s/ " , fmt : : base57 ( info . sha1 ) , info . path . substr ( info . path . find_last_of ( ' / ' ) + 1 ) ) ;
if ( ! fs : : create_path ( cache_path ) )
2017-07-10 21:22:54 +02:00
{
2019-01-13 18:06:30 +01:00
fmt : : throw_exception ( " Failed to create cache directory: %s (%s) " , cache_path , fs : : g_tls_error ) ;
2017-07-10 21:22:54 +02:00
}
}
2017-06-24 17:36:49 +02:00
# ifdef LLVM_AVAILABLE
2021-03-31 15:31:21 +02:00
std : : optional < scoped_progress_dialog > progr ;
2019-12-04 23:17:57 +01:00
if ( ! check_only )
{
// Initialize progress dialog
2023-11-28 18:41:14 +01:00
progr . emplace ( " Loading PPU Modules... " ) ;
2019-12-04 23:17:57 +01:00
}
2018-05-30 19:34:36 +02:00
2017-07-10 21:22:54 +02:00
// Permanently loaded compiled PPU modules (name -> data)
2023-08-06 20:04:48 +02:00
jit_module & jit_mod = g_fxo - > get < jit_module_manager > ( ) . get ( cache_path + " _ " + std : : to_string ( std : : bit_cast < usz > ( info . segs [ 0 ] . ptr ) ) ) ;
2017-07-10 21:22:54 +02:00
// Compiler instance (deferred initialization)
2020-05-19 18:09:27 +02:00
std : : shared_ptr < jit_compiler > & jit = jit_mod . pjit ;
2017-06-24 17:36:49 +02:00
2017-06-22 23:52:09 +02:00
// Split module into fragments <= 1 MiB
2020-12-18 08:39:54 +01:00
usz fpos = 0 ;
2017-02-26 16:56:31 +01:00
2017-07-01 01:08:51 +02:00
// Difference between function name and current location
2022-09-19 14:57:51 +02:00
const u32 reloc = info . relocs . empty ( ) ? 0 : : : at32 ( info . segs , 0 ) . addr ;
2016-06-07 22:24:20 +02:00
2020-03-03 20:37:29 +01:00
// Info sent to threads
std : : vector < std : : pair < std : : string , ppu_module > > workload ;
// Info to load to main JIT instance (true - compiled)
std : : vector < std : : pair < std : : string , bool > > link_workload ;
// Sync variable to acquire workloads
atomic_t < u32 > work_cv = 0 ;
2021-01-30 14:08:22 +01:00
bool compiled_new = false ;
2021-06-25 09:50:42 +02:00
bool has_mfvscr = false ;
2023-07-14 16:05:27 +02:00
const bool is_being_used_in_emulation = vm : : base ( info . segs [ 0 ] . addr ) = = info . segs [ 0 ] . ptr ;
const cpu_thread * cpu = cpu_thread : : get_current ( ) ;
2021-06-25 09:50:42 +02:00
for ( auto & func : info . funcs )
{
if ( func . size = = 0 )
{
continue ;
}
for ( const auto & [ addr , size ] : func . blocks )
{
if ( size = = 0 )
{
continue ;
}
2023-06-25 14:53:42 +02:00
auto i_ptr = ensure ( info . get_ptr < u32 > ( addr ) ) ;
for ( u32 i = addr ; i < addr + size ; i + = 4 , i_ptr + + )
2021-06-25 09:50:42 +02:00
{
2023-06-25 14:53:42 +02:00
if ( g_ppu_itype . decode ( * i_ptr ) = = ppu_itype : : MFVSCR )
2021-06-25 09:50:42 +02:00
{
ppu_log . warning ( " MFVSCR found " ) ;
has_mfvscr = true ;
break ;
}
}
if ( has_mfvscr )
{
break ;
}
}
if ( has_mfvscr )
{
break ;
}
}
2023-09-05 13:20:50 +02:00
u32 total_compile = 0 ;
2021-01-31 19:38:47 +01:00
while ( ! jit_mod . init & & fpos < info . funcs . size ( ) )
2017-06-22 23:52:09 +02:00
{
2017-07-10 21:22:54 +02:00
// Initialize compiler instance
2023-07-14 16:05:27 +02:00
if ( ! jit & & is_being_used_in_emulation )
2017-12-31 13:45:12 +01:00
{
2018-01-01 08:40:57 +01:00
jit = std : : make_shared < jit_compiler > ( s_link_table , g_cfg . core . llvm_cpu ) ;
2017-12-31 13:45:12 +01:00
}
2017-07-10 21:22:54 +02:00
2017-07-01 01:08:51 +02:00
// Copy module information (TODO: optimize)
2017-10-10 15:40:46 +02:00
ppu_module part ;
part . copy_part ( info ) ;
2017-06-24 17:36:49 +02:00
part . funcs . reserve ( 16000 ) ;
2017-06-22 23:52:09 +02:00
2017-07-01 01:08:51 +02:00
// Overall block size in bytes
2020-12-18 08:39:54 +01:00
usz bsize = 0 ;
2021-01-19 18:40:15 +01:00
usz bcount = 0 ;
2017-07-01 01:08:51 +02:00
2017-06-22 23:52:09 +02:00
while ( fpos < info . funcs . size ( ) )
{
auto & func = info . funcs [ fpos ] ;
2021-01-19 18:40:15 +01:00
if ( ! func . size )
{
fpos + + ;
continue ;
}
2019-01-13 18:06:30 +01:00
if ( bsize + func . size > 100 * 1024 & & bsize )
2017-06-22 23:52:09 +02:00
{
2021-01-19 18:40:15 +01:00
if ( bcount > = 1000 )
{
break ;
}
2017-06-22 23:52:09 +02:00
}
2022-12-09 19:06:50 +01:00
if ( g_fxo - > is_init < ppu_far_jumps_t > ( ) )
2022-08-17 15:53:05 +02:00
{
2022-12-09 19:06:50 +01:00
auto targets = g_fxo - > get < ppu_far_jumps_t > ( ) . get_targets ( func . addr , func . size ) ;
for ( auto [ source , target ] : targets )
{
auto far_jump = ensure ( g_fxo - > get < ppu_far_jumps_t > ( ) . gen_jump ( source ) ) ;
if ( source = = func . addr & & jit )
{
jit - > update_global_mapping ( fmt : : format ( " __0x%x " , func . addr - reloc ) , reinterpret_cast < u64 > ( far_jump ) ) ;
}
ppu_register_function_at ( source , 4 , far_jump ) ;
}
2022-08-17 15:53:05 +02:00
2022-12-09 19:06:50 +01:00
if ( ! targets . empty ( ) )
2022-08-17 15:53:05 +02:00
{
// Replace the function with ppu_far_jump
fpos + + ;
continue ;
}
}
2021-01-19 18:40:15 +01:00
// Copy block or function entry
ppu_function & entry = part . funcs . emplace_back ( func ) ;
// Fixup some information
entry . name = fmt : : format ( " __0x%x " , entry . addr - reloc ) ;
2021-12-30 17:39:18 +01:00
if ( has_mfvscr & & g_cfg . core . ppu_set_sat_bit )
2021-06-25 09:50:42 +02:00
{
// TODO
entry . attr + = ppu_attr : : has_mfvscr ;
}
2021-01-19 18:40:15 +01:00
if ( entry . blocks . empty ( ) )
2017-06-22 23:52:09 +02:00
{
2021-01-19 18:40:15 +01:00
entry . blocks . emplace ( func . addr , func . size ) ;
2017-06-22 23:52:09 +02:00
}
2021-01-19 18:40:15 +01:00
bsize + = func . size ;
2017-06-22 23:52:09 +02:00
fpos + + ;
2021-01-19 18:40:15 +01:00
bcount + + ;
2017-06-22 23:52:09 +02:00
}
2019-01-13 18:06:30 +01:00
// Compute module hash to generate (hopefully) unique object name
std : : string obj_name ;
2017-06-24 17:36:49 +02:00
{
sha1_context ctx ;
u8 output [ 20 ] ;
sha1_starts ( & ctx ) ;
2017-06-22 23:52:09 +02:00
2020-09-25 16:29:25 +02:00
int has_dcbz = ! ! g_cfg . core . accurate_cache_line_stores ;
2017-06-24 17:36:49 +02:00
for ( const auto & func : part . funcs )
{
if ( func . size = = 0 )
{
continue ;
}
2017-06-22 23:52:09 +02:00
2017-07-01 01:08:51 +02:00
const be_t < u32 > addr = func . addr - reloc ;
2017-06-24 17:36:49 +02:00
const be_t < u32 > size = func . size ;
sha1_update ( & ctx , reinterpret_cast < const u8 * > ( & addr ) , sizeof ( addr ) ) ;
sha1_update ( & ctx , reinterpret_cast < const u8 * > ( & size ) , sizeof ( size ) ) ;
2017-06-22 23:52:09 +02:00
2017-06-24 17:36:49 +02:00
for ( const auto & block : func . blocks )
{
2017-07-01 01:08:51 +02:00
if ( block . second = = 0 | | reloc )
2017-06-24 17:36:49 +02:00
{
continue ;
}
2017-06-22 23:52:09 +02:00
2018-03-17 18:41:35 +01:00
// Find relevant relocations
auto low = std : : lower_bound ( part . relocs . cbegin ( ) , part . relocs . cend ( ) , block . first ) ;
auto high = std : : lower_bound ( low , part . relocs . cend ( ) , block . first + block . second ) ;
auto addr = block . first ;
for ( ; low ! = high ; + + low )
{
// Aligned relocation address
const u32 roff = low - > addr & ~ 3 ;
if ( roff > addr )
{
// Hash from addr to the beginning of the relocation
2023-06-25 14:53:42 +02:00
sha1_update ( & ctx , ensure ( info . get_ptr < const u8 > ( addr ) ) , roff - addr ) ;
2018-03-17 18:41:35 +01:00
}
// Hash relocation type instead
const be_t < u32 > type = low - > type ;
sha1_update ( & ctx , reinterpret_cast < const u8 * > ( & type ) , sizeof ( type ) ) ;
// Set the next addr
addr = roff + 4 ;
}
2020-09-25 16:29:25 +02:00
if ( has_dcbz = = 1 )
{
2023-06-25 14:53:42 +02:00
auto i_ptr = ensure ( info . get_ptr < u32 > ( addr ) ) ;
for ( u32 i = addr , end = block . second + block . first - 1 ; i < = end ; i + = 4 , i_ptr + + )
2020-09-25 16:29:25 +02:00
{
2023-06-25 14:53:42 +02:00
if ( g_ppu_itype . decode ( * i_ptr ) = = ppu_itype : : DCBZ )
2020-09-25 16:29:25 +02:00
{
has_dcbz = 2 ;
break ;
}
}
}
2018-03-17 18:41:35 +01:00
// Hash from addr to the end of the block
2023-06-25 14:53:42 +02:00
sha1_update ( & ctx , ensure ( info . get_ptr < const u8 > ( addr ) ) , block . second - ( addr - block . first ) ) ;
2017-06-24 17:36:49 +02:00
}
2017-07-01 01:08:51 +02:00
if ( reloc )
{
continue ;
}
2020-09-25 16:29:25 +02:00
if ( has_dcbz = = 1 )
{
2023-06-25 14:53:42 +02:00
auto i_ptr = ensure ( info . get_ptr < u32 > ( func . addr ) ) ;
for ( u32 i = func . addr , end = func . addr + func . size - 1 ; i < = end ; i + = 4 , i_ptr + + )
2020-09-25 16:29:25 +02:00
{
2023-06-25 14:53:42 +02:00
if ( g_ppu_itype . decode ( * i_ptr ) = = ppu_itype : : DCBZ )
2020-09-25 16:29:25 +02:00
{
has_dcbz = 2 ;
break ;
}
}
}
2023-06-25 14:53:42 +02:00
sha1_update ( & ctx , ensure ( info . get_ptr < const u8 > ( func . addr ) ) , func . size ) ;
2017-06-22 23:52:09 +02:00
}
2017-02-26 16:56:31 +01:00
2019-01-13 18:06:30 +01:00
if ( false )
2017-07-22 15:39:39 +02:00
{
2017-09-25 17:52:34 +02:00
const be_t < u64 > forced_upd = 3 ;
2017-07-22 15:39:39 +02:00
sha1_update ( & ctx , reinterpret_cast < const u8 * > ( & forced_upd ) , sizeof ( forced_upd ) ) ;
}
2017-06-24 17:36:49 +02:00
sha1_finish ( & ctx , output ) ;
2019-01-13 18:06:30 +01:00
// Settings: should be populated by settings which affect codegen (TODO)
enum class ppu_settings : u32
{
2023-04-08 14:21:22 +02:00
platform_bit ,
2021-12-30 17:39:18 +01:00
accurate_dfma ,
fixup_vnan ,
2022-01-15 12:30:13 +01:00
fixup_nj_denormals ,
2020-09-25 16:29:25 +02:00
accurate_cache_line_stores ,
2020-04-07 19:29:11 +02:00
reservations_128_byte ,
2021-01-19 18:40:15 +01:00
greedy_mode ,
2021-12-30 17:39:18 +01:00
accurate_sat ,
accurate_fpcc ,
accurate_vnan ,
2022-01-15 12:30:13 +01:00
accurate_nj_mode ,
2024-03-18 15:14:45 +01:00
contains_symbol_resolver ,
2019-01-13 18:06:30 +01:00
__bitset_enum_max
} ;
be_t < bs_t < ppu_settings > > settings { } ;
2023-04-08 14:21:22 +02:00
# if !defined(_WIN32) && !defined(__APPLE__)
settings + = ppu_settings : : platform_bit ;
2019-01-13 18:06:30 +01:00
# endif
2021-12-30 17:39:18 +01:00
if ( g_cfg . core . use_accurate_dfma )
settings + = ppu_settings : : accurate_dfma ;
if ( g_cfg . core . ppu_fix_vnan )
settings + = ppu_settings : : fixup_vnan ;
2022-01-15 12:30:13 +01:00
if ( g_cfg . core . ppu_llvm_nj_fixup )
settings + = ppu_settings : : fixup_nj_denormals ;
2020-09-25 16:29:25 +02:00
if ( has_dcbz = = 2 )
settings + = ppu_settings : : accurate_cache_line_stores ;
2020-10-13 21:36:00 +02:00
if ( g_cfg . core . ppu_128_reservations_loop_max_length )
2020-04-07 19:29:11 +02:00
settings + = ppu_settings : : reservations_128_byte ;
2021-01-19 18:40:15 +01:00
if ( g_cfg . core . ppu_llvm_greedy_mode )
settings + = ppu_settings : : greedy_mode ;
2021-12-30 17:39:18 +01:00
if ( has_mfvscr & & g_cfg . core . ppu_set_sat_bit )
settings + = ppu_settings : : accurate_sat ;
if ( g_cfg . core . ppu_set_fpcc )
settings + = ppu_settings : : accurate_fpcc , fmt : : throw_exception ( " FPCC Not implemented " ) ;
if ( g_cfg . core . ppu_set_vnan )
2022-01-15 12:30:13 +01:00
settings + = ppu_settings : : accurate_vnan , settings - = ppu_settings : : fixup_vnan , fmt : : throw_exception ( " VNAN Not implemented " ) ;
if ( g_cfg . core . ppu_use_nj_bit )
settings + = ppu_settings : : accurate_nj_mode , settings - = ppu_settings : : fixup_nj_denormals , fmt : : throw_exception ( " NJ Not implemented " ) ;
2024-03-18 15:14:45 +01:00
if ( fpos > = info . funcs . size ( ) )
settings + = ppu_settings : : contains_symbol_resolver ; // Avoid invalidating all modules for this purpose
2019-01-13 18:06:30 +01:00
// Write version, hash, CPU, settings
2023-05-24 20:22:18 +02:00
fmt : : append ( obj_name , " v6-kusa-%s-%s-%s.obj " , fmt : : base57 ( output , 16 ) , fmt : : base57 ( settings ) , jit_compiler : : cpu ( g_cfg . core . llvm_cpu ) ) ;
2017-06-24 17:36:49 +02:00
}
2017-06-22 23:52:09 +02:00
2023-09-10 16:39:55 +02:00
if ( cpu ? cpu - > state . all_of ( cpu_flag : : exit ) : Emu . IsStopped ( ) )
2017-02-26 16:56:31 +01:00
{
2017-06-24 17:36:49 +02:00
break ;
}
2017-06-22 23:52:09 +02:00
2021-01-30 23:04:07 +01:00
if ( ! check_only )
2017-07-01 01:08:51 +02:00
{
2023-09-05 13:20:50 +02:00
total_compile + + ;
2019-12-04 23:17:57 +01:00
2021-01-30 23:04:07 +01:00
link_workload . emplace_back ( obj_name , false ) ;
}
2020-03-03 20:37:29 +01:00
2017-06-24 17:36:49 +02:00
// Check object file
2020-04-07 15:09:47 +02:00
if ( jit_compiler : : check ( cache_path + obj_name ) )
2017-06-24 17:36:49 +02:00
{
2021-01-30 14:08:22 +01:00
if ( ! jit & & ! check_only )
2018-03-17 18:41:35 +01:00
{
2020-03-03 20:37:29 +01:00
ppu_log . success ( " LLVM: Module exists: %s " , obj_name ) ;
2018-03-17 18:41:35 +01:00
2023-09-10 16:39:55 +02:00
// Done already, revert total amount increase
// Avoid incrementing "pdone" instead because it creates false appreciation for both the progress dialog and the user
total_compile - - ;
2021-07-29 21:31:45 +02:00
}
2017-06-24 17:36:49 +02:00
continue ;
}
2017-02-26 16:56:31 +01:00
2021-01-30 14:08:22 +01:00
if ( check_only )
{
return true ;
}
2021-01-30 23:04:07 +01:00
// Remember, used in ppu_initialize(void)
compiled_new = true ;
2020-03-03 20:37:29 +01:00
// Adjust information (is_compiled)
link_workload . back ( ) . second = true ;
// Fill workload list for compilation
workload . emplace_back ( std : : move ( obj_name ) , std : : move ( part ) ) ;
}
2021-01-30 23:04:59 +01:00
if ( check_only )
{
return false ;
}
2023-09-05 13:20:50 +02:00
// Update progress dialog
if ( total_compile )
{
g_progr_ptotal + = total_compile ;
}
2023-09-05 20:15:52 +02:00
if ( g_progr_ftotal_bits & & file_size )
2023-09-05 13:20:50 +02:00
{
2023-09-05 20:15:52 +02:00
g_progr_fknown_bits + = file_size ;
2023-09-05 13:20:50 +02:00
}
2023-09-09 12:28:33 +02:00
// Create worker threads for compilation
2019-12-04 23:17:57 +01:00
if ( ! workload . empty ( ) )
{
2023-11-28 18:41:14 +01:00
* progr = " Compiling PPU Modules... " ;
2019-12-04 23:17:57 +01:00
2021-04-21 22:12:21 +02:00
u32 thread_count = rpcs3 : : utils : : get_max_threads ( ) ;
2020-03-03 20:37:29 +01:00
if ( workload . size ( ) < thread_count )
{
thread_count = : : size32 ( workload ) ;
}
struct thread_index_allocator
{
atomic_t < u64 > index = 0 ;
} ;
2018-05-30 19:34:36 +02:00
2023-09-09 12:28:33 +02:00
struct thread_op
2017-06-24 17:36:49 +02:00
{
2023-09-09 12:28:33 +02:00
atomic_t < u32 > & work_cv ;
std : : vector < std : : pair < std : : string , ppu_module > > & workload ;
2024-03-18 15:14:45 +01:00
const ppu_module & main_module ;
2023-09-09 12:28:33 +02:00
const std : : string & cache_path ;
const cpu_thread * cpu ;
2017-12-19 22:01:03 +01:00
2023-09-09 12:28:33 +02:00
std : : unique_lock < decltype ( jit_core_allocator : : sem ) > core_lock ;
thread_op ( atomic_t < u32 > & work_cv , std : : vector < std : : pair < std : : string , ppu_module > > & workload
2024-03-18 15:14:45 +01:00
, const cpu_thread * cpu , const ppu_module & main_module , const std : : string & cache_path , decltype ( jit_core_allocator : : sem ) & sem ) noexcept
2023-09-09 12:28:33 +02:00
: work_cv ( work_cv )
, workload ( workload )
2024-03-18 15:14:45 +01:00
, main_module ( main_module )
2023-09-09 12:28:33 +02:00
, cache_path ( cache_path )
, cpu ( cpu )
2017-02-26 16:56:31 +01:00
{
2023-09-09 12:28:33 +02:00
// Save mutex
core_lock = std : : unique_lock { sem , std : : defer_lock } ;
}
thread_op ( const thread_op & other ) noexcept
: work_cv ( other . work_cv )
, workload ( other . workload )
2024-03-18 15:14:45 +01:00
, main_module ( other . main_module )
2023-09-09 12:28:33 +02:00
, cache_path ( other . cache_path )
, cpu ( other . cpu )
{
if ( auto mtx = other . core_lock . mutex ( ) )
2021-01-30 23:03:20 +01:00
{
2023-09-09 12:28:33 +02:00
// Save mutex
core_lock = std : : unique_lock { * mtx , std : : defer_lock } ;
2021-01-30 23:03:20 +01:00
}
2023-09-09 12:28:33 +02:00
}
2021-01-30 23:03:20 +01:00
2023-09-09 12:28:33 +02:00
thread_op ( thread_op & & other ) noexcept = default ;
2020-03-03 20:37:29 +01:00
2023-09-09 12:28:33 +02:00
void operator ( ) ( )
{
// Set low priority
thread_ctrl : : scoped_priority low_prio ( - 1 ) ;
2017-06-24 17:36:49 +02:00
2023-09-09 12:28:33 +02:00
# ifdef __APPLE__
pthread_jit_write_protect_np ( false ) ;
# endif
for ( u32 i = work_cv + + ; i < workload . size ( ) ; i = work_cv + + , g_progr_pdone + + )
2022-10-03 22:20:03 +02:00
{
2023-09-09 12:28:33 +02:00
if ( cpu ? cpu - > state . all_of ( cpu_flag : : exit ) : Emu . IsStopped ( ) )
{
continue ;
}
2022-10-03 22:20:03 +02:00
2023-09-09 12:28:33 +02:00
// Keep allocating workload
const auto & [ obj_name , part ] = std : : as_const ( workload ) [ i ] ;
2020-03-03 20:37:29 +01:00
2023-09-09 12:28:33 +02:00
ppu_log . warning ( " LLVM: Compiling module %s%s " , cache_path , obj_name ) ;
2017-06-22 23:52:09 +02:00
2023-09-09 12:28:33 +02:00
// Use another JIT instance
jit_compiler jit2 ( { } , g_cfg . core . llvm_cpu , 0x1 ) ;
2024-03-18 15:14:45 +01:00
ppu_initialize2 ( jit2 , part , cache_path , obj_name , i = = workload . size ( ) - 1 ? main_module : part ) ;
2023-09-09 12:28:33 +02:00
ppu_log . success ( " LLVM: Compiled module %s " , obj_name ) ;
}
core_lock . unlock ( ) ;
2017-02-26 16:56:31 +01:00
}
2023-09-09 12:28:33 +02:00
} ;
// Prevent watchdog thread from terminating
g_watchdog_hold_ctr + + ;
named_thread_group threads ( fmt : : format ( " PPUW.%u. " , + + g_fxo - > get < thread_index_allocator > ( ) . index ) , thread_count
2024-03-18 15:14:45 +01:00
, thread_op ( work_cv , workload , cpu , info , cache_path , g_fxo - > get < jit_core_allocator > ( ) . sem )
2023-09-09 12:28:33 +02:00
, [ & ] ( u32 /*thread_index*/ , thread_op & op )
{
// Allocate "core"
op . core_lock . lock ( ) ;
// Second check before creating another thread
return work_cv < workload . size ( ) & & ( cpu ? ! cpu - > state . all_of ( cpu_flag : : exit ) : ! Emu . IsStopped ( ) ) ;
2020-03-03 20:37:29 +01:00
} ) ;
threads . join ( ) ;
2020-06-01 01:27:33 +02:00
g_watchdog_hold_ctr - - ;
2023-09-09 12:28:33 +02:00
}
2020-06-01 01:27:33 +02:00
2023-09-09 12:28:33 +02:00
{
2023-07-14 16:05:27 +02:00
if ( ! is_being_used_in_emulation | | ( cpu ? cpu - > state . all_of ( cpu_flag : : exit ) : Emu . IsStopped ( ) ) )
2020-03-03 20:37:29 +01:00
{
2021-01-30 14:08:22 +01:00
return compiled_new ;
2020-03-03 20:37:29 +01:00
}
2019-12-04 23:17:57 +01:00
if ( workload . size ( ) < link_workload . size ( ) )
{
// Only show this message if this task is relevant
2023-11-28 18:41:14 +01:00
* progr = " Linking PPU Modules... " ;
2019-12-04 23:17:57 +01:00
}
2023-08-22 23:31:08 +02:00
for ( const auto & [ obj_name , is_compiled ] : link_workload )
2020-03-03 20:37:29 +01:00
{
2023-07-14 16:05:27 +02:00
if ( cpu ? cpu - > state . all_of ( cpu_flag : : exit ) : Emu . IsStopped ( ) )
2017-06-25 14:16:07 +02:00
{
2020-03-03 20:37:29 +01:00
break ;
2017-06-25 14:16:07 +02:00
}
2017-07-15 11:20:40 +02:00
jit - > add ( cache_path + obj_name ) ;
2018-12-31 19:25:19 +01:00
2020-03-03 20:37:29 +01:00
if ( ! is_compiled )
{
ppu_log . success ( " LLVM: Loaded module %s " , obj_name ) ;
2019-12-04 23:17:57 +01:00
g_progr_pdone + + ;
2020-03-03 20:37:29 +01:00
}
}
2016-06-07 22:24:20 +02:00
}
2023-07-14 16:05:27 +02:00
if ( ! is_being_used_in_emulation | | ( cpu ? cpu - > state . all_of ( cpu_flag : : exit ) : Emu . IsStopped ( ) ) )
2017-06-25 14:16:07 +02:00
{
2021-01-30 14:08:22 +01:00
return compiled_new ;
2017-06-25 14:16:07 +02:00
}
2017-09-10 06:04:29 +02:00
// Jit can be null if the loop doesn't ever enter.
2022-06-14 14:28:38 +02:00
# ifdef __APPLE__
pthread_jit_write_protect_np ( false ) ;
# endif
2023-09-04 21:20:23 +02:00
// Try to patch all single and unregistered BLRs with the same function (TODO: Maybe generalize it into PIC code detection and patching)
ppu_intrp_func_t BLR_func = nullptr ;
2023-11-28 18:41:14 +01:00
const bool is_first = jit & & ! jit_mod . init ;
2024-03-18 15:14:45 +01:00
const bool showing_only_apply_stage = ! g_progr . load ( ) & & ! g_progr_ptotal & & ! g_progr_ftotal & & g_progr_ptotal . compare_and_swap_test ( 0 , 1 ) ;
2024-03-21 14:56:31 +01:00
progr = " Applying PPU Code... " ;
2024-03-18 15:14:45 +01:00
2023-11-28 18:41:14 +01:00
if ( is_first )
2017-12-19 22:01:03 +01:00
{
2017-07-10 21:22:54 +02:00
jit - > fin ( ) ;
2023-11-28 18:41:14 +01:00
}
2017-12-31 13:45:12 +01:00
2024-03-18 15:14:45 +01:00
if ( is_first )
2023-11-28 18:41:14 +01:00
{
2024-03-18 15:14:45 +01:00
jit_mod . symbol_resolver = reinterpret_cast < void ( * ) ( u8 * , u64 ) > ( jit - > get ( " __resolve_symbols " ) ) ;
}
else
{
ensure ( jit_mod . symbol_resolver ) ;
2023-11-28 18:41:14 +01:00
}
2017-07-10 21:22:54 +02:00
2024-03-18 15:14:45 +01:00
jit_mod . symbol_resolver ( vm : : g_exec_addr , info . segs [ 0 ] . addr ) ;
2023-09-04 21:20:23 +02:00
2024-03-18 15:14:45 +01:00
// Find a BLR-only function in order to copy it to all BLRs (some games need it)
2023-11-28 18:41:14 +01:00
for ( const auto & func : info . funcs )
{
2024-03-18 15:14:45 +01:00
if ( func . size = = 4 & & * info . get_ptr < u32 > ( func . addr ) = = ppu_instructions : : BLR ( ) )
2023-11-28 18:41:14 +01:00
{
2024-03-21 14:56:31 +01:00
BLR_func = ppu_read ( func . addr ) ;
2023-11-28 18:41:14 +01:00
break ;
2017-07-10 21:22:54 +02:00
}
2023-11-28 18:41:14 +01:00
}
2017-07-10 21:22:54 +02:00
2024-03-18 15:14:45 +01:00
if ( is_first )
2023-11-28 18:41:14 +01:00
{
jit_mod . init = true ;
2017-06-29 16:25:39 +02:00
}
2021-01-30 14:08:22 +01:00
2023-09-04 21:20:23 +02:00
if ( BLR_func )
{
auto inst_ptr = info . get_ptr < u32 > ( info . segs [ 0 ] . addr ) ;
for ( u32 addr = info . segs [ 0 ] . addr ; addr < info . segs [ 0 ] . addr + info . segs [ 0 ] . size ; addr + = 4 , inst_ptr + + )
{
2024-03-21 14:56:31 +01:00
if ( * inst_ptr = = ppu_instructions : : BLR ( ) & & ( reinterpret_cast < uptr > ( ppu_read ( addr ) ) < < 16 > > 16 ) = = reinterpret_cast < uptr > ( ppu_recompiler_fallback_ghc ) )
2023-09-04 21:20:23 +02:00
{
2024-03-21 14:56:31 +01:00
write_to_ptr < ppu_intrp_func_t > ( ppu_ptr ( addr ) , BLR_func ) ;
2023-09-04 21:20:23 +02:00
}
}
}
2024-03-18 15:14:45 +01:00
if ( showing_only_apply_stage )
{
// Done
g_progr_pdone + + ;
}
2021-01-30 14:08:22 +01:00
return compiled_new ;
2017-07-23 09:54:00 +02:00
# else
fmt : : throw_exception ( " LLVM is not available in this build. " ) ;
2017-06-24 21:01:27 +02:00
# endif
2017-06-24 17:36:49 +02:00
}
2024-03-18 15:14:45 +01:00
static void ppu_initialize2 ( jit_compiler & jit , const ppu_module & module_part , const std : : string & cache_path , const std : : string & obj_name , const ppu_module & whole_module )
2017-06-24 17:36:49 +02:00
{
2017-06-24 21:01:27 +02:00
# ifdef LLVM_AVAILABLE
2017-06-24 17:36:49 +02:00
using namespace llvm ;
2017-06-22 23:52:09 +02:00
// Create LLVM module
2020-05-06 17:18:30 +02:00
std : : unique_ptr < Module > _module = std : : make_unique < Module > ( obj_name , jit . get_context ( ) ) ;
2017-06-22 23:52:09 +02:00
// Initialize target
2023-04-08 14:21:22 +02:00
_module - > setTargetTriple ( jit_compiler : : triple1 ( ) ) ;
2020-05-06 17:18:30 +02:00
_module - > setDataLayout ( jit . get_engine ( ) . getTargetMachine ( ) - > createDataLayout ( ) ) ;
2017-12-19 22:01:03 +01:00
2017-06-22 23:52:09 +02:00
// Initialize translator
2020-05-06 17:18:30 +02:00
PPUTranslator translator ( jit . get_context ( ) , _module . get ( ) , module_part , jit . get_engine ( ) ) ;
2017-06-22 23:52:09 +02:00
// Define some types
2021-01-31 19:38:47 +01:00
const auto _func = FunctionType : : get ( translator . get_type < void > ( ) , {
translator . get_type < u8 * > ( ) , // Exec base
translator . GetContextType ( ) - > getPointerTo ( ) , // PPU context
translator . get_type < u64 > ( ) , // Segment address (for PRX)
translator . get_type < u8 * > ( ) , // Memory base
translator . get_type < u64 > ( ) , // r0
translator . get_type < u64 > ( ) , // r1
translator . get_type < u64 > ( ) , // r2
} , false ) ;
2017-06-22 23:52:09 +02:00
// Initialize function list
for ( const auto & func : module_part . funcs )
2017-01-22 20:03:57 +01:00
{
2017-06-22 23:52:09 +02:00
if ( func . size )
2017-01-22 20:03:57 +01:00
{
2020-05-06 17:18:30 +02:00
const auto f = cast < Function > ( _module - > getOrInsertFunction ( func . name , _func ) . getCallee ( ) ) ;
2021-01-31 19:38:47 +01:00
f - > setCallingConv ( CallingConv : : GHC ) ;
2023-03-10 23:57:21 +01:00
f - > addParamAttr ( 1 , llvm : : Attribute : : NoAlias ) ;
2021-01-19 18:40:15 +01:00
f - > addFnAttr ( Attribute : : NoUnwind ) ;
2017-06-22 23:52:09 +02:00
}
}
2017-01-22 20:03:57 +01:00
2016-06-07 22:24:20 +02:00
{
2023-03-11 20:08:27 +01:00
if ( g_cfg . core . ppu_debug )
{
translator . build_interpreter ( ) ;
}
2023-11-29 19:53:38 +01:00
# if LLVM_VERSION_MAJOR < 17
2020-05-06 17:18:30 +02:00
legacy : : FunctionPassManager pm ( _module . get ( ) ) ;
2017-06-22 23:52:09 +02:00
// Basic optimizations
2017-06-25 23:29:09 +02:00
//pm.add(createCFGSimplificationPass());
//pm.add(createPromoteMemoryToRegisterPass());
2017-06-22 23:52:09 +02:00
pm . add ( createEarlyCSEPass ( ) ) ;
2017-06-28 13:56:35 +02:00
//pm.add(createTailCallEliminationPass());
2017-06-25 23:29:09 +02:00
//pm.add(createInstructionCombiningPass());
2017-06-22 23:52:09 +02:00
//pm.add(createBasicAAWrapperPass());
//pm.add(new MemoryDependenceAnalysis());
2017-06-28 13:56:35 +02:00
//pm.add(createLICMPass());
//pm.add(createLoopInstSimplifyPass());
2017-06-25 23:29:09 +02:00
//pm.add(createNewGVNPass());
2023-04-13 10:30:53 +02:00
//pm.add(createDeadStoreEliminationPass());
2017-06-28 13:56:35 +02:00
//pm.add(createSCCPPass());
//pm.add(createReassociatePass());
//pm.add(createInstructionCombiningPass());
//pm.add(createInstructionSimplifierPass());
//pm.add(createAggressiveDCEPass());
//pm.add(createCFGSimplificationPass());
2017-06-22 23:52:09 +02:00
//pm.add(createLintPass()); // Check
2023-11-29 19:53:38 +01:00
# else
// Create the analysis managers.
// These must be declared in this order so that they are destroyed in the
// correct order due to inter-analysis-manager references.
LoopAnalysisManager lam ;
FunctionAnalysisManager fam ;
CGSCCAnalysisManager cgam ;
ModuleAnalysisManager mam ;
// Create the new pass manager builder.
// Take a look at the PassBuilder constructor parameters for more
// customization, e.g. specifying a TargetMachine or various debugging
// options.
PassBuilder pb ;
// Register all the basic analyses with the managers.
pb . registerModuleAnalyses ( mam ) ;
pb . registerCGSCCAnalyses ( cgam ) ;
pb . registerFunctionAnalyses ( fam ) ;
pb . registerLoopAnalyses ( lam ) ;
pb . crossRegisterProxies ( lam , fam , cgam , mam ) ;
FunctionPassManager fpm ;
// Basic optimizations
fpm . addPass ( EarlyCSEPass ( ) ) ;
# endif
2017-06-22 23:52:09 +02:00
// Translate functions
2020-12-18 08:39:54 +01:00
for ( usz fi = 0 , fmax = module_part . funcs . size ( ) ; fi < fmax ; fi + + )
2016-06-07 22:24:20 +02:00
{
2017-06-22 23:52:09 +02:00
if ( Emu . IsStopped ( ) )
2017-01-22 20:03:57 +01:00
{
2020-02-01 09:31:27 +01:00
ppu_log . success ( " LLVM: Translation cancelled " ) ;
2017-06-22 23:52:09 +02:00
return ;
}
2017-01-22 20:03:57 +01:00
2017-06-28 13:54:18 +02:00
if ( module_part . funcs [ fi ] . size )
2017-06-22 23:52:09 +02:00
{
// Translate
2017-07-01 01:08:51 +02:00
if ( const auto func = translator . Translate ( module_part . funcs [ fi ] ) )
{
// Run optimization passes
2023-11-29 19:53:38 +01:00
# if LLVM_VERSION_MAJOR < 17
2017-07-01 01:08:51 +02:00
pm . run ( * func ) ;
2023-11-29 19:53:38 +01:00
# else
fpm . run ( * func , fam ) ;
# endif
2017-07-01 01:08:51 +02:00
}
else
{
Emu . Pause ( ) ;
return ;
2017-12-19 22:01:03 +01:00
}
2016-06-25 07:16:15 +02:00
}
2016-06-07 22:24:20 +02:00
}
2024-03-18 15:14:45 +01:00
// Run this only in one module for all functions
if ( & whole_module ! = & module_part )
{
if ( const auto func = translator . GetSymbolResolver ( whole_module ) )
{
// Run optimization passes
2023-11-29 19:53:38 +01:00
# if LLVM_VERSION_MAJOR < 17
2024-03-18 15:14:45 +01:00
pm . run ( * func ) ;
2023-11-29 19:53:38 +01:00
# else
fpm . run ( * func , fam ) ;
# endif
2024-03-18 15:14:45 +01:00
}
else
{
Emu . Pause ( ) ;
return ;
}
}
2020-06-04 08:19:56 +02:00
//legacy::PassManager mpm;
2016-06-07 22:24:20 +02:00
2017-06-22 23:52:09 +02:00
// Remove unused functions, structs, global variables, etc
2017-06-25 23:29:09 +02:00
//mpm.add(createStripDeadPrototypesPass());
2017-06-22 23:52:09 +02:00
//mpm.add(createFunctionInliningPass());
2017-06-25 23:29:09 +02:00
//mpm.add(createDeadInstEliminationPass());
//mpm.run(*module);
2017-01-22 20:03:57 +01:00
2017-06-22 23:52:09 +02:00
std : : string result ;
raw_string_ostream out ( result ) ;
2016-06-07 22:24:20 +02:00
2017-06-22 23:52:09 +02:00
if ( g_cfg . core . llvm_logs )
{
2020-05-06 17:18:30 +02:00
out < < * _module ; // print IR
2017-06-24 17:36:49 +02:00
fs : : file ( cache_path + obj_name + " .log " , fs : : rewrite ) . write ( out . str ( ) ) ;
2017-06-22 23:52:09 +02:00
result . clear ( ) ;
}
2016-06-07 22:24:20 +02:00
2020-05-06 17:18:30 +02:00
if ( verifyModule ( * _module , & out ) )
2016-06-07 22:24:20 +02:00
{
2017-06-22 23:52:09 +02:00
out . flush ( ) ;
2020-02-01 09:31:27 +01:00
ppu_log . error ( " LLVM: Verification failed for %s: \n %s " , obj_name , result ) ;
2022-01-20 18:44:49 +01:00
Emu . CallFromMainThread ( [ ] { Emu . GracefulShutdown ( false , true ) ; } ) ;
2017-06-22 23:52:09 +02:00
return ;
2016-06-07 22:24:20 +02:00
}
2017-06-22 23:52:09 +02:00
2020-05-06 17:18:30 +02:00
ppu_log . notice ( " LLVM: %zu functions generated " , _module - > getFunctionList ( ) . size ( ) ) ;
2016-06-07 22:24:20 +02:00
}
2017-06-24 17:36:49 +02:00
// Load or compile module
2020-05-06 17:18:30 +02:00
jit . add ( std : : move ( _module ) , cache_path ) ;
2017-06-24 17:36:49 +02:00
# endif // LLVM_AVAILABLE
2016-06-22 15:37:51 +02:00
}