2020-12-05 13:08:24 +01:00
# include "stdafx.h"
2018-09-29 00:12:00 +02:00
# include "SPURecompiler.h"
2018-04-30 18:44:01 +02:00
# include "Emu/System.h"
2020-02-15 23:36:20 +01:00
# include "Emu/system_config.h"
2015-08-26 04:54:06 +02:00
# include "Emu/IdManager.h"
2021-03-23 20:32:50 +01:00
# include "Emu/Cell/timers.hpp"
2018-05-02 20:49:19 +02:00
# include "Crypto/sha1.h"
2019-05-11 18:21:07 +02:00
# include "Utilities/JIT.h"
2015-08-26 04:54:06 +02:00
# include "SPUThread.h"
2018-04-09 16:45:37 +02:00
# include "SPUAnalyser.h"
2018-04-30 18:44:01 +02:00
# include "SPUInterpreter.h"
# include <thread>
2021-07-27 08:27:03 +02:00
# include <unordered_set>
2015-08-26 04:54:06 +02:00
2020-12-13 14:34:45 +01:00
# include "util/v128.hpp"
2021-12-30 17:39:18 +01:00
# include "util/simd.hpp"
2020-12-18 10:55:54 +01:00
# include "util/sysinfo.hpp"
2020-12-13 14:34:45 +01:00
2021-12-30 17:39:18 +01:00
const extern spu_decoder < spu_itype > g_spu_itype ;
const extern spu_decoder < spu_iname > g_spu_iname ;
const extern spu_decoder < spu_iflag > g_spu_iflag ;
2019-07-15 15:16:30 +02:00
2019-05-05 15:28:41 +02:00
# ifdef LLVM_AVAILABLE
# include "Emu/CPU/CPUTranslator.h"
2019-11-30 00:11:28 +01:00
# ifdef _MSC_VER
# pragma warning(push, 0)
# else
# pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Wall"
# pragma GCC diagnostic ignored "-Wextra"
# pragma GCC diagnostic ignored "-Wold-style-cast"
2021-03-05 20:05:37 +01:00
# pragma GCC diagnostic ignored "-Wunused-parameter"
2021-03-08 21:41:23 +01:00
# pragma GCC diagnostic ignored "-Wstrict-aliasing"
2021-03-30 17:31:46 +02:00
# pragma GCC diagnostic ignored "-Weffc++"
2021-04-07 23:52:18 +02:00
# pragma GCC diagnostic ignored "-Wmissing-noreturn"
2019-11-30 00:11:28 +01:00
# endif
2023-03-10 23:57:21 +01:00
# if LLVM_VERSION_MAJOR < 17
2019-05-05 15:28:41 +02:00
# include "llvm/ADT/Triple.h"
2023-03-10 23:57:21 +01:00
# endif
2023-09-25 17:04:48 +02:00
# include "llvm/TargetParser/Host.h"
2019-05-05 15:28:41 +02:00
# include "llvm/IR/LegacyPassManager.h"
# include "llvm/IR/Verifier.h"
2019-05-17 22:54:47 +02:00
# include "llvm/IR/InlineAsm.h"
2019-05-05 15:28:41 +02:00
# include "llvm/Transforms/Utils/BasicBlockUtils.h"
# include "llvm/Transforms/Scalar.h"
2023-04-15 13:37:07 +02:00
# include "llvm/Analysis/PostDominators.h"
2023-04-19 12:02:10 +02:00
# include "llvm/Analysis/AliasAnalysis.h"
# include "llvm/ADT/PostOrderIterator.h"
2019-11-30 00:11:28 +01:00
# ifdef _MSC_VER
# pragma warning(pop)
# else
# pragma GCC diagnostic pop
# endif
2019-05-05 15:28:41 +02:00
class spu_llvm_recompiler : public spu_recompiler_base , public cpu_translator
{
// JIT Instance
jit_compiler m_jit { { } , jit_compiler : : cpu ( g_cfg . core . llvm_cpu ) } ;
// Interpreter table size power
const u8 m_interp_magn ;
// Constant opcode bits
u32 m_op_const_mask = - 1 ;
// Current function chunk entry point
u32 m_entry ;
// Main entry point offset
u32 m_base ;
2019-05-12 02:22:14 +02:00
// Module name
std : : string m_hash ;
2019-11-17 20:07:08 +01:00
// Patchpoint unique id
u32 m_pp_id = 0 ;
2020-04-04 20:33:46 +02:00
// Next opcode
u32 m_next_op = 0 ;
2019-05-05 15:28:41 +02:00
// Current function (chunk)
llvm : : Function * m_function ;
llvm : : Value * m_thread ;
llvm : : Value * m_lsptr ;
llvm : : Value * m_interp_op ;
llvm : : Value * m_interp_pc ;
llvm : : Value * m_interp_table ;
llvm : : Value * m_interp_7f0 ;
llvm : : Value * m_interp_regs ;
// Helpers
llvm : : Value * m_base_pc ;
llvm : : Value * m_interp_pc_next ;
llvm : : BasicBlock * m_interp_bblock ;
// i8*, contains constant vm::g_base_addr value
llvm : : Value * m_memptr ;
2018-07-25 15:39:03 +02:00
2018-06-10 14:46:01 +02:00
// Pointers to registers in the thread context
std : : array < llvm : : Value * , s_reg_max > m_reg_addr ;
// Global variable (function table)
llvm : : GlobalVariable * m_function_table { } ;
2019-03-25 19:31:16 +01:00
// Helpers (interpreter)
llvm : : GlobalVariable * m_scale_float_to { } ;
llvm : : GlobalVariable * m_scale_to_float { } ;
2019-05-05 15:28:41 +02:00
// Function for check_state execution
llvm : : Function * m_test_state { } ;
2019-05-11 19:48:47 +02:00
// Chunk for external tail call (dispatch)
llvm : : Function * m_dispatch { } ;
2019-03-13 18:57:42 +01:00
llvm : : MDNode * m_md_unlikely ;
llvm : : MDNode * m_md_likely ;
2018-06-10 14:46:01 +02:00
struct block_info
{
2019-05-05 15:28:41 +02:00
// Pointer to the analyser
spu_recompiler_base : : block_info * bb { } ;
2018-06-10 14:46:01 +02:00
// Current block's entry block
llvm : : BasicBlock * block ;
// Final block (for PHI nodes, set after completion)
llvm : : BasicBlock * block_end { } ;
2023-04-15 13:37:07 +02:00
// Additional blocks for sinking instructions after block_end:
std : : unordered_map < u32 , llvm : : BasicBlock * , value_hash < u32 , 2 > > block_edges ;
2018-06-10 14:46:01 +02:00
// Current register values
std : : array < llvm : : Value * , s_reg_max > reg { } ;
// PHI nodes created for this block (if any)
std : : array < llvm : : PHINode * , s_reg_max > phi { } ;
// Store instructions
std : : array < llvm : : StoreInst * , s_reg_max > store { } ;
2023-07-21 17:12:57 +02:00
// Store reordering/elimination protection
std : : array < usz , s_reg_max > store_context_last_id = fill_array < usz > ( 0 ) ; // Protects against illegal forward ordering
std : : array < usz , s_reg_max > store_context_first_id = fill_array < usz > ( usz { umax } ) ; // Protects against illegal past store elimination (backwards ordering is not implemented)
std : : array < usz , s_reg_max > store_context_ctr = fill_array < usz > ( 1 ) ; // Store barrier cointer
2023-12-29 07:30:15 +01:00
bool does_gpr_barrier_proceed_last_store ( u32 i ) const noexcept
{
const usz counter = store_context_ctr [ i ] ;
return counter ! = 1 & & counter > store_context_last_id [ i ] ;
}
bool does_gpr_barrier_preceed_first_store ( u32 i ) const noexcept
{
const usz counter = store_context_ctr [ i ] ;
2023-12-30 17:40:37 +01:00
const usz first_id = store_context_first_id [ i ] ;
return counter ! = 1 & & first_id ! = umax & & counter < first_id ;
2023-12-29 07:30:15 +01:00
}
2018-06-10 14:46:01 +02:00
} ;
2019-05-05 15:28:41 +02:00
struct function_info
2018-07-22 18:09:25 +02:00
{
2019-05-05 15:28:41 +02:00
// Standard callable chunk
llvm : : Function * chunk { } ;
2018-07-22 18:09:25 +02:00
2019-05-05 15:28:41 +02:00
// Callable function
llvm : : Function * fn { } ;
2018-07-22 18:09:25 +02:00
2019-05-05 15:28:41 +02:00
// Registers possibly loaded in the entry block
std : : array < llvm : : Value * , s_reg_max > load { } ;
2018-07-22 18:09:25 +02:00
} ;
2018-06-10 14:46:01 +02:00
// Current block
block_info * m_block ;
2019-05-05 15:28:41 +02:00
// Current function or chunk
function_info * m_finfo ;
2018-07-22 18:09:25 +02:00
2018-06-10 14:46:01 +02:00
// All blocks in the current function chunk
std : : unordered_map < u32 , block_info , value_hash < u32 , 2 > > m_blocks ;
// Block list for processing
std : : vector < u32 > m_block_queue ;
// All function chunks in current SPU compile unit
2019-05-05 15:28:41 +02:00
std : : unordered_map < u32 , function_info , value_hash < u32 , 2 > > m_functions ;
2018-06-10 14:46:01 +02:00
// Function chunk list for processing
std : : vector < u32 > m_function_queue ;
// Add or get the function chunk
2019-05-05 15:28:41 +02:00
function_info * add_function ( u32 addr )
2018-06-10 14:46:01 +02:00
{
2019-05-05 15:28:41 +02:00
// Enqueue if necessary
const auto empl = m_functions . try_emplace ( addr ) ;
if ( ! empl . second )
{
return & empl . first - > second ;
}
// Chunk function type
2019-05-11 19:48:47 +02:00
// 0. Result (tail call target)
2019-05-05 15:28:41 +02:00
// 1. Thread context
// 2. Local storage pointer
// 3.
2019-10-15 16:43:33 +02:00
#if 0
2019-05-11 19:48:47 +02:00
const auto chunk_type = get_ftype < u8 * , u8 * , u8 * , u32 > ( ) ;
# else
2019-05-05 15:28:41 +02:00
const auto chunk_type = get_ftype < void , u8 * , u8 * , u32 > ( ) ;
2019-05-11 19:48:47 +02:00
# endif
2019-05-05 15:28:41 +02:00
2018-06-10 14:46:01 +02:00
// Get function chunk name
2022-01-23 13:22:00 +01:00
const std : : string name = fmt : : format ( " __spu-cx%05x-%s " , addr , fmt : : base57 ( be_t < u64 > { m_hash_start } ) ) ;
2019-05-05 15:28:41 +02:00
llvm : : Function * result = llvm : : cast < llvm : : Function > ( m_module - > getOrInsertFunction ( name , chunk_type ) . getCallee ( ) ) ;
2018-06-10 14:46:01 +02:00
// Set parameters
result - > setLinkage ( llvm : : GlobalValue : : InternalLinkage ) ;
2023-03-10 23:57:21 +01:00
result - > addParamAttr ( 0 , llvm : : Attribute : : NoAlias ) ;
result - > addParamAttr ( 1 , llvm : : Attribute : : NoAlias ) ;
2019-10-15 16:43:33 +02:00
# if 1
2019-05-05 15:28:41 +02:00
result - > setCallingConv ( llvm : : CallingConv : : GHC ) ;
2019-05-11 01:35:05 +02:00
# endif
2018-06-10 14:46:01 +02:00
2019-05-05 15:28:41 +02:00
empl . first - > second . chunk = result ;
2018-07-22 18:09:25 +02:00
2019-05-05 15:28:41 +02:00
if ( g_cfg . core . spu_block_size = = spu_block_size_type : : giga )
2018-06-10 14:46:01 +02:00
{
2019-05-05 15:28:41 +02:00
// Find good real function
const auto ffound = m_funcs . find ( addr ) ;
2018-07-22 18:09:25 +02:00
2019-05-05 15:28:41 +02:00
if ( ffound ! = m_funcs . end ( ) & & ffound - > second . good )
2018-07-22 18:09:25 +02:00
{
2019-05-05 15:28:41 +02:00
// Real function type (not equal to chunk type)
2019-05-15 15:18:13 +02:00
// 4. $SP
// 5. $3
const auto func_type = get_ftype < u32 [ 4 ] , u8 * , u8 * , u32 , u32 [ 4 ] , u32 [ 4 ] > ( ) ;
2019-05-05 15:28:41 +02:00
2022-01-23 13:22:00 +01:00
const std : : string fname = fmt : : format ( " __spu-fx%05x-%s " , addr , fmt : : base57 ( be_t < u64 > { m_hash_start } ) ) ;
2019-05-05 15:28:41 +02:00
llvm : : Function * fn = llvm : : cast < llvm : : Function > ( m_module - > getOrInsertFunction ( fname , func_type ) . getCallee ( ) ) ;
2018-07-22 18:09:25 +02:00
2019-05-05 15:28:41 +02:00
fn - > setLinkage ( llvm : : GlobalValue : : InternalLinkage ) ;
2023-03-10 23:57:21 +01:00
fn - > addParamAttr ( 0 , llvm : : Attribute : : NoAlias ) ;
fn - > addParamAttr ( 1 , llvm : : Attribute : : NoAlias ) ;
2019-10-15 16:43:33 +02:00
# if 1
2019-05-05 15:28:41 +02:00
fn - > setCallingConv ( llvm : : CallingConv : : GHC ) ;
2019-05-11 01:35:05 +02:00
# endif
2019-05-05 15:28:41 +02:00
empl . first - > second . fn = fn ;
}
}
// Enqueue
m_function_queue . push_back ( addr ) ;
return & empl . first - > second ;
}
// Create tail call to the function chunk (non-tail calls are just out of question)
2020-11-02 04:07:58 +01:00
void tail_chunk ( llvm : : FunctionCallee callee , llvm : : Value * base_pc = nullptr )
2019-05-05 15:28:41 +02:00
{
2020-11-02 04:07:58 +01:00
if ( ! callee & & ! g_cfg . core . spu_verification )
2019-05-12 02:22:14 +02:00
{
2019-05-14 17:55:10 +02:00
// Disable patchpoints if verification is disabled
2020-11-02 04:07:58 +01:00
callee = m_dispatch ;
2019-05-12 02:22:14 +02:00
}
2020-11-02 04:07:58 +01:00
else if ( ! callee )
2019-05-12 02:22:14 +02:00
{
// Create branch patchpoint if chunk == nullptr
2020-12-09 08:47:45 +01:00
ensure ( m_finfo & & ( ! m_finfo - > fn | | m_function = = m_finfo - > chunk ) ) ;
2019-05-12 02:22:14 +02:00
// Register under a unique linkable name
2019-11-17 20:07:08 +01:00
const std : : string ppname = fmt : : format ( " %s-pp-%u " , m_hash , m_pp_id + + ) ;
2019-11-17 20:09:23 +01:00
m_engine - > updateGlobalMapping ( ppname , reinterpret_cast < u64 > ( m_spurt - > make_branch_patchpoint ( ) ) ) ;
2019-05-12 02:22:14 +02:00
// Create function with not exactly correct type
const auto ppfunc = llvm : : cast < llvm : : Function > ( m_module - > getOrInsertFunction ( ppname , m_finfo - > chunk - > getFunctionType ( ) ) . getCallee ( ) ) ;
ppfunc - > setCallingConv ( m_finfo - > chunk - > getCallingConv ( ) ) ;
if ( m_finfo - > chunk - > getReturnType ( ) ! = get_type < void > ( ) )
{
2023-04-05 13:35:06 +02:00
m_ir - > CreateRet ( ppfunc ) ;
2019-05-12 02:22:14 +02:00
return ;
}
2020-11-02 04:07:58 +01:00
callee = ppfunc ;
2019-05-12 02:22:14 +02:00
base_pc = m_ir - > getInt32 ( 0 ) ;
}
2020-12-09 08:47:45 +01:00
ensure ( callee ) ;
2020-11-02 04:07:58 +01:00
auto call = m_ir - > CreateCall ( callee , { m_thread , m_lsptr , base_pc ? base_pc : m_base_pc } ) ;
auto func = m_finfo ? m_finfo - > chunk : llvm : : dyn_cast < llvm : : Function > ( callee . getCallee ( ) ) ;
2019-05-11 19:48:47 +02:00
call - > setCallingConv ( func - > getCallingConv ( ) ) ;
2019-05-05 15:28:41 +02:00
call - > setTailCall ( ) ;
2019-05-11 19:48:47 +02:00
if ( func - > getReturnType ( ) = = get_type < void > ( ) )
{
m_ir - > CreateRetVoid ( ) ;
}
else
{
m_ir - > CreateRet ( call ) ;
}
2019-05-05 15:28:41 +02:00
}
// Call the real function
void call_function ( llvm : : Function * fn , bool tail = false )
{
llvm : : Value * lr { } ;
llvm : : Value * sp { } ;
2019-05-15 15:18:13 +02:00
llvm : : Value * r3 { } ;
2019-05-05 15:28:41 +02:00
if ( ! m_finfo - > fn & & ! m_block )
{
2023-03-10 23:57:21 +01:00
lr = m_ir - > CreateLoad ( get_type < u32 > ( ) , spu_ptr < u32 > ( & spu_thread : : gpr , + s_reg_lr , & v128 : : _u32 , 3 ) ) ;
sp = m_ir - > CreateLoad ( get_type < u32 [ 4 ] > ( ) , spu_ptr < u32 [ 4 ] > ( & spu_thread : : gpr , + s_reg_sp ) ) ;
r3 = m_ir - > CreateLoad ( get_type < u32 [ 4 ] > ( ) , spu_ptr < u32 [ 4 ] > ( & spu_thread : : gpr , 3 ) ) ;
2019-05-05 15:28:41 +02:00
}
else
{
lr = m_ir - > CreateExtractElement ( get_reg_fixed < u32 [ 4 ] > ( s_reg_lr ) . value , 3 ) ;
2019-05-15 14:42:03 +02:00
sp = get_reg_fixed < u32 [ 4 ] > ( s_reg_sp ) . value ;
2019-05-15 15:18:13 +02:00
r3 = get_reg_fixed < u32 [ 4 ] > ( 3 ) . value ;
2019-05-05 15:28:41 +02:00
}
2020-12-09 08:47:45 +01:00
const auto _call = m_ir - > CreateCall ( ensure ( fn ) , { m_thread , m_lsptr , m_base_pc , sp , r3 } ) ;
2019-05-05 15:28:41 +02:00
2019-05-11 01:35:05 +02:00
_call - > setCallingConv ( fn - > getCallingConv ( ) ) ;
2019-05-05 15:28:41 +02:00
// Tail call using loaded LR value (gateway from a chunk)
if ( ! m_finfo - > fn )
{
lr = m_ir - > CreateAnd ( lr , 0x3fffc ) ;
m_ir - > CreateStore ( lr , spu_ptr < u32 > ( & spu_thread : : pc ) ) ;
2019-05-15 15:18:13 +02:00
m_ir - > CreateStore ( _call , spu_ptr < u32 [ 4 ] > ( & spu_thread : : gpr , 3 ) ) ;
2019-05-05 15:28:41 +02:00
m_ir - > CreateBr ( add_block_indirect ( { } , value < u32 > ( lr ) ) ) ;
}
else if ( tail )
{
_call - > setTailCall ( ) ;
m_ir - > CreateRet ( _call ) ;
}
else
{
// TODO: initialize $LR with a constant
for ( u32 i = 0 ; i < s_reg_max ; i + + )
{
if ( i ! = s_reg_lr & & i ! = s_reg_sp & & ( i < s_reg_80 | | i > s_reg_127 ) )
2018-07-22 18:09:25 +02:00
{
2023-03-10 23:57:21 +01:00
m_block - > reg [ i ] = m_ir - > CreateLoad ( get_reg_type ( i ) , init_reg_fixed ( i ) ) ;
2018-07-22 18:09:25 +02:00
}
}
2019-05-05 15:28:41 +02:00
2019-05-15 15:18:13 +02:00
// Set result
m_block - > reg [ 3 ] = _call ;
2018-06-10 14:46:01 +02:00
}
2019-05-05 15:28:41 +02:00
}
2018-06-10 14:46:01 +02:00
2019-05-05 15:28:41 +02:00
// Emit return from the real function
void ret_function ( )
{
2019-05-15 15:18:13 +02:00
m_ir - > CreateRet ( get_reg_fixed < u32 [ 4 ] > ( 3 ) . value ) ;
2018-06-10 14:46:01 +02:00
}
void set_function ( llvm : : Function * func )
{
m_function = func ;
2023-03-10 23:57:21 +01:00
m_thread = func - > getArg ( 0 ) ;
m_lsptr = func - > getArg ( 1 ) ;
m_base_pc = func - > getArg ( 2 ) ;
2018-06-10 14:46:01 +02:00
m_reg_addr . fill ( nullptr ) ;
m_block = nullptr ;
2018-07-22 18:09:25 +02:00
m_finfo = nullptr ;
2018-06-10 14:46:01 +02:00
m_blocks . clear ( ) ;
m_block_queue . clear ( ) ;
m_ir - > SetInsertPoint ( llvm : : BasicBlock : : Create ( m_context , " " , m_function ) ) ;
2023-03-10 23:57:21 +01:00
m_memptr = m_ir - > CreateLoad ( get_type < u8 * > ( ) , spu_ptr < u8 * > ( & spu_thread : : memory_base_addr ) ) ;
2018-06-10 14:46:01 +02:00
}
// Add block with current block as a predecessor
2019-05-16 01:41:31 +02:00
llvm : : BasicBlock * add_block ( u32 target , bool absolute = false )
2018-06-10 14:46:01 +02:00
{
// Check the predecessor
2019-05-05 15:28:41 +02:00
const bool pred_found = m_block_info [ target / 4 ] & & m_preds [ target ] . find_first_of ( m_pos ) + 1 ;
2018-06-10 14:46:01 +02:00
if ( m_blocks . empty ( ) )
{
// Special case: first block, proceed normally
2019-05-05 15:28:41 +02:00
if ( auto fn = std : : exchange ( m_finfo - > fn , nullptr ) )
{
// Create a gateway
call_function ( fn , true ) ;
m_finfo - > fn = fn ;
m_function = fn ;
2023-03-10 23:57:21 +01:00
m_thread = fn - > getArg ( 0 ) ;
m_lsptr = fn - > getArg ( 1 ) ;
m_base_pc = fn - > getArg ( 2 ) ;
2019-05-05 15:28:41 +02:00
m_ir - > SetInsertPoint ( llvm : : BasicBlock : : Create ( m_context , " " , fn ) ) ;
2023-03-10 23:57:21 +01:00
m_memptr = m_ir - > CreateLoad ( get_type < u8 * > ( ) , spu_ptr < u8 * > ( & spu_thread : : memory_base_addr ) ) ;
2019-05-05 15:28:41 +02:00
// Load registers at the entry chunk
for ( u32 i = 0 ; i < s_reg_max ; i + + )
{
if ( i > = s_reg_80 & & i < = s_reg_127 )
{
// TODO
//m_finfo->load[i] = llvm::UndefValue::get(get_reg_type(i));
}
2023-03-10 23:57:21 +01:00
m_finfo - > load [ i ] = m_ir - > CreateLoad ( get_reg_type ( i ) , init_reg_fixed ( i ) ) ;
2019-05-05 15:28:41 +02:00
}
// Load $SP
2023-03-10 23:57:21 +01:00
m_finfo - > load [ s_reg_sp ] = fn - > getArg ( 3 ) ;
2019-05-05 15:28:41 +02:00
// Load first args
2023-03-10 23:57:21 +01:00
m_finfo - > load [ 3 ] = fn - > getArg ( 4 ) ;
2019-05-05 15:28:41 +02:00
}
2018-06-10 14:46:01 +02:00
}
2019-05-05 15:28:41 +02:00
else if ( m_block_info [ target / 4 ] & & m_entry_info [ target / 4 ] & & ! ( pred_found & & m_entry = = target ) & & ( ! m_finfo - > fn | | ! m_ret_info [ target / 4 ] ) )
2018-06-10 14:46:01 +02:00
{
// Generate a tail call to the function chunk
const auto cblock = m_ir - > GetInsertBlock ( ) ;
const auto result = llvm : : BasicBlock : : Create ( m_context , " " , m_function ) ;
m_ir - > SetInsertPoint ( result ) ;
2019-05-05 15:28:41 +02:00
const auto pfinfo = add_function ( target ) ;
2019-05-16 01:41:31 +02:00
if ( absolute )
{
2020-12-09 08:47:45 +01:00
ensure ( ! m_finfo - > fn ) ;
2019-05-16 01:41:31 +02:00
const auto next = llvm : : BasicBlock : : Create ( m_context , " " , m_function ) ;
const auto fail = llvm : : BasicBlock : : Create ( m_context , " " , m_function ) ;
m_ir - > CreateCondBr ( m_ir - > CreateICmpEQ ( m_base_pc , m_ir - > getInt32 ( m_base ) ) , next , fail ) ;
m_ir - > SetInsertPoint ( fail ) ;
2023-04-08 14:21:22 +02:00
m_ir - > CreateStore ( m_ir - > getInt32 ( target ) , spu_ptr < u32 > ( & spu_thread : : pc ) ) ;
2019-05-16 01:41:31 +02:00
tail_chunk ( nullptr ) ;
m_ir - > SetInsertPoint ( next ) ;
}
2019-05-05 15:28:41 +02:00
if ( pfinfo - > fn )
{
// Tail call to the real function
call_function ( pfinfo - > fn , true ) ;
if ( ! result - > getTerminator ( ) )
ret_function ( ) ;
}
else
{
// Just a boring tail call to another chunk
update_pc ( target ) ;
tail_chunk ( pfinfo - > chunk ) ;
}
2018-06-10 14:46:01 +02:00
m_ir - > SetInsertPoint ( cblock ) ;
return result ;
}
else if ( ! pred_found | | ! m_block_info [ target / 4 ] )
{
if ( m_block_info [ target / 4 ] )
{
2020-03-27 12:20:37 +01:00
spu_log . error ( " [%s] [0x%x] Predecessor not found for target 0x%x (chunk=0x%x, entry=0x%x, size=%u) " , m_hash , m_pos , target , m_entry , m_function_queue [ 0 ] , m_size / 4 ) ;
2018-06-10 14:46:01 +02:00
}
const auto cblock = m_ir - > GetInsertBlock ( ) ;
const auto result = llvm : : BasicBlock : : Create ( m_context , " " , m_function ) ;
m_ir - > SetInsertPoint ( result ) ;
2019-05-16 01:41:31 +02:00
if ( absolute )
{
2020-12-09 08:47:45 +01:00
ensure ( ! m_finfo - > fn ) ;
2019-05-16 01:41:31 +02:00
2023-04-08 14:21:22 +02:00
m_ir - > CreateStore ( m_ir - > getInt32 ( target ) , spu_ptr < u32 > ( & spu_thread : : pc ) ) ;
2019-05-16 01:41:31 +02:00
}
else
{
update_pc ( target ) ;
}
2019-05-12 02:22:14 +02:00
tail_chunk ( nullptr ) ;
2018-06-10 14:46:01 +02:00
m_ir - > SetInsertPoint ( cblock ) ;
return result ;
}
2020-12-09 08:47:45 +01:00
ensure ( ! absolute ) ;
2019-05-16 01:41:31 +02:00
2018-06-10 14:46:01 +02:00
auto & result = m_blocks [ target ] . block ;
if ( ! result )
{
result = llvm : : BasicBlock : : Create ( m_context , fmt : : format ( " b-0x%x " , target ) , m_function ) ;
2018-05-02 20:49:19 +02:00
2018-06-10 14:46:01 +02:00
// Add the block to the queue
m_block_queue . push_back ( target ) ;
}
else if ( m_block & & m_blocks [ target ] . block_end )
{
// Connect PHI nodes if necessary
for ( u32 i = 0 ; i < s_reg_max ; i + + )
{
if ( const auto phi = m_blocks [ target ] . phi [ i ] )
{
2018-07-27 12:00:05 +02:00
const auto typ = phi - > getType ( ) = = get_type < f64 [ 4 ] > ( ) ? get_type < f64 [ 4 ] > ( ) : get_reg_type ( i ) ;
2019-03-25 19:31:16 +01:00
phi - > addIncoming ( get_reg_fixed ( i , typ ) , m_block - > block_end ) ;
2018-06-10 14:46:01 +02:00
}
}
}
2018-05-02 20:49:19 +02:00
2018-06-10 14:46:01 +02:00
return result ;
}
2018-05-02 20:49:19 +02:00
2018-07-25 15:39:03 +02:00
template < typename T = u8 >
llvm : : Value * _ptr ( llvm : : Value * base , u32 offset )
2018-05-02 20:49:19 +02:00
{
2023-04-05 13:35:06 +02:00
return m_ir - > CreateGEP ( get_type < u8 > ( ) , base , m_ir - > getInt64 ( offset ) ) ;
2018-05-02 20:49:19 +02:00
}
template < typename T , typename . . . Args >
llvm : : Value * spu_ptr ( Args . . . offset_args )
{
return _ptr < T > ( m_thread , : : offset32 ( offset_args . . . ) ) ;
}
2018-06-10 14:46:01 +02:00
template < typename T , typename . . . Args >
llvm : : Value * spu_ptr ( value_t < u64 > add , Args . . . offset_args )
2018-05-02 20:49:19 +02:00
{
2023-03-10 23:57:21 +01:00
const auto off = m_ir - > CreateGEP ( get_type < u8 > ( ) , m_thread , m_ir - > getInt64 ( : : offset32 ( offset_args . . . ) ) ) ;
2023-04-05 13:35:06 +02:00
return m_ir - > CreateAdd ( off , add . value ) ;
2018-06-10 14:46:01 +02:00
}
2018-05-02 20:49:19 +02:00
2018-07-25 15:39:03 +02:00
// Return default register type
llvm : : Type * get_reg_type ( u32 index )
{
if ( index < 128 )
{
return get_type < u32 [ 4 ] > ( ) ;
}
switch ( index )
{
case s_reg_mfc_eal :
case s_reg_mfc_lsa :
return get_type < u32 > ( ) ;
case s_reg_mfc_tag :
return get_type < u8 > ( ) ;
case s_reg_mfc_size :
return get_type < u16 > ( ) ;
default :
2020-12-09 16:04:52 +01:00
fmt : : throw_exception ( " get_reg_type(%u): invalid register index " , index ) ;
2018-07-25 15:39:03 +02:00
}
}
u32 get_reg_offset ( u32 index )
{
if ( index < 128 )
{
2018-10-11 00:17:19 +02:00
return : : offset32 ( & spu_thread : : gpr , index ) ;
2018-07-25 15:39:03 +02:00
}
switch ( index )
{
2018-10-11 00:17:19 +02:00
case s_reg_mfc_eal : return : : offset32 ( & spu_thread : : ch_mfc_cmd , & spu_mfc_cmd : : eal ) ;
case s_reg_mfc_lsa : return : : offset32 ( & spu_thread : : ch_mfc_cmd , & spu_mfc_cmd : : lsa ) ;
case s_reg_mfc_tag : return : : offset32 ( & spu_thread : : ch_mfc_cmd , & spu_mfc_cmd : : tag ) ;
case s_reg_mfc_size : return : : offset32 ( & spu_thread : : ch_mfc_cmd , & spu_mfc_cmd : : size ) ;
2018-07-25 15:39:03 +02:00
default :
2020-12-09 16:04:52 +01:00
fmt : : throw_exception ( " get_reg_offset(%u): invalid register index " , index ) ;
2018-07-25 15:39:03 +02:00
}
}
2019-03-25 19:31:16 +01:00
llvm : : Value * init_reg_fixed ( u32 index )
2018-06-10 14:46:01 +02:00
{
2019-03-25 19:31:16 +01:00
if ( ! m_block )
{
2023-04-05 13:35:06 +02:00
return _ptr < u8 > ( m_thread , get_reg_offset ( index ) ) ;
2019-03-25 19:31:16 +01:00
}
2022-09-19 14:57:51 +02:00
auto & ptr = : : at32 ( m_reg_addr , index ) ;
2018-07-05 14:26:33 +02:00
if ( ! ptr )
2018-05-02 20:49:19 +02:00
{
// Save and restore current insert point if necessary
const auto block_cur = m_ir - > GetInsertBlock ( ) ;
2018-06-10 14:46:01 +02:00
// Emit register pointer at the beginning of the function chunk
m_ir - > SetInsertPoint ( m_function - > getEntryBlock ( ) . getTerminator ( ) ) ;
2018-07-25 15:39:03 +02:00
ptr = _ptr < u8 > ( m_thread , get_reg_offset ( index ) ) ;
2018-05-02 20:49:19 +02:00
m_ir - > SetInsertPoint ( block_cur ) ;
}
2018-07-05 14:26:33 +02:00
return ptr ;
2018-05-02 20:49:19 +02:00
}
2019-03-25 19:31:16 +01:00
// Get pointer to the vector register (interpreter only)
template < typename T , uint I >
2021-03-05 20:05:37 +01:00
llvm : : Value * init_vr ( const bf_t < u32 , I , 7 > & )
2019-03-25 19:31:16 +01:00
{
if ( ! m_interp_magn )
{
m_interp_7f0 = m_ir - > getInt32 ( 0x7f0 ) ;
m_interp_regs = _ptr ( m_thread , get_reg_offset ( 0 ) ) ;
}
// Extract reg index
const auto isl = I > = 4 ? m_interp_op : m_ir - > CreateShl ( m_interp_op , u64 { 4 - I } ) ;
const auto isr = I < = 4 ? m_interp_op : m_ir - > CreateLShr ( m_interp_op , u64 { I - 4 } ) ;
const auto idx = m_ir - > CreateAnd ( I > 4 ? isr : isl , m_interp_7f0 ) ;
// Pointer to the register
2023-04-05 13:35:06 +02:00
return m_ir - > CreateGEP ( get_type < u8 > ( ) , m_interp_regs , m_ir - > CreateZExt ( idx , get_type < u64 > ( ) ) ) ;
2019-03-25 19:31:16 +01:00
}
2018-07-27 12:00:05 +02:00
llvm : : Value * double_as_uint64 ( llvm : : Value * val )
{
2019-05-05 15:28:41 +02:00
return bitcast < u64 [ 4 ] > ( val ) ;
2018-07-27 12:00:05 +02:00
}
llvm : : Value * uint64_as_double ( llvm : : Value * val )
{
2019-05-05 15:28:41 +02:00
return bitcast < f64 [ 4 ] > ( val ) ;
2018-07-27 12:00:05 +02:00
}
llvm : : Value * double_to_xfloat ( llvm : : Value * val )
{
2020-12-09 08:47:45 +01:00
ensure ( val & & val - > getType ( ) = = get_type < f64 [ 4 ] > ( ) ) ;
2018-07-27 12:00:05 +02:00
const auto d = double_as_uint64 ( val ) ;
const auto s = m_ir - > CreateAnd ( m_ir - > CreateLShr ( d , 32 ) , 0x80000000 ) ;
const auto m = m_ir - > CreateXor ( m_ir - > CreateLShr ( d , 29 ) , 0x40000000 ) ;
const auto r = m_ir - > CreateOr ( m_ir - > CreateAnd ( m , 0x7fffffff ) , s ) ;
2019-04-23 19:08:18 +02:00
return m_ir - > CreateTrunc ( m_ir - > CreateSelect ( m_ir - > CreateIsNotNull ( d ) , r , splat < u64 [ 4 ] > ( 0 ) . eval ( m_ir ) ) , get_type < u32 [ 4 ] > ( ) ) ;
2018-07-27 12:00:05 +02:00
}
llvm : : Value * xfloat_to_double ( llvm : : Value * val )
{
2020-12-09 08:47:45 +01:00
ensure ( val & & val - > getType ( ) = = get_type < u32 [ 4 ] > ( ) ) ;
2018-07-27 12:00:05 +02:00
const auto x = m_ir - > CreateZExt ( val , get_type < u64 [ 4 ] > ( ) ) ;
const auto s = m_ir - > CreateShl ( m_ir - > CreateAnd ( x , 0x80000000 ) , 32 ) ;
const auto a = m_ir - > CreateAnd ( x , 0x7fffffff ) ;
2019-04-23 19:08:18 +02:00
const auto m = m_ir - > CreateShl ( m_ir - > CreateAdd ( a , splat < u64 [ 4 ] > ( 0x1c0000000 ) . eval ( m_ir ) ) , 29 ) ;
const auto r = m_ir - > CreateSelect ( m_ir - > CreateICmpSGT ( a , splat < u64 [ 4 ] > ( 0x7fffff ) . eval ( m_ir ) ) , m , splat < u64 [ 4 ] > ( 0 ) . eval ( m_ir ) ) ;
2018-07-27 12:00:05 +02:00
const auto f = m_ir - > CreateOr ( s , r ) ;
return uint64_as_double ( f ) ;
}
// Clamp double values to ±Smax, flush values smaller than ±Smin to positive zero
llvm : : Value * xfloat_in_double ( llvm : : Value * val )
{
2020-12-09 08:47:45 +01:00
ensure ( val & & val - > getType ( ) = = get_type < f64 [ 4 ] > ( ) ) ;
2018-07-27 12:00:05 +02:00
2019-04-23 19:08:18 +02:00
const auto smax = uint64_as_double ( splat < u64 [ 4 ] > ( 0x47ffffffe0000000 ) . eval ( m_ir ) ) ;
const auto smin = uint64_as_double ( splat < u64 [ 4 ] > ( 0x3810000000000000 ) . eval ( m_ir ) ) ;
2018-07-27 12:00:05 +02:00
const auto d = double_as_uint64 ( val ) ;
const auto s = m_ir - > CreateAnd ( d , 0x8000000000000000 ) ;
const auto a = uint64_as_double ( m_ir - > CreateAnd ( d , 0x7fffffffe0000000 ) ) ;
const auto n = m_ir - > CreateFCmpOLT ( a , smax ) ;
const auto z = m_ir - > CreateFCmpOLT ( a , smin ) ;
const auto c = double_as_uint64 ( m_ir - > CreateSelect ( n , a , smax ) ) ;
2019-04-23 19:08:18 +02:00
return m_ir - > CreateSelect ( z , fsplat < f64 [ 4 ] > ( 0. ) . eval ( m_ir ) , uint64_as_double ( m_ir - > CreateOr ( c , s ) ) ) ;
2018-07-27 12:00:05 +02:00
}
// Expand 32-bit mask for xfloat values to 64-bit, 29 least significant bits are always zero
llvm : : Value * conv_xfloat_mask ( llvm : : Value * val )
{
const auto d = m_ir - > CreateZExt ( val , get_type < u64 [ 4 ] > ( ) ) ;
const auto s = m_ir - > CreateShl ( m_ir - > CreateAnd ( d , 0x80000000 ) , 32 ) ;
const auto e = m_ir - > CreateLShr ( m_ir - > CreateAShr ( m_ir - > CreateShl ( d , 33 ) , 4 ) , 1 ) ;
return m_ir - > CreateOr ( s , e ) ;
}
2019-03-25 19:31:16 +01:00
llvm : : Value * get_reg_raw ( u32 index )
2019-05-05 15:28:41 +02:00
{
if ( ! m_block | | index > = m_block - > reg . size ( ) )
{
return nullptr ;
}
2018-07-27 12:00:05 +02:00
2019-05-05 15:28:41 +02:00
return m_block - > reg [ index ] ;
}
2019-05-02 12:36:56 +02:00
2019-05-05 15:28:41 +02:00
llvm : : Value * get_reg_fixed ( u32 index , llvm : : Type * type )
{
llvm : : Value * dummy { } ;
2018-07-27 12:00:05 +02:00
2022-09-19 14:57:51 +02:00
auto & reg = * ( m_block ? & : : at32 ( m_block - > reg , index ) : & dummy ) ;
2018-07-27 12:00:05 +02:00
2019-05-05 15:28:41 +02:00
if ( ! reg )
{
// Load register value if necessary
2023-03-10 23:57:21 +01:00
reg = m_finfo & & m_finfo - > load [ index ] ? m_finfo - > load [ index ] : m_ir - > CreateLoad ( get_reg_type ( index ) , init_reg_fixed ( index ) ) ;
2019-05-05 15:28:41 +02:00
}
2018-07-27 12:00:05 +02:00
2019-05-05 15:28:41 +02:00
if ( reg - > getType ( ) = = get_type < f64 [ 4 ] > ( ) )
{
if ( type = = reg - > getType ( ) )
2018-07-27 12:00:05 +02:00
{
2019-05-05 15:28:41 +02:00
return reg ;
2018-07-27 12:00:05 +02:00
}
2019-05-05 15:28:41 +02:00
return bitcast ( double_to_xfloat ( reg ) , type ) ;
2018-07-27 12:00:05 +02:00
}
2019-05-05 15:28:41 +02:00
if ( type = = get_type < f64 [ 4 ] > ( ) )
2018-07-05 14:26:33 +02:00
{
2019-05-05 15:28:41 +02:00
return xfloat_to_double ( bitcast < u32 [ 4 ] > ( reg ) ) ;
2018-07-05 14:26:33 +02:00
}
2019-05-05 15:28:41 +02:00
return bitcast ( reg , type ) ;
2018-07-05 14:26:33 +02:00
}
template < typename T = u32 [ 4 ] >
2019-03-25 19:31:16 +01:00
value_t < T > get_reg_fixed ( u32 index )
2018-07-05 14:26:33 +02:00
{
2018-05-02 20:49:19 +02:00
value_t < T > r ;
2019-03-25 19:31:16 +01:00
r . value = get_reg_fixed ( index , get_type < T > ( ) ) ;
return r ;
}
template < typename T = u32 [ 4 ] , uint I >
value_t < T > get_vr ( const bf_t < u32 , I , 7 > & index )
{
value_t < T > r ;
if ( ( m_op_const_mask & index . data_mask ( ) ) ! = index . data_mask ( ) )
{
// Update const mask if necessary
2019-05-05 15:28:41 +02:00
if ( I > = ( 32u - m_interp_magn ) )
2019-03-25 19:31:16 +01:00
{
m_op_const_mask | = index . data_mask ( ) ;
}
// Load reg
if ( get_type < T > ( ) = = get_type < f64 [ 4 ] > ( ) )
{
2023-03-10 23:57:21 +01:00
r . value = xfloat_to_double ( m_ir - > CreateLoad ( get_type < u32 [ 4 ] > ( ) , init_vr < u32 [ 4 ] > ( index ) ) ) ;
2019-03-25 19:31:16 +01:00
}
else
{
2023-03-10 23:57:21 +01:00
r . value = m_ir - > CreateLoad ( get_type < T > ( ) , init_vr < T > ( index ) ) ;
2019-03-25 19:31:16 +01:00
}
}
else
{
r . value = get_reg_fixed ( index , get_type < T > ( ) ) ;
}
2018-05-02 20:49:19 +02:00
return r ;
}
2019-04-25 19:18:27 +02:00
template < typename U , uint I >
auto get_vr_as ( U & & , const bf_t < u32 , I , 7 > & index )
{
return get_vr < typename llvm_expr_t < U > : : type > ( index ) ;
}
2019-04-24 16:48:35 +02:00
template < typename T = u32 [ 4 ] , typename . . . Args >
std : : tuple < std : : conditional_t < false , Args , value_t < T > > . . . > get_vrs ( const Args & . . . args )
{
return { get_vr < T > ( args ) . . . } ;
}
2019-04-24 19:09:58 +02:00
template < typename T = u32 [ 4 ] , uint I >
llvm_match_t < T > match_vr ( const bf_t < u32 , I , 7 > & index )
{
llvm_match_t < T > r ;
if ( m_block )
{
2022-09-19 14:57:51 +02:00
auto v = : : at32 ( m_block - > reg , index ) ;
2019-04-24 19:09:58 +02:00
if ( v & & v - > getType ( ) = = get_type < T > ( ) )
{
r . value = v ;
return r ;
}
}
return r ;
}
2019-04-25 19:18:27 +02:00
template < typename U , uint I >
auto match_vr_as ( U & & , const bf_t < u32 , I , 7 > & index )
{
return match_vr < typename llvm_expr_t < U > : : type > ( index ) ;
}
template < typename . . . Types , uint I , typename F >
bool match_vr ( const bf_t < u32 , I , 7 > & index , F & & pred )
{
2019-05-05 15:28:41 +02:00
return ( ( match_vr < Types > ( index ) ? pred ( match_vr < Types > ( index ) , match < Types > ( ) ) : false ) | | . . . ) ;
2019-04-25 19:18:27 +02:00
}
2019-04-24 19:09:58 +02:00
template < typename T = u32 [ 4 ] , typename . . . Args >
std : : tuple < std : : conditional_t < false , Args , llvm_match_t < T > > . . . > match_vrs ( const Args & . . . args )
{
return { match_vr < T > ( args ) . . . } ;
}
2019-04-25 19:18:27 +02:00
// Extract scalar value from the preferred slot
template < typename T >
2019-05-05 15:28:41 +02:00
auto get_scalar ( value_t < T > value )
2019-04-25 19:18:27 +02:00
{
2019-05-05 15:28:41 +02:00
using e_type = std : : remove_extent_t < T > ;
static_assert ( sizeof ( T ) = = 16 | | std : : is_same_v < f64 [ 4 ] , T > , " Unknown vector type " ) ;
2019-04-25 19:18:27 +02:00
2019-05-05 15:28:41 +02:00
if ( auto [ ok , v ] = match_expr ( value , vsplat < T > ( match < e_type > ( ) ) ) ; ok )
{
return eval ( v ) ;
}
2019-04-25 19:18:27 +02:00
if constexpr ( sizeof ( e_type ) = = 1 )
{
2019-05-05 15:28:41 +02:00
return eval ( extract ( value , 12 ) ) ;
2019-04-25 19:18:27 +02:00
}
else if constexpr ( sizeof ( e_type ) = = 2 )
{
2019-05-05 15:28:41 +02:00
return eval ( extract ( value , 6 ) ) ;
2019-04-25 19:18:27 +02:00
}
2019-05-05 15:28:41 +02:00
else if constexpr ( sizeof ( e_type ) = = 4 | | sizeof ( T ) = = 32 )
2019-04-25 19:18:27 +02:00
{
2019-05-05 15:28:41 +02:00
return eval ( extract ( value , 3 ) ) ;
2019-04-25 19:18:27 +02:00
}
else
{
2019-05-05 15:28:41 +02:00
return eval ( extract ( value , 1 ) ) ;
2019-04-25 19:18:27 +02:00
}
}
2021-01-17 13:13:28 +01:00
// Splat scalar value from the preferred slot
template < typename T >
auto splat_scalar ( T & & arg )
{
using VT = std : : remove_extent_t < typename std : : decay_t < T > : : type > ;
if constexpr ( sizeof ( VT ) = = 1 )
{
return zshuffle ( std : : forward < T > ( arg ) , 12 , 12 , 12 , 12 , 12 , 12 , 12 , 12 , 12 , 12 , 12 , 12 , 12 , 12 , 12 , 12 ) ;
}
else if constexpr ( sizeof ( VT ) = = 2 )
{
return zshuffle ( std : : forward < T > ( arg ) , 6 , 6 , 6 , 6 , 6 , 6 , 6 , 6 ) ;
}
else if constexpr ( sizeof ( VT ) = = 4 )
{
return zshuffle ( std : : forward < T > ( arg ) , 3 , 3 , 3 , 3 ) ;
}
else if constexpr ( sizeof ( VT ) = = 8 )
{
return zshuffle ( std : : forward < T > ( arg ) , 1 , 1 ) ;
}
else
{
static_assert ( sizeof ( VT ) = = 16 ) ;
return std : : forward < T > ( arg ) ;
}
}
2019-03-25 19:31:16 +01:00
void set_reg_fixed ( u32 index , llvm : : Value * value , bool fixup = true )
2018-05-02 20:49:19 +02:00
{
2019-03-25 19:31:16 +01:00
llvm : : StoreInst * dummy { } ;
2018-06-10 14:46:01 +02:00
// Check
2020-12-09 08:47:45 +01:00
ensure ( ! m_block | | m_regmod [ m_pos / 4 ] = = index ) ;
2018-05-02 20:49:19 +02:00
2018-07-27 12:00:05 +02:00
// Test for special case
const bool is_xfloat = value - > getType ( ) = = get_type < f64 [ 4 ] > ( ) ;
// Clamp value if necessary
const auto saved_value = is_xfloat & & fixup ? xfloat_in_double ( value ) : value ;
2018-06-10 14:46:01 +02:00
// Set register value
2019-03-25 19:31:16 +01:00
if ( m_block )
{
2021-09-07 17:35:00 +02:00
# ifndef _WIN32
if ( g_cfg . core . spu_debug )
value - > setName ( fmt : : format ( " result_0x%05x " , m_pos ) ) ;
# endif
2022-09-19 14:57:51 +02:00
: : at32 ( m_block - > reg , index ) = saved_value ;
2019-03-25 19:31:16 +01:00
}
2018-05-02 20:49:19 +02:00
2018-06-10 14:46:01 +02:00
// Get register location
2019-03-25 19:31:16 +01:00
const auto addr = init_reg_fixed ( index ) ;
auto & _store = * ( m_block ? & m_block - > store [ index ] : & dummy ) ;
2018-06-10 14:46:01 +02:00
// Erase previous dead store instruction if necessary
2019-03-25 19:31:16 +01:00
if ( _store )
2018-05-02 20:49:19 +02:00
{
2023-07-21 17:12:57 +02:00
if ( m_block - > store_context_last_id [ index ] = = m_block - > store_context_ctr [ index ] )
{
// Erase store of it is not preserved by ensure_gpr_stores()
_store - > eraseFromParent ( ) ;
}
}
if ( m_block )
{
// Keep the store's location in history of gpr preservaions
m_block - > store_context_last_id [ index ] = m_block - > store_context_ctr [ index ] ;
m_block - > store_context_first_id [ index ] = std : : min < usz > ( m_block - > store_context_first_id [ index ] , m_block - > store_context_ctr [ index ] ) ;
2018-05-02 20:49:19 +02:00
}
2018-06-10 14:46:01 +02:00
2019-05-05 15:28:41 +02:00
if ( m_finfo & & m_finfo - > fn )
{
2019-05-15 15:41:57 +02:00
if ( index < = 3 | | ( index > = s_reg_80 & & index < = s_reg_127 ) )
2019-05-05 15:28:41 +02:00
{
// Don't save some registers in true functions
return ;
}
}
2018-06-10 14:46:01 +02:00
// Write register to the context
2023-03-10 23:57:21 +01:00
_store = m_ir - > CreateStore ( is_xfloat ? double_to_xfloat ( saved_value ) : m_ir - > CreateBitCast ( value , get_reg_type ( index ) ) , addr ) ;
2018-07-05 14:26:33 +02:00
}
2019-03-25 19:31:16 +01:00
template < typename T , uint I >
2023-04-08 14:21:22 +02:00
void set_vr ( const bf_t < u32 , I , 7 > & index , T expr , std : : function < llvm : : KnownBits ( ) > vr_assume = nullptr , bool fixup = true )
2019-03-25 19:31:16 +01:00
{
// Process expression
const auto value = expr . eval ( m_ir ) ;
// Test for special case
const bool is_xfloat = value - > getType ( ) = = get_type < f64 [ 4 ] > ( ) ;
if ( ( m_op_const_mask & index . data_mask ( ) ) ! = index . data_mask ( ) )
{
// Update const mask if necessary
2019-05-05 15:28:41 +02:00
if ( I > = ( 32u - m_interp_magn ) )
2019-03-25 19:31:16 +01:00
{
m_op_const_mask | = index . data_mask ( ) ;
}
// Clamp value if necessary
const auto saved_value = is_xfloat & & fixup ? xfloat_in_double ( value ) : value ;
// Store value
m_ir - > CreateStore ( is_xfloat ? double_to_xfloat ( saved_value ) : m_ir - > CreateBitCast ( value , get_type < u32 [ 4 ] > ( ) ) , init_vr < u32 [ 4 ] > ( index ) ) ;
return ;
}
2023-04-08 14:21:22 +02:00
if ( vr_assume )
{
}
2019-03-25 19:31:16 +01:00
set_reg_fixed ( index , value , fixup ) ;
}
template < typename T = u32 [ 4 ] , uint I , uint N >
value_t < T > get_imm ( const bf_t < u32 , I , N > & imm , bool mask = true )
2018-07-05 14:26:33 +02:00
{
2019-03-25 19:31:16 +01:00
if ( ( m_op_const_mask & imm . data_mask ( ) ) ! = imm . data_mask ( ) )
{
// Update const mask if necessary
2019-05-05 15:28:41 +02:00
if ( I > = ( 32u - m_interp_magn ) )
2019-03-25 19:31:16 +01:00
{
m_op_const_mask | = imm . data_mask ( ) ;
}
// Extract unsigned immediate (skip AND if mask == false or truncated anyway)
value_t < T > r ;
r . value = m_interp_op ;
r . value = I = = 0 ? r . value : m_ir - > CreateLShr ( r . value , u64 { I } ) ;
r . value = ! mask | | N > = r . esize ? r . value : m_ir - > CreateAnd ( r . value , imm . data_mask ( ) > > I ) ;
2020-04-22 17:33:17 +02:00
if constexpr ( r . esize ! = 32 )
2019-03-25 19:31:16 +01:00
{
r . value = m_ir - > CreateZExtOrTrunc ( r . value , get_type < T > ( ) - > getScalarType ( ) ) ;
}
if ( r . is_vector )
{
r . value = m_ir - > CreateVectorSplat ( r . is_vector , r . value ) ;
}
return r ;
}
2019-04-23 19:08:18 +02:00
return eval ( splat < T > ( imm ) ) ;
2019-03-25 19:31:16 +01:00
}
template < typename T = u32 [ 4 ] , uint I , uint N >
value_t < T > get_imm ( const bf_t < s32 , I , N > & imm )
{
if ( ( m_op_const_mask & imm . data_mask ( ) ) ! = imm . data_mask ( ) )
{
// Update const mask if necessary
2019-05-05 15:28:41 +02:00
if ( I > = ( 32u - m_interp_magn ) )
2019-03-25 19:31:16 +01:00
{
m_op_const_mask | = imm . data_mask ( ) ;
}
// Extract signed immediate (skip sign ext if truncated anyway)
value_t < T > r ;
r . value = m_interp_op ;
2019-05-05 15:28:41 +02:00
r . value = I + N = = 32 | | N > = r . esize ? r . value : m_ir - > CreateShl ( r . value , u64 { 32u - I - N } ) ;
r . value = N = = 32 | | N > = r . esize ? r . value : m_ir - > CreateAShr ( r . value , u64 { 32u - N } ) ;
2019-03-25 19:31:16 +01:00
r . value = I = = 0 | | N < r . esize ? r . value : m_ir - > CreateLShr ( r . value , u64 { I } ) ;
2020-04-22 17:33:17 +02:00
if constexpr ( r . esize ! = 32 )
2019-03-25 19:31:16 +01:00
{
r . value = m_ir - > CreateSExtOrTrunc ( r . value , get_type < T > ( ) - > getScalarType ( ) ) ;
}
if ( r . is_vector )
{
r . value = m_ir - > CreateVectorSplat ( r . is_vector , r . value ) ;
}
return r ;
}
2019-04-23 19:08:18 +02:00
return eval ( splat < T > ( imm ) ) ;
2018-05-02 20:49:19 +02:00
}
2019-05-05 15:28:41 +02:00
// Get PC for given instruction address
llvm : : Value * get_pc ( u32 addr )
2018-05-02 20:49:19 +02:00
{
2019-05-05 15:28:41 +02:00
return m_ir - > CreateAdd ( m_base_pc , m_ir - > getInt32 ( addr - m_base ) ) ;
}
// Update PC for current or explicitly specified instruction address
void update_pc ( u32 target = - 1 )
{
2023-04-15 09:52:38 +02:00
m_ir - > CreateStore ( m_ir - > CreateAnd ( get_pc ( target + 1 ? target : m_pos ) , 0x3fffc ) , spu_ptr < u32 > ( & spu_thread : : pc ) ) - > setVolatile ( true ) ;
2018-05-02 20:49:19 +02:00
}
2018-06-10 14:46:01 +02:00
// Call cpu_thread::check_state if necessary and return or continue (full check)
2023-07-21 17:12:57 +02:00
void check_state ( u32 addr , bool may_be_unsafe_for_savestate = true )
2018-06-10 14:46:01 +02:00
{
2018-10-11 00:17:19 +02:00
const auto pstate = spu_ptr < u32 > ( & spu_thread : : state ) ;
2018-06-10 14:46:01 +02:00
const auto _body = llvm : : BasicBlock : : Create ( m_context , " " , m_function ) ;
const auto check = llvm : : BasicBlock : : Create ( m_context , " " , m_function ) ;
2023-03-10 23:57:21 +01:00
m_ir - > CreateCondBr ( m_ir - > CreateICmpEQ ( m_ir - > CreateLoad ( get_type < u32 > ( ) , pstate , true ) , m_ir - > getInt32 ( 0 ) ) , _body , check , m_md_likely ) ;
2018-06-10 14:46:01 +02:00
m_ir - > SetInsertPoint ( check ) ;
2019-05-05 15:28:41 +02:00
update_pc ( addr ) ;
2023-07-21 17:12:57 +02:00
if ( may_be_unsafe_for_savestate & & std : : none_of ( std : : begin ( m_block - > phi ) , std : : end ( m_block - > phi ) , FN ( ! ! x ) ) )
{
may_be_unsafe_for_savestate = false ;
}
if ( may_be_unsafe_for_savestate )
{
m_ir - > CreateStore ( m_ir - > getInt8 ( 1 ) , spu_ptr < u8 > ( & spu_thread : : unsavable ) ) - > setVolatile ( true ) ;
}
2023-04-16 18:36:54 +02:00
m_ir - > CreateCall ( m_test_state , { m_thread } ) ;
2023-07-21 17:12:57 +02:00
if ( may_be_unsafe_for_savestate )
{
m_ir - > CreateStore ( m_ir - > getInt8 ( 0 ) , spu_ptr < u8 > ( & spu_thread : : unsavable ) ) - > setVolatile ( true ) ;
}
2019-04-25 12:43:03 +02:00
m_ir - > CreateBr ( _body ) ;
2018-06-10 14:46:01 +02:00
m_ir - > SetInsertPoint ( _body ) ;
}
2018-05-02 20:49:19 +02:00
public :
2019-03-25 19:31:16 +01:00
spu_llvm_recompiler ( u8 interp_magn = 0 )
2018-05-03 14:55:45 +02:00
: spu_recompiler_base ( )
2018-05-02 20:49:19 +02:00
, cpu_translator ( nullptr , false )
2019-03-25 19:31:16 +01:00
, m_interp_magn ( interp_magn )
2018-05-02 20:49:19 +02:00
{
}
2018-05-04 23:01:27 +02:00
virtual void init ( ) override
2018-05-02 20:49:19 +02:00
{
// Initialize if necessary
if ( ! m_spurt )
{
2021-03-02 12:59:19 +01:00
m_spurt = & g_fxo - > get < spu_runtime > ( ) ;
2019-05-05 15:28:41 +02:00
cpu_translator : : initialize ( m_jit . get_context ( ) , m_jit . get_engine ( ) ) ;
2019-03-13 18:57:42 +01:00
const auto md_name = llvm : : MDString : : get ( m_context , " branch_weights " ) ;
const auto md_low = llvm : : ValueAsMetadata : : get ( llvm : : ConstantInt : : get ( GetType < u32 > ( ) , 1 ) ) ;
const auto md_high = llvm : : ValueAsMetadata : : get ( llvm : : ConstantInt : : get ( GetType < u32 > ( ) , 999 ) ) ;
// Metadata for branch weights
m_md_likely = llvm : : MDTuple : : get ( m_context , { md_name , md_high , md_low } ) ;
m_md_unlikely = llvm : : MDTuple : : get ( m_context , { md_name , md_low , md_high } ) ;
2018-05-02 20:49:19 +02:00
}
2018-05-04 23:01:27 +02:00
}
2019-11-23 17:30:54 +01:00
virtual spu_function_t compile ( spu_program & & _func ) override
2018-05-02 20:49:19 +02:00
{
2019-11-23 17:30:54 +01:00
if ( _func . data . empty ( ) & & m_interp_magn )
2019-03-25 19:31:16 +01:00
{
return compile_interpreter ( ) ;
}
2019-11-23 17:30:54 +01:00
const u32 start0 = _func . entry_point ;
2019-10-27 10:57:02 +01:00
2019-10-25 23:52:56 +02:00
const auto add_loc = m_spurt - > add_empty ( std : : move ( _func ) ) ;
2019-01-21 19:04:32 +01:00
2019-10-25 23:52:56 +02:00
if ( ! add_loc )
2018-05-02 20:49:19 +02:00
{
2019-05-01 13:56:41 +02:00
return nullptr ;
}
2019-11-23 17:30:54 +01:00
const spu_program & func = add_loc - > data ;
2019-10-25 23:52:56 +02:00
2019-11-23 17:30:54 +01:00
if ( func . entry_point ! = start0 )
2019-10-27 10:57:02 +01:00
{
// Wait for the duplicate
while ( ! add_loc - > compiled )
{
add_loc - > compiled . wait ( nullptr ) ;
}
return add_loc - > compiled ;
}
2019-05-12 22:17:45 +02:00
std : : string log ;
2021-03-02 12:59:19 +01:00
if ( auto & cache = g_fxo - > get < spu_cache > ( ) ; cache & & g_cfg . core . spu_cache & & ! add_loc - > cached . exchange ( 1 ) )
2019-05-01 13:56:41 +02:00
{
2021-03-02 12:59:19 +01:00
cache . add ( func ) ;
2018-05-02 20:49:19 +02:00
}
{
sha1_context ctx ;
u8 output [ 20 ] ;
sha1_starts ( & ctx ) ;
2019-11-23 17:30:54 +01:00
sha1_update ( & ctx , reinterpret_cast < const u8 * > ( func . data . data ( ) ) , func . data . size ( ) * 4 ) ;
2018-05-02 20:49:19 +02:00
sha1_finish ( & ctx , output ) ;
2019-05-12 02:22:14 +02:00
m_hash . clear ( ) ;
2022-01-23 13:22:00 +01:00
fmt : : append ( m_hash , " __spu-0x%05x-%s " , func . entry_point , fmt : : base57 ( output ) ) ;
2019-10-14 19:41:31 +02:00
be_t < u64 > hash_start ;
std : : memcpy ( & hash_start , output , sizeof ( hash_start ) ) ;
m_hash_start = hash_start ;
2018-05-02 20:49:19 +02:00
}
2020-02-01 09:36:09 +01:00
spu_log . notice ( " Building function 0x%x... (size %u, %s) " , func . entry_point , func . data . size ( ) , m_hash ) ;
2018-05-02 20:49:19 +02:00
2019-11-23 17:30:54 +01:00
m_pos = func . lower_bound ;
m_base = func . entry_point ;
m_size = : : size32 ( func . data ) * 4 ;
2019-05-14 17:55:10 +02:00
const u32 start = m_pos ;
2018-06-10 14:46:01 +02:00
const u32 end = start + m_size ;
2018-05-02 20:49:19 +02:00
2019-11-17 20:07:08 +01:00
m_pp_id = 0 ;
2019-10-25 23:52:56 +02:00
if ( g_cfg . core . spu_debug & & ! add_loc - > logged . exchange ( 1 ) )
2018-05-02 20:49:19 +02:00
{
2019-10-25 23:52:56 +02:00
this - > dump ( func , log ) ;
2019-03-18 21:01:16 +01:00
fs : : file ( m_spurt - > get_cache_path ( ) + " spu.log " , fs : : write + fs : : append ) . write ( log ) ;
2018-08-03 14:34:51 +02:00
}
2018-06-10 14:46:01 +02:00
using namespace llvm ;
2019-11-17 20:09:23 +01:00
m_engine - > clearAllGlobalMappings ( ) ;
2018-06-10 14:46:01 +02:00
// Create LLVM module
2020-05-06 17:18:30 +02:00
std : : unique_ptr < Module > _module = std : : make_unique < Module > ( m_hash + " .obj " , m_context ) ;
2023-04-08 14:21:22 +02:00
_module - > setTargetTriple ( jit_compiler : : triple2 ( ) ) ;
2020-05-06 17:18:30 +02:00
_module - > setDataLayout ( m_jit . get_engine ( ) . getTargetMachine ( ) - > createDataLayout ( ) ) ;
m_module = _module . get ( ) ;
2018-05-02 20:49:19 +02:00
// Initialize IR Builder
2018-06-10 14:46:01 +02:00
IRBuilder < > irb ( m_context ) ;
2018-05-02 20:49:19 +02:00
m_ir = & irb ;
2018-06-10 14:46:01 +02:00
// Add entry function (contains only state/code check)
2019-05-12 02:22:14 +02:00
const auto main_func = llvm : : cast < llvm : : Function > ( m_module - > getOrInsertFunction ( m_hash , get_ftype < void , u8 * , u8 * , u64 > ( ) ) . getCallee ( ) ) ;
2023-03-10 23:57:21 +01:00
const auto main_arg2 = main_func - > getArg ( 2 ) ;
2019-05-11 18:21:07 +02:00
main_func - > setCallingConv ( CallingConv : : GHC ) ;
2018-06-10 14:46:01 +02:00
set_function ( main_func ) ;
2018-05-02 20:49:19 +02:00
2018-06-10 14:46:01 +02:00
// Start compilation
2018-05-02 20:49:19 +02:00
const auto label_test = BasicBlock : : Create ( m_context , " " , m_function ) ;
const auto label_diff = BasicBlock : : Create ( m_context , " " , m_function ) ;
const auto label_body = BasicBlock : : Create ( m_context , " " , m_function ) ;
2018-06-10 14:46:01 +02:00
const auto label_stop = BasicBlock : : Create ( m_context , " " , m_function ) ;
2018-05-02 20:49:19 +02:00
2019-05-05 15:28:41 +02:00
// Load PC, which will be the actual value of 'm_base'
2023-03-10 23:57:21 +01:00
m_base_pc = m_ir - > CreateLoad ( get_type < u32 > ( ) , spu_ptr < u32 > ( & spu_thread : : pc ) ) ;
2019-05-05 15:28:41 +02:00
2018-05-02 20:49:19 +02:00
// Emit state check
2018-10-11 00:17:19 +02:00
const auto pstate = spu_ptr < u32 > ( & spu_thread : : state ) ;
2023-04-08 14:21:22 +02:00
m_ir - > CreateCondBr ( m_ir - > CreateICmpNE ( m_ir - > CreateLoad ( get_type < u32 > ( ) , pstate ) , m_ir - > getInt32 ( 0 ) ) , label_stop , label_test , m_md_unlikely ) ;
2018-05-02 20:49:19 +02:00
// Emit code check
2018-06-10 14:46:01 +02:00
u32 check_iterations = 0 ;
2018-05-02 20:49:19 +02:00
m_ir - > SetInsertPoint ( label_test ) ;
2019-10-14 19:41:31 +02:00
// Set block hash for profiling (if enabled)
if ( g_cfg . core . spu_prof & & g_cfg . core . spu_verification )
2023-04-08 14:21:22 +02:00
m_ir - > CreateStore ( m_ir - > getInt64 ( ( m_hash_start & - 65536 ) ) , spu_ptr < u64 > ( & spu_thread : : block_hash ) ) ;
2019-10-14 19:41:31 +02:00
2018-06-04 01:13:53 +02:00
if ( ! g_cfg . core . spu_verification )
2018-05-02 20:49:19 +02:00
{
2018-06-04 01:13:53 +02:00
// Disable check (unsafe)
m_ir - > CreateBr ( label_body ) ;
2018-05-02 20:49:19 +02:00
}
2019-11-23 17:30:54 +01:00
else if ( func . data . size ( ) = = 1 )
2018-05-02 20:49:19 +02:00
{
2023-04-05 13:35:06 +02:00
const auto pu32 = m_ir - > CreateGEP ( get_type < u8 > ( ) , m_lsptr , m_base_pc ) ;
2023-03-10 23:57:21 +01:00
const auto cond = m_ir - > CreateICmpNE ( m_ir - > CreateLoad ( get_type < u32 > ( ) , pu32 ) , m_ir - > getInt32 ( func . data [ 0 ] ) ) ;
2019-03-13 18:57:42 +01:00
m_ir - > CreateCondBr ( cond , label_diff , label_body , m_md_unlikely ) ;
2018-05-02 20:49:19 +02:00
}
2019-11-23 17:30:54 +01:00
else if ( func . data . size ( ) = = 2 )
2018-05-02 20:49:19 +02:00
{
2023-04-05 13:35:06 +02:00
const auto pu64 = m_ir - > CreateGEP ( get_type < u8 > ( ) , m_lsptr , m_base_pc ) ;
2023-03-10 23:57:21 +01:00
const auto cond = m_ir - > CreateICmpNE ( m_ir - > CreateLoad ( get_type < u64 > ( ) , pu64 ) , m_ir - > getInt64 ( static_cast < u64 > ( func . data [ 1 ] ) < < 32 | func . data [ 0 ] ) ) ;
2019-03-13 18:57:42 +01:00
m_ir - > CreateCondBr ( cond , label_diff , label_body , m_md_unlikely ) ;
2018-05-02 20:49:19 +02:00
}
else
{
2019-05-05 15:28:41 +02:00
u32 starta = start ;
// Skip holes at the beginning (giga only)
for ( u32 j = start ; j < end ; j + = 4 )
{
2019-11-23 17:30:54 +01:00
if ( ! func . data [ ( j - start ) / 4 ] )
2019-05-05 15:28:41 +02:00
{
starta + = 4 ;
}
else
{
break ;
}
}
2021-04-12 08:26:48 +02:00
u32 stride ;
u32 elements ;
u32 dwords ;
if ( m_use_avx512 & & g_cfg . core . full_width_avx512 )
{
stride = 64 ;
elements = 16 ;
dwords = 8 ;
}
2021-11-05 22:05:04 +01:00
else if ( m_use_avx )
2021-04-12 08:26:48 +02:00
{
stride = 32 ;
elements = 8 ;
dwords = 4 ;
}
2021-11-05 22:05:04 +01:00
else
2021-04-12 08:26:48 +02:00
{
stride = 16 ;
elements = 4 ;
dwords = 2 ;
}
2019-05-05 15:28:41 +02:00
// Get actual pc corresponding to the found beginning of the data
llvm : : Value * starta_pc = m_ir - > CreateAnd ( get_pc ( starta ) , 0x3fffc ) ;
2023-03-10 23:57:21 +01:00
llvm : : Value * data_addr = m_ir - > CreateGEP ( get_type < u8 > ( ) , m_lsptr , starta_pc ) ;
2018-05-02 20:49:19 +02:00
llvm : : Value * acc = nullptr ;
2021-04-12 08:26:48 +02:00
for ( u32 j = starta ; j < end ; j + = stride )
2018-05-02 20:49:19 +02:00
{
2021-04-12 08:26:48 +02:00
int indices [ 16 ] ;
2018-05-02 20:49:19 +02:00
bool holes = false ;
bool data = false ;
2021-04-12 08:26:48 +02:00
for ( u32 i = 0 ; i < elements ; i + + )
2018-05-02 20:49:19 +02:00
{
const u32 k = j + i * 4 ;
2019-11-23 17:30:54 +01:00
if ( k < start | | k > = end | | ! func . data [ ( k - start ) / 4 ] )
2018-05-02 20:49:19 +02:00
{
2021-04-12 08:26:48 +02:00
indices [ i ] = elements ;
2018-05-02 20:49:19 +02:00
holes = true ;
}
else
{
indices [ i ] = i ;
data = true ;
}
}
if ( ! data )
{
2019-05-05 15:28:41 +02:00
// Skip full-sized holes
2018-05-02 20:49:19 +02:00
continue ;
}
2021-04-12 08:26:48 +02:00
llvm : : Value * vls = nullptr ;
2019-05-05 15:28:41 +02:00
// Load unaligned code block from LS
2021-04-12 08:26:48 +02:00
if ( m_use_avx512 & & g_cfg . core . full_width_avx512 )
{
2023-03-10 23:57:21 +01:00
vls = m_ir - > CreateAlignedLoad ( get_type < u32 [ 16 ] > ( ) , _ptr < u32 [ 16 ] > ( data_addr , j - starta ) , llvm : : MaybeAlign { 4 } ) ;
2021-04-12 08:26:48 +02:00
}
2021-11-05 22:05:04 +01:00
else if ( m_use_avx )
2021-04-12 08:26:48 +02:00
{
2023-03-10 23:57:21 +01:00
vls = m_ir - > CreateAlignedLoad ( get_type < u32 [ 8 ] > ( ) , _ptr < u32 [ 8 ] > ( data_addr , j - starta ) , llvm : : MaybeAlign { 4 } ) ;
2021-04-12 08:26:48 +02:00
}
else
{
2023-03-10 23:57:21 +01:00
vls = m_ir - > CreateAlignedLoad ( get_type < u32 [ 4 ] > ( ) , _ptr < u32 [ 4 ] > ( data_addr , j - starta ) , llvm : : MaybeAlign { 4 } ) ;
2021-04-12 08:26:48 +02:00
}
2018-05-02 20:49:19 +02:00
// Mask if necessary
if ( holes )
{
2023-03-10 23:57:21 +01:00
vls = m_ir - > CreateShuffleVector ( vls , ConstantAggregateZero : : get ( vls - > getType ( ) ) , llvm : : ArrayRef ( indices , elements ) ) ;
2018-05-02 20:49:19 +02:00
}
// Perform bitwise comparison and accumulate
2021-04-12 08:26:48 +02:00
u32 words [ 16 ] ;
2018-05-02 20:49:19 +02:00
2021-04-12 08:26:48 +02:00
for ( u32 i = 0 ; i < elements ; i + + )
2018-05-02 20:49:19 +02:00
{
const u32 k = j + i * 4 ;
2019-11-23 17:30:54 +01:00
words [ i ] = k > = start & & k < end ? func . data [ ( k - start ) / 4 ] : 0 ;
2018-05-02 20:49:19 +02:00
}
2023-03-10 23:57:21 +01:00
vls = m_ir - > CreateXor ( vls , ConstantDataVector : : get ( m_context , llvm : : ArrayRef ( words , elements ) ) ) ;
2018-05-02 20:49:19 +02:00
acc = acc ? m_ir - > CreateOr ( acc , vls ) : vls ;
2018-06-10 14:46:01 +02:00
check_iterations + + ;
2018-05-02 20:49:19 +02:00
}
// Pattern for PTEST
2021-04-12 08:26:48 +02:00
if ( m_use_avx512 & & g_cfg . core . full_width_avx512 )
{
acc = m_ir - > CreateBitCast ( acc , get_type < u64 [ 8 ] > ( ) ) ;
}
2021-11-05 22:05:04 +01:00
else if ( m_use_avx )
2021-04-12 08:26:48 +02:00
{
acc = m_ir - > CreateBitCast ( acc , get_type < u64 [ 4 ] > ( ) ) ;
}
else
{
acc = m_ir - > CreateBitCast ( acc , get_type < u64 [ 2 ] > ( ) ) ;
}
2018-05-02 20:49:19 +02:00
llvm : : Value * elem = m_ir - > CreateExtractElement ( acc , u64 { 0 } ) ;
2021-04-12 08:26:48 +02:00
for ( u32 i = 1 ; i < dwords ; i + + )
{
elem = m_ir - > CreateOr ( elem , m_ir - > CreateExtractElement ( acc , i ) ) ;
}
2018-05-02 20:49:19 +02:00
// Compare result with zero
const auto cond = m_ir - > CreateICmpNE ( elem , m_ir - > getInt64 ( 0 ) ) ;
2019-03-13 18:57:42 +01:00
m_ir - > CreateCondBr ( cond , label_diff , label_body , m_md_unlikely ) ;
2018-05-02 20:49:19 +02:00
}
2018-06-10 14:46:01 +02:00
// Increase block counter with statistics
2018-05-02 20:49:19 +02:00
m_ir - > SetInsertPoint ( label_body ) ;
2018-10-11 00:17:19 +02:00
const auto pbcount = spu_ptr < u64 > ( & spu_thread : : block_counter ) ;
2023-03-10 23:57:21 +01:00
m_ir - > CreateStore ( m_ir - > CreateAdd ( m_ir - > CreateLoad ( get_type < u64 > ( ) , pbcount ) , m_ir - > getInt64 ( check_iterations ) ) , pbcount ) ;
2018-06-10 14:46:01 +02:00
2019-05-11 18:21:07 +02:00
// Call the entry function chunk
const auto entry_chunk = add_function ( m_pos ) ;
2019-05-11 19:48:47 +02:00
const auto entry_call = m_ir - > CreateCall ( entry_chunk - > chunk , { m_thread , m_lsptr , m_base_pc } ) ;
entry_call - > setCallingConv ( entry_chunk - > chunk - > getCallingConv ( ) ) ;
2019-10-03 18:57:32 +02:00
const auto dispatcher = llvm : : cast < llvm : : Function > ( m_module - > getOrInsertFunction ( " spu_dispatcher " , main_func - > getType ( ) ) . getCallee ( ) ) ;
2019-11-17 20:09:23 +01:00
m_engine - > updateGlobalMapping ( " spu_dispatcher " , reinterpret_cast < u64 > ( spu_runtime : : tr_all ) ) ;
2019-10-15 16:43:33 +02:00
dispatcher - > setCallingConv ( main_func - > getCallingConv ( ) ) ;
2019-05-11 19:48:47 +02:00
// Proceed to the next code
if ( entry_chunk - > chunk - > getReturnType ( ) ! = get_type < void > ( ) )
{
2023-04-05 13:35:06 +02:00
const auto next_call = m_ir - > CreateCall ( main_func - > getFunctionType ( ) , entry_call , { m_thread , m_lsptr , m_ir - > getInt64 ( 0 ) } ) ;
2019-05-11 19:48:47 +02:00
next_call - > setCallingConv ( main_func - > getCallingConv ( ) ) ;
next_call - > setTailCall ( ) ;
}
else
{
entry_call - > setTailCall ( ) ;
}
m_ir - > CreateRetVoid ( ) ;
2019-05-11 12:57:34 +02:00
2018-06-10 14:46:01 +02:00
m_ir - > SetInsertPoint ( label_stop ) ;
2023-03-11 20:08:27 +01:00
call ( " spu_escape " , spu_runtime : : g_escape , m_thread ) - > setTailCall ( ) ;
2018-06-10 14:46:01 +02:00
m_ir - > CreateRetVoid ( ) ;
m_ir - > SetInsertPoint ( label_diff ) ;
if ( g_cfg . core . spu_verification )
{
2018-10-11 00:17:19 +02:00
const auto pbfail = spu_ptr < u64 > ( & spu_thread : : block_failure ) ;
2023-03-10 23:57:21 +01:00
m_ir - > CreateStore ( m_ir - > CreateAdd ( m_ir - > CreateLoad ( get_type < u64 > ( ) , pbfail ) , m_ir - > getInt64 ( 1 ) ) , pbfail ) ;
2019-05-11 19:48:47 +02:00
const auto dispci = call ( " spu_dispatch " , spu_runtime : : tr_dispatch , m_thread , m_lsptr , main_arg2 ) ;
2019-05-11 18:21:07 +02:00
dispci - > setCallingConv ( CallingConv : : GHC ) ;
dispci - > setTailCall ( ) ;
2019-05-05 15:28:41 +02:00
m_ir - > CreateRetVoid ( ) ;
2018-06-10 14:46:01 +02:00
}
else
{
m_ir - > CreateUnreachable ( ) ;
}
2022-01-23 13:22:00 +01:00
m_dispatch = cast < Function > ( _module - > getOrInsertFunction ( " __spu-null " , entry_chunk - > chunk - > getFunctionType ( ) ) . getCallee ( ) ) ;
2019-05-11 19:48:47 +02:00
m_dispatch - > setLinkage ( llvm : : GlobalValue : : InternalLinkage ) ;
m_dispatch - > setCallingConv ( entry_chunk - > chunk - > getCallingConv ( ) ) ;
set_function ( m_dispatch ) ;
if ( entry_chunk - > chunk - > getReturnType ( ) = = get_type < void > ( ) )
{
2023-04-05 13:35:06 +02:00
const auto next_call = m_ir - > CreateCall ( main_func - > getFunctionType ( ) , dispatcher , { m_thread , m_lsptr , m_ir - > getInt64 ( 0 ) } ) ;
2019-05-11 19:48:47 +02:00
next_call - > setCallingConv ( main_func - > getCallingConv ( ) ) ;
next_call - > setTailCall ( ) ;
m_ir - > CreateRetVoid ( ) ;
}
else
{
2023-04-05 13:35:06 +02:00
m_ir - > CreateRet ( dispatcher ) ;
2019-05-11 19:48:47 +02:00
}
2019-05-05 15:28:41 +02:00
// Function that executes check_state and escapes if necessary
m_test_state = llvm : : cast < llvm : : Function > ( m_module - > getOrInsertFunction ( " spu_test_state " , get_ftype < void , u8 * > ( ) ) . getCallee ( ) ) ;
m_test_state - > setLinkage ( GlobalValue : : InternalLinkage ) ;
2022-06-14 14:28:38 +02:00
# ifdef ARCH_ARM64
// LLVM doesn't support PreserveAll on arm64.
2022-07-10 08:38:48 +02:00
m_test_state - > setCallingConv ( CallingConv : : PreserveMost ) ;
2022-06-14 14:28:38 +02:00
# else
2019-05-05 15:28:41 +02:00
m_test_state - > setCallingConv ( CallingConv : : PreserveAll ) ;
2022-06-14 14:28:38 +02:00
# endif
2019-05-05 15:28:41 +02:00
m_ir - > SetInsertPoint ( BasicBlock : : Create ( m_context , " " , m_test_state ) ) ;
const auto escape_yes = BasicBlock : : Create ( m_context , " " , m_test_state ) ;
const auto escape_no = BasicBlock : : Create ( m_context , " " , m_test_state ) ;
2023-03-10 23:57:21 +01:00
m_ir - > CreateCondBr ( call ( " spu_exec_check_state " , & exec_check_state , m_test_state - > getArg ( 0 ) ) , escape_yes , escape_no ) ;
2019-05-05 15:28:41 +02:00
m_ir - > SetInsertPoint ( escape_yes ) ;
2023-03-10 23:57:21 +01:00
call ( " spu_escape " , spu_runtime : : g_escape , m_test_state - > getArg ( 0 ) ) ;
2019-05-05 15:28:41 +02:00
m_ir - > CreateRetVoid ( ) ;
m_ir - > SetInsertPoint ( escape_no ) ;
m_ir - > CreateRetVoid ( ) ;
2018-06-10 14:46:01 +02:00
// Create function table (uninitialized)
2019-05-05 15:28:41 +02:00
m_function_table = new llvm : : GlobalVariable ( * m_module , llvm : : ArrayType : : get ( entry_chunk - > chunk - > getType ( ) , m_size / 4 ) , true , llvm : : GlobalValue : : InternalLinkage , nullptr ) ;
2018-05-02 20:49:19 +02:00
2018-06-10 14:46:01 +02:00
// Create function chunks
2020-12-18 08:39:54 +01:00
for ( usz fi = 0 ; fi < m_function_queue . size ( ) ; fi + + )
2018-05-02 20:49:19 +02:00
{
2018-06-10 14:46:01 +02:00
// Initialize function info
m_entry = m_function_queue [ fi ] ;
2019-05-05 15:28:41 +02:00
set_function ( m_functions [ m_entry ] . chunk ) ;
2019-10-14 19:41:31 +02:00
// Set block hash for profiling (if enabled)
if ( g_cfg . core . spu_prof )
2023-04-08 14:21:22 +02:00
m_ir - > CreateStore ( m_ir - > getInt64 ( ( m_hash_start & - 65536 ) | ( m_entry > > 2 ) ) , spu_ptr < u64 > ( & spu_thread : : block_hash ) ) ;
2019-10-14 19:41:31 +02:00
2018-07-22 18:09:25 +02:00
m_finfo = & m_functions [ m_entry ] ;
2018-06-10 14:46:01 +02:00
m_ir - > CreateBr ( add_block ( m_entry ) ) ;
2018-05-02 20:49:19 +02:00
2018-06-10 14:46:01 +02:00
// Emit instructions for basic blocks
2020-12-18 08:39:54 +01:00
for ( usz bi = 0 ; bi < m_block_queue . size ( ) ; bi + + )
2018-05-02 20:49:19 +02:00
{
2018-06-10 14:46:01 +02:00
// Initialize basic block info
const u32 baddr = m_block_queue [ bi ] ;
m_block = & m_blocks [ baddr ] ;
m_ir - > SetInsertPoint ( m_block - > block ) ;
2022-09-19 14:57:51 +02:00
auto & bb = : : at32 ( m_bbs , baddr ) ;
2019-04-30 23:06:42 +02:00
bool need_check = false ;
2019-05-05 15:28:41 +02:00
m_block - > bb = & bb ;
2018-06-10 14:46:01 +02:00
2020-02-26 21:13:54 +01:00
if ( ! bb . preds . empty ( ) )
2018-06-10 14:46:01 +02:00
{
// Initialize registers and build PHI nodes if necessary
for ( u32 i = 0 ; i < s_reg_max ; i + + )
{
2019-05-05 15:28:41 +02:00
const u32 src = m_finfo - > fn ? bb . reg_origin_abs [ i ] : bb . reg_origin [ i ] ;
2019-04-30 23:06:42 +02:00
2019-05-05 15:28:41 +02:00
if ( src > 0x40000 )
2018-06-10 14:46:01 +02:00
{
2019-05-05 15:28:41 +02:00
// Use the xfloat hint to create 256-bit (4x double) PHI
2023-07-23 09:09:24 +02:00
llvm : : Type * type = g_cfg . core . spu_xfloat_accuracy = = xfloat_accuracy : : accurate & & bb . reg_maybe_xf [ i ] ? get_type < f64 [ 4 ] > ( ) : get_reg_type ( i ) ;
2019-05-05 15:28:41 +02:00
const auto _phi = m_ir - > CreatePHI ( type , : : size32 ( bb . preds ) , fmt : : format ( " phi0x%05x_r%u " , baddr , i ) ) ;
2018-06-10 14:46:01 +02:00
m_block - > phi [ i ] = _phi ;
m_block - > reg [ i ] = _phi ;
2019-04-30 23:06:42 +02:00
for ( u32 pred : bb . preds )
2018-06-10 14:46:01 +02:00
{
const auto bfound = m_blocks . find ( pred ) ;
2018-05-02 20:49:19 +02:00
2018-06-10 14:46:01 +02:00
if ( bfound ! = m_blocks . end ( ) & & bfound - > second . block_end )
{
auto & value = bfound - > second . reg [ i ] ;
2018-05-02 20:49:19 +02:00
2018-06-10 14:46:01 +02:00
if ( ! value | | value - > getType ( ) ! = _phi - > getType ( ) )
{
2019-03-25 19:31:16 +01:00
const auto regptr = init_reg_fixed ( i ) ;
2018-06-10 14:46:01 +02:00
const auto cblock = m_ir - > GetInsertBlock ( ) ;
m_ir - > SetInsertPoint ( bfound - > second . block_end - > getTerminator ( ) ) ;
2018-05-02 20:49:19 +02:00
2018-06-10 14:46:01 +02:00
if ( ! value )
{
// Value hasn't been loaded yet
2023-03-10 23:57:21 +01:00
value = m_finfo & & m_finfo - > load [ i ] ? m_finfo - > load [ i ] : m_ir - > CreateLoad ( get_reg_type ( i ) , regptr ) ;
2018-06-10 14:46:01 +02:00
}
2018-07-22 18:09:25 +02:00
2019-05-05 15:28:41 +02:00
if ( value - > getType ( ) = = get_type < f64 [ 4 ] > ( ) & & type ! = get_type < f64 [ 4 ] > ( ) )
2018-07-27 12:00:05 +02:00
{
value = double_to_xfloat ( value ) ;
}
2019-05-05 15:28:41 +02:00
else if ( value - > getType ( ) ! = get_type < f64 [ 4 ] > ( ) & & type = = get_type < f64 [ 4 ] > ( ) )
2018-07-05 14:26:33 +02:00
{
2019-05-05 15:28:41 +02:00
value = xfloat_to_double ( bitcast < u32 [ 4 ] > ( value ) ) ;
2018-07-05 14:26:33 +02:00
}
else
{
2019-05-05 15:28:41 +02:00
value = bitcast ( value , _phi - > getType ( ) ) ;
2018-07-05 14:26:33 +02:00
}
2018-05-02 20:49:19 +02:00
2018-06-10 14:46:01 +02:00
m_ir - > SetInsertPoint ( cblock ) ;
2018-06-03 17:07:39 +02:00
2020-12-09 08:47:45 +01:00
ensure ( bfound - > second . block_end - > getTerminator ( ) ) ;
2018-06-10 14:46:01 +02:00
}
2018-06-03 17:07:39 +02:00
2018-06-10 14:46:01 +02:00
_phi - > addIncoming ( value , bfound - > second . block_end ) ;
}
}
2018-06-03 17:07:39 +02:00
2018-06-10 14:46:01 +02:00
if ( baddr = = m_entry )
{
// Load value at the function chunk's entry block if necessary
2019-03-25 19:31:16 +01:00
const auto regptr = init_reg_fixed ( i ) ;
2018-06-10 14:46:01 +02:00
const auto cblock = m_ir - > GetInsertBlock ( ) ;
m_ir - > SetInsertPoint ( m_function - > getEntryBlock ( ) . getTerminator ( ) ) ;
2023-03-10 23:57:21 +01:00
const auto value = m_finfo & & m_finfo - > load [ i ] ? m_finfo - > load [ i ] : m_ir - > CreateLoad ( get_reg_type ( i ) , regptr ) ;
2018-06-10 14:46:01 +02:00
m_ir - > SetInsertPoint ( cblock ) ;
_phi - > addIncoming ( value , & m_function - > getEntryBlock ( ) ) ;
}
}
2019-04-30 23:06:42 +02:00
else if ( src < 0x40000 )
2018-06-10 14:46:01 +02:00
{
2019-04-30 23:06:42 +02:00
// Passthrough register value
2018-06-10 14:46:01 +02:00
const auto bfound = m_blocks . find ( src ) ;
if ( bfound ! = m_blocks . end ( ) )
{
m_block - > reg [ i ] = bfound - > second . reg [ i ] ;
}
2019-04-30 23:06:42 +02:00
else
{
2020-02-01 09:36:09 +01:00
spu_log . error ( " [0x%05x] Value not found ($%u from 0x%05x) " , baddr , i , src ) ;
2019-04-30 23:06:42 +02:00
}
2018-06-10 14:46:01 +02:00
}
2019-05-05 15:28:41 +02:00
else
2018-07-22 18:09:25 +02:00
{
2019-05-05 15:28:41 +02:00
m_block - > reg [ i ] = m_finfo - > load [ i ] ;
2018-07-22 18:09:25 +02:00
}
2018-06-10 14:46:01 +02:00
}
// Emit state check if necessary (TODO: more conditions)
2019-04-30 23:06:42 +02:00
for ( u32 pred : bb . preds )
2018-06-03 17:07:39 +02:00
{
2019-04-06 22:57:37 +02:00
if ( pred > = baddr )
2018-06-03 17:07:39 +02:00
{
// If this block is a target of a backward branch (possibly loop), emit a check
2019-04-30 23:06:42 +02:00
need_check = true ;
2018-06-03 17:07:39 +02:00
break ;
}
}
}
2018-11-09 13:25:10 +01:00
// State check at the beginning of the chunk
2019-04-30 23:06:42 +02:00
if ( need_check | | ( bi = = 0 & & g_cfg . core . spu_block_size ! = spu_block_size_type : : safe ) )
2018-11-09 13:25:10 +01:00
{
check_state ( baddr ) ;
}
2018-06-10 14:46:01 +02:00
// Emit instructions
for ( m_pos = baddr ; m_pos > = start & & m_pos < end & & ! m_ir - > GetInsertBlock ( ) - > getTerminator ( ) ; m_pos + = 4 )
2018-06-03 17:07:39 +02:00
{
2018-06-10 14:46:01 +02:00
if ( m_pos ! = baddr & & m_block_info [ m_pos / 4 ] )
{
break ;
}
2019-11-23 17:30:54 +01:00
const u32 op = std : : bit_cast < be_t < u32 > > ( func . data [ ( m_pos - start ) / 4 ] ) ;
2018-06-10 14:46:01 +02:00
if ( ! op )
{
2020-03-27 12:20:37 +01:00
spu_log . error ( " [%s] Unexpected fallthrough to 0x%x (chunk=0x%x, entry=0x%x) " , m_hash , m_pos , m_entry , m_function_queue [ 0 ] ) ;
2018-06-10 14:46:01 +02:00
break ;
}
2020-04-04 20:33:46 +02:00
// Set variable for set_link()
if ( m_pos + 4 > = end )
m_next_op = 0 ;
else
m_next_op = func . data [ ( m_pos - start ) / 4 + 1 ] ;
2018-06-10 14:46:01 +02:00
// Execute recompiler function (TODO)
2020-03-24 09:40:22 +01:00
( this - > * decode ( op ) ) ( { op } ) ;
2018-06-03 17:07:39 +02:00
}
2018-05-02 20:49:19 +02:00
2018-06-10 14:46:01 +02:00
// Finalize block with fallthrough if necessary
if ( ! m_ir - > GetInsertBlock ( ) - > getTerminator ( ) )
{
const u32 target = m_pos = = baddr ? baddr : m_pos & 0x3fffc ;
if ( m_pos ! = baddr )
{
m_pos - = 4 ;
if ( target > = start & & target < end )
{
const auto tfound = m_targets . find ( m_pos ) ;
2019-05-05 15:28:41 +02:00
if ( tfound = = m_targets . end ( ) | | tfound - > second . find_first_of ( target ) + 1 = = 0 )
2018-06-10 14:46:01 +02:00
{
2020-03-27 12:20:37 +01:00
spu_log . error ( " [%s] Unregistered fallthrough to 0x%x (chunk=0x%x, entry=0x%x) " , m_hash , target , m_entry , m_function_queue [ 0 ] ) ;
2018-06-10 14:46:01 +02:00
}
}
}
m_block - > block_end = m_ir - > GetInsertBlock ( ) ;
m_ir - > CreateBr ( add_block ( target ) ) ;
}
2018-05-02 20:49:19 +02:00
2020-12-09 08:47:45 +01:00
ensure ( m_block - > block_end ) ;
2018-05-02 20:49:19 +02:00
}
2023-04-15 13:37:07 +02:00
// Work on register stores.
2023-04-19 12:02:10 +02:00
// 1. Remove stores which are overwritten later.
2023-04-15 13:37:07 +02:00
// 2. Sink stores to post-dominating blocks.
llvm : : PostDominatorTree pdt ( * m_function ) ;
llvm : : DominatorTree dt ( * m_function ) ;
2023-04-19 12:02:10 +02:00
// Post-order indices
std : : unordered_map < llvm : : BasicBlock * , usz > pois ;
{
usz i = 0 ;
for ( auto * bb : llvm : : post_order ( m_function ) )
pois [ bb ] = i + + ;
}
2023-07-21 17:12:57 +02:00
// Basic block to block_info
std : : unordered_map < llvm : : BasicBlock * , block_info * > bb_to_info ;
2023-04-15 13:37:07 +02:00
std : : vector < block_info * > block_q ;
block_q . reserve ( m_blocks . size ( ) ) ;
for ( auto & [ a , b ] : m_blocks )
{
block_q . emplace_back ( & b ) ;
2023-07-21 17:12:57 +02:00
bb_to_info [ b . block ] = & b ;
2023-04-15 13:37:07 +02:00
}
2023-04-19 12:02:10 +02:00
for ( usz bi = 0 ; bi < block_q . size ( ) ; )
2023-04-15 13:37:07 +02:00
{
2023-04-19 12:02:10 +02:00
auto bqbi = block_q [ bi + + ] ;
2023-04-15 13:37:07 +02:00
// TODO: process all registers up to s_reg_max
for ( u32 i = 0 ; i < 128 ; i + + )
{
2023-07-21 17:12:57 +02:00
// Check if the store is beyond the last barrier
2023-12-29 07:30:15 +01:00
if ( auto & bs = bqbi - > store [ i ] ; bs & & ! bqbi - > does_gpr_barrier_proceed_last_store ( i ) )
2023-04-15 13:37:07 +02:00
{
for ( auto & [ a , b ] : m_blocks )
{
2023-07-21 17:12:57 +02:00
// Check if the store occurs before any barrier in the block
if ( b . store [ i ] & & b . store [ i ] ! = bs & & b . store_context_first_id [ i ] = = 1 )
2023-04-15 13:37:07 +02:00
{
if ( pdt . dominates ( b . store [ i ] , bs ) )
{
bs - > eraseFromParent ( ) ;
bs = nullptr ;
2023-04-19 12:02:10 +02:00
break ;
}
}
}
if ( ! bs )
continue ;
// Set of store instructions which overwrite bs
std : : vector < llvm : : BasicBlock * > killers ;
for ( auto & [ a , b ] : m_blocks )
{
const auto si = b . store [ i ] ;
2023-04-15 13:37:07 +02:00
2023-04-19 12:02:10 +02:00
if ( si & & si ! = bs )
{
if ( pois [ bs - > getParent ( ) ] > pois [ si - > getParent ( ) ] )
{
killers . emplace_back ( si - > getParent ( ) ) ;
}
else
{
// Reset: store is not the first in the set
killers . clear ( ) ;
2023-04-15 13:37:07 +02:00
break ;
}
}
}
2023-04-19 12:02:10 +02:00
if ( killers . empty ( ) )
continue ;
// Find nearest common post-dominator
llvm : : BasicBlock * common_pdom = killers [ 0 ] ;
for ( auto * bbb : llvm : : drop_begin ( killers ) )
{
if ( ! common_pdom )
break ;
common_pdom = pdt . findNearestCommonDominator ( common_pdom , bbb ) ;
}
// Shortcut
if ( ! pdt . dominates ( common_pdom , bs - > getParent ( ) ) )
common_pdom = nullptr ;
// Look for possibly-dead store in CFG starting from the exit nodes
llvm : : SetVector < llvm : : BasicBlock * > work_list ;
2023-07-21 17:12:57 +02:00
std : : unordered_map < llvm : : BasicBlock * , bool > worked_on ;
2023-12-28 20:14:58 +01:00
if ( ! common_pdom | | std : : count ( killers . begin ( ) , killers . end ( ) , common_pdom ) = = 0 )
2023-04-19 12:02:10 +02:00
{
if ( common_pdom )
{
// Shortcut
work_list . insert ( common_pdom ) ;
2023-07-21 17:12:57 +02:00
worked_on [ common_pdom ] = true ;
2023-04-19 12:02:10 +02:00
}
else
{
// Check all exits
for ( auto * r : pdt . roots ( ) )
2023-07-21 17:12:57 +02:00
{
worked_on [ r ] = true ;
2023-04-19 12:02:10 +02:00
work_list . insert ( r ) ;
2023-07-21 17:12:57 +02:00
}
2023-04-19 12:02:10 +02:00
}
}
2023-12-28 20:14:58 +01:00
// bool flag indicates the presence of a memory barrier before the killer store
2023-07-21 17:12:57 +02:00
std : : vector < std : : pair < llvm : : BasicBlock * , bool > > work2_list ;
2023-04-19 12:02:10 +02:00
for ( usz wi = 0 ; wi < work_list . size ( ) ; wi + + )
{
auto * cur = work_list [ wi ] ;
if ( std : : count ( killers . begin ( ) , killers . end ( ) , cur ) )
2023-07-21 17:12:57 +02:00
{
2023-12-29 07:30:15 +01:00
work2_list . emplace_back ( cur , bb_to_info [ cur ] & & bb_to_info [ cur ] - > does_gpr_barrier_preceed_first_store ( i ) ) ;
2023-04-19 12:02:10 +02:00
continue ;
2023-07-21 17:12:57 +02:00
}
2023-04-19 12:02:10 +02:00
if ( cur = = bs - > getParent ( ) )
{
// Reset: store is not dead
killers . clear ( ) ;
break ;
}
for ( auto * p : llvm : : predecessors ( cur ) )
2023-07-21 17:12:57 +02:00
{
if ( ! worked_on [ p ] )
{
worked_on [ p ] = true ;
work_list . insert ( p ) ;
}
}
}
if ( killers . empty ( ) )
continue ;
worked_on . clear ( ) ;
for ( usz wi = 0 ; wi < work2_list . size ( ) ; wi + + )
{
worked_on [ work2_list [ wi ] . first ] = true ;
}
2023-12-28 20:14:58 +01:00
// Need to treat tails differently: do not require checking barrier (checked before in a suitable manner)
const usz work_list_tail_blocks_max_index = work2_list . size ( ) ;
2023-07-21 17:12:57 +02:00
for ( usz wi = 0 ; wi < work2_list . size ( ) ; wi + + )
{
auto [ cur , found_user ] = work2_list [ wi ] ;
2023-12-29 07:30:15 +01:00
ensure ( cur ! = bs - > getParent ( ) ) ;
2023-07-21 17:12:57 +02:00
2023-12-28 20:14:58 +01:00
if ( ! found_user & & wi > = work_list_tail_blocks_max_index )
2023-07-21 17:12:57 +02:00
{
2023-12-28 20:14:58 +01:00
if ( auto info = bb_to_info [ cur ] )
{
if ( info - > store_context_ctr [ i ] ! = 1 )
{
found_user = true ;
}
}
2023-07-21 17:12:57 +02:00
}
for ( auto * p : llvm : : predecessors ( cur ) )
{
2023-12-29 07:30:15 +01:00
if ( p = = bs - > getParent ( ) )
{
if ( found_user )
{
// Reset: store is being used and preserved by ensure_gpr_stores()
killers . clear ( ) ;
break ;
}
continue ;
}
2023-07-21 17:12:57 +02:00
if ( ! worked_on [ p ] )
{
worked_on [ p ] = true ;
work2_list . push_back ( std : : make_pair ( p , found_user ) ) ;
}
// Enqueue a second iteration for found_user=true if only found with found_user=false
else if ( found_user & & ! std : : find_if ( work2_list . rbegin ( ) , work2_list . rend ( ) , [ & ] ( auto & it ) { return it . first = = p ; } ) - > second )
{
work2_list . push_back ( std : : make_pair ( p , true ) ) ;
}
}
2023-12-29 07:30:15 +01:00
if ( killers . empty ( ) )
{
break ;
}
2023-04-19 12:02:10 +02:00
}
// Finally erase the dead store
if ( ! killers . empty ( ) )
{
bs - > eraseFromParent ( ) ;
bs = nullptr ;
// Run the loop from the start
bi = 0 ;
}
2023-04-15 13:37:07 +02:00
}
2023-04-19 12:02:10 +02:00
}
}
2023-04-15 13:37:07 +02:00
2023-04-19 12:02:10 +02:00
block_q . clear ( ) ;
for ( auto & [ a , b ] : m_blocks )
{
block_q . emplace_back ( & b ) ;
}
for ( usz bi = 0 ; bi < block_q . size ( ) ; bi + + )
{
for ( u32 i = 0 ; i < 128 ; i + + )
{
2023-04-15 13:37:07 +02:00
// If store isn't erased, try to sink it
2023-12-29 07:30:15 +01:00
if ( auto & bs = block_q [ bi ] - > store [ i ] ; bs & & block_q [ bi ] - > bb - > targets . size ( ) > 1 & & ! block_q [ bi ] - > does_gpr_barrier_proceed_last_store ( i ) )
2023-04-15 13:37:07 +02:00
{
std : : map < u32 , block_info * , std : : greater < > > sucs ;
for ( u32 tj : block_q [ bi ] - > bb - > targets )
{
auto b2it = m_blocks . find ( tj ) ;
if ( b2it ! = m_blocks . end ( ) )
{
sucs . emplace ( tj , & b2it - > second ) ;
}
}
for ( auto [ a2 , b2 ] : sucs )
{
2023-04-20 12:59:52 +02:00
if ( b2 ! = block_q [ bi ] )
2023-04-15 13:37:07 +02:00
{
2023-07-21 17:12:57 +02:00
auto ins = b2 - > block - > getFirstNonPHI ( ) ;
2023-04-15 13:37:07 +02:00
if ( b2 - > bb - > preds . size ( ) = = 1 )
{
2023-04-20 12:59:52 +02:00
if ( ! dt . dominates ( bs - > getOperand ( 0 ) , ins ) )
continue ;
if ( ! pdt . dominates ( ins , bs ) )
continue ;
2023-04-15 13:37:07 +02:00
m_ir - > SetInsertPoint ( ins ) ;
auto si = llvm : : cast < StoreInst > ( m_ir - > Insert ( bs - > clone ( ) ) ) ;
if ( b2 - > store [ i ] = = nullptr )
{
b2 - > store [ i ] = si ;
2023-07-21 17:12:57 +02:00
b2 - > store_context_last_id [ i ] = 0 ;
2023-04-15 13:37:07 +02:00
if ( ! std : : count ( block_q . begin ( ) + bi , block_q . end ( ) , b2 ) )
{
// Sunk store can be checked again
block_q . push_back ( b2 ) ;
}
}
}
else
{
// Initialize additional block between two basic blocks
auto & edge = block_q [ bi ] - > block_edges [ a2 ] ;
if ( ! edge )
{
2023-04-20 12:59:52 +02:00
const auto succ_range = llvm : : successors ( block_q [ bi ] - > block_end ) ;
auto succ = b2 - > block ;
2023-04-22 01:37:51 +02:00
llvm : : SmallSetVector < llvm : : BasicBlock * , 32 > succ_q ;
succ_q . insert ( b2 - > block ) ;
2023-04-20 12:59:52 +02:00
2023-04-22 01:37:51 +02:00
for ( usz j = 0 ; j < 32 & & j < succ_q . size ( ) ; j + + )
2023-04-20 12:59:52 +02:00
{
if ( ! llvm : : count ( succ_range , ( succ = succ_q [ j ] ) ) )
{
for ( auto pred : llvm : : predecessors ( succ ) )
{
2023-04-22 01:37:51 +02:00
succ_q . insert ( pred ) ;
2023-04-20 12:59:52 +02:00
}
}
else
{
break ;
}
}
if ( ! llvm : : count ( succ_range , succ ) )
{
// TODO: figure this out
spu_log . notice ( " [%s] Failed successor to 0x%05x " , fmt : : base57 ( be_t < u64 > { m_hash_start } ) , a2 ) ;
continue ;
}
edge = llvm : : SplitEdge ( block_q [ bi ] - > block_end , succ ) ;
2023-04-15 13:37:07 +02:00
pdt . recalculate ( * m_function ) ;
dt . recalculate ( * m_function ) ;
}
ins = edge - > getTerminator ( ) ;
2023-04-20 12:59:52 +02:00
if ( ! dt . dominates ( bs - > getOperand ( 0 ) , ins ) )
continue ;
2023-04-15 13:37:07 +02:00
if ( ! pdt . dominates ( ins , bs ) )
continue ;
m_ir - > SetInsertPoint ( ins ) ;
m_ir - > Insert ( bs - > clone ( ) ) ;
}
bs - > eraseFromParent ( ) ;
bs = nullptr ;
pdt . recalculate ( * m_function ) ;
dt . recalculate ( * m_function ) ;
break ;
}
}
}
}
}
2018-05-02 20:49:19 +02:00
}
2018-06-10 14:46:01 +02:00
// Create function table if necessary
if ( m_function_table - > getNumUses ( ) )
2018-05-02 20:49:19 +02:00
{
2018-06-10 14:46:01 +02:00
std : : vector < llvm : : Constant * > chunks ;
chunks . reserve ( m_size / 4 ) ;
2018-05-02 20:49:19 +02:00
2018-06-10 14:46:01 +02:00
for ( u32 i = start ; i < end ; i + = 4 )
{
const auto found = m_functions . find ( i ) ;
2018-06-04 01:13:53 +02:00
2018-06-10 14:46:01 +02:00
if ( found = = m_functions . end ( ) )
{
2019-11-03 11:36:33 +01:00
if ( false & & g_cfg . core . spu_verification )
2019-10-26 23:16:44 +02:00
{
const std : : string ppname = fmt : : format ( " %s-chunkpp-0x%05x " , m_hash , i ) ;
2019-11-17 20:09:23 +01:00
m_engine - > updateGlobalMapping ( ppname , reinterpret_cast < u64 > ( m_spurt - > make_branch_patchpoint ( i / 4 ) ) ) ;
2019-10-26 23:16:44 +02:00
const auto ppfunc = llvm : : cast < llvm : : Function > ( m_module - > getOrInsertFunction ( ppname , m_finfo - > chunk - > getFunctionType ( ) ) . getCallee ( ) ) ;
ppfunc - > setCallingConv ( m_finfo - > chunk - > getCallingConv ( ) ) ;
chunks . push_back ( ppfunc ) ;
continue ;
}
2019-05-11 19:48:47 +02:00
chunks . push_back ( m_dispatch ) ;
2018-07-22 18:09:25 +02:00
continue ;
2018-06-10 14:46:01 +02:00
}
2018-07-22 18:09:25 +02:00
2019-05-05 15:28:41 +02:00
chunks . push_back ( found - > second . chunk ) ;
2018-06-10 14:46:01 +02:00
}
2019-05-05 15:28:41 +02:00
m_function_table - > setInitializer ( llvm : : ConstantArray : : get ( llvm : : ArrayType : : get ( entry_chunk - > chunk - > getType ( ) , m_size / 4 ) , chunks ) ) ;
2018-06-04 01:13:53 +02:00
}
else
{
2018-06-10 14:46:01 +02:00
m_function_table - > eraseFromParent ( ) ;
}
// Initialize pass manager
2020-05-06 17:18:30 +02:00
legacy : : FunctionPassManager pm ( _module . get ( ) ) ;
2018-06-10 14:46:01 +02:00
// Basic optimizations
pm . add ( createEarlyCSEPass ( ) ) ;
pm . add ( createCFGSimplificationPass ( ) ) ;
2020-02-03 09:15:26 +01:00
//pm.add(createNewGVNPass());
2023-09-25 17:04:48 +02:00
# if LLVM_VERSION_MAJOR < 17
2018-06-10 14:46:01 +02:00
pm . add ( createDeadStoreEliminationPass ( ) ) ;
2023-09-25 17:04:48 +02:00
# endif
2019-04-25 12:43:03 +02:00
pm . add ( createLICMPass ( ) ) ;
2023-09-25 17:04:48 +02:00
# if LLVM_VERSION_MAJOR < 17
2018-07-27 12:00:05 +02:00
pm . add ( createAggressiveDCEPass ( ) ) ;
2023-09-25 17:04:48 +02:00
# else
pm . add ( createDeadCodeEliminationPass ( ) ) ;
# endif
2018-06-10 14:46:01 +02:00
//pm.add(createLintPass()); // Check
2021-09-03 11:21:05 +02:00
for ( auto & f : * m_module )
{
replace_intrinsics ( f ) ;
}
2018-06-10 14:46:01 +02:00
for ( const auto & func : m_functions )
{
2019-05-05 15:28:41 +02:00
const auto f = func . second . fn ? func . second . fn : func . second . chunk ;
2019-04-25 12:43:03 +02:00
pm . run ( * f ) ;
2018-06-04 01:13:53 +02:00
}
2018-05-02 20:49:19 +02:00
2018-06-10 14:46:01 +02:00
// Clear context (TODO)
m_blocks . clear ( ) ;
m_block_queue . clear ( ) ;
m_functions . clear ( ) ;
m_function_queue . clear ( ) ;
m_function_table = nullptr ;
2018-05-02 20:49:19 +02:00
raw_string_ostream out ( log ) ;
if ( g_cfg . core . spu_debug )
{
2019-11-23 17:30:54 +01:00
fmt : : append ( log , " LLVM IR at 0x%x: \n " , func . entry_point ) ;
2020-05-06 17:18:30 +02:00
out < < * _module ; // print IR
2018-05-02 20:49:19 +02:00
out < < " \n \n " ;
}
2020-05-06 17:18:30 +02:00
if ( verifyModule ( * _module , & out ) )
2018-05-02 20:49:19 +02:00
{
out . flush ( ) ;
2020-02-01 09:36:09 +01:00
spu_log . error ( " LLVM: Verification failed at 0x%x: \n %s " , func . entry_point , log ) ;
2018-06-10 14:46:01 +02:00
if ( g_cfg . core . spu_debug )
{
2019-05-12 22:17:45 +02:00
fs : : file ( m_spurt - > get_cache_path ( ) + " spu-ir.log " , fs : : write + fs : : append ) . write ( log ) ;
2018-06-10 14:46:01 +02:00
}
2020-12-09 16:04:52 +01:00
fmt : : throw_exception ( " Compilation failed " ) ;
2018-05-02 20:49:19 +02:00
}
2022-07-10 08:38:48 +02:00
# if defined(__APPLE__)
pthread_jit_write_protect_np ( false ) ;
# endif
2018-05-02 20:49:19 +02:00
if ( g_cfg . core . spu_debug )
{
// Testing only
2020-05-06 17:18:30 +02:00
m_jit . add ( std : : move ( _module ) , m_spurt - > get_cache_path ( ) + " llvm/ " ) ;
2018-05-02 20:49:19 +02:00
}
else
{
2020-05-06 17:18:30 +02:00
m_jit . add ( std : : move ( _module ) ) ;
2018-05-02 20:49:19 +02:00
}
2019-01-21 19:04:32 +01:00
m_jit . fin ( ) ;
2018-05-02 20:49:19 +02:00
// Register function pointer
2019-01-21 19:04:32 +01:00
const spu_function_t fn = reinterpret_cast < spu_function_t > ( m_jit . get_engine ( ) . getPointerToFunction ( main_func ) ) ;
2018-05-02 20:49:19 +02:00
2019-10-25 23:52:56 +02:00
// Install unconditionally, possibly replacing existing one from spu_fast
add_loc - > compiled = fn ;
// Rebuild trampoline if necessary
2019-11-23 17:30:54 +01:00
if ( ! m_spurt - > rebuild_ubertrampoline ( func . data [ 0 ] ) )
2019-03-18 21:01:16 +01:00
{
2019-05-01 13:56:41 +02:00
return nullptr ;
2019-03-18 21:01:16 +01:00
}
2018-05-02 20:49:19 +02:00
2019-10-25 23:52:56 +02:00
add_loc - > compiled . notify_all ( ) ;
2018-05-02 20:49:19 +02:00
if ( g_cfg . core . spu_debug )
{
out . flush ( ) ;
2023-07-27 18:15:32 +02:00
fs : : write_file ( m_spurt - > get_cache_path ( ) + " spu-ir.log " , fs : : create + fs : : write + fs : : append , log ) ;
2019-03-18 21:01:16 +01:00
}
2022-07-10 08:38:48 +02:00
# if defined(__APPLE__)
pthread_jit_write_protect_np ( true ) ;
# endif
# if defined(ARCH_ARM64)
// Flush all cache lines after potentially writing executable code
asm ( " ISB " ) ;
asm ( " DSB ISH " ) ;
# endif
2021-03-02 12:59:19 +01:00
if ( g_fxo - > get < spu_cache > ( ) . operator bool ( ) )
2019-05-17 22:54:47 +02:00
{
2020-02-01 09:36:09 +01:00
spu_log . success ( " New block compiled successfully " ) ;
2019-05-17 22:54:47 +02:00
}
2019-05-01 13:56:41 +02:00
return fn ;
2018-05-02 20:49:19 +02:00
}
2019-03-25 19:31:16 +01:00
static void interp_check ( spu_thread * _spu , bool after )
{
static thread_local std : : array < v128 , 128 > s_gpr ;
if ( ! after )
{
// Preserve reg state
s_gpr = _spu - > gpr ;
// Execute interpreter instruction
const u32 op = * reinterpret_cast < const be_t < u32 > * > ( _spu - > _ptr < u8 > ( 0 ) + _spu - > pc ) ;
2021-12-30 17:39:18 +01:00
if ( ! g_fxo - > get < spu_interpreter_rt > ( ) . decode ( op ) ( * _spu , { op } ) )
2020-12-09 16:04:52 +01:00
spu_log . fatal ( " Bad instruction " ) ;
2019-03-25 19:31:16 +01:00
// Swap state
for ( u32 i = 0 ; i < s_gpr . size ( ) ; + + i )
std : : swap ( _spu - > gpr [ i ] , s_gpr [ i ] ) ;
}
else
{
// Check saved state
for ( u32 i = 0 ; i < s_gpr . size ( ) ; + + i )
{
if ( _spu - > gpr [ i ] ! = s_gpr [ i ] )
{
2020-02-01 09:36:09 +01:00
spu_log . fatal ( " Register mismatch: $%u \n %s \n %s " , i , _spu - > gpr [ i ] , s_gpr [ i ] ) ;
2019-03-25 19:31:16 +01:00
_spu - > state + = cpu_flag : : dbg_pause ;
}
}
}
}
2019-05-01 13:56:41 +02:00
spu_function_t compile_interpreter ( )
2019-03-25 19:31:16 +01:00
{
using namespace llvm ;
2019-11-17 20:09:23 +01:00
m_engine - > clearAllGlobalMappings ( ) ;
2019-03-25 19:31:16 +01:00
// Create LLVM module
2020-05-06 17:18:30 +02:00
std : : unique_ptr < Module > _module = std : : make_unique < Module > ( " spu_interpreter.obj " , m_context ) ;
2023-04-08 14:21:22 +02:00
_module - > setTargetTriple ( jit_compiler : : triple2 ( ) ) ;
2020-05-06 17:18:30 +02:00
_module - > setDataLayout ( m_jit . get_engine ( ) . getTargetMachine ( ) - > createDataLayout ( ) ) ;
m_module = _module . get ( ) ;
2019-03-25 19:31:16 +01:00
// Initialize IR Builder
IRBuilder < > irb ( m_context ) ;
m_ir = & irb ;
// Create interpreter table
const auto if_type = get_ftype < void , u8 * , u8 * , u32 , u32 , u8 * , u32 , u8 * > ( ) ;
2019-06-28 09:16:14 +02:00
m_function_table = new GlobalVariable ( * m_module , ArrayType : : get ( if_type - > getPointerTo ( ) , 1ull < < m_interp_magn ) , true , GlobalValue : : InternalLinkage , nullptr ) ;
2019-03-25 19:31:16 +01:00
// Add return function
2020-05-06 17:18:30 +02:00
const auto ret_func = cast < Function > ( _module - > getOrInsertFunction ( " spu_ret " , if_type ) . getCallee ( ) ) ;
2019-03-25 19:31:16 +01:00
ret_func - > setCallingConv ( CallingConv : : GHC ) ;
ret_func - > setLinkage ( GlobalValue : : InternalLinkage ) ;
m_ir - > SetInsertPoint ( BasicBlock : : Create ( m_context , " " , ret_func ) ) ;
2023-03-10 23:57:21 +01:00
m_thread = ret_func - > getArg ( 1 ) ;
m_interp_pc = ret_func - > getArg ( 2 ) ;
2019-03-25 19:31:16 +01:00
m_ir - > CreateRetVoid ( ) ;
// Add entry function, serves as a trampoline
2019-03-29 14:35:00 +01:00
const auto main_func = llvm : : cast < Function > ( m_module - > getOrInsertFunction ( " spu_interpreter " , get_ftype < void , u8 * , u8 * , u8 * > ( ) ) . getCallee ( ) ) ;
2019-10-16 13:14:45 +02:00
# ifdef _WIN32
main_func - > setCallingConv ( CallingConv : : Win64 ) ;
# endif
2019-03-25 19:31:16 +01:00
set_function ( main_func ) ;
// Load pc and opcode
2023-03-10 23:57:21 +01:00
m_interp_pc = m_ir - > CreateLoad ( get_type < u32 > ( ) , spu_ptr < u32 > ( & spu_thread : : pc ) ) ;
2023-04-05 13:35:06 +02:00
m_interp_op = m_ir - > CreateLoad ( get_type < u32 > ( ) , m_ir - > CreateGEP ( get_type < u8 > ( ) , m_lsptr , m_ir - > CreateZExt ( m_interp_pc , get_type < u64 > ( ) ) ) ) ;
2019-03-25 19:31:16 +01:00
m_interp_op = m_ir - > CreateCall ( get_intrinsic < u32 > ( Intrinsic : : bswap ) , { m_interp_op } ) ;
// Pinned constant, address of interpreter table
2023-04-05 13:35:06 +02:00
m_interp_table = m_ir - > CreateGEP ( m_function_table - > getValueType ( ) , m_function_table , { m_ir - > getInt64 ( 0 ) , m_ir - > getInt64 ( 0 ) } ) ;
2019-03-25 19:31:16 +01:00
// Pinned constant, mask for shifted register index
m_interp_7f0 = m_ir - > getInt32 ( 0x7f0 ) ;
// Pinned constant, address of first register
m_interp_regs = _ptr ( m_thread , get_reg_offset ( 0 ) ) ;
2019-05-05 15:28:41 +02:00
// Save host thread's stack pointer
const auto native_sp = spu_ptr < u64 > ( & spu_thread : : saved_native_sp ) ;
2022-06-14 14:28:38 +02:00
# if defined(ARCH_X64)
2019-05-05 15:28:41 +02:00
const auto rsp_name = MetadataAsValue : : get ( m_context , MDNode : : get ( m_context , { MDString : : get ( m_context , " rsp " ) } ) ) ;
2022-06-14 14:28:38 +02:00
# elif defined(ARCH_ARM64)
const auto rsp_name = MetadataAsValue : : get ( m_context , MDNode : : get ( m_context , { MDString : : get ( m_context , " sp " ) } ) ) ;
# endif
2019-05-05 15:28:41 +02:00
m_ir - > CreateStore ( m_ir - > CreateCall ( get_intrinsic < u64 > ( Intrinsic : : read_register ) , { rsp_name } ) , native_sp ) ;
2019-03-25 19:31:16 +01:00
// Decode (shift) and load function pointer
2023-04-05 13:35:06 +02:00
const auto first = m_ir - > CreateLoad ( if_type - > getPointerTo ( ) , m_ir - > CreateGEP ( if_type - > getPointerTo ( ) , m_interp_table , m_ir - > CreateLShr ( m_interp_op , 32u - m_interp_magn ) ) ) ;
const auto call0 = m_ir - > CreateCall ( if_type , first , { m_lsptr , m_thread , m_interp_pc , m_interp_op , m_interp_table , m_interp_7f0 , m_interp_regs } ) ;
2019-03-25 19:31:16 +01:00
call0 - > setCallingConv ( CallingConv : : GHC ) ;
m_ir - > CreateRetVoid ( ) ;
// Create helper globals
{
std : : vector < llvm : : Constant * > float_to ;
std : : vector < llvm : : Constant * > to_float ;
float_to . reserve ( 256 ) ;
to_float . reserve ( 256 ) ;
for ( int i = 0 ; i < 256 ; + + i )
{
float_to . push_back ( ConstantFP : : get ( get_type < f32 > ( ) , std : : exp2 ( 173 - i ) ) ) ;
to_float . push_back ( ConstantFP : : get ( get_type < f32 > ( ) , std : : exp2 ( i - 155 ) ) ) ;
}
const auto atype = ArrayType : : get ( get_type < f32 > ( ) , 256 ) ;
m_scale_float_to = new GlobalVariable ( * m_module , atype , true , GlobalValue : : InternalLinkage , ConstantArray : : get ( atype , float_to ) ) ;
m_scale_to_float = new GlobalVariable ( * m_module , atype , true , GlobalValue : : InternalLinkage , ConstantArray : : get ( atype , to_float ) ) ;
}
// Fill interpreter table
2019-05-17 22:54:47 +02:00
std : : array < llvm : : Function * , 256 > ifuncs { } ;
2019-03-25 19:31:16 +01:00
std : : vector < llvm : : Constant * > iptrs ;
2019-06-28 09:16:14 +02:00
iptrs . reserve ( 1ull < < m_interp_magn ) ;
2019-03-25 19:31:16 +01:00
m_block = nullptr ;
2019-05-17 22:54:47 +02:00
auto last_itype = spu_itype : : type { 255 } ;
2019-03-25 19:31:16 +01:00
for ( u32 i = 0 ; i < 1u < < m_interp_magn ; )
{
// Fake opcode
2019-05-05 15:28:41 +02:00
const u32 op = i < < ( 32u - m_interp_magn ) ;
2019-03-25 19:31:16 +01:00
// Instruction type
2021-12-30 17:39:18 +01:00
const auto itype = g_spu_itype . decode ( op ) ;
2019-03-25 19:31:16 +01:00
// Function name
2021-12-30 17:39:18 +01:00
std : : string fname = fmt : : format ( " spu_%s " , g_spu_iname . decode ( op ) ) ;
2019-03-25 19:31:16 +01:00
if ( last_itype ! = itype )
{
// Trigger automatic information collection (probing)
m_op_const_mask = 0 ;
}
else
{
// Inject const mask into function name
2019-05-05 15:28:41 +02:00
fmt : : append ( fname , " _%X " , ( i & ( m_op_const_mask > > ( 32u - m_interp_magn ) ) ) | ( 1u < < m_interp_magn ) ) ;
2019-03-25 19:31:16 +01:00
}
// Decode instruction name, access function
2020-05-06 17:18:30 +02:00
const auto f = cast < Function > ( _module - > getOrInsertFunction ( fname , if_type ) . getCallee ( ) ) ;
2019-03-25 19:31:16 +01:00
// Build if necessary
if ( f - > empty ( ) )
{
2019-05-17 22:54:47 +02:00
if ( last_itype ! = itype )
{
2023-09-06 05:53:10 +02:00
ifuncs [ static_cast < usz > ( itype ) ] = f ;
2019-05-17 22:54:47 +02:00
}
2019-03-25 19:31:16 +01:00
f - > setCallingConv ( CallingConv : : GHC ) ;
m_function = f ;
2023-03-10 23:57:21 +01:00
m_lsptr = f - > getArg ( 0 ) ;
m_thread = f - > getArg ( 1 ) ;
m_interp_pc = f - > getArg ( 2 ) ;
m_interp_op = f - > getArg ( 3 ) ;
m_interp_table = f - > getArg ( 4 ) ;
m_interp_7f0 = f - > getArg ( 5 ) ;
m_interp_regs = f - > getArg ( 6 ) ;
2019-03-25 19:31:16 +01:00
m_ir - > SetInsertPoint ( BasicBlock : : Create ( m_context , " " , f ) ) ;
2023-03-10 23:57:21 +01:00
m_memptr = m_ir - > CreateLoad ( get_type < u8 * > ( ) , spu_ptr < u8 * > ( & spu_thread : : memory_base_addr ) ) ;
2019-03-25 19:31:16 +01:00
switch ( itype )
{
case spu_itype : : UNK :
case spu_itype : : DFCEQ :
case spu_itype : : DFCMEQ :
case spu_itype : : DFCGT :
2019-03-29 14:49:19 +01:00
case spu_itype : : DFCMGT :
2019-03-25 19:31:16 +01:00
case spu_itype : : DFTSV :
case spu_itype : : STOP :
case spu_itype : : STOPD :
case spu_itype : : RDCH :
case spu_itype : : WRCH :
{
// Invalid or abortable instruction. Save current address.
m_ir - > CreateStore ( m_interp_pc , spu_ptr < u32 > ( & spu_thread : : pc ) ) ;
[[fallthrough]] ;
}
default :
{
break ;
}
}
{
m_interp_bblock = nullptr ;
// Next instruction (no wraparound at the end of LS)
m_interp_pc_next = m_ir - > CreateAdd ( m_interp_pc , m_ir - > getInt32 ( 4 ) ) ;
bool check = false ;
if ( itype = = spu_itype : : WRCH | |
itype = = spu_itype : : RDCH | |
itype = = spu_itype : : RCHCNT | |
itype = = spu_itype : : STOP | |
itype = = spu_itype : : STOPD | |
itype & spu_itype : : floating | |
itype & spu_itype : : branch )
{
check = false ;
}
if ( itype & spu_itype : : branch )
{
// Instruction changes pc - change order.
2020-03-24 09:40:22 +01:00
( this - > * decode ( op ) ) ( { op } ) ;
2019-03-25 19:31:16 +01:00
if ( m_interp_bblock )
{
m_ir - > SetInsertPoint ( m_interp_bblock ) ;
m_interp_bblock = nullptr ;
}
}
if ( ! m_ir - > GetInsertBlock ( ) - > getTerminator ( ) )
{
if ( check )
{
m_ir - > CreateStore ( m_interp_pc , spu_ptr < u32 > ( & spu_thread : : pc ) ) ;
}
// Decode next instruction.
const auto next_pc = itype & spu_itype : : branch ? m_interp_pc : m_interp_pc_next ;
2023-04-05 13:35:06 +02:00
const auto be32_op = m_ir - > CreateLoad ( get_type < u32 > ( ) , m_ir - > CreateGEP ( get_type < u8 > ( ) , m_lsptr , m_ir - > CreateZExt ( next_pc , get_type < u64 > ( ) ) ) ) ;
2019-03-25 19:31:16 +01:00
const auto next_op = m_ir - > CreateCall ( get_intrinsic < u32 > ( Intrinsic : : bswap ) , { be32_op } ) ;
2023-04-05 13:35:06 +02:00
const auto next_if = m_ir - > CreateLoad ( if_type - > getPointerTo ( ) , m_ir - > CreateGEP ( if_type - > getPointerTo ( ) , m_interp_table , m_ir - > CreateLShr ( next_op , 32u - m_interp_magn ) ) ) ;
2019-03-25 19:31:16 +01:00
llvm : : cast < LoadInst > ( next_if ) - > setVolatile ( true ) ;
if ( ! ( itype & spu_itype : : branch ) )
{
if ( check )
{
2019-05-05 15:28:41 +02:00
call ( " spu_interp_check " , & interp_check , m_thread , m_ir - > getFalse ( ) ) ;
2019-03-25 19:31:16 +01:00
}
// Normal instruction.
2020-03-24 09:40:22 +01:00
( this - > * decode ( op ) ) ( { op } ) ;
2019-03-25 19:31:16 +01:00
if ( check & & ! m_ir - > GetInsertBlock ( ) - > getTerminator ( ) )
{
2019-05-05 15:28:41 +02:00
call ( " spu_interp_check " , & interp_check , m_thread , m_ir - > getTrue ( ) ) ;
2019-03-25 19:31:16 +01:00
}
m_interp_pc = m_interp_pc_next ;
}
2019-05-17 22:54:47 +02:00
if ( last_itype ! = itype )
{
// Reset to discard dead code
llvm : : cast < LoadInst > ( next_if ) - > setVolatile ( false ) ;
if ( itype & spu_itype : : branch )
{
const auto _stop = BasicBlock : : Create ( m_context , " " , f ) ;
const auto _next = BasicBlock : : Create ( m_context , " " , f ) ;
2023-03-10 23:57:21 +01:00
m_ir - > CreateCondBr ( m_ir - > CreateIsNotNull ( m_ir - > CreateLoad ( get_type < u32 > ( ) , spu_ptr < u32 > ( & spu_thread : : state ) ) ) , _stop , _next , m_md_unlikely ) ;
2019-05-17 22:54:47 +02:00
m_ir - > SetInsertPoint ( _stop ) ;
m_ir - > CreateStore ( m_interp_pc , spu_ptr < u32 > ( & spu_thread : : pc ) ) ;
const auto escape_yes = BasicBlock : : Create ( m_context , " " , f ) ;
const auto escape_no = BasicBlock : : Create ( m_context , " " , f ) ;
m_ir - > CreateCondBr ( call ( " spu_exec_check_state " , & exec_check_state , m_thread ) , escape_yes , escape_no ) ;
m_ir - > SetInsertPoint ( escape_yes ) ;
call ( " spu_escape " , spu_runtime : : g_escape , m_thread ) ;
m_ir - > CreateBr ( _next ) ;
m_ir - > SetInsertPoint ( escape_no ) ;
m_ir - > CreateBr ( _next ) ;
m_ir - > SetInsertPoint ( _next ) ;
}
2023-04-05 13:35:06 +02:00
llvm : : Value * fret = m_interp_table ;
2019-05-17 22:54:47 +02:00
if ( itype = = spu_itype : : WRCH | |
itype = = spu_itype : : RDCH | |
itype = = spu_itype : : RCHCNT | |
itype = = spu_itype : : STOP | |
itype = = spu_itype : : STOPD | |
itype = = spu_itype : : UNK | |
itype = = spu_itype : : DFCMEQ | |
itype = = spu_itype : : DFCMGT | |
itype = = spu_itype : : DFCGT | |
itype = = spu_itype : : DFCEQ | |
itype = = spu_itype : : DFTSV )
{
m_interp_7f0 = m_ir - > getInt32 ( 0x7f0 ) ;
m_interp_regs = _ptr ( m_thread , get_reg_offset ( 0 ) ) ;
fret = ret_func ;
}
else if ( ! ( itype & spu_itype : : branch ) )
{
// Hack: inline ret instruction before final jmp; this is not reliable.
2022-06-14 14:28:38 +02:00
# ifdef ARCH_X64
2019-05-17 22:54:47 +02:00
m_ir - > CreateCall ( InlineAsm : : get ( get_ftype < void > ( ) , " ret " , " " , true , false , InlineAsm : : AD_Intel ) ) ;
2022-06-14 14:28:38 +02:00
# else
m_ir - > CreateCall ( InlineAsm : : get ( get_ftype < void > ( ) , " ret " , " " , true , false ) ) ;
# endif
2019-05-17 22:54:47 +02:00
fret = ret_func ;
}
const auto arg3 = UndefValue : : get ( get_type < u32 > ( ) ) ;
2023-04-05 13:35:06 +02:00
const auto _ret = m_ir - > CreateCall ( if_type , fret , { m_lsptr , m_thread , m_interp_pc , arg3 , m_interp_table , m_interp_7f0 , m_interp_regs } ) ;
2019-05-17 22:54:47 +02:00
_ret - > setCallingConv ( CallingConv : : GHC ) ;
_ret - > setTailCall ( ) ;
m_ir - > CreateRetVoid ( ) ;
}
2019-03-25 19:31:16 +01:00
if ( ! m_ir - > GetInsertBlock ( ) - > getTerminator ( ) )
{
// Call next instruction.
const auto _stop = BasicBlock : : Create ( m_context , " " , f ) ;
const auto _next = BasicBlock : : Create ( m_context , " " , f ) ;
2023-03-10 23:57:21 +01:00
m_ir - > CreateCondBr ( m_ir - > CreateIsNotNull ( m_ir - > CreateLoad ( get_type < u32 > ( ) , spu_ptr < u32 > ( & spu_thread : : state ) ) ) , _stop , _next , m_md_unlikely ) ;
2019-03-25 19:31:16 +01:00
m_ir - > SetInsertPoint ( _next ) ;
if ( itype = = spu_itype : : WRCH | |
itype = = spu_itype : : RDCH | |
itype = = spu_itype : : RCHCNT | |
itype = = spu_itype : : STOP | |
itype = = spu_itype : : STOPD )
{
m_interp_7f0 = m_ir - > getInt32 ( 0x7f0 ) ;
m_interp_regs = _ptr ( m_thread , get_reg_offset ( 0 ) ) ;
}
2023-04-05 13:35:06 +02:00
const auto ncall = m_ir - > CreateCall ( if_type , next_if , { m_lsptr , m_thread , m_interp_pc , next_op , m_interp_table , m_interp_7f0 , m_interp_regs } ) ;
2019-03-25 19:31:16 +01:00
ncall - > setCallingConv ( CallingConv : : GHC ) ;
ncall - > setTailCall ( ) ;
m_ir - > CreateRetVoid ( ) ;
m_ir - > SetInsertPoint ( _stop ) ;
2023-04-08 14:21:22 +02:00
m_ir - > CreateStore ( m_interp_pc , spu_ptr < u32 > ( & spu_thread : : pc ) ) ;
2023-03-11 20:08:27 +01:00
call ( " spu_escape " , spu_runtime : : g_escape , m_thread ) - > setTailCall ( ) ;
2019-03-25 19:31:16 +01:00
m_ir - > CreateRetVoid ( ) ;
}
}
}
}
2019-05-17 22:54:47 +02:00
if ( last_itype ! = itype & & g_cfg . core . spu_decoder ! = spu_decoder_type : : llvm )
2019-03-25 19:31:16 +01:00
{
// Repeat after probing
last_itype = itype ;
}
else
{
// Add to the table
iptrs . push_back ( f ) ;
i + + ;
}
}
2019-06-28 09:16:14 +02:00
m_function_table - > setInitializer ( ConstantArray : : get ( ArrayType : : get ( if_type - > getPointerTo ( ) , 1ull < < m_interp_magn ) , iptrs ) ) ;
2019-03-25 19:31:16 +01:00
m_function_table = nullptr ;
// Initialize pass manager
2020-05-06 17:18:30 +02:00
legacy : : FunctionPassManager pm ( _module . get ( ) ) ;
2019-03-25 19:31:16 +01:00
// Basic optimizations
pm . add ( createEarlyCSEPass ( ) ) ;
pm . add ( createCFGSimplificationPass ( ) ) ;
2023-09-25 17:04:48 +02:00
# if LLVM_VERSION_MAJOR < 17
2019-03-25 19:31:16 +01:00
pm . add ( createDeadStoreEliminationPass ( ) ) ;
pm . add ( createAggressiveDCEPass ( ) ) ;
2023-09-25 17:04:48 +02:00
# else
pm . add ( createDeadCodeEliminationPass ( ) ) ;
# endif
2019-03-25 19:31:16 +01:00
//pm.add(createLintPass());
2021-09-03 11:21:05 +02:00
for ( auto & f : * _module )
{
replace_intrinsics ( f ) ;
//pm.run(f);
}
2019-03-25 19:31:16 +01:00
std : : string log ;
raw_string_ostream out ( log ) ;
if ( g_cfg . core . spu_debug )
{
fmt : : append ( log , " LLVM IR (interpreter): \n " ) ;
2020-05-06 17:18:30 +02:00
out < < * _module ; // print IR
2019-03-25 19:31:16 +01:00
out < < " \n \n " ;
}
2020-05-06 17:18:30 +02:00
if ( verifyModule ( * _module , & out ) )
2019-03-25 19:31:16 +01:00
{
out . flush ( ) ;
2020-02-01 09:36:09 +01:00
spu_log . error ( " LLVM: Verification failed: \n %s " , log ) ;
2019-03-25 19:31:16 +01:00
if ( g_cfg . core . spu_debug )
{
2023-07-27 18:15:32 +02:00
fs : : write_file ( m_spurt - > get_cache_path ( ) + " spu-ir.log " , fs : : create + fs : : write + fs : : append , log ) ;
2019-03-25 19:31:16 +01:00
}
2020-12-09 16:04:52 +01:00
fmt : : throw_exception ( " Compilation failed " ) ;
2019-03-25 19:31:16 +01:00
}
if ( g_cfg . core . spu_debug )
{
// Testing only
2020-05-06 17:18:30 +02:00
m_jit . add ( std : : move ( _module ) , m_spurt - > get_cache_path ( ) + " llvm/ " ) ;
2019-03-25 19:31:16 +01:00
}
else
{
2020-05-06 17:18:30 +02:00
m_jit . add ( std : : move ( _module ) ) ;
2019-03-25 19:31:16 +01:00
}
m_jit . fin ( ) ;
// Register interpreter entry point
spu_runtime : : g_interpreter = reinterpret_cast < spu_function_t > ( m_jit . get_engine ( ) . getPointerToFunction ( main_func ) ) ;
2019-05-17 22:54:47 +02:00
for ( u32 i = 0 ; i < spu_runtime : : g_interpreter_table . size ( ) ; i + + )
{
// Fill exported interpreter table
spu_runtime : : g_interpreter_table [ i ] = ifuncs [ i ] ? reinterpret_cast < u64 > ( m_jit . get_engine ( ) . getPointerToFunction ( ifuncs [ i ] ) ) : 0 ;
}
2019-03-25 19:31:16 +01:00
if ( ! spu_runtime : : g_interpreter )
{
2019-05-01 13:56:41 +02:00
return nullptr ;
2019-03-25 19:31:16 +01:00
}
if ( g_cfg . core . spu_debug )
{
out . flush ( ) ;
2023-07-27 18:15:32 +02:00
fs : : write_file ( m_spurt - > get_cache_path ( ) + " spu-ir.log " , fs : : create + fs : : write + fs : : append , log ) ;
2019-03-25 19:31:16 +01:00
}
2019-05-01 13:56:41 +02:00
return spu_runtime : : g_interpreter ;
2019-03-25 19:31:16 +01:00
}
2018-10-11 00:17:19 +02:00
static bool exec_check_state ( spu_thread * _spu )
2018-06-03 17:07:39 +02:00
{
return _spu - > check_state ( ) ;
}
2021-12-30 17:39:18 +01:00
template < spu_intrp_func_t F >
2018-10-11 00:17:19 +02:00
static void exec_fall ( spu_thread * _spu , spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
if ( F ( * _spu , op ) )
{
_spu - > pc + = 4 ;
}
}
2021-12-30 17:39:18 +01:00
template < spu_intrp_func_t F >
2018-05-02 20:49:19 +02:00
void fall ( spu_opcode_t op )
{
2021-12-30 17:39:18 +01:00
std : : string name = fmt : : format ( " spu_%s " , g_spu_iname . decode ( op . opcode ) ) ;
2019-05-05 15:28:41 +02:00
2019-03-25 19:31:16 +01:00
if ( m_interp_magn )
{
2019-05-05 15:28:41 +02:00
call ( name , F , m_thread , m_interp_op ) ;
2019-03-25 19:31:16 +01:00
return ;
}
2018-05-02 20:49:19 +02:00
update_pc ( ) ;
2019-05-05 15:28:41 +02:00
call ( name , & exec_fall < F > , m_thread , m_ir - > getInt32 ( op . opcode ) ) ;
2018-05-02 20:49:19 +02:00
}
2021-04-07 23:52:18 +02:00
[[noreturn]] static void exec_unk ( spu_thread * , u32 op )
2018-05-02 20:49:19 +02:00
{
2020-12-09 16:04:52 +01:00
fmt : : throw_exception ( " Unknown/Illegal instruction (0x%08x) " , op ) ;
2018-05-02 20:49:19 +02:00
}
void UNK ( spu_opcode_t op_unk )
{
2019-03-25 19:31:16 +01:00
if ( m_interp_magn )
{
m_ir - > CreateStore ( m_interp_pc , spu_ptr < u32 > ( & spu_thread : : pc ) ) ;
2019-05-05 15:28:41 +02:00
call ( " spu_unknown " , & exec_unk , m_thread , m_ir - > getInt32 ( op_unk . opcode ) ) ;
2019-03-25 19:31:16 +01:00
return ;
}
2018-06-10 14:46:01 +02:00
m_block - > block_end = m_ir - > GetInsertBlock ( ) ;
2018-05-02 20:49:19 +02:00
update_pc ( ) ;
2019-05-05 15:28:41 +02:00
call ( " spu_unknown " , & exec_unk , m_thread , m_ir - > getInt32 ( op_unk . opcode ) ) ;
2018-05-02 20:49:19 +02:00
}
2019-06-06 20:32:35 +02:00
static void exec_stop ( spu_thread * _spu , u32 code )
2018-05-02 20:49:19 +02:00
{
2022-07-06 12:11:23 +02:00
if ( ! _spu - > stop_and_signal ( code ) | | _spu - > state & cpu_flag : : again )
2019-06-06 20:32:35 +02:00
{
spu_runtime : : g_escape ( _spu ) ;
}
2022-07-06 12:11:23 +02:00
if ( _spu - > test_stopped ( ) )
{
_spu - > pc + = 4 ;
spu_runtime : : g_escape ( _spu ) ;
}
2018-05-02 20:49:19 +02:00
}
void STOP ( spu_opcode_t op ) //
{
2019-03-25 19:31:16 +01:00
if ( m_interp_magn )
{
2019-06-06 20:32:35 +02:00
call ( " spu_syscall " , & exec_stop , m_thread , m_ir - > CreateAnd ( m_interp_op , m_ir - > getInt32 ( 0x3fff ) ) ) ;
2019-03-25 19:31:16 +01:00
return ;
}
2018-05-02 20:49:19 +02:00
update_pc ( ) ;
2019-06-06 20:32:35 +02:00
call ( " spu_syscall " , & exec_stop , m_thread , m_ir - > getInt32 ( op . opcode & 0x3fff ) ) ;
2018-06-10 14:46:01 +02:00
if ( g_cfg . core . spu_block_size = = spu_block_size_type : : safe )
{
m_block - > block_end = m_ir - > GetInsertBlock ( ) ;
2019-05-05 15:28:41 +02:00
update_pc ( m_pos + 4 ) ;
2023-07-21 17:12:57 +02:00
ensure_gpr_stores ( ) ;
2019-05-11 19:48:47 +02:00
tail_chunk ( m_dispatch ) ;
return ;
2018-11-05 12:24:08 +01:00
}
2018-05-02 20:49:19 +02:00
}
2021-03-05 20:05:37 +01:00
void STOPD ( spu_opcode_t ) //
2018-05-02 20:49:19 +02:00
{
2019-03-25 19:31:16 +01:00
if ( m_interp_magn )
{
2019-06-06 20:32:35 +02:00
call ( " spu_syscall " , & exec_stop , m_thread , m_ir - > getInt32 ( 0x3fff ) ) ;
2019-03-25 19:31:16 +01:00
return ;
}
2018-06-10 14:46:01 +02:00
STOP ( spu_opcode_t { 0x3fff } ) ;
2018-05-02 20:49:19 +02:00
}
2019-06-06 20:32:35 +02:00
static u32 exec_rdch ( spu_thread * _spu , u32 ch )
2018-05-02 20:49:19 +02:00
{
2019-06-06 20:32:35 +02:00
const s64 result = _spu - > get_ch_value ( ch ) ;
2022-07-06 12:11:23 +02:00
if ( result < 0 | | _spu - > state & cpu_flag : : again )
2019-06-06 20:32:35 +02:00
{
spu_runtime : : g_escape ( _spu ) ;
}
2022-07-04 15:02:17 +02:00
static_cast < void > ( _spu - > test_stopped ( ) ) ;
2019-06-06 20:32:35 +02:00
return static_cast < u32 > ( result & 0xffffffff ) ;
2018-05-02 20:49:19 +02:00
}
2019-06-06 20:32:35 +02:00
static u32 exec_read_in_mbox ( spu_thread * _spu )
2018-05-02 20:49:19 +02:00
{
2018-06-30 01:19:12 +02:00
// TODO
2019-06-06 20:32:35 +02:00
return exec_rdch ( _spu , SPU_RdInMbox ) ;
2018-06-30 01:19:12 +02:00
}
2018-10-11 00:17:19 +02:00
static u32 exec_read_dec ( spu_thread * _spu )
2018-06-30 01:19:12 +02:00
{
2022-05-13 13:50:21 +02:00
const u32 res = _spu - > read_dec ( ) . first ;
2018-06-30 01:19:12 +02:00
if ( res > 1500 & & g_cfg . core . spu_loop_detection )
{
2019-06-20 03:32:19 +02:00
_spu - > state + = cpu_flag : : wait ;
2018-06-30 01:19:12 +02:00
std : : this_thread : : yield ( ) ;
2022-07-04 15:02:17 +02:00
static_cast < void > ( _spu - > test_stopped ( ) ) ;
2018-06-30 01:19:12 +02:00
}
return res ;
}
2019-06-06 20:32:35 +02:00
static u32 exec_read_events ( spu_thread * _spu )
2018-06-30 01:19:12 +02:00
{
// TODO
2019-06-06 20:32:35 +02:00
return exec_rdch ( _spu , SPU_RdEventStat ) ;
2018-06-30 01:19:12 +02:00
}
2023-07-21 17:12:57 +02:00
void ensure_gpr_stores ( )
{
if ( m_block )
{
// Make previous stores not able to be reordered beyond this point or be deleted
std : : for_each ( m_block - > store_context_ctr . begin ( ) , m_block - > store_context_ctr . end ( ) , FN ( x + + ) ) ;
}
}
2018-06-30 01:19:12 +02:00
llvm : : Value * get_rdch ( spu_opcode_t op , u32 off , bool atomic )
{
const auto ptr = _ptr < u64 > ( m_thread , off ) ;
llvm : : Value * val0 ;
if ( atomic )
{
2021-11-02 17:30:03 +01:00
const auto val = m_ir - > CreateAtomicRMW ( llvm : : AtomicRMWInst : : Xchg , ptr , m_ir - > getInt64 ( 0 ) , llvm : : MaybeAlign { 8 } , llvm : : AtomicOrdering : : Acquire ) ;
2018-06-30 01:19:12 +02:00
val0 = val ;
}
else
{
2023-04-08 14:21:22 +02:00
const auto val = m_ir - > CreateLoad ( get_type < u64 > ( ) , ptr ) ;
2023-04-14 06:40:03 +02:00
val - > setAtomic ( llvm : : AtomicOrdering : : Acquire ) ;
m_ir - > CreateStore ( m_ir - > getInt64 ( 0 ) , ptr ) - > setAtomic ( llvm : : AtomicOrdering : : Release ) ;
2018-06-30 01:19:12 +02:00
val0 = val ;
}
const auto _cur = m_ir - > GetInsertBlock ( ) ;
const auto done = llvm : : BasicBlock : : Create ( m_context , " " , m_function ) ;
const auto wait = llvm : : BasicBlock : : Create ( m_context , " " , m_function ) ;
2019-06-06 20:32:35 +02:00
const auto cond = m_ir - > CreateICmpSLT ( val0 , m_ir - > getInt64 ( 0 ) ) ;
val0 = m_ir - > CreateTrunc ( val0 , get_type < u32 > ( ) ) ;
m_ir - > CreateCondBr ( cond , done , wait ) ;
2018-06-30 01:19:12 +02:00
m_ir - > SetInsertPoint ( wait ) ;
2022-07-04 15:02:17 +02:00
update_pc ( ) ;
2019-05-05 15:28:41 +02:00
const auto val1 = call ( " spu_read_channel " , & exec_rdch , m_thread , m_ir - > getInt32 ( op . ra ) ) ;
m_ir - > CreateBr ( done ) ;
2018-06-30 01:19:12 +02:00
m_ir - > SetInsertPoint ( done ) ;
2019-06-06 20:32:35 +02:00
const auto rval = m_ir - > CreatePHI ( get_type < u32 > ( ) , 2 ) ;
2018-06-30 01:19:12 +02:00
rval - > addIncoming ( val0 , _cur ) ;
rval - > addIncoming ( val1 , wait ) ;
2019-06-06 20:32:35 +02:00
return rval ;
2018-06-30 01:19:12 +02:00
}
void RDCH ( spu_opcode_t op ) //
{
value_t < u32 > res ;
2019-03-25 19:31:16 +01:00
if ( m_interp_magn )
{
2019-05-05 15:28:41 +02:00
res . value = call ( " spu_read_channel " , & exec_rdch , m_thread , get_imm < u32 > ( op . ra ) . value ) ;
2019-03-25 19:31:16 +01:00
set_vr ( op . rt , insert ( splat < u32 [ 4 ] > ( 0 ) , 3 , res ) ) ;
return ;
}
2018-06-30 01:19:12 +02:00
switch ( op . ra )
{
case SPU_RdSRR0 :
{
2023-03-10 23:57:21 +01:00
res . value = m_ir - > CreateLoad ( get_type < u32 > ( ) , spu_ptr < u32 > ( & spu_thread : : srr0 ) ) ;
2018-06-30 01:19:12 +02:00
break ;
}
case SPU_RdInMbox :
{
update_pc ( ) ;
2023-07-21 17:12:57 +02:00
ensure_gpr_stores ( ) ;
2019-05-05 15:28:41 +02:00
res . value = call ( " spu_read_in_mbox " , & exec_read_in_mbox , m_thread ) ;
2018-06-30 01:19:12 +02:00
break ;
}
case MFC_RdTagStat :
{
2018-10-11 00:17:19 +02:00
res . value = get_rdch ( op , : : offset32 ( & spu_thread : : ch_tag_stat ) , false ) ;
2018-06-30 01:19:12 +02:00
break ;
}
case MFC_RdTagMask :
{
2023-03-10 23:57:21 +01:00
res . value = m_ir - > CreateLoad ( get_type < u32 > ( ) , spu_ptr < u32 > ( & spu_thread : : ch_tag_mask ) ) ;
2018-06-30 01:19:12 +02:00
break ;
}
case SPU_RdSigNotify1 :
{
2023-07-21 17:12:57 +02:00
update_pc ( ) ;
ensure_gpr_stores ( ) ;
2018-10-11 00:17:19 +02:00
res . value = get_rdch ( op , : : offset32 ( & spu_thread : : ch_snr1 ) , true ) ;
2018-06-30 01:19:12 +02:00
break ;
}
case SPU_RdSigNotify2 :
{
2023-07-21 17:12:57 +02:00
update_pc ( ) ;
ensure_gpr_stores ( ) ;
2018-10-11 00:17:19 +02:00
res . value = get_rdch ( op , : : offset32 ( & spu_thread : : ch_snr2 ) , true ) ;
2018-06-30 01:19:12 +02:00
break ;
}
case MFC_RdAtomicStat :
{
2018-10-11 00:17:19 +02:00
res . value = get_rdch ( op , : : offset32 ( & spu_thread : : ch_atomic_stat ) , false ) ;
2018-06-30 01:19:12 +02:00
break ;
}
case MFC_RdListStallStat :
{
2018-10-11 00:17:19 +02:00
res . value = get_rdch ( op , : : offset32 ( & spu_thread : : ch_stall_stat ) , false ) ;
2018-06-30 01:19:12 +02:00
break ;
}
case SPU_RdDec :
{
2023-08-12 05:33:15 +02:00
if ( utils : : get_tsc_freq ( ) & & ! ( g_cfg . core . spu_loop_detection ) & & ( g_cfg . core . clocks_scale = = 100 ) )
{
const auto timestamp = m_ir - > CreateLoad ( get_type < u64 > ( ) , spu_ptr < u64 > ( & spu_thread : : ch_dec_start_timestamp ) ) ;
const auto dec_value = m_ir - > CreateLoad ( get_type < u32 > ( ) , spu_ptr < u32 > ( & spu_thread : : ch_dec_value ) ) ;
const auto tsc = m_ir - > CreateCall ( get_intrinsic ( llvm : : Intrinsic : : x86_rdtsc ) ) ;
const auto tscx = m_ir - > CreateMul ( m_ir - > CreateUDiv ( tsc , m_ir - > getInt64 ( utils : : get_tsc_freq ( ) ) ) , m_ir - > getInt64 ( 80000000 ) ) ;
const auto tscm = m_ir - > CreateUDiv ( m_ir - > CreateMul ( m_ir - > CreateURem ( tsc , m_ir - > getInt64 ( utils : : get_tsc_freq ( ) ) ) , m_ir - > getInt64 ( 80000000 ) ) , m_ir - > getInt64 ( utils : : get_tsc_freq ( ) ) ) ;
const auto tsctb = m_ir - > CreateAdd ( tscx , tscm ) ;
const auto frz = m_ir - > CreateLoad ( get_type < u8 > ( ) , spu_ptr < u8 > ( & spu_thread : : is_dec_frozen ) ) ;
const auto frzev = m_ir - > CreateICmpEQ ( frz , m_ir - > getInt8 ( 0 ) ) ;
const auto delta = m_ir - > CreateTrunc ( m_ir - > CreateSub ( tsctb , timestamp ) , get_type < u32 > ( ) ) ;
const auto deltax = m_ir - > CreateSelect ( frzev , delta , m_ir - > getInt32 ( 0 ) ) ;
res . value = m_ir - > CreateSub ( dec_value , deltax ) ;
break ;
}
2019-05-05 15:28:41 +02:00
res . value = call ( " spu_read_decrementer " , & exec_read_dec , m_thread ) ;
2018-06-30 01:19:12 +02:00
break ;
}
case SPU_RdEventMask :
{
2023-04-14 06:40:03 +02:00
const auto value = m_ir - > CreateLoad ( get_type < u64 > ( ) , spu_ptr < u64 > ( & spu_thread : : ch_events ) ) ;
value - > setAtomic ( llvm : : AtomicOrdering : : Acquire ) ;
res . value = m_ir - > CreateTrunc ( m_ir - > CreateLShr ( value , 32 ) , get_type < u32 > ( ) ) ;
2018-06-30 01:19:12 +02:00
break ;
}
case SPU_RdEventStat :
{
update_pc ( ) ;
2023-09-29 04:42:14 +02:00
if ( g_cfg . savestate . compatible_mode )
{
ensure_gpr_stores ( ) ;
}
else
{
m_ir - > CreateStore ( m_ir - > getInt8 ( 1 ) , spu_ptr < u8 > ( & spu_thread : : unsavable ) ) ;
}
2019-05-05 15:28:41 +02:00
res . value = call ( " spu_read_events " , & exec_read_events , m_thread ) ;
2023-09-29 04:42:14 +02:00
if ( ! g_cfg . savestate . compatible_mode )
{
m_ir - > CreateStore ( m_ir - > getInt8 ( 0 ) , spu_ptr < u8 > ( & spu_thread : : unsavable ) ) ;
}
2018-06-30 01:19:12 +02:00
break ;
}
case SPU_RdMachStat :
{
2023-03-10 23:57:21 +01:00
res . value = m_ir - > CreateZExt ( m_ir - > CreateLoad ( get_type < u8 > ( ) , spu_ptr < u8 > ( & spu_thread : : interrupts_enabled ) ) , get_type < u32 > ( ) ) ;
res . value = m_ir - > CreateOr ( res . value , m_ir - > CreateAnd ( m_ir - > CreateLoad ( get_type < u32 > ( ) , spu_ptr < u32 > ( & spu_thread : : thread_type ) ) , m_ir - > getInt32 ( 2 ) ) ) ;
2018-06-30 01:19:12 +02:00
break ;
}
default :
{
update_pc ( ) ;
2023-07-21 17:12:57 +02:00
ensure_gpr_stores ( ) ;
2019-05-05 15:28:41 +02:00
res . value = call ( " spu_read_channel " , & exec_rdch , m_thread , m_ir - > getInt32 ( op . ra ) ) ;
2018-06-30 01:19:12 +02:00
break ;
}
}
set_vr ( op . rt , insert ( splat < u32 [ 4 ] > ( 0 ) , 3 , res ) ) ;
2018-05-02 20:49:19 +02:00
}
2018-10-11 00:17:19 +02:00
static u32 exec_rchcnt ( spu_thread * _spu , u32 ch )
2018-05-02 20:49:19 +02:00
{
return _spu - > get_ch_count ( ch ) ;
}
2020-08-27 22:36:54 +02:00
static u32 exec_get_events ( spu_thread * _spu , u32 mask )
2018-06-29 18:37:12 +02:00
{
2018-08-06 09:19:47 +02:00
return _spu - > get_events ( mask ) . count ;
2018-06-29 18:37:12 +02:00
}
llvm : : Value * get_rchcnt ( u32 off , u64 inv = 0 )
{
2023-03-11 20:08:27 +01:00
const auto val = m_ir - > CreateLoad ( get_type < u64 > ( ) , _ptr < u64 > ( m_thread , off ) ) ;
2023-04-14 06:40:03 +02:00
val - > setAtomic ( llvm : : AtomicOrdering : : Acquire ) ;
2018-06-29 18:37:12 +02:00
const auto shv = m_ir - > CreateLShr ( val , spu_channel : : off_count ) ;
return m_ir - > CreateTrunc ( m_ir - > CreateXor ( shv , u64 { inv } ) , get_type < u32 > ( ) ) ;
}
2018-05-02 20:49:19 +02:00
void RCHCNT ( spu_opcode_t op ) //
{
value_t < u32 > res ;
2018-06-29 18:37:12 +02:00
2019-03-25 19:31:16 +01:00
if ( m_interp_magn )
{
2019-05-05 15:28:41 +02:00
res . value = call ( " spu_read_channel_count " , & exec_rchcnt , m_thread , get_imm < u32 > ( op . ra ) . value ) ;
2019-03-25 19:31:16 +01:00
set_vr ( op . rt , insert ( splat < u32 [ 4 ] > ( 0 ) , 3 , res ) ) ;
return ;
}
2018-06-29 18:37:12 +02:00
switch ( op . ra )
{
case SPU_WrOutMbox :
{
2018-10-11 00:17:19 +02:00
res . value = get_rchcnt ( : : offset32 ( & spu_thread : : ch_out_mbox ) , true ) ;
2018-06-29 18:37:12 +02:00
break ;
}
case SPU_WrOutIntrMbox :
{
2018-10-11 00:17:19 +02:00
res . value = get_rchcnt ( : : offset32 ( & spu_thread : : ch_out_intr_mbox ) , true ) ;
2018-06-29 18:37:12 +02:00
break ;
}
case MFC_RdTagStat :
{
2018-10-11 00:17:19 +02:00
res . value = get_rchcnt ( : : offset32 ( & spu_thread : : ch_tag_stat ) ) ;
2018-06-29 18:37:12 +02:00
break ;
}
case MFC_RdListStallStat :
{
2018-10-11 00:17:19 +02:00
res . value = get_rchcnt ( : : offset32 ( & spu_thread : : ch_stall_stat ) ) ;
2018-06-29 18:37:12 +02:00
break ;
}
case SPU_RdSigNotify1 :
{
2018-10-11 00:17:19 +02:00
res . value = get_rchcnt ( : : offset32 ( & spu_thread : : ch_snr1 ) ) ;
2018-06-29 18:37:12 +02:00
break ;
}
case SPU_RdSigNotify2 :
{
2018-10-11 00:17:19 +02:00
res . value = get_rchcnt ( : : offset32 ( & spu_thread : : ch_snr2 ) ) ;
2018-06-29 18:37:12 +02:00
break ;
}
case MFC_RdAtomicStat :
{
2018-10-11 00:17:19 +02:00
res . value = get_rchcnt ( : : offset32 ( & spu_thread : : ch_atomic_stat ) ) ;
2018-06-29 18:37:12 +02:00
break ;
}
case MFC_WrTagUpdate :
{
2020-07-02 16:06:00 +02:00
res . value = m_ir - > getInt32 ( 1 ) ;
2018-06-29 18:37:12 +02:00
break ;
}
case MFC_Cmd :
{
2023-03-11 20:08:27 +01:00
res . value = m_ir - > CreateLoad ( get_type < u32 > ( ) , spu_ptr < u32 > ( & spu_thread : : mfc_size ) ) ;
2018-06-29 18:37:12 +02:00
res . value = m_ir - > CreateSub ( m_ir - > getInt32 ( 16 ) , res . value ) ;
break ;
}
case SPU_RdInMbox :
{
2023-04-14 06:40:03 +02:00
const auto value = m_ir - > CreateLoad ( get_type < u32 > ( ) , spu_ptr < u32 > ( & spu_thread : : ch_in_mbox ) ) ;
value - > setAtomic ( llvm : : AtomicOrdering : : Acquire ) ;
res . value = value ;
2018-06-29 18:37:12 +02:00
res . value = m_ir - > CreateLShr ( res . value , 8 ) ;
res . value = m_ir - > CreateAnd ( res . value , 7 ) ;
break ;
}
case SPU_RdEventStat :
{
2023-03-11 20:08:27 +01:00
const auto mask = m_ir - > CreateTrunc ( m_ir - > CreateLShr ( m_ir - > CreateLoad ( get_type < u64 > ( ) , spu_ptr < u64 > ( & spu_thread : : ch_events ) ) , 32 ) , get_type < u32 > ( ) ) ;
2018-08-06 09:19:47 +02:00
res . value = call ( " spu_get_events " , & exec_get_events , m_thread , mask ) ;
2018-06-29 18:37:12 +02:00
break ;
}
2020-09-19 18:40:47 +02:00
// Channels with a constant count of 1:
case SPU_WrEventMask :
case SPU_WrEventAck :
case SPU_WrDec :
case SPU_RdDec :
case SPU_RdEventMask :
case SPU_RdMachStat :
case SPU_WrSRR0 :
case SPU_RdSRR0 :
case SPU_Set_Bkmk_Tag :
case SPU_PM_Start_Ev :
case SPU_PM_Stop_Ev :
case MFC_RdTagMask :
case MFC_LSA :
case MFC_EAH :
case MFC_EAL :
case MFC_Size :
case MFC_TagID :
case MFC_WrTagMask :
case MFC_WrListStallAck :
{
res . value = m_ir - > getInt32 ( 1 ) ;
break ;
}
2018-06-29 18:37:12 +02:00
default :
{
2019-05-05 15:28:41 +02:00
res . value = call ( " spu_read_channel_count " , & exec_rchcnt , m_thread , m_ir - > getInt32 ( op . ra ) ) ;
2018-06-29 18:37:12 +02:00
break ;
}
}
2018-05-02 20:49:19 +02:00
set_vr ( op . rt , insert ( splat < u32 [ 4 ] > ( 0 ) , 3 , res ) ) ;
}
2019-06-06 20:32:35 +02:00
static void exec_wrch ( spu_thread * _spu , u32 ch , u32 value )
2018-05-02 20:49:19 +02:00
{
2022-07-06 12:11:23 +02:00
if ( ! _spu - > set_ch_value ( ch , value ) | | _spu - > state & cpu_flag : : again )
2019-06-06 20:32:35 +02:00
{
spu_runtime : : g_escape ( _spu ) ;
}
2018-05-02 20:49:19 +02:00
2022-07-06 12:11:23 +02:00
static_cast < void > ( _spu - > test_stopped ( ) ) ;
2018-07-25 15:39:03 +02:00
}
2019-01-07 07:13:17 +01:00
static void exec_list_unstall ( spu_thread * _spu , u32 tag )
{
for ( u32 i = 0 ; i < _spu - > mfc_size ; i + + )
{
if ( _spu - > mfc_queue [ i ] . tag = = ( tag | 0x80 ) )
{
_spu - > mfc_queue [ i ] . tag & = 0x7f ;
}
}
2019-06-06 20:32:35 +02:00
_spu - > do_mfc ( ) ;
2019-01-07 07:13:17 +01:00
}
2019-06-06 20:32:35 +02:00
static void exec_mfc_cmd ( spu_thread * _spu )
2018-07-25 15:39:03 +02:00
{
2022-07-06 12:11:23 +02:00
if ( ! _spu - > process_mfc_cmd ( ) | | _spu - > state & cpu_flag : : again )
2019-06-06 20:32:35 +02:00
{
spu_runtime : : g_escape ( _spu ) ;
}
2022-07-06 12:11:23 +02:00
static_cast < void > ( _spu - > test_stopped ( ) ) ;
2018-07-25 15:39:03 +02:00
}
2018-05-02 20:49:19 +02:00
void WRCH ( spu_opcode_t op ) //
{
2019-04-23 14:07:04 +02:00
const auto val = eval ( extract ( get_vr ( op . rt ) , 3 ) ) ;
2018-07-25 15:39:03 +02:00
2019-03-25 19:31:16 +01:00
if ( m_interp_magn )
{
2019-06-06 20:32:35 +02:00
call ( " spu_write_channel " , & exec_wrch , m_thread , get_imm < u32 > ( op . ra ) . value , val . value ) ;
2019-03-25 19:31:16 +01:00
return ;
}
2018-07-25 15:39:03 +02:00
switch ( op . ra )
{
case SPU_WrSRR0 :
{
2019-10-01 09:06:34 +02:00
m_ir - > CreateStore ( eval ( val & 0x3fffc ) . value , spu_ptr < u32 > ( & spu_thread : : srr0 ) ) ;
2018-07-25 15:39:03 +02:00
return ;
}
case SPU_WrOutIntrMbox :
{
// TODO
break ;
}
case SPU_WrOutMbox :
{
// TODO
break ;
}
case MFC_WrTagMask :
{
// TODO
2018-10-11 00:17:19 +02:00
m_ir - > CreateStore ( val . value , spu_ptr < u32 > ( & spu_thread : : ch_tag_mask ) ) ;
2020-06-26 18:10:28 +02:00
const auto next = llvm : : BasicBlock : : Create ( m_context , " " , m_function ) ;
const auto _mfc = llvm : : BasicBlock : : Create ( m_context , " " , m_function ) ;
2023-03-10 23:57:21 +01:00
m_ir - > CreateCondBr ( m_ir - > CreateICmpNE ( m_ir - > CreateLoad ( get_type < u32 > ( ) , spu_ptr < u32 > ( & spu_thread : : ch_tag_upd ) ) , m_ir - > getInt32 ( MFC_TAG_UPDATE_IMMEDIATE ) ) , _mfc , next ) ;
2020-06-26 18:10:28 +02:00
m_ir - > SetInsertPoint ( _mfc ) ;
2021-05-29 12:39:48 +02:00
update_pc ( ) ;
2020-06-26 18:10:28 +02:00
call ( " spu_write_channel " , & exec_wrch , m_thread , m_ir - > getInt32 ( op . ra ) , val . value ) ;
m_ir - > CreateBr ( next ) ;
m_ir - > SetInsertPoint ( next ) ;
2018-07-25 15:39:03 +02:00
return ;
}
case MFC_WrTagUpdate :
{
2020-07-02 17:57:36 +02:00
if ( true )
2018-07-25 15:39:03 +02:00
{
2023-03-10 23:57:21 +01:00
const auto tag_mask = m_ir - > CreateLoad ( get_type < u32 > ( ) , spu_ptr < u32 > ( & spu_thread : : ch_tag_mask ) ) ;
const auto mfc_fence = m_ir - > CreateLoad ( get_type < u32 > ( ) , spu_ptr < u32 > ( & spu_thread : : mfc_fence ) ) ;
2018-07-25 15:39:03 +02:00
const auto completed = m_ir - > CreateAnd ( tag_mask , m_ir - > CreateNot ( mfc_fence ) ) ;
2018-10-11 00:17:19 +02:00
const auto upd_ptr = spu_ptr < u32 > ( & spu_thread : : ch_tag_upd ) ;
const auto stat_ptr = spu_ptr < u64 > ( & spu_thread : : ch_tag_stat ) ;
2021-05-22 09:35:15 +02:00
const auto stat_val = m_ir - > CreateOr ( m_ir - > CreateZExt ( completed , get_type < u64 > ( ) ) , s64 { smin } ) ;
2018-07-25 15:39:03 +02:00
2020-07-02 17:57:36 +02:00
const auto next = llvm : : BasicBlock : : Create ( m_context , " " , m_function ) ;
const auto next0 = llvm : : BasicBlock : : Create ( m_context , " " , m_function ) ;
const auto imm = llvm : : BasicBlock : : Create ( m_context , " " , m_function ) ;
const auto any = llvm : : BasicBlock : : Create ( m_context , " " , m_function ) ;
const auto fail = llvm : : BasicBlock : : Create ( m_context , " " , m_function ) ;
const auto update = llvm : : BasicBlock : : Create ( m_context , " " , m_function ) ;
2018-07-25 15:39:03 +02:00
2020-07-02 17:57:36 +02:00
m_ir - > CreateCondBr ( m_ir - > CreateICmpEQ ( val . value , m_ir - > getInt32 ( MFC_TAG_UPDATE_IMMEDIATE ) ) , imm , next0 ) ;
m_ir - > SetInsertPoint ( imm ) ;
m_ir - > CreateStore ( val . value , upd_ptr ) ;
m_ir - > CreateStore ( stat_val , stat_ptr ) ;
m_ir - > CreateBr ( next ) ;
m_ir - > SetInsertPoint ( next0 ) ;
m_ir - > CreateCondBr ( m_ir - > CreateICmpULE ( val . value , m_ir - > getInt32 ( MFC_TAG_UPDATE_ALL ) ) , any , fail , m_md_likely ) ;
// Illegal update, access violate with special address
m_ir - > SetInsertPoint ( fail ) ;
const auto ptr = _ptr < u32 > ( m_memptr , 0xffdead04 ) ;
2023-04-08 14:21:22 +02:00
m_ir - > CreateStore ( m_ir - > getInt32 ( " TAG \0 " _u32 ) , ptr ) ;
2020-07-02 17:57:36 +02:00
m_ir - > CreateBr ( next ) ;
m_ir - > SetInsertPoint ( any ) ;
const auto cond = m_ir - > CreateSelect ( m_ir - > CreateICmpEQ ( val . value , m_ir - > getInt32 ( MFC_TAG_UPDATE_ANY ) )
, m_ir - > CreateICmpNE ( completed , m_ir - > getInt32 ( 0 ) ) , m_ir - > CreateICmpEQ ( completed , tag_mask ) ) ;
m_ir - > CreateStore ( m_ir - > CreateSelect ( cond , m_ir - > getInt32 ( MFC_TAG_UPDATE_IMMEDIATE ) , val . value ) , upd_ptr ) ;
m_ir - > CreateCondBr ( cond , update , next , m_md_likely ) ;
m_ir - > SetInsertPoint ( update ) ;
m_ir - > CreateStore ( stat_val , stat_ptr ) ;
m_ir - > CreateBr ( next ) ;
m_ir - > SetInsertPoint ( next ) ;
return ;
}
2018-07-25 15:39:03 +02:00
}
case MFC_LSA :
{
2019-03-25 19:31:16 +01:00
set_reg_fixed ( s_reg_mfc_lsa , val . value ) ;
2018-07-25 15:39:03 +02:00
return ;
}
case MFC_EAH :
{
if ( auto ci = llvm : : dyn_cast < llvm : : ConstantInt > ( val . value ) )
{
if ( ci - > getZExtValue ( ) = = 0 )
{
return ;
}
}
2020-02-01 09:36:09 +01:00
spu_log . warning ( " [0x%x] MFC_EAH: $%u is not a zero constant " , m_pos , + op . rt ) ;
2018-10-11 00:17:19 +02:00
//m_ir->CreateStore(val.value, spu_ptr<u32>(&spu_thread::ch_mfc_cmd, &spu_mfc_cmd::eah));
2018-07-25 15:39:03 +02:00
return ;
}
case MFC_EAL :
{
2019-03-25 19:31:16 +01:00
set_reg_fixed ( s_reg_mfc_eal , val . value ) ;
2018-07-25 15:39:03 +02:00
return ;
}
case MFC_Size :
{
2023-07-09 11:41:48 +02:00
set_reg_fixed ( s_reg_mfc_size , trunc < u16 > ( val ) . eval ( m_ir ) ) ;
2018-07-25 15:39:03 +02:00
return ;
}
case MFC_TagID :
{
2019-04-18 16:18:46 +02:00
set_reg_fixed ( s_reg_mfc_tag , trunc < u8 > ( val & 0x1f ) . eval ( m_ir ) ) ;
2018-07-25 15:39:03 +02:00
return ;
}
case MFC_Cmd :
{
// Prevent store elimination (TODO)
2023-07-21 17:12:57 +02:00
m_block - > store_context_ctr [ s_reg_mfc_eal ] + + ;
m_block - > store_context_ctr [ s_reg_mfc_lsa ] + + ;
m_block - > store_context_ctr [ s_reg_mfc_tag ] + + ;
m_block - > store_context_ctr [ s_reg_mfc_size ] + + ;
2018-07-25 15:39:03 +02:00
2019-04-18 16:18:46 +02:00
if ( auto ci = llvm : : dyn_cast < llvm : : ConstantInt > ( trunc < u8 > ( val ) . eval ( m_ir ) ) )
2018-07-25 15:39:03 +02:00
{
2022-09-20 09:20:24 +02:00
if ( g_cfg . core . mfc_debug )
2020-09-02 23:58:29 +02:00
{
break ;
}
2022-09-20 09:20:24 +02:00
bool must_use_cpp_functions = ! ! g_cfg . core . spu_accurate_dma ;
2022-09-01 04:23:05 +02:00
if ( u64 cmdh = ci - > getZExtValue ( ) & ~ ( MFC_BARRIER_MASK | MFC_FENCE_MASK | MFC_RESULT_MASK ) ; g_cfg . core . rsx_fifo_accuracy | | g_cfg . video . strict_rendering_mode | | ! g_use_rtm )
2020-05-09 20:00:55 +02:00
{
// TODO: don't require TSX (current implementation is TSX-only)
if ( cmdh = = MFC_PUT_CMD | | cmdh = = MFC_SNDSIG_CMD )
{
2022-09-20 09:20:24 +02:00
must_use_cpp_functions = true ;
2020-05-09 20:00:55 +02:00
}
}
2020-05-27 17:53:09 +02:00
2020-05-14 14:34:14 +02:00
const auto eal = get_reg_fixed < u32 > ( s_reg_mfc_eal ) ;
const auto lsa = get_reg_fixed < u32 > ( s_reg_mfc_lsa ) ;
const auto tag = get_reg_fixed < u8 > ( s_reg_mfc_tag ) ;
const auto size = get_reg_fixed < u16 > ( s_reg_mfc_size ) ;
const auto mask = m_ir - > CreateShl ( m_ir - > getInt32 ( 1 ) , zext < u32 > ( tag ) . eval ( m_ir ) ) ;
const auto exec = llvm : : BasicBlock : : Create ( m_context , " " , m_function ) ;
const auto fail = llvm : : BasicBlock : : Create ( m_context , " " , m_function ) ;
const auto next = llvm : : BasicBlock : : Create ( m_context , " " , m_function ) ;
const auto pf = spu_ptr < u32 > ( & spu_thread : : mfc_fence ) ;
const auto pb = spu_ptr < u32 > ( & spu_thread : : mfc_barrier ) ;
2018-07-25 15:39:03 +02:00
switch ( u64 cmd = ci - > getZExtValue ( ) )
{
2020-05-16 19:03:27 +02:00
case MFC_SDCRT_CMD :
case MFC_SDCRTST_CMD :
{
return ;
}
2018-07-25 15:39:03 +02:00
case MFC_PUTL_CMD :
case MFC_PUTLB_CMD :
case MFC_PUTLF_CMD :
case MFC_PUTRL_CMD :
case MFC_PUTRLB_CMD :
case MFC_PUTRLF_CMD :
case MFC_GETL_CMD :
case MFC_GETLB_CMD :
case MFC_GETLF_CMD :
2022-07-06 12:11:23 +02:00
{
2023-07-21 17:12:57 +02:00
ensure_gpr_stores ( ) ;
2022-07-06 12:11:23 +02:00
[[fallthrough]] ;
}
2020-05-16 19:03:27 +02:00
case MFC_SDCRZ_CMD :
2022-07-06 12:11:23 +02:00
case MFC_GETLLAR_CMD :
case MFC_PUTLLC_CMD :
case MFC_PUTLLUC_CMD :
case MFC_PUTQLLUC_CMD :
2018-07-25 15:39:03 +02:00
{
// TODO
m_ir - > CreateBr ( next ) ;
m_ir - > SetInsertPoint ( exec ) ;
m_ir - > CreateUnreachable ( ) ;
m_ir - > SetInsertPoint ( fail ) ;
m_ir - > CreateUnreachable ( ) ;
m_ir - > SetInsertPoint ( next ) ;
2018-10-11 00:17:19 +02:00
m_ir - > CreateStore ( ci , spu_ptr < u8 > ( & spu_thread : : ch_mfc_cmd , & spu_mfc_cmd : : cmd ) ) ;
2019-05-14 17:55:10 +02:00
update_pc ( ) ;
2019-05-05 15:28:41 +02:00
call ( " spu_exec_mfc_cmd " , & exec_mfc_cmd , m_thread ) ;
2018-07-25 15:39:03 +02:00
return ;
}
case MFC_SNDSIG_CMD :
case MFC_SNDSIGB_CMD :
case MFC_SNDSIGF_CMD :
case MFC_PUT_CMD :
case MFC_PUTB_CMD :
case MFC_PUTF_CMD :
case MFC_PUTR_CMD :
case MFC_PUTRB_CMD :
case MFC_PUTRF_CMD :
case MFC_GET_CMD :
case MFC_GETB_CMD :
case MFC_GETF_CMD :
{
// Try to obtain constant size
u64 csize = - 1 ;
if ( auto ci = llvm : : dyn_cast < llvm : : ConstantInt > ( size . value ) )
{
csize = ci - > getZExtValue ( ) ;
}
2019-01-15 16:31:21 +01:00
if ( cmd > = MFC_SNDSIG_CMD & & csize ! = 4 )
2018-07-25 15:39:03 +02:00
{
2019-01-15 16:31:21 +01:00
csize = - 1 ;
2018-07-25 15:39:03 +02:00
}
2023-03-10 23:57:21 +01:00
llvm : : Value * src = m_ir - > CreateGEP ( get_type < u8 > ( ) , m_lsptr , zext < u64 > ( lsa ) . eval ( m_ir ) ) ;
llvm : : Value * dst = m_ir - > CreateGEP ( get_type < u8 > ( ) , m_memptr , zext < u64 > ( eal ) . eval ( m_ir ) ) ;
2018-07-25 15:39:03 +02:00
if ( cmd & MFC_GET_CMD )
{
std : : swap ( src , dst ) ;
}
2023-03-10 23:57:21 +01:00
llvm : : Value * barrier = m_ir - > CreateLoad ( get_type < u32 > ( ) , pb ) ;
2018-07-25 15:39:03 +02:00
if ( cmd & ( MFC_BARRIER_MASK | MFC_FENCE_MASK ) )
{
2023-03-10 23:57:21 +01:00
barrier = m_ir - > CreateOr ( barrier , m_ir - > CreateLoad ( get_type < u32 > ( ) , pf ) ) ;
2018-07-25 15:39:03 +02:00
}
const auto cond = m_ir - > CreateIsNull ( m_ir - > CreateAnd ( mask , barrier ) ) ;
2019-03-13 18:57:42 +01:00
m_ir - > CreateCondBr ( cond , exec , fail , m_md_likely ) ;
2018-07-25 15:39:03 +02:00
m_ir - > SetInsertPoint ( exec ) ;
2019-03-13 18:52:34 +01:00
const auto copy = llvm : : BasicBlock : : Create ( m_context , " " , m_function ) ;
2021-02-26 10:20:25 +01:00
// Always use interpreter function for MFC debug option
2022-09-20 09:20:24 +02:00
if ( ! must_use_cpp_functions )
{
const auto mmio = llvm : : BasicBlock : : Create ( m_context , " " , m_function ) ;
m_ir - > CreateCondBr ( m_ir - > CreateICmpUGE ( eal . value , m_ir - > getInt32 ( 0xe0000000 ) ) , mmio , copy , m_md_unlikely ) ;
m_ir - > SetInsertPoint ( mmio ) ;
}
2019-03-13 18:52:34 +01:00
m_ir - > CreateStore ( ci , spu_ptr < u8 > ( & spu_thread : : ch_mfc_cmd , & spu_mfc_cmd : : cmd ) ) ;
2019-05-05 15:28:41 +02:00
call ( " spu_exec_mfc_cmd " , & exec_mfc_cmd , m_thread ) ;
2019-03-13 18:52:34 +01:00
m_ir - > CreateBr ( next ) ;
m_ir - > SetInsertPoint ( copy ) ;
2023-03-10 23:57:21 +01:00
llvm : : Type * vtype = get_type < u8 [ 16 ] > ( ) ;
2018-07-25 15:39:03 +02:00
switch ( csize )
{
case 0 :
2021-05-22 09:35:15 +02:00
case umax :
2018-07-25 15:39:03 +02:00
{
break ;
}
case 1 :
{
2023-03-10 23:57:21 +01:00
vtype = get_type < u8 > ( ) ;
2018-07-25 15:39:03 +02:00
break ;
}
case 2 :
{
2023-03-10 23:57:21 +01:00
vtype = get_type < u16 > ( ) ;
2018-07-25 15:39:03 +02:00
break ;
}
case 4 :
{
2023-03-10 23:57:21 +01:00
vtype = get_type < u32 > ( ) ;
2018-07-25 15:39:03 +02:00
break ;
}
case 8 :
{
2023-03-10 23:57:21 +01:00
vtype = get_type < u64 > ( ) ;
2018-07-25 15:39:03 +02:00
break ;
}
default :
{
if ( csize % 16 | | csize > 0x4000 )
{
2020-02-01 09:36:09 +01:00
spu_log . error ( " [0x%x] MFC_Cmd: invalid size %u " , m_pos , csize ) ;
2018-07-25 15:39:03 +02:00
}
}
}
2022-05-23 23:19:00 +02:00
// Check if the LS address is constant and 256 bit aligned
u64 clsa = umax ;
if ( auto ci = llvm : : dyn_cast < llvm : : ConstantInt > ( lsa . value ) )
{
clsa = ci - > getZExtValue ( ) ;
}
u32 stride = 16 ;
if ( m_use_avx & & csize > = 32 & & ! ( clsa % 32 ) )
{
2023-03-10 23:57:21 +01:00
vtype = get_type < u8 [ 32 ] > ( ) ;
2022-05-23 23:19:00 +02:00
stride = 32 ;
}
2018-07-25 15:39:03 +02:00
if ( csize > 0 & & csize < = 16 )
{
// Generate single copy operation
2023-04-05 13:35:06 +02:00
m_ir - > CreateStore ( m_ir - > CreateLoad ( vtype , src ) , dst ) ;
2018-07-25 15:39:03 +02:00
}
2022-05-23 23:19:00 +02:00
else if ( csize < = stride * 16 & & ! ( csize % 32 ) )
2018-07-25 15:39:03 +02:00
{
// Generate fixed sequence of copy operations
2022-05-23 23:19:00 +02:00
for ( u32 i = 0 ; i < csize ; i + = stride )
2018-07-25 15:39:03 +02:00
{
2023-03-10 23:57:21 +01:00
const auto _src = m_ir - > CreateGEP ( get_type < u8 > ( ) , src , m_ir - > getInt32 ( i ) ) ;
const auto _dst = m_ir - > CreateGEP ( get_type < u8 > ( ) , dst , m_ir - > getInt32 ( i ) ) ;
2022-05-23 23:19:00 +02:00
if ( csize - i < stride )
{
2023-04-05 13:35:06 +02:00
m_ir - > CreateStore ( m_ir - > CreateLoad ( get_type < u8 [ 16 ] > ( ) , _src ) , _dst ) ;
2022-05-23 23:19:00 +02:00
}
else
{
2023-04-05 13:35:06 +02:00
m_ir - > CreateAlignedStore ( m_ir - > CreateAlignedLoad ( vtype , _src , llvm : : MaybeAlign { 16 } ) , _dst , llvm : : MaybeAlign { 16 } ) ;
2022-05-23 23:19:00 +02:00
}
2018-07-25 15:39:03 +02:00
}
}
2020-05-09 20:00:55 +02:00
else if ( csize )
2018-07-25 15:39:03 +02:00
{
// TODO
2019-10-15 16:43:33 +02:00
auto spu_memcpy = [ ] ( u8 * dst , const u8 * src , u32 size )
{
std : : memcpy ( dst , src , size ) ;
} ;
call ( " spu_memcpy " , + spu_memcpy , dst , src , zext < u32 > ( size ) . eval ( m_ir ) ) ;
2018-07-25 15:39:03 +02:00
}
2020-10-29 03:35:09 +01:00
// Disable certain thing
m_ir - > CreateStore ( m_ir - > getInt32 ( 0 ) , spu_ptr < u32 > ( & spu_thread : : last_faddr ) ) ;
2018-07-25 15:39:03 +02:00
m_ir - > CreateBr ( next ) ;
break ;
}
case MFC_BARRIER_CMD :
case MFC_EIEIO_CMD :
case MFC_SYNC_CMD :
{
2023-03-10 23:57:21 +01:00
const auto cond = m_ir - > CreateIsNull ( m_ir - > CreateLoad ( get_type < u32 > ( ) , spu_ptr < u32 > ( & spu_thread : : mfc_size ) ) ) ;
2019-03-13 18:57:42 +01:00
m_ir - > CreateCondBr ( cond , exec , fail , m_md_likely ) ;
2018-07-25 15:39:03 +02:00
m_ir - > SetInsertPoint ( exec ) ;
m_ir - > CreateFence ( llvm : : AtomicOrdering : : SequentiallyConsistent ) ;
m_ir - > CreateBr ( next ) ;
break ;
}
default :
{
// TODO
2020-02-01 09:36:09 +01:00
spu_log . error ( " [0x%x] MFC_Cmd: unknown command (0x%x) " , m_pos , cmd ) ;
2018-07-25 15:39:03 +02:00
m_ir - > CreateBr ( next ) ;
m_ir - > SetInsertPoint ( exec ) ;
m_ir - > CreateUnreachable ( ) ;
break ;
}
}
// Fallback: enqueue the command
m_ir - > SetInsertPoint ( fail ) ;
// Get MFC slot, redirect to invalid memory address
2023-03-10 23:57:21 +01:00
const auto slot = m_ir - > CreateLoad ( get_type < u32 > ( ) , spu_ptr < u32 > ( & spu_thread : : mfc_size ) ) ;
2018-10-11 00:17:19 +02:00
const auto off0 = m_ir - > CreateAdd ( m_ir - > CreateMul ( slot , m_ir - > getInt32 ( sizeof ( spu_mfc_cmd ) ) ) , m_ir - > getInt32 ( : : offset32 ( & spu_thread : : mfc_queue ) ) ) ;
2023-03-10 23:57:21 +01:00
const auto ptr0 = m_ir - > CreateGEP ( get_type < u8 > ( ) , m_thread , m_ir - > CreateZExt ( off0 , get_type < u64 > ( ) ) ) ;
const auto ptr1 = m_ir - > CreateGEP ( get_type < u8 > ( ) , m_memptr , m_ir - > getInt64 ( 0xffdeadf0 ) ) ;
2018-07-25 15:39:03 +02:00
const auto pmfc = m_ir - > CreateSelect ( m_ir - > CreateICmpULT ( slot , m_ir - > getInt32 ( 16 ) ) , ptr0 , ptr1 ) ;
m_ir - > CreateStore ( ci , _ptr < u8 > ( pmfc , : : offset32 ( & spu_mfc_cmd : : cmd ) ) ) ;
switch ( u64 cmd = ci - > getZExtValue ( ) )
{
case MFC_GETLLAR_CMD :
case MFC_PUTLLC_CMD :
case MFC_PUTLLUC_CMD :
case MFC_PUTQLLUC_CMD :
{
break ;
}
case MFC_PUTL_CMD :
case MFC_PUTLB_CMD :
case MFC_PUTLF_CMD :
case MFC_PUTRL_CMD :
case MFC_PUTRLB_CMD :
case MFC_PUTRLF_CMD :
case MFC_GETL_CMD :
case MFC_GETLB_CMD :
case MFC_GETLF_CMD :
{
break ;
}
2020-05-16 19:03:27 +02:00
case MFC_SDCRZ_CMD :
{
break ;
}
2018-07-25 15:39:03 +02:00
case MFC_SNDSIG_CMD :
case MFC_SNDSIGB_CMD :
case MFC_SNDSIGF_CMD :
case MFC_PUT_CMD :
case MFC_PUTB_CMD :
case MFC_PUTF_CMD :
case MFC_PUTR_CMD :
case MFC_PUTRB_CMD :
case MFC_PUTRF_CMD :
case MFC_GET_CMD :
case MFC_GETB_CMD :
case MFC_GETF_CMD :
{
m_ir - > CreateStore ( tag . value , _ptr < u8 > ( pmfc , : : offset32 ( & spu_mfc_cmd : : tag ) ) ) ;
m_ir - > CreateStore ( size . value , _ptr < u16 > ( pmfc , : : offset32 ( & spu_mfc_cmd : : size ) ) ) ;
m_ir - > CreateStore ( lsa . value , _ptr < u32 > ( pmfc , : : offset32 ( & spu_mfc_cmd : : lsa ) ) ) ;
m_ir - > CreateStore ( eal . value , _ptr < u32 > ( pmfc , : : offset32 ( & spu_mfc_cmd : : eal ) ) ) ;
2023-03-10 23:57:21 +01:00
m_ir - > CreateStore ( m_ir - > CreateOr ( m_ir - > CreateLoad ( get_type < u32 > ( ) , pf ) , mask ) , pf ) ;
2019-09-21 12:41:22 +02:00
if ( cmd & MFC_BARRIER_MASK )
2023-03-10 23:57:21 +01:00
m_ir - > CreateStore ( m_ir - > CreateOr ( m_ir - > CreateLoad ( get_type < u32 > ( ) , pb ) , mask ) , pb ) ;
2018-07-25 15:39:03 +02:00
break ;
}
case MFC_BARRIER_CMD :
case MFC_EIEIO_CMD :
case MFC_SYNC_CMD :
{
m_ir - > CreateStore ( m_ir - > getInt32 ( - 1 ) , pb ) ;
2023-03-10 23:57:21 +01:00
m_ir - > CreateStore ( m_ir - > CreateOr ( m_ir - > CreateLoad ( get_type < u32 > ( ) , pf ) , mask ) , pf ) ;
2018-07-25 15:39:03 +02:00
break ;
}
default :
{
m_ir - > CreateUnreachable ( ) ;
break ;
}
}
2018-10-11 00:17:19 +02:00
m_ir - > CreateStore ( m_ir - > CreateAdd ( slot , m_ir - > getInt32 ( 1 ) ) , spu_ptr < u32 > ( & spu_thread : : mfc_size ) ) ;
2018-07-25 15:39:03 +02:00
m_ir - > CreateBr ( next ) ;
m_ir - > SetInsertPoint ( next ) ;
return ;
}
// Fallback to unoptimized WRCH implementation (TODO)
2020-02-01 09:36:09 +01:00
spu_log . warning ( " [0x%x] MFC_Cmd: $%u is not a constant " , m_pos , + op . rt ) ;
2018-07-25 15:39:03 +02:00
break ;
}
case MFC_WrListStallAck :
{
const auto mask = eval ( splat < u32 > ( 1 ) < < ( val & 0x1f ) ) ;
2018-10-11 00:17:19 +02:00
const auto _ptr = spu_ptr < u32 > ( & spu_thread : : ch_stall_mask ) ;
2023-03-10 23:57:21 +01:00
const auto _old = m_ir - > CreateLoad ( get_type < u32 > ( ) , _ptr ) ;
2018-07-25 15:39:03 +02:00
const auto _new = m_ir - > CreateAnd ( _old , m_ir - > CreateNot ( mask . value ) ) ;
m_ir - > CreateStore ( _new , _ptr ) ;
const auto next = llvm : : BasicBlock : : Create ( m_context , " " , m_function ) ;
const auto _mfc = llvm : : BasicBlock : : Create ( m_context , " " , m_function ) ;
m_ir - > CreateCondBr ( m_ir - > CreateICmpNE ( _old , _new ) , _mfc , next ) ;
m_ir - > SetInsertPoint ( _mfc ) ;
2023-07-21 17:12:57 +02:00
ensure_gpr_stores ( ) ;
2021-05-29 12:39:48 +02:00
update_pc ( ) ;
2019-05-05 15:28:41 +02:00
call ( " spu_list_unstall " , & exec_list_unstall , m_thread , eval ( val & 0x1f ) . value ) ;
2018-07-25 15:39:03 +02:00
m_ir - > CreateBr ( next ) ;
m_ir - > SetInsertPoint ( next ) ;
return ;
}
case SPU_WrDec :
{
2020-08-27 22:36:54 +02:00
call ( " spu_get_events " , & exec_get_events , m_thread , m_ir - > getInt32 ( SPU_EVENT_TM ) ) ;
2023-08-12 05:33:15 +02:00
if ( utils : : get_tsc_freq ( ) & & ! ( g_cfg . core . spu_loop_detection ) & & ( g_cfg . core . clocks_scale = = 100 ) )
{
const auto tsc = m_ir - > CreateCall ( get_intrinsic ( llvm : : Intrinsic : : x86_rdtsc ) ) ;
const auto tscx = m_ir - > CreateMul ( m_ir - > CreateUDiv ( tsc , m_ir - > getInt64 ( utils : : get_tsc_freq ( ) ) ) , m_ir - > getInt64 ( 80000000 ) ) ;
const auto tscm = m_ir - > CreateUDiv ( m_ir - > CreateMul ( m_ir - > CreateURem ( tsc , m_ir - > getInt64 ( utils : : get_tsc_freq ( ) ) ) , m_ir - > getInt64 ( 80000000 ) ) , m_ir - > getInt64 ( utils : : get_tsc_freq ( ) ) ) ;
const auto tsctb = m_ir - > CreateAdd ( tscx , tscm ) ;
m_ir - > CreateStore ( tsctb , spu_ptr < u64 > ( & spu_thread : : ch_dec_start_timestamp ) ) ;
}
else
{
m_ir - > CreateStore ( call ( " get_timebased_time " , & get_timebased_time ) , spu_ptr < u64 > ( & spu_thread : : ch_dec_start_timestamp ) ) ;
}
2018-10-11 00:17:19 +02:00
m_ir - > CreateStore ( val . value , spu_ptr < u32 > ( & spu_thread : : ch_dec_value ) ) ;
2022-05-13 13:50:21 +02:00
m_ir - > CreateStore ( m_ir - > getInt8 ( 0 ) , spu_ptr < u8 > ( & spu_thread : : is_dec_frozen ) ) ;
2018-07-25 15:39:03 +02:00
return ;
}
2020-09-19 18:40:47 +02:00
case SPU_Set_Bkmk_Tag :
case SPU_PM_Start_Ev :
case SPU_PM_Stop_Ev :
2018-07-25 15:39:03 +02:00
{
return ;
}
2021-04-09 21:12:47 +02:00
default : break ;
2018-07-25 15:39:03 +02:00
}
2018-05-02 20:49:19 +02:00
update_pc ( ) ;
2023-07-21 17:12:57 +02:00
ensure_gpr_stores ( ) ;
2019-06-06 20:32:35 +02:00
call ( " spu_write_channel " , & exec_wrch , m_thread , m_ir - > getInt32 ( op . ra ) , val . value ) ;
2018-05-02 20:49:19 +02:00
}
2021-03-05 20:05:37 +01:00
void LNOP ( spu_opcode_t ) //
2018-05-02 20:49:19 +02:00
{
}
2021-03-05 20:05:37 +01:00
void NOP ( spu_opcode_t ) //
2018-05-02 20:49:19 +02:00
{
}
2021-03-05 20:05:37 +01:00
void SYNC ( spu_opcode_t ) //
2018-05-02 20:49:19 +02:00
{
// This instruction must be used following a store instruction that modifies the instruction stream.
m_ir - > CreateFence ( llvm : : AtomicOrdering : : SequentiallyConsistent ) ;
2018-06-10 14:46:01 +02:00
2019-03-25 19:31:16 +01:00
if ( g_cfg . core . spu_block_size = = spu_block_size_type : : safe & & ! m_interp_magn )
2018-06-10 14:46:01 +02:00
{
m_block - > block_end = m_ir - > GetInsertBlock ( ) ;
2019-05-05 15:28:41 +02:00
update_pc ( m_pos + 4 ) ;
2019-05-11 19:48:47 +02:00
tail_chunk ( m_dispatch ) ;
2018-06-10 14:46:01 +02:00
}
2018-05-02 20:49:19 +02:00
}
2021-03-05 20:05:37 +01:00
void DSYNC ( spu_opcode_t ) //
2018-05-02 20:49:19 +02:00
{
// This instruction forces all earlier load, store, and channel instructions to complete before proceeding.
2019-11-11 21:12:21 +01:00
m_ir - > CreateFence ( llvm : : AtomicOrdering : : SequentiallyConsistent ) ;
2018-05-02 20:49:19 +02:00
}
void MFSPR ( spu_opcode_t op ) //
{
// Check SPUInterpreter for notes.
set_vr ( op . rt , splat < u32 [ 4 ] > ( 0 ) ) ;
}
2021-03-05 20:05:37 +01:00
void MTSPR ( spu_opcode_t ) //
2018-05-02 20:49:19 +02:00
{
// Check SPUInterpreter for notes.
}
2019-11-23 17:31:42 +01:00
template < typename TA , typename TB >
2021-05-13 22:14:27 +02:00
auto mpyh ( TA & & a , TB & & b )
2019-11-23 17:31:42 +01:00
{
2021-05-13 22:14:27 +02:00
return bitcast < u32 [ 4 ] > ( bitcast < u16 [ 8 ] > ( ( std : : forward < TA > ( a ) > > 16 ) ) * bitcast < u16 [ 8 ] > ( std : : forward < TB > ( b ) ) ) < < 16 ;
2019-11-23 17:31:42 +01:00
}
template < typename TA , typename TB >
2021-05-13 22:14:27 +02:00
auto mpyu ( TA & & a , TB & & b )
2019-11-23 17:31:42 +01:00
{
return ( std : : forward < TA > ( a ) < < 16 > > 16 ) * ( std : : forward < TB > ( b ) < < 16 > > 16 ) ;
}
2019-03-25 19:31:16 +01:00
void SF ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
set_vr ( op . rt , get_vr ( op . rb ) - get_vr ( op . ra ) ) ;
}
2019-03-25 19:31:16 +01:00
void OR ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
set_vr ( op . rt , get_vr ( op . ra ) | get_vr ( op . rb ) ) ;
}
void BG ( spu_opcode_t op )
{
2019-04-24 16:48:35 +02:00
const auto [ a , b ] = get_vrs < u32 [ 4 ] > ( op . ra , op . rb ) ;
2018-07-09 18:54:29 +02:00
set_vr ( op . rt , zext < u32 [ 4 ] > ( a < = b ) ) ;
2018-05-02 20:49:19 +02:00
}
2019-03-25 19:31:16 +01:00
void SFH ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
set_vr ( op . rt , get_vr < u16 [ 8 ] > ( op . rb ) - get_vr < u16 [ 8 ] > ( op . ra ) ) ;
}
2019-03-25 19:31:16 +01:00
void NOR ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
set_vr ( op . rt , ~ ( get_vr ( op . ra ) | get_vr ( op . rb ) ) ) ;
}
void ABSDB ( spu_opcode_t op )
{
2019-04-24 16:48:35 +02:00
const auto [ a , b ] = get_vrs < u8 [ 16 ] > ( op . ra , op . rb ) ;
2021-09-05 19:33:19 +02:00
set_vr ( op . rt , absd ( a , b ) ) ;
2018-05-02 20:49:19 +02:00
}
void ROT ( spu_opcode_t op )
{
2021-09-06 20:07:06 +02:00
const auto [ a , b ] = get_vrs < u32 [ 4 ] > ( op . ra , op . rb ) ;
set_vr ( op . rt , rol ( a , b ) ) ;
2018-05-02 20:49:19 +02:00
}
void ROTM ( spu_opcode_t op )
{
2021-09-06 20:07:06 +02:00
const auto [ a , b ] = get_vrs < u32 [ 4 ] > ( op . ra , op . rb ) ;
2023-03-11 20:08:27 +01:00
auto minusb = eval ( - b ) ;
if ( auto [ ok , x ] = match_expr ( b , - match < u32 [ 4 ] > ( ) ) ; ok )
{
minusb = eval ( x ) ;
}
2023-04-08 14:21:22 +02:00
if ( auto k = get_known_bits ( minusb ) ; ! ! ( k . Zero & 32 ) )
2023-03-11 20:08:27 +01:00
{
set_vr ( op . rt , a > > ( minusb & 31 ) ) ;
return ;
}
set_vr ( op . rt , inf_lshr ( a , minusb & 63 ) ) ;
2018-05-02 20:49:19 +02:00
}
void ROTMA ( spu_opcode_t op )
{
2021-09-06 20:07:06 +02:00
const auto [ a , b ] = get_vrs < s32 [ 4 ] > ( op . ra , op . rb ) ;
2023-03-11 20:08:27 +01:00
auto minusb = eval ( - b ) ;
if ( auto [ ok , x ] = match_expr ( b , - match < s32 [ 4 ] > ( ) ) ; ok )
{
minusb = eval ( x ) ;
}
2023-04-08 14:21:22 +02:00
if ( auto k = get_known_bits ( minusb ) ; ! ! ( k . Zero & 32 ) )
2023-03-11 20:08:27 +01:00
{
set_vr ( op . rt , a > > ( minusb & 31 ) ) ;
return ;
}
set_vr ( op . rt , inf_ashr ( a , minusb & 63 ) ) ;
2018-05-02 20:49:19 +02:00
}
void SHL ( spu_opcode_t op )
{
2021-09-06 20:07:06 +02:00
const auto [ a , b ] = get_vrs < u32 [ 4 ] > ( op . ra , op . rb ) ;
2023-03-11 20:08:27 +01:00
2023-04-08 14:21:22 +02:00
if ( auto k = get_known_bits ( b ) ; ! ! ( k . Zero & 32 ) )
2023-03-11 20:08:27 +01:00
{
set_vr ( op . rt , a < < ( b & 31 ) ) ;
return ;
}
2021-09-06 20:07:06 +02:00
set_vr ( op . rt , inf_shl ( a , b & 63 ) ) ;
2018-05-02 20:49:19 +02:00
}
void ROTH ( spu_opcode_t op )
{
2021-09-06 20:07:06 +02:00
const auto [ a , b ] = get_vrs < u16 [ 8 ] > ( op . ra , op . rb ) ;
set_vr ( op . rt , rol ( a , b ) ) ;
2018-05-02 20:49:19 +02:00
}
void ROTHM ( spu_opcode_t op )
{
2021-09-06 20:07:06 +02:00
const auto [ a , b ] = get_vrs < u16 [ 8 ] > ( op . ra , op . rb ) ;
2023-03-11 20:08:27 +01:00
auto minusb = eval ( - b ) ;
if ( auto [ ok , x ] = match_expr ( b , - match < u16 [ 8 ] > ( ) ) ; ok )
{
minusb = eval ( x ) ;
}
2023-04-08 14:21:22 +02:00
if ( auto k = get_known_bits ( minusb ) ; ! ! ( k . Zero & 16 ) )
2023-03-11 20:08:27 +01:00
{
set_vr ( op . rt , a > > ( minusb & 15 ) ) ;
return ;
}
set_vr ( op . rt , inf_lshr ( a , minusb & 31 ) ) ;
2018-05-02 20:49:19 +02:00
}
void ROTMAH ( spu_opcode_t op )
{
2021-09-06 20:07:06 +02:00
const auto [ a , b ] = get_vrs < s16 [ 8 ] > ( op . ra , op . rb ) ;
2023-03-11 20:08:27 +01:00
auto minusb = eval ( - b ) ;
if ( auto [ ok , x ] = match_expr ( b , - match < s16 [ 8 ] > ( ) ) ; ok )
{
minusb = eval ( x ) ;
}
2023-04-08 14:21:22 +02:00
if ( auto k = get_known_bits ( minusb ) ; ! ! ( k . Zero & 16 ) )
2023-03-11 20:08:27 +01:00
{
set_vr ( op . rt , a > > ( minusb & 15 ) ) ;
return ;
}
set_vr ( op . rt , inf_ashr ( a , minusb & 31 ) ) ;
2018-05-02 20:49:19 +02:00
}
void SHLH ( spu_opcode_t op )
{
2021-09-06 20:07:06 +02:00
const auto [ a , b ] = get_vrs < u16 [ 8 ] > ( op . ra , op . rb ) ;
2023-03-11 20:08:27 +01:00
2023-04-08 14:21:22 +02:00
if ( auto k = get_known_bits ( b ) ; ! ! ( k . Zero & 16 ) )
2023-03-11 20:08:27 +01:00
{
set_vr ( op . rt , a < < ( b & 15 ) ) ;
return ;
}
2021-09-06 20:07:06 +02:00
set_vr ( op . rt , inf_shl ( a , b & 31 ) ) ;
2018-05-02 20:49:19 +02:00
}
void ROTI ( spu_opcode_t op )
{
2021-09-06 20:07:06 +02:00
const auto a = get_vr < u32 [ 4 ] > ( op . ra ) ;
const auto i = get_imm < u32 [ 4 ] > ( op . i7 , false ) ;
set_vr ( op . rt , rol ( a , i ) ) ;
2018-05-02 20:49:19 +02:00
}
void ROTMI ( spu_opcode_t op )
{
2021-09-06 20:07:06 +02:00
const auto a = get_vr < u32 [ 4 ] > ( op . ra ) ;
const auto i = get_imm < u32 [ 4 ] > ( op . i7 , false ) ;
set_vr ( op . rt , inf_lshr ( a , - i & 63 ) ) ;
2018-05-02 20:49:19 +02:00
}
void ROTMAI ( spu_opcode_t op )
{
2021-09-06 20:07:06 +02:00
const auto a = get_vr < s32 [ 4 ] > ( op . ra ) ;
const auto i = get_imm < s32 [ 4 ] > ( op . i7 , false ) ;
set_vr ( op . rt , inf_ashr ( a , - i & 63 ) ) ;
2018-05-02 20:49:19 +02:00
}
void SHLI ( spu_opcode_t op )
{
2021-09-06 20:07:06 +02:00
const auto a = get_vr < u32 [ 4 ] > ( op . ra ) ;
const auto i = get_imm < u32 [ 4 ] > ( op . i7 , false ) ;
set_vr ( op . rt , inf_shl ( a , i & 63 ) ) ;
2018-05-02 20:49:19 +02:00
}
void ROTHI ( spu_opcode_t op )
{
2021-09-06 20:07:06 +02:00
const auto a = get_vr < u16 [ 8 ] > ( op . ra ) ;
const auto i = get_imm < u16 [ 8 ] > ( op . i7 , false ) ;
set_vr ( op . rt , rol ( a , i ) ) ;
2018-05-02 20:49:19 +02:00
}
void ROTHMI ( spu_opcode_t op )
{
2021-09-06 20:07:06 +02:00
const auto a = get_vr < u16 [ 8 ] > ( op . ra ) ;
const auto i = get_imm < u16 [ 8 ] > ( op . i7 , false ) ;
set_vr ( op . rt , inf_lshr ( a , - i & 31 ) ) ;
2018-05-02 20:49:19 +02:00
}
void ROTMAHI ( spu_opcode_t op )
{
2021-09-06 20:07:06 +02:00
const auto a = get_vr < s16 [ 8 ] > ( op . ra ) ;
const auto i = get_imm < s16 [ 8 ] > ( op . i7 , false ) ;
set_vr ( op . rt , inf_ashr ( a , - i & 31 ) ) ;
2018-05-02 20:49:19 +02:00
}
void SHLHI ( spu_opcode_t op )
{
2021-09-06 20:07:06 +02:00
const auto a = get_vr < u16 [ 8 ] > ( op . ra ) ;
const auto i = get_imm < u16 [ 8 ] > ( op . i7 , false ) ;
set_vr ( op . rt , inf_shl ( a , i & 31 ) ) ;
2018-05-02 20:49:19 +02:00
}
2019-03-25 19:31:16 +01:00
void A ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
2019-11-23 17:31:42 +01:00
if ( auto [ a , b ] = match_vrs < u32 [ 4 ] > ( op . ra , op . rb ) ; a & & b )
{
static const auto MP = match < u32 [ 4 ] > ( ) ;
if ( auto [ ok , a0 , b0 , b1 , a1 ] = match_expr ( a , mpyh ( MP , MP ) + mpyh ( MP , MP ) ) ; ok )
{
if ( auto [ ok , a2 , b2 ] = match_expr ( b , mpyu ( MP , MP ) ) ; ok & & a2 . eq ( a0 , a1 ) & & b2 . eq ( b0 , b1 ) )
{
// 32-bit multiplication
2020-02-01 09:36:09 +01:00
spu_log . notice ( " mpy32 in %s at 0x%05x " , m_hash , m_pos ) ;
2019-11-23 17:31:42 +01:00
set_vr ( op . rt , a0 * b0 ) ;
return ;
}
}
}
2018-05-02 20:49:19 +02:00
set_vr ( op . rt , get_vr ( op . ra ) + get_vr ( op . rb ) ) ;
}
2019-03-25 19:31:16 +01:00
void AND ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
2021-03-05 20:05:37 +01:00
if ( match_vr < u8 [ 16 ] , u16 [ 8 ] , u64 [ 2 ] > ( op . ra , [ & ] ( auto a , auto /*MP1*/ )
2019-04-24 19:09:58 +02:00
{
2019-04-25 19:18:27 +02:00
if ( auto b = match_vr_as ( a , op . rb ) )
{
set_vr ( op . rt , a & b ) ;
return true ;
}
2019-04-24 19:09:58 +02:00
2021-03-05 20:05:37 +01:00
return match_vr < u8 [ 16 ] , u16 [ 8 ] , u64 [ 2 ] > ( op . rb , [ & ] ( auto /*b*/ , auto /*MP2*/ )
2019-04-25 19:18:27 +02:00
{
set_vr ( op . rt , a & get_vr_as ( a , op . rb ) ) ;
return true ;
} ) ;
} ) )
2019-04-24 19:09:58 +02:00
{
return ;
}
2018-05-02 20:49:19 +02:00
set_vr ( op . rt , get_vr ( op . ra ) & get_vr ( op . rb ) ) ;
}
void CG ( spu_opcode_t op )
{
2019-04-24 16:48:35 +02:00
const auto [ a , b ] = get_vrs < u32 [ 4 ] > ( op . ra , op . rb ) ;
2018-07-09 18:54:29 +02:00
set_vr ( op . rt , zext < u32 [ 4 ] > ( a + b < a ) ) ;
2018-05-02 20:49:19 +02:00
}
2019-03-25 19:31:16 +01:00
void AH ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
set_vr ( op . rt , get_vr < u16 [ 8 ] > ( op . ra ) + get_vr < u16 [ 8 ] > ( op . rb ) ) ;
}
2019-03-25 19:31:16 +01:00
void NAND ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
set_vr ( op . rt , ~ ( get_vr ( op . ra ) & get_vr ( op . rb ) ) ) ;
}
void AVGB ( spu_opcode_t op )
{
set_vr ( op . rt , avg ( get_vr < u8 [ 16 ] > ( op . ra ) , get_vr < u8 [ 16 ] > ( op . rb ) ) ) ;
}
void GB ( spu_opcode_t op )
{
2023-09-22 22:15:30 +02:00
// GFNI trick to extract selected bit from bytes
// By treating the first input as constant, and the second input as variable,
// with only 1 bit set in our constant, gf2p8affineqb will extract that selected bit
// from each byte of the second operand
if ( m_use_gfni )
{
const auto a = get_vr < u8 [ 16 ] > ( op . ra ) ;
const auto as = zshuffle ( a , 16 , 16 , 16 , 16 , 16 , 16 , 16 , 16 , 16 , 16 , 16 , 16 , 12 , 8 , 4 , 0 ) ;
set_vr ( op . rt , gf2p8affineqb ( build < u8 [ 16 ] > ( 0x0 , 0x0 , 0x0 , 0x0 , 0x01 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x01 , 0x0 , 0x0 , 0x0 ) , as , 0x0 ) ) ;
return ;
}
2018-07-05 14:30:08 +02:00
const auto a = get_vr < s32 [ 4 ] > ( op . ra ) ;
const auto m = zext < u32 > ( bitcast < i4 > ( trunc < bool [ 4 ] > ( a ) ) ) ;
2019-04-18 16:18:46 +02:00
set_vr ( op . rt , insert ( splat < u32 [ 4 ] > ( 0 ) , 3 , eval ( m ) ) ) ;
2018-05-02 20:49:19 +02:00
}
void GBH ( spu_opcode_t op )
{
2023-09-22 22:15:30 +02:00
if ( m_use_gfni )
{
const auto a = get_vr < u8 [ 16 ] > ( op . ra ) ;
const auto as = zshuffle ( a , 16 , 16 , 16 , 16 , 16 , 16 , 16 , 16 , 14 , 12 , 10 , 8 , 6 , 4 , 2 , 0 ) ;
set_vr ( op . rt , gf2p8affineqb ( build < u8 [ 16 ] > ( 0x0 , 0x0 , 0x0 , 0x0 , 0x01 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x01 , 0x0 , 0x0 , 0x0 ) , as , 0x0 ) ) ;
return ;
}
2018-07-05 14:30:08 +02:00
const auto a = get_vr < s16 [ 8 ] > ( op . ra ) ;
const auto m = zext < u32 > ( bitcast < u8 > ( trunc < bool [ 8 ] > ( a ) ) ) ;
2019-04-18 16:18:46 +02:00
set_vr ( op . rt , insert ( splat < u32 [ 4 ] > ( 0 ) , 3 , eval ( m ) ) ) ;
2018-05-02 20:49:19 +02:00
}
void GBB ( spu_opcode_t op )
{
2023-09-22 22:15:30 +02:00
const auto a = get_vr < u8 [ 16 ] > ( op . ra ) ;
if ( m_use_gfni )
{
const auto as = zshuffle ( a , 7 , 6 , 5 , 4 , 3 , 2 , 1 , 0 , 15 , 14 , 13 , 12 , 11 , 10 , 9 , 8 ) ;
const auto m = gf2p8affineqb ( build < u8 [ 16 ] > ( 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x01 , 0x01 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 , 0x0 ) , as , 0x0 ) ;
set_vr ( op . rt , zshuffle ( m , 16 , 16 , 16 , 16 , 16 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 ) ) ;
return ;
}
2018-07-05 14:30:08 +02:00
const auto m = zext < u32 > ( bitcast < u16 > ( trunc < bool [ 16 ] > ( a ) ) ) ;
2019-04-18 16:18:46 +02:00
set_vr ( op . rt , insert ( splat < u32 [ 4 ] > ( 0 ) , 3 , eval ( m ) ) ) ;
2018-05-02 20:49:19 +02:00
}
void FSM ( spu_opcode_t op )
{
2021-01-17 09:45:56 +01:00
// FSM following a comparison instruction
if ( match_vr < s8 [ 16 ] , s16 [ 8 ] , s32 [ 4 ] , s64 [ 2 ] > ( op . ra , [ & ] ( auto c , auto MP )
{
using VT = typename decltype ( MP ) : : type ;
if ( auto [ ok , x ] = match_expr ( c , sext < VT > ( match < bool [ std : : extent_v < VT > ] > ( ) ) ) ; ok )
{
set_vr ( op . rt , ( splat_scalar ( c ) ) ) ;
return true ;
}
return false ;
} ) )
{
return ;
}
2018-07-05 14:30:08 +02:00
const auto v = extract ( get_vr ( op . ra ) , 3 ) ;
const auto m = bitcast < bool [ 4 ] > ( trunc < i4 > ( v ) ) ;
2019-04-18 16:18:46 +02:00
set_vr ( op . rt , sext < s32 [ 4 ] > ( m ) ) ;
2018-05-02 20:49:19 +02:00
}
void FSMH ( spu_opcode_t op )
{
2018-07-05 14:30:08 +02:00
const auto v = extract ( get_vr ( op . ra ) , 3 ) ;
const auto m = bitcast < bool [ 8 ] > ( trunc < u8 > ( v ) ) ;
2019-04-18 16:18:46 +02:00
set_vr ( op . rt , sext < s16 [ 8 ] > ( m ) ) ;
2018-05-02 20:49:19 +02:00
}
void FSMB ( spu_opcode_t op )
{
2018-07-05 14:30:08 +02:00
const auto v = extract ( get_vr ( op . ra ) , 3 ) ;
const auto m = bitcast < bool [ 16 ] > ( trunc < u16 > ( v ) ) ;
2019-04-18 16:18:46 +02:00
set_vr ( op . rt , sext < s8 [ 16 ] > ( m ) ) ;
2018-05-02 20:49:19 +02:00
}
2020-07-13 00:21:23 +02:00
template < typename TA >
static auto byteswap ( TA & & a )
{
return zshuffle ( std : : forward < TA > ( a ) , 15 , 14 , 13 , 12 , 11 , 10 , 9 , 8 , 7 , 6 , 5 , 4 , 3 , 2 , 1 , 0 ) ;
}
2018-05-02 20:49:19 +02:00
void ROTQBYBI ( spu_opcode_t op )
{
2020-07-13 00:21:23 +02:00
const auto a = get_vr < u8 [ 16 ] > ( op . ra ) ;
// Data with swapped endian from a load instruction
2021-04-20 21:09:51 +02:00
if ( auto [ ok , as ] = match_expr ( a , byteswap ( match < u8 [ 16 ] > ( ) ) ) ; ok )
2020-07-13 00:21:23 +02:00
{
const auto sc = build < u8 [ 16 ] > ( 15 , 14 , 13 , 12 , 11 , 10 , 9 , 8 , 7 , 6 , 5 , 4 , 3 , 2 , 1 , 0 ) ;
2021-01-24 06:36:39 +01:00
const auto sh = sc + ( splat_scalar ( get_vr < u8 [ 16 ] > ( op . rb ) ) > > 3 ) ;
if ( m_use_avx512_icl )
{
set_vr ( op . rt , vpermb ( as , sh ) ) ;
return ;
}
set_vr ( op . rt , pshufb ( as , ( sh & 0xf ) ) ) ;
2020-07-13 00:21:23 +02:00
return ;
}
2019-04-24 15:05:29 +02:00
const auto sc = build < u8 [ 16 ] > ( 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 ) ;
2021-01-24 06:36:39 +01:00
const auto sh = sc - ( splat_scalar ( get_vr < u8 [ 16 ] > ( op . rb ) ) > > 3 ) ;
if ( m_use_avx512_icl )
{
set_vr ( op . rt , vpermb ( a , sh ) ) ;
return ;
}
set_vr ( op . rt , pshufb ( a , ( sh & 0xf ) ) ) ;
2018-05-02 20:49:19 +02:00
}
void ROTQMBYBI ( spu_opcode_t op )
{
2021-08-29 02:54:18 +02:00
const auto a = get_vr < u8 [ 16 ] > ( op . ra ) ;
2023-09-17 00:59:39 +02:00
const auto b = get_vr < s32 [ 4 ] > ( op . rb ) ;
auto minusb = eval ( - ( b > > 3 ) ) ;
if ( auto [ ok , v0 , v1 ] = match_expr ( b , match < s32 [ 4 ] > ( ) - match < s32 [ 4 ] > ( ) ) ; ok )
{
if ( auto [ ok1 , data ] = get_const_vector ( v0 . value , m_pos ) ; ok1 )
{
if ( data = = v128 : : from32p ( 7 ) )
{
minusb = eval ( v1 > > 3 ) ;
}
}
}
const auto minusbx = eval ( bitcast < u8 [ 16 ] > ( minusb ) & 0x1f ) ;
2021-08-29 02:54:18 +02:00
// Data with swapped endian from a load instruction
if ( auto [ ok , as ] = match_expr ( a , byteswap ( match < u8 [ 16 ] > ( ) ) ) ; ok )
{
const auto sc = build < u8 [ 16 ] > ( 15 , 14 , 13 , 12 , 11 , 10 , 9 , 8 , 7 , 6 , 5 , 4 , 3 , 2 , 1 , 0 ) ;
2023-09-17 00:59:39 +02:00
const auto sh = sc - splat_scalar ( minusbx ) ;
2021-08-29 02:54:18 +02:00
set_vr ( op . rt , pshufb ( as , sh ) ) ;
return ;
}
2019-04-24 15:05:29 +02:00
const auto sc = build < u8 [ 16 ] > ( 112 , 113 , 114 , 115 , 116 , 117 , 118 , 119 , 120 , 121 , 122 , 123 , 124 , 125 , 126 , 127 ) ;
2023-09-17 00:59:39 +02:00
const auto sh = sc + splat_scalar ( minusbx ) ;
2021-08-29 02:54:18 +02:00
set_vr ( op . rt , pshufb ( a , sh ) ) ;
2018-05-02 20:49:19 +02:00
}
void SHLQBYBI ( spu_opcode_t op )
{
2021-08-29 02:54:18 +02:00
const auto a = get_vr < u8 [ 16 ] > ( op . ra ) ;
const auto b = get_vr < u8 [ 16 ] > ( op . rb ) ;
// Data with swapped endian from a load instruction
if ( auto [ ok , as ] = match_expr ( a , byteswap ( match < u8 [ 16 ] > ( ) ) ) ; ok )
{
const auto sc = build < u8 [ 16 ] > ( 127 , 126 , 125 , 124 , 123 , 122 , 121 , 120 , 119 , 118 , 117 , 116 , 115 , 114 , 113 , 112 ) ;
const auto sh = sc + ( splat_scalar ( b ) > > 3 ) ;
set_vr ( op . rt , pshufb ( as , sh ) ) ;
return ;
}
2019-04-24 15:05:29 +02:00
const auto sc = build < u8 [ 16 ] > ( 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 ) ;
2021-08-29 02:54:18 +02:00
const auto sh = sc - ( splat_scalar ( b ) > > 3 ) ;
set_vr ( op . rt , pshufb ( a , sh ) ) ;
2018-05-02 20:49:19 +02:00
}
2019-04-25 19:18:27 +02:00
template < typename RT , typename T >
auto spu_get_insertion_shuffle_mask ( T & & index )
{
const auto c = bitcast < RT > ( build < u8 [ 16 ] > ( 0x1f , 0x1e , 0x1d , 0x1c , 0x1b , 0x1a , 0x19 , 0x18 , 0x17 , 0x16 , 0x15 , 0x14 , 0x13 , 0x12 , 0x11 , 0x10 ) ) ;
using e_type = std : : remove_extent_t < RT > ;
const auto v = splat < e_type > ( static_cast < e_type > ( sizeof ( e_type ) = = 8 ? 0x01020304050607ull : 0x010203ull ) ) ;
return insert ( c , std : : forward < T > ( index ) , v ) ;
}
2018-05-02 20:49:19 +02:00
void CBX ( spu_opcode_t op )
{
2019-05-05 15:28:41 +02:00
if ( m_finfo & & m_finfo - > fn & & op . ra = = s_reg_sp )
{
// Optimization with aligned stack assumption. Strange because SPU code could use CBD instead, but encountered in wild.
set_vr ( op . rt , spu_get_insertion_shuffle_mask < u8 [ 16 ] > ( ~ get_scalar ( get_vr ( op . rb ) ) & 0xf ) ) ;
return ;
}
2019-04-25 19:18:27 +02:00
const auto s = get_scalar ( get_vr ( op . ra ) ) + get_scalar ( get_vr ( op . rb ) ) ;
set_vr ( op . rt , spu_get_insertion_shuffle_mask < u8 [ 16 ] > ( ~ s & 0xf ) ) ;
2018-05-02 20:49:19 +02:00
}
void CHX ( spu_opcode_t op )
{
2019-05-05 15:28:41 +02:00
if ( m_finfo & & m_finfo - > fn & & op . ra = = s_reg_sp )
{
// See CBX.
set_vr ( op . rt , spu_get_insertion_shuffle_mask < u16 [ 8 ] > ( ~ get_scalar ( get_vr ( op . rb ) ) > > 1 & 0x7 ) ) ;
return ;
}
2019-04-25 19:18:27 +02:00
const auto s = get_scalar ( get_vr ( op . ra ) ) + get_scalar ( get_vr ( op . rb ) ) ;
set_vr ( op . rt , spu_get_insertion_shuffle_mask < u16 [ 8 ] > ( ~ s > > 1 & 0x7 ) ) ;
2018-05-02 20:49:19 +02:00
}
void CWX ( spu_opcode_t op )
{
2019-05-05 15:28:41 +02:00
if ( m_finfo & & m_finfo - > fn & & op . ra = = s_reg_sp )
{
// See CBX.
set_vr ( op . rt , spu_get_insertion_shuffle_mask < u32 [ 4 ] > ( ~ get_scalar ( get_vr ( op . rb ) ) > > 2 & 0x3 ) ) ;
return ;
}
2019-04-25 19:18:27 +02:00
const auto s = get_scalar ( get_vr ( op . ra ) ) + get_scalar ( get_vr ( op . rb ) ) ;
set_vr ( op . rt , spu_get_insertion_shuffle_mask < u32 [ 4 ] > ( ~ s > > 2 & 0x3 ) ) ;
2018-05-02 20:49:19 +02:00
}
void CDX ( spu_opcode_t op )
{
2019-05-05 15:28:41 +02:00
if ( m_finfo & & m_finfo - > fn & & op . ra = = s_reg_sp )
{
// See CBX.
set_vr ( op . rt , spu_get_insertion_shuffle_mask < u64 [ 2 ] > ( ~ get_scalar ( get_vr ( op . rb ) ) > > 3 & 0x1 ) ) ;
return ;
}
2019-04-25 19:18:27 +02:00
const auto s = get_scalar ( get_vr ( op . ra ) ) + get_scalar ( get_vr ( op . rb ) ) ;
set_vr ( op . rt , spu_get_insertion_shuffle_mask < u64 [ 2 ] > ( ~ s > > 3 & 0x1 ) ) ;
2018-05-02 20:49:19 +02:00
}
void ROTQBI ( spu_opcode_t op )
{
2019-04-19 13:28:27 +02:00
const auto a = get_vr ( op . ra ) ;
2021-01-17 13:13:28 +01:00
const auto b = splat_scalar ( get_vr ( op . rb ) & 0x7 ) ;
2019-04-24 15:05:29 +02:00
set_vr ( op . rt , fshl ( a , zshuffle ( a , 3 , 0 , 1 , 2 ) , b ) ) ;
2018-05-02 20:49:19 +02:00
}
void ROTQMBI ( spu_opcode_t op )
{
2023-09-17 00:59:39 +02:00
const auto [ a , b ] = get_vrs < u32 [ 4 ] > ( op . ra , op . rb ) ;
auto minusb = eval ( - b ) ;
if ( auto [ ok , x ] = match_expr ( b , - match < u32 [ 4 ] > ( ) ) ; ok )
{
minusb = eval ( x ) ;
}
const auto bx = splat_scalar ( minusb ) & 0x7 ;
set_vr ( op . rt , fshr ( zshuffle ( a , 1 , 2 , 3 , 4 ) , a , bx ) ) ;
2018-05-02 20:49:19 +02:00
}
void SHLQBI ( spu_opcode_t op )
{
2019-04-19 13:28:27 +02:00
const auto a = get_vr ( op . ra ) ;
2021-01-17 13:13:28 +01:00
const auto b = splat_scalar ( get_vr ( op . rb ) & 0x7 ) ;
2019-04-24 15:05:29 +02:00
set_vr ( op . rt , fshl ( a , zshuffle ( a , 4 , 0 , 1 , 2 ) , b ) ) ;
2018-05-02 20:49:19 +02:00
}
2022-09-04 21:10:04 +02:00
# if defined(ARCH_X64)
2020-04-04 14:34:13 +02:00
static __m128i exec_rotqby ( __m128i a , u8 b )
{
alignas ( 32 ) const __m128i buf [ 2 ] { a , a } ;
return _mm_loadu_si128 ( reinterpret_cast < const __m128i * > ( reinterpret_cast < const u8 * > ( buf ) + ( 16 - ( b & 0xf ) ) ) ) ;
}
2022-09-04 21:10:04 +02:00
# elif defined(ARCH_ARM64)
2021-12-30 17:39:18 +01:00
# else
2022-07-10 08:35:27 +02:00
# error "Unimplemented"
2021-12-30 17:39:18 +01:00
# endif
2020-04-04 14:34:13 +02:00
2018-05-02 20:49:19 +02:00
void ROTQBY ( spu_opcode_t op )
{
2018-06-25 13:26:00 +02:00
const auto a = get_vr < u8 [ 16 ] > ( op . ra ) ;
const auto b = get_vr < u8 [ 16 ] > ( op . rb ) ;
2020-04-04 14:34:13 +02:00
2022-09-04 21:10:04 +02:00
# if defined(ARCH_X64)
2020-04-04 14:34:13 +02:00
if ( ! m_use_ssse3 )
{
value_t < u8 [ 16 ] > r ;
2021-12-30 17:39:18 +01:00
r . value = call < u8 [ 16 ] > ( " spu_rotqby " , & exec_rotqby , a . value , eval ( extract ( b , 12 ) ) . value ) ;
2020-04-04 14:34:13 +02:00
set_vr ( op . rt , r ) ;
return ;
}
2022-09-04 21:10:04 +02:00
# endif
2020-04-04 14:34:13 +02:00
2020-07-13 00:21:23 +02:00
// Data with swapped endian from a load instruction
2021-04-20 21:09:51 +02:00
if ( auto [ ok , as ] = match_expr ( a , byteswap ( match < u8 [ 16 ] > ( ) ) ) ; ok )
2020-07-13 00:21:23 +02:00
{
const auto sc = build < u8 [ 16 ] > ( 15 , 14 , 13 , 12 , 11 , 10 , 9 , 8 , 7 , 6 , 5 , 4 , 3 , 2 , 1 , 0 ) ;
2021-01-24 06:36:39 +01:00
const auto sh = eval ( sc + splat_scalar ( b ) ) ;
2021-01-25 19:49:16 +01:00
2021-01-24 06:36:39 +01:00
if ( m_use_avx512_icl )
{
set_vr ( op . rt , vpermb ( as , sh ) ) ;
return ;
}
set_vr ( op . rt , pshufb ( as , ( sh & 0xf ) ) ) ;
2020-07-13 00:21:23 +02:00
return ;
}
2019-04-24 15:05:29 +02:00
const auto sc = build < u8 [ 16 ] > ( 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 ) ;
2021-01-24 06:36:39 +01:00
const auto sh = eval ( sc - splat_scalar ( b ) ) ;
2021-01-25 19:49:16 +01:00
2021-01-24 06:36:39 +01:00
if ( m_use_avx512_icl )
{
set_vr ( op . rt , vpermb ( a , sh ) ) ;
return ;
}
set_vr ( op . rt , pshufb ( a , ( sh & 0xf ) ) ) ;
2018-05-02 20:49:19 +02:00
}
void ROTQMBY ( spu_opcode_t op )
{
2018-06-25 13:26:00 +02:00
const auto a = get_vr < u8 [ 16 ] > ( op . ra ) ;
2023-07-28 09:26:40 +02:00
const auto b = get_vr < u32 [ 4 ] > ( op . rb ) ;
auto minusb = eval ( - b ) ;
if ( auto [ ok , x ] = match_expr ( b , - match < u32 [ 4 ] > ( ) ) ; ok )
{
minusb = eval ( x ) ;
}
2023-07-31 22:57:26 +02:00
2023-07-28 09:26:40 +02:00
const auto minusbx = bitcast < u8 [ 16 ] > ( minusb ) ;
2021-08-29 02:54:18 +02:00
// Data with swapped endian from a load instruction
if ( auto [ ok , as ] = match_expr ( a , byteswap ( match < u8 [ 16 ] > ( ) ) ) ; ok )
{
const auto sc = build < u8 [ 16 ] > ( 15 , 14 , 13 , 12 , 11 , 10 , 9 , 8 , 7 , 6 , 5 , 4 , 3 , 2 , 1 , 0 ) ;
2023-07-28 09:26:40 +02:00
const auto sh = sc - ( splat_scalar ( minusbx ) & 0x1f ) ;
2021-08-29 02:54:18 +02:00
set_vr ( op . rt , pshufb ( as , sh ) ) ;
return ;
}
2019-04-24 15:05:29 +02:00
const auto sc = build < u8 [ 16 ] > ( 112 , 113 , 114 , 115 , 116 , 117 , 118 , 119 , 120 , 121 , 122 , 123 , 124 , 125 , 126 , 127 ) ;
2023-07-28 09:26:40 +02:00
const auto sh = sc + ( splat_scalar ( minusbx ) & 0x1f ) ;
2018-06-25 13:26:00 +02:00
set_vr ( op . rt , pshufb ( a , sh ) ) ;
2018-05-02 20:49:19 +02:00
}
void SHLQBY ( spu_opcode_t op )
{
2018-06-25 13:26:00 +02:00
const auto a = get_vr < u8 [ 16 ] > ( op . ra ) ;
const auto b = get_vr < u8 [ 16 ] > ( op . rb ) ;
2021-08-29 02:54:18 +02:00
// Data with swapped endian from a load instruction
if ( auto [ ok , as ] = match_expr ( a , byteswap ( match < u8 [ 16 ] > ( ) ) ) ; ok )
{
const auto sc = build < u8 [ 16 ] > ( 127 , 126 , 125 , 124 , 123 , 122 , 121 , 120 , 119 , 118 , 117 , 116 , 115 , 114 , 113 , 112 ) ;
const auto sh = sc + ( splat_scalar ( b ) & 0x1f ) ;
set_vr ( op . rt , pshufb ( as , sh ) ) ;
return ;
}
2019-04-24 15:05:29 +02:00
const auto sc = build < u8 [ 16 ] > ( 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 ) ;
2021-01-17 13:13:28 +01:00
const auto sh = sc - ( splat_scalar ( b ) & 0x1f ) ;
2018-06-25 13:26:00 +02:00
set_vr ( op . rt , pshufb ( a , sh ) ) ;
2018-05-02 20:49:19 +02:00
}
2021-10-26 10:56:47 +02:00
template < typename T >
static llvm_calli < u32 [ 4 ] , T > orx ( T & & a )
{
return { " spu_orx " , { std : : forward < T > ( a ) } } ;
}
2018-05-02 20:49:19 +02:00
void ORX ( spu_opcode_t op )
{
2021-10-26 10:56:47 +02:00
register_intrinsic ( " spu_orx " , [ & ] ( llvm : : CallInst * ci )
{
const auto a = value < u32 [ 4 ] > ( ci - > getOperand ( 0 ) ) ;
const auto x = zshuffle ( a , 2 , 3 , 0 , 1 ) | a ;
const auto y = zshuffle ( x , 1 , 0 , 3 , 2 ) | x ;
return zshuffle ( y , 4 , 4 , 4 , 3 ) ;
} ) ;
set_vr ( op . rt , orx ( get_vr ( op . ra ) ) ) ;
2018-05-02 20:49:19 +02:00
}
void CBD ( spu_opcode_t op )
{
2019-05-05 15:28:41 +02:00
if ( m_finfo & & m_finfo - > fn & & op . ra = = s_reg_sp )
{
// Known constant with aligned stack assumption (optimization).
set_vr ( op . rt , spu_get_insertion_shuffle_mask < u8 [ 16 ] > ( ~ get_imm < u32 > ( op . i7 ) & 0xf ) ) ;
return ;
}
2019-04-25 19:18:27 +02:00
const auto a = get_scalar ( get_vr ( op . ra ) ) + get_imm < u32 > ( op . i7 ) ;
set_vr ( op . rt , spu_get_insertion_shuffle_mask < u8 [ 16 ] > ( ~ a & 0xf ) ) ;
2018-05-02 20:49:19 +02:00
}
void CHD ( spu_opcode_t op )
{
2019-05-05 15:28:41 +02:00
if ( m_finfo & & m_finfo - > fn & & op . ra = = s_reg_sp )
{
// See CBD.
set_vr ( op . rt , spu_get_insertion_shuffle_mask < u16 [ 8 ] > ( ~ get_imm < u32 > ( op . i7 ) > > 1 & 0x7 ) ) ;
return ;
}
2019-04-25 19:18:27 +02:00
const auto a = get_scalar ( get_vr ( op . ra ) ) + get_imm < u32 > ( op . i7 ) ;
set_vr ( op . rt , spu_get_insertion_shuffle_mask < u16 [ 8 ] > ( ~ a > > 1 & 0x7 ) ) ;
2018-05-02 20:49:19 +02:00
}
void CWD ( spu_opcode_t op )
{
2019-05-05 15:28:41 +02:00
if ( m_finfo & & m_finfo - > fn & & op . ra = = s_reg_sp )
{
// See CBD.
set_vr ( op . rt , spu_get_insertion_shuffle_mask < u32 [ 4 ] > ( ~ get_imm < u32 > ( op . i7 ) > > 2 & 0x3 ) ) ;
return ;
}
2019-04-25 19:18:27 +02:00
const auto a = get_scalar ( get_vr ( op . ra ) ) + get_imm < u32 > ( op . i7 ) ;
set_vr ( op . rt , spu_get_insertion_shuffle_mask < u32 [ 4 ] > ( ~ a > > 2 & 0x3 ) ) ;
2018-05-02 20:49:19 +02:00
}
void CDD ( spu_opcode_t op )
{
2019-05-05 15:28:41 +02:00
if ( m_finfo & & m_finfo - > fn & & op . ra = = s_reg_sp )
{
// See CBD.
set_vr ( op . rt , spu_get_insertion_shuffle_mask < u64 [ 2 ] > ( ~ get_imm < u32 > ( op . i7 ) > > 3 & 0x1 ) ) ;
return ;
}
2019-04-25 19:18:27 +02:00
const auto a = get_scalar ( get_vr ( op . ra ) ) + get_imm < u32 > ( op . i7 ) ;
set_vr ( op . rt , spu_get_insertion_shuffle_mask < u64 [ 2 ] > ( ~ a > > 3 & 0x1 ) ) ;
2018-05-02 20:49:19 +02:00
}
void ROTQBII ( spu_opcode_t op )
{
2019-04-19 13:28:27 +02:00
const auto a = get_vr ( op . ra ) ;
const auto b = eval ( get_imm ( op . i7 , false ) & 0x7 ) ;
2019-04-24 15:05:29 +02:00
set_vr ( op . rt , fshl ( a , zshuffle ( a , 3 , 0 , 1 , 2 ) , b ) ) ;
2018-05-02 20:49:19 +02:00
}
void ROTQMBII ( spu_opcode_t op )
{
2019-04-19 13:28:27 +02:00
const auto a = get_vr ( op . ra ) ;
const auto b = eval ( - get_imm ( op . i7 , false ) & 0x7 ) ;
2019-04-24 15:05:29 +02:00
set_vr ( op . rt , fshr ( zshuffle ( a , 1 , 2 , 3 , 4 ) , a , b ) ) ;
2018-05-02 20:49:19 +02:00
}
void SHLQBII ( spu_opcode_t op )
{
2019-04-19 13:28:27 +02:00
const auto a = get_vr ( op . ra ) ;
const auto b = eval ( get_imm ( op . i7 , false ) & 0x7 ) ;
2019-04-24 15:05:29 +02:00
set_vr ( op . rt , fshl ( a , zshuffle ( a , 4 , 0 , 1 , 2 ) , b ) ) ;
2018-05-02 20:49:19 +02:00
}
void ROTQBYI ( spu_opcode_t op )
{
2019-03-25 19:31:16 +01:00
const auto a = get_vr < u8 [ 16 ] > ( op . ra ) ;
2019-04-24 15:05:29 +02:00
const auto sc = build < u8 [ 16 ] > ( 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 ) ;
const auto sh = ( sc - get_imm < u8 [ 16 ] > ( op . i7 , false ) ) & 0xf ;
2019-03-25 19:31:16 +01:00
set_vr ( op . rt , pshufb ( a , sh ) ) ;
2018-05-02 20:49:19 +02:00
}
void ROTQMBYI ( spu_opcode_t op )
{
2019-03-25 19:31:16 +01:00
const auto a = get_vr < u8 [ 16 ] > ( op . ra ) ;
2019-04-24 15:05:29 +02:00
const auto sc = build < u8 [ 16 ] > ( 112 , 113 , 114 , 115 , 116 , 117 , 118 , 119 , 120 , 121 , 122 , 123 , 124 , 125 , 126 , 127 ) ;
const auto sh = sc + ( - get_imm < u8 [ 16 ] > ( op . i7 , false ) & 0x1f ) ;
2019-03-25 19:31:16 +01:00
set_vr ( op . rt , pshufb ( a , sh ) ) ;
2018-05-02 20:49:19 +02:00
}
void SHLQBYI ( spu_opcode_t op )
{
2022-06-10 12:34:12 +02:00
if ( get_reg_raw ( op . ra ) & & ! op . i7 ) return set_reg_fixed ( op . rt , get_reg_raw ( op . ra ) , false ) ; // For expressions matching
2019-03-25 19:31:16 +01:00
const auto a = get_vr < u8 [ 16 ] > ( op . ra ) ;
2019-04-24 15:05:29 +02:00
const auto sc = build < u8 [ 16 ] > ( 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 ) ;
const auto sh = sc - ( get_imm < u8 [ 16 ] > ( op . i7 , false ) & 0x1f ) ;
2019-03-25 19:31:16 +01:00
set_vr ( op . rt , pshufb ( a , sh ) ) ;
2018-05-02 20:49:19 +02:00
}
void CGT ( spu_opcode_t op )
{
2019-04-18 16:18:46 +02:00
set_vr ( op . rt , sext < s32 [ 4 ] > ( get_vr < s32 [ 4 ] > ( op . ra ) > get_vr < s32 [ 4 ] > ( op . rb ) ) ) ;
2018-05-02 20:49:19 +02:00
}
2019-03-25 19:31:16 +01:00
void XOR ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
set_vr ( op . rt , get_vr ( op . ra ) ^ get_vr ( op . rb ) ) ;
}
void CGTH ( spu_opcode_t op )
{
2019-04-18 16:18:46 +02:00
set_vr ( op . rt , sext < s16 [ 8 ] > ( get_vr < s16 [ 8 ] > ( op . ra ) > get_vr < s16 [ 8 ] > ( op . rb ) ) ) ;
2018-05-02 20:49:19 +02:00
}
2019-03-25 19:31:16 +01:00
void EQV ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
set_vr ( op . rt , ~ ( get_vr ( op . ra ) ^ get_vr ( op . rb ) ) ) ;
}
void CGTB ( spu_opcode_t op )
{
2019-04-18 16:18:46 +02:00
set_vr ( op . rt , sext < s8 [ 16 ] > ( get_vr < s8 [ 16 ] > ( op . ra ) > get_vr < s8 [ 16 ] > ( op . rb ) ) ) ;
2018-05-02 20:49:19 +02:00
}
void SUMB ( spu_opcode_t op )
{
2021-09-26 10:40:47 +02:00
if ( m_use_avx512 )
{
const auto [ a , b ] = get_vrs < u8 [ 16 ] > ( op . ra , op . rb ) ;
const auto zeroes = splat < u8 [ 16 ] > ( 0 ) ;
if ( op . ra = = op . rb & & ! m_interp_magn )
{
set_vr ( op . rt , vdbpsadbw ( a , zeroes , 0 ) ) ;
return ;
}
const auto ax = vdbpsadbw ( a , zeroes , 0 ) ;
const auto bx = vdbpsadbw ( b , zeroes , 0 ) ;
2023-01-21 17:40:18 +01:00
set_vr ( op . rt , shuffle2 ( ax , bx , 0 , 9 , 2 , 11 , 4 , 13 , 6 , 15 ) ) ;
2021-09-26 10:40:47 +02:00
return ;
}
2021-08-30 19:32:51 +02:00
if ( m_use_vnni )
2021-08-29 06:48:00 +02:00
{
const auto [ a , b ] = get_vrs < u32 [ 4 ] > ( op . ra , op . rb ) ;
const auto zeroes = splat < u32 [ 4 ] > ( 0 ) ;
const auto ones = splat < u32 [ 4 ] > ( 0x01010101 ) ;
const auto ax = bitcast < u16 [ 8 ] > ( vpdpbusd ( zeroes , a , ones ) ) ;
const auto bx = bitcast < u16 [ 8 ] > ( vpdpbusd ( zeroes , b , ones ) ) ;
set_vr ( op . rt , shuffle2 ( ax , bx , 0 , 8 , 2 , 10 , 4 , 12 , 6 , 14 ) ) ;
return ;
}
2019-04-24 16:48:35 +02:00
const auto [ a , b ] = get_vrs < u16 [ 8 ] > ( op . ra , op . rb ) ;
2018-05-02 20:49:19 +02:00
const auto ahs = eval ( ( a > > 8 ) + ( a & 0xff ) ) ;
const auto bhs = eval ( ( b > > 8 ) + ( b & 0xff ) ) ;
2019-04-24 15:05:29 +02:00
const auto lsh = shuffle2 ( ahs , bhs , 0 , 9 , 2 , 11 , 4 , 13 , 6 , 15 ) ;
const auto hsh = shuffle2 ( ahs , bhs , 1 , 8 , 3 , 10 , 5 , 12 , 7 , 14 ) ;
2018-05-02 20:49:19 +02:00
set_vr ( op . rt , lsh + hsh ) ;
}
void CLZ ( spu_opcode_t op )
{
set_vr ( op . rt , ctlz ( get_vr ( op . ra ) ) ) ;
}
void XSWD ( spu_opcode_t op )
{
set_vr ( op . rt , get_vr < s64 [ 2 ] > ( op . ra ) < < 32 > > 32 ) ;
}
void XSHW ( spu_opcode_t op )
{
set_vr ( op . rt , get_vr < s32 [ 4 ] > ( op . ra ) < < 16 > > 16 ) ;
}
void CNTB ( spu_opcode_t op )
{
set_vr ( op . rt , ctpop ( get_vr < u8 [ 16 ] > ( op . ra ) ) ) ;
}
void XSBH ( spu_opcode_t op )
{
set_vr ( op . rt , get_vr < s16 [ 8 ] > ( op . ra ) < < 8 > > 8 ) ;
}
void CLGT ( spu_opcode_t op )
{
2019-04-18 16:18:46 +02:00
set_vr ( op . rt , sext < s32 [ 4 ] > ( get_vr ( op . ra ) > get_vr ( op . rb ) ) ) ;
2018-05-02 20:49:19 +02:00
}
2019-03-25 19:31:16 +01:00
void ANDC ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
set_vr ( op . rt , get_vr ( op . ra ) & ~ get_vr ( op . rb ) ) ;
}
void CLGTH ( spu_opcode_t op )
{
2019-04-18 16:18:46 +02:00
set_vr ( op . rt , sext < s16 [ 8 ] > ( get_vr < u16 [ 8 ] > ( op . ra ) > get_vr < u16 [ 8 ] > ( op . rb ) ) ) ;
2018-05-02 20:49:19 +02:00
}
2019-03-25 19:31:16 +01:00
void ORC ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
set_vr ( op . rt , get_vr ( op . ra ) | ~ get_vr ( op . rb ) ) ;
}
void CLGTB ( spu_opcode_t op )
{
2019-04-18 16:18:46 +02:00
set_vr ( op . rt , sext < s8 [ 16 ] > ( get_vr < u8 [ 16 ] > ( op . ra ) > get_vr < u8 [ 16 ] > ( op . rb ) ) ) ;
2018-05-02 20:49:19 +02:00
}
void CEQ ( spu_opcode_t op )
{
2019-04-18 16:18:46 +02:00
set_vr ( op . rt , sext < s32 [ 4 ] > ( get_vr ( op . ra ) = = get_vr ( op . rb ) ) ) ;
2018-05-02 20:49:19 +02:00
}
void MPYHHU ( spu_opcode_t op )
{
set_vr ( op . rt , ( get_vr ( op . ra ) > > 16 ) * ( get_vr ( op . rb ) > > 16 ) ) ;
}
void ADDX ( spu_opcode_t op )
{
2019-04-17 02:00:53 +02:00
set_vr ( op . rt , llvm_sum { get_vr ( op . ra ) , get_vr ( op . rb ) , get_vr ( op . rt ) & 1 } ) ;
2018-05-02 20:49:19 +02:00
}
void SFX ( spu_opcode_t op )
{
set_vr ( op . rt , get_vr ( op . rb ) - get_vr ( op . ra ) - ( ~ get_vr ( op . rt ) & 1 ) ) ;
}
void CGX ( spu_opcode_t op )
{
2019-04-24 16:48:35 +02:00
const auto [ a , b ] = get_vrs < u32 [ 4 ] > ( op . ra , op . rb ) ;
2019-09-20 12:14:55 +02:00
const auto x = ( get_vr < s32 [ 4 ] > ( op . rt ) < < 31 ) > > 31 ;
2018-05-02 20:49:19 +02:00
const auto s = eval ( a + b ) ;
2019-09-20 12:14:55 +02:00
set_vr ( op . rt , noncast < u32 [ 4 ] > ( sext < s32 [ 4 ] > ( s < a ) | ( sext < s32 [ 4 ] > ( s = = noncast < u32 [ 4 ] > ( x ) ) & x ) ) > > 31 ) ;
2018-05-02 20:49:19 +02:00
}
void BGX ( spu_opcode_t op )
{
2019-04-24 16:48:35 +02:00
const auto [ a , b ] = get_vrs < u32 [ 4 ] > ( op . ra , op . rb ) ;
2019-04-18 16:18:46 +02:00
const auto c = get_vr < s32 [ 4 ] > ( op . rt ) < < 31 ;
2019-09-20 14:13:10 +02:00
set_vr ( op . rt , noncast < u32 [ 4 ] > ( sext < s32 [ 4 ] > ( b > a ) | ( sext < s32 [ 4 ] > ( a = = b ) & c ) ) > > 31 ) ;
2018-05-02 20:49:19 +02:00
}
void MPYHHA ( spu_opcode_t op )
{
set_vr ( op . rt , ( get_vr < s32 [ 4 ] > ( op . ra ) > > 16 ) * ( get_vr < s32 [ 4 ] > ( op . rb ) > > 16 ) + get_vr < s32 [ 4 ] > ( op . rt ) ) ;
}
void MPYHHAU ( spu_opcode_t op )
{
set_vr ( op . rt , ( get_vr ( op . ra ) > > 16 ) * ( get_vr ( op . rb ) > > 16 ) + get_vr ( op . rt ) ) ;
}
void MPY ( spu_opcode_t op )
{
set_vr ( op . rt , ( get_vr < s32 [ 4 ] > ( op . ra ) < < 16 > > 16 ) * ( get_vr < s32 [ 4 ] > ( op . rb ) < < 16 > > 16 ) ) ;
}
void MPYH ( spu_opcode_t op )
{
2019-11-23 17:31:42 +01:00
set_vr ( op . rt , mpyh ( get_vr ( op . ra ) , get_vr ( op . rb ) ) ) ;
2018-05-02 20:49:19 +02:00
}
void MPYHH ( spu_opcode_t op )
{
set_vr ( op . rt , ( get_vr < s32 [ 4 ] > ( op . ra ) > > 16 ) * ( get_vr < s32 [ 4 ] > ( op . rb ) > > 16 ) ) ;
}
void MPYS ( spu_opcode_t op )
{
set_vr ( op . rt , ( get_vr < s32 [ 4 ] > ( op . ra ) < < 16 > > 16 ) * ( get_vr < s32 [ 4 ] > ( op . rb ) < < 16 > > 16 ) > > 16 ) ;
}
void CEQH ( spu_opcode_t op )
{
2019-04-18 16:18:46 +02:00
set_vr ( op . rt , sext < s16 [ 8 ] > ( get_vr < u16 [ 8 ] > ( op . ra ) = = get_vr < u16 [ 8 ] > ( op . rb ) ) ) ;
2018-05-02 20:49:19 +02:00
}
void MPYU ( spu_opcode_t op )
{
2019-11-23 17:31:42 +01:00
set_vr ( op . rt , mpyu ( get_vr ( op . ra ) , get_vr ( op . rb ) ) ) ;
2018-05-02 20:49:19 +02:00
}
void CEQB ( spu_opcode_t op )
{
2019-04-18 16:18:46 +02:00
set_vr ( op . rt , sext < s8 [ 16 ] > ( get_vr < u8 [ 16 ] > ( op . ra ) = = get_vr < u8 [ 16 ] > ( op . rb ) ) ) ;
2018-05-02 20:49:19 +02:00
}
void FSMBI ( spu_opcode_t op )
{
2019-04-18 16:18:46 +02:00
const auto m = bitcast < bool [ 16 ] > ( get_imm < u16 > ( op . i16 ) ) ;
set_vr ( op . rt , sext < s8 [ 16 ] > ( m ) ) ;
2018-05-02 20:49:19 +02:00
}
void IL ( spu_opcode_t op )
{
2019-03-25 19:31:16 +01:00
set_vr ( op . rt , get_imm < s32 [ 4 ] > ( op . si16 ) ) ;
2018-05-02 20:49:19 +02:00
}
void ILHU ( spu_opcode_t op )
{
2019-03-25 19:31:16 +01:00
set_vr ( op . rt , get_imm < u32 [ 4 ] > ( op . i16 ) < < 16 ) ;
2018-05-02 20:49:19 +02:00
}
void ILH ( spu_opcode_t op )
{
2019-03-25 19:31:16 +01:00
set_vr ( op . rt , get_imm < u16 [ 8 ] > ( op . i16 ) ) ;
2018-05-02 20:49:19 +02:00
}
void IOHL ( spu_opcode_t op )
{
2019-03-25 19:31:16 +01:00
set_vr ( op . rt , get_vr ( op . rt ) | get_imm ( op . i16 ) ) ;
2018-05-02 20:49:19 +02:00
}
void ORI ( spu_opcode_t op )
{
2022-06-10 12:34:12 +02:00
if ( get_reg_raw ( op . ra ) & & ! op . si10 ) return set_reg_fixed ( op . rt , get_reg_raw ( op . ra ) , false ) ; // For expressions matching
2019-03-25 19:31:16 +01:00
set_vr ( op . rt , get_vr < s32 [ 4 ] > ( op . ra ) | get_imm < s32 [ 4 ] > ( op . si10 ) ) ;
2018-05-02 20:49:19 +02:00
}
void ORHI ( spu_opcode_t op )
{
2022-06-10 12:34:12 +02:00
if ( get_reg_raw ( op . ra ) & & ! op . si10 ) return set_reg_fixed ( op . rt , get_reg_raw ( op . ra ) , false ) ;
2019-03-25 19:31:16 +01:00
set_vr ( op . rt , get_vr < s16 [ 8 ] > ( op . ra ) | get_imm < s16 [ 8 ] > ( op . si10 ) ) ;
2018-05-02 20:49:19 +02:00
}
void ORBI ( spu_opcode_t op )
{
2022-06-10 12:34:12 +02:00
if ( get_reg_raw ( op . ra ) & & ! op . si10 ) return set_reg_fixed ( op . rt , get_reg_raw ( op . ra ) , false ) ;
2019-03-25 19:31:16 +01:00
set_vr ( op . rt , get_vr < s8 [ 16 ] > ( op . ra ) | get_imm < s8 [ 16 ] > ( op . si10 ) ) ;
2018-05-02 20:49:19 +02:00
}
void SFI ( spu_opcode_t op )
{
2019-03-25 19:31:16 +01:00
set_vr ( op . rt , get_imm < s32 [ 4 ] > ( op . si10 ) - get_vr < s32 [ 4 ] > ( op . ra ) ) ;
2018-05-02 20:49:19 +02:00
}
void SFHI ( spu_opcode_t op )
{
2019-03-25 19:31:16 +01:00
set_vr ( op . rt , get_imm < s16 [ 8 ] > ( op . si10 ) - get_vr < s16 [ 8 ] > ( op . ra ) ) ;
2018-05-02 20:49:19 +02:00
}
void ANDI ( spu_opcode_t op )
{
2022-06-10 12:34:12 +02:00
if ( get_reg_raw ( op . ra ) & & op . si10 = = - 1 ) return set_reg_fixed ( op . rt , get_reg_raw ( op . ra ) , false ) ;
2019-03-25 19:31:16 +01:00
set_vr ( op . rt , get_vr < s32 [ 4 ] > ( op . ra ) & get_imm < s32 [ 4 ] > ( op . si10 ) ) ;
2018-05-02 20:49:19 +02:00
}
void ANDHI ( spu_opcode_t op )
{
2022-06-10 12:34:12 +02:00
if ( get_reg_raw ( op . ra ) & & op . si10 = = - 1 ) return set_reg_fixed ( op . rt , get_reg_raw ( op . ra ) , false ) ;
2019-03-25 19:31:16 +01:00
set_vr ( op . rt , get_vr < s16 [ 8 ] > ( op . ra ) & get_imm < s16 [ 8 ] > ( op . si10 ) ) ;
2018-05-02 20:49:19 +02:00
}
void ANDBI ( spu_opcode_t op )
{
2022-06-10 12:34:12 +02:00
if ( get_reg_raw ( op . ra ) & & static_cast < s8 > ( op . si10 ) = = - 1 ) return set_reg_fixed ( op . rt , get_reg_raw ( op . ra ) , false ) ;
2019-03-25 19:31:16 +01:00
set_vr ( op . rt , get_vr < s8 [ 16 ] > ( op . ra ) & get_imm < s8 [ 16 ] > ( op . si10 ) ) ;
2018-05-02 20:49:19 +02:00
}
void AI ( spu_opcode_t op )
{
2022-06-10 12:34:12 +02:00
if ( get_reg_raw ( op . ra ) & & ! op . si10 ) return set_reg_fixed ( op . rt , get_reg_raw ( op . ra ) , false ) ;
2019-03-25 19:31:16 +01:00
set_vr ( op . rt , get_vr < s32 [ 4 ] > ( op . ra ) + get_imm < s32 [ 4 ] > ( op . si10 ) ) ;
2018-05-02 20:49:19 +02:00
}
void AHI ( spu_opcode_t op )
{
2022-06-10 12:34:12 +02:00
if ( get_reg_raw ( op . ra ) & & ! op . si10 ) return set_reg_fixed ( op . rt , get_reg_raw ( op . ra ) , false ) ;
2019-03-25 19:31:16 +01:00
set_vr ( op . rt , get_vr < s16 [ 8 ] > ( op . ra ) + get_imm < s16 [ 8 ] > ( op . si10 ) ) ;
2018-05-02 20:49:19 +02:00
}
void XORI ( spu_opcode_t op )
{
2022-06-10 12:34:12 +02:00
if ( get_reg_raw ( op . ra ) & & ! op . si10 ) return set_reg_fixed ( op . rt , get_reg_raw ( op . ra ) , false ) ;
2019-03-25 19:31:16 +01:00
set_vr ( op . rt , get_vr < s32 [ 4 ] > ( op . ra ) ^ get_imm < s32 [ 4 ] > ( op . si10 ) ) ;
2018-05-02 20:49:19 +02:00
}
void XORHI ( spu_opcode_t op )
{
2022-06-10 12:34:12 +02:00
if ( get_reg_raw ( op . ra ) & & ! op . si10 ) return set_reg_fixed ( op . rt , get_reg_raw ( op . ra ) , false ) ;
2019-03-25 19:31:16 +01:00
set_vr ( op . rt , get_vr < s16 [ 8 ] > ( op . ra ) ^ get_imm < s16 [ 8 ] > ( op . si10 ) ) ;
2018-05-02 20:49:19 +02:00
}
void XORBI ( spu_opcode_t op )
{
2022-06-10 12:34:12 +02:00
if ( get_reg_raw ( op . ra ) & & ! op . si10 ) return set_reg_fixed ( op . rt , get_reg_raw ( op . ra ) , false ) ;
2019-03-25 19:31:16 +01:00
set_vr ( op . rt , get_vr < s8 [ 16 ] > ( op . ra ) ^ get_imm < s8 [ 16 ] > ( op . si10 ) ) ;
2018-05-02 20:49:19 +02:00
}
void CGTI ( spu_opcode_t op )
{
2019-04-18 16:18:46 +02:00
set_vr ( op . rt , sext < s32 [ 4 ] > ( get_vr < s32 [ 4 ] > ( op . ra ) > get_imm < s32 [ 4 ] > ( op . si10 ) ) ) ;
2018-05-02 20:49:19 +02:00
}
void CGTHI ( spu_opcode_t op )
{
2019-04-18 16:18:46 +02:00
set_vr ( op . rt , sext < s16 [ 8 ] > ( get_vr < s16 [ 8 ] > ( op . ra ) > get_imm < s16 [ 8 ] > ( op . si10 ) ) ) ;
2018-05-02 20:49:19 +02:00
}
void CGTBI ( spu_opcode_t op )
{
2019-04-18 16:18:46 +02:00
set_vr ( op . rt , sext < s8 [ 16 ] > ( get_vr < s8 [ 16 ] > ( op . ra ) > get_imm < s8 [ 16 ] > ( op . si10 ) ) ) ;
2018-05-02 20:49:19 +02:00
}
void CLGTI ( spu_opcode_t op )
{
2019-04-18 16:18:46 +02:00
set_vr ( op . rt , sext < s32 [ 4 ] > ( get_vr ( op . ra ) > get_imm ( op . si10 ) ) ) ;
2018-05-02 20:49:19 +02:00
}
void CLGTHI ( spu_opcode_t op )
{
2019-04-18 16:18:46 +02:00
set_vr ( op . rt , sext < s16 [ 8 ] > ( get_vr < u16 [ 8 ] > ( op . ra ) > get_imm < u16 [ 8 ] > ( op . si10 ) ) ) ;
2018-05-02 20:49:19 +02:00
}
void CLGTBI ( spu_opcode_t op )
{
2019-04-18 16:18:46 +02:00
set_vr ( op . rt , sext < s8 [ 16 ] > ( get_vr < u8 [ 16 ] > ( op . ra ) > get_imm < u8 [ 16 ] > ( op . si10 ) ) ) ;
2018-05-02 20:49:19 +02:00
}
void MPYI ( spu_opcode_t op )
{
2019-03-25 19:31:16 +01:00
set_vr ( op . rt , ( get_vr < s32 [ 4 ] > ( op . ra ) < < 16 > > 16 ) * get_imm < s32 [ 4 ] > ( op . si10 ) ) ;
2018-05-02 20:49:19 +02:00
}
void MPYUI ( spu_opcode_t op )
{
2019-03-25 19:31:16 +01:00
set_vr ( op . rt , ( get_vr ( op . ra ) < < 16 > > 16 ) * ( get_imm ( op . si10 ) & 0xffff ) ) ;
2018-05-02 20:49:19 +02:00
}
void CEQI ( spu_opcode_t op )
{
2019-04-18 16:18:46 +02:00
set_vr ( op . rt , sext < s32 [ 4 ] > ( get_vr ( op . ra ) = = get_imm ( op . si10 ) ) ) ;
2018-05-02 20:49:19 +02:00
}
void CEQHI ( spu_opcode_t op )
{
2019-04-18 16:18:46 +02:00
set_vr ( op . rt , sext < s16 [ 8 ] > ( get_vr < u16 [ 8 ] > ( op . ra ) = = get_imm < u16 [ 8 ] > ( op . si10 ) ) ) ;
2018-05-02 20:49:19 +02:00
}
void CEQBI ( spu_opcode_t op )
{
2019-04-18 16:18:46 +02:00
set_vr ( op . rt , sext < s8 [ 16 ] > ( get_vr < u8 [ 16 ] > ( op . ra ) = = get_imm < u8 [ 16 ] > ( op . si10 ) ) ) ;
2018-05-02 20:49:19 +02:00
}
void ILA ( spu_opcode_t op )
{
2019-03-25 19:31:16 +01:00
set_vr ( op . rt , get_imm ( op . i18 ) ) ;
2018-05-02 20:49:19 +02:00
}
void SELB ( spu_opcode_t op )
{
2019-05-05 15:28:41 +02:00
if ( match_vr < s8 [ 16 ] , s16 [ 8 ] , s32 [ 4 ] , s64 [ 2 ] > ( op . rc , [ & ] ( auto c , auto MP )
2018-07-06 01:23:47 +02:00
{
2019-05-05 15:28:41 +02:00
using VT = typename decltype ( MP ) : : type ;
2018-07-06 01:23:47 +02:00
2019-05-05 15:28:41 +02:00
// If the control mask comes from a comparison instruction, replace SELB with select
if ( auto [ ok , x ] = match_expr ( c , sext < VT > ( match < bool [ std : : extent_v < VT > ] > ( ) ) ) ; ok )
{
if constexpr ( std : : extent_v < VT > = = 2 ) // u64[2]
2018-07-06 01:23:47 +02:00
{
2019-05-05 15:28:41 +02:00
// Try to select floats as floats if a OR b is typed as f64[2]
if ( auto [ a , b ] = match_vrs < f64 [ 2 ] > ( op . ra , op . rb ) ; a | | b )
2018-07-06 01:23:47 +02:00
{
2019-05-05 15:28:41 +02:00
set_vr ( op . rt4 , select ( x , get_vr < f64 [ 2 ] > ( op . rb ) , get_vr < f64 [ 2 ] > ( op . ra ) ) ) ;
return true ;
2018-07-06 01:23:47 +02:00
}
}
2019-05-05 15:28:41 +02:00
if constexpr ( std : : extent_v < VT > = = 4 ) // u32[4]
2018-07-06 01:23:47 +02:00
{
2021-09-13 20:56:01 +02:00
// Match division (adjusted) (TODO)
if ( auto a = match_vr < f32 [ 4 ] > ( op . ra ) )
{
static const auto MT = match < f32 [ 4 ] > ( ) ;
if ( auto [ div_ok , diva , divb ] = match_expr ( a , MT / MT ) ; div_ok )
{
if ( auto b = match_vr < s32 [ 4 ] > ( op . rb ) )
{
if ( auto [ add1_ok ] = match_expr ( b , bitcast < s32 [ 4 ] > ( a ) + splat < s32 [ 4 ] > ( 1 ) ) ; add1_ok )
{
if ( auto [ fm_ok , a1 , b1 ] = match_expr ( x , bitcast < s32 [ 4 ] > ( fm ( MT , MT ) ) > splat < s32 [ 4 ] > ( - 1 ) ) ; fm_ok )
{
if ( auto [ fnma_ok ] = match_expr ( a1 , fnms ( divb , bitcast < f32 [ 4 ] > ( b ) , diva ) ) ; fnma_ok )
{
if ( fabs ( b1 ) . eval ( m_ir ) = = fsplat < f32 [ 4 ] > ( 1.0 ) . eval ( m_ir ) )
{
set_vr ( op . rt4 , diva / divb ) ;
return true ;
}
if ( auto [ sel_ok ] = match_expr ( b1 , bitcast < f32 [ 4 ] > ( ( bitcast < u32 [ 4 ] > ( diva ) & 0x80000000 ) | 0x3f800000 ) ) ; sel_ok )
{
set_vr ( op . rt4 , diva / divb ) ;
return true ;
}
}
}
}
}
}
}
2019-05-05 15:28:41 +02:00
if ( auto [ a , b ] = match_vrs < f64 [ 4 ] > ( op . ra , op . rb ) ; a | | b )
2018-07-27 12:00:05 +02:00
{
2019-05-05 15:28:41 +02:00
set_vr ( op . rt4 , select ( x , get_vr < f64 [ 4 ] > ( op . rb ) , get_vr < f64 [ 4 ] > ( op . ra ) ) ) ;
return true ;
2018-07-27 12:00:05 +02:00
}
2019-05-05 15:28:41 +02:00
if ( auto [ a , b ] = match_vrs < f32 [ 4 ] > ( op . ra , op . rb ) ; a | | b )
2018-07-06 01:23:47 +02:00
{
2019-05-05 15:28:41 +02:00
set_vr ( op . rt4 , select ( x , get_vr < f32 [ 4 ] > ( op . rb ) , get_vr < f32 [ 4 ] > ( op . ra ) ) ) ;
return true ;
2018-07-06 01:23:47 +02:00
}
}
2023-03-11 20:08:27 +01:00
if ( auto [ ok , y ] = match_expr ( x , bitcast < bool [ std : : extent_v < VT > ] > ( match < get_int_vt < std : : extent_v < VT > > > ( ) ) ) ; ok )
{
// Don't ruin FSMB/FSM/FSMH instructions
return false ;
}
2019-05-05 15:28:41 +02:00
set_vr ( op . rt4 , select ( x , get_vr < VT > ( op . rb ) , get_vr < VT > ( op . ra ) ) ) ;
return true ;
2018-07-06 01:23:47 +02:00
}
2019-05-05 15:28:41 +02:00
return false ;
} ) )
{
return ;
2018-07-06 01:23:47 +02:00
}
2020-07-25 18:59:35 +02:00
const auto c = get_vr ( op . rc ) ;
// Check if the constant mask doesn't require bit granularity
2021-09-01 15:20:33 +02:00
if ( auto [ ok , mask ] = get_const_vector ( c . value , m_pos ) ; ok )
2020-07-25 18:59:35 +02:00
{
bool sel_32 = true ;
for ( u32 i = 0 ; i < 4 ; i + + )
{
if ( mask . _u32 [ i ] & & mask . _u32 [ i ] ! = 0xFFFFFFFF )
{
sel_32 = false ;
break ;
}
}
if ( sel_32 )
{
if ( auto [ a , b ] = match_vrs < f64 [ 4 ] > ( op . ra , op . rb ) ; a | | b )
{
set_vr ( op . rt4 , select ( noncast < s32 [ 4 ] > ( c ) ! = 0 , get_vr < f64 [ 4 ] > ( op . rb ) , get_vr < f64 [ 4 ] > ( op . ra ) ) ) ;
return ;
}
else if ( auto [ a , b ] = match_vrs < f32 [ 4 ] > ( op . ra , op . rb ) ; a | | b )
{
set_vr ( op . rt4 , select ( noncast < s32 [ 4 ] > ( c ) ! = 0 , get_vr < f32 [ 4 ] > ( op . rb ) , get_vr < f32 [ 4 ] > ( op . ra ) ) ) ;
return ;
}
set_vr ( op . rt4 , select ( noncast < s32 [ 4 ] > ( c ) ! = 0 , get_vr < u32 [ 4 ] > ( op . rb ) , get_vr < u32 [ 4 ] > ( op . ra ) ) ) ;
return ;
}
2020-10-29 03:35:09 +01:00
2020-07-25 18:59:35 +02:00
bool sel_16 = true ;
for ( u32 i = 0 ; i < 8 ; i + + )
{
if ( mask . _u16 [ i ] & & mask . _u16 [ i ] ! = 0xFFFF )
{
sel_16 = false ;
break ;
}
}
if ( sel_16 )
{
set_vr ( op . rt4 , select ( bitcast < s16 [ 8 ] > ( c ) ! = 0 , get_vr < u16 [ 8 ] > ( op . rb ) , get_vr < u16 [ 8 ] > ( op . ra ) ) ) ;
return ;
}
2020-10-29 03:35:09 +01:00
2020-07-25 18:59:35 +02:00
bool sel_8 = true ;
for ( u32 i = 0 ; i < 16 ; i + + )
{
if ( mask . _u8 [ i ] & & mask . _u8 [ i ] ! = 0xFF )
{
sel_8 = false ;
break ;
}
}
if ( sel_8 )
{
set_vr ( op . rt4 , select ( bitcast < s8 [ 16 ] > ( c ) ! = 0 , get_vr < u8 [ 16 ] > ( op . rb ) , get_vr < u8 [ 16 ] > ( op . ra ) ) ) ;
return ;
}
}
2019-03-25 19:31:16 +01:00
const auto op1 = get_reg_raw ( op . rb ) ;
const auto op2 = get_reg_raw ( op . ra ) ;
2018-07-27 12:00:05 +02:00
2019-05-05 15:28:41 +02:00
if ( ( op1 & & op1 - > getType ( ) = = get_type < f64 [ 4 ] > ( ) ) | | ( op2 & & op2 - > getType ( ) = = get_type < f64 [ 4 ] > ( ) ) )
2018-07-27 12:00:05 +02:00
{
// Optimization: keep xfloat values in doubles even if the mask is unpredictable (hard way)
const auto c = get_vr < u32 [ 4 ] > ( op . rc ) ;
const auto b = get_vr < f64 [ 4 ] > ( op . rb ) ;
const auto a = get_vr < f64 [ 4 ] > ( op . ra ) ;
const auto m = conv_xfloat_mask ( c . value ) ;
const auto x = m_ir - > CreateAnd ( double_as_uint64 ( b . value ) , m ) ;
const auto y = m_ir - > CreateAnd ( double_as_uint64 ( a . value ) , m_ir - > CreateNot ( m ) ) ;
2019-03-25 19:31:16 +01:00
set_reg_fixed ( op . rt4 , uint64_as_double ( m_ir - > CreateOr ( x , y ) ) ) ;
2018-07-27 12:00:05 +02:00
return ;
}
2019-04-19 12:27:11 +02:00
set_vr ( op . rt4 , ( get_vr ( op . rb ) & c ) | ( get_vr ( op . ra ) & ~ c ) ) ;
2018-05-02 20:49:19 +02:00
}
2019-03-25 19:31:16 +01:00
void SHUFB ( spu_opcode_t op ) //
2018-05-02 20:49:19 +02:00
{
2019-04-25 19:18:27 +02:00
if ( match_vr < u8 [ 16 ] , u16 [ 8 ] , u32 [ 4 ] , u64 [ 2 ] > ( op . rc , [ & ] ( auto c , auto MP )
2018-07-05 14:35:15 +02:00
{
2019-04-25 19:18:27 +02:00
using VT = typename decltype ( MP ) : : type ;
2018-07-05 14:35:15 +02:00
2019-04-25 19:18:27 +02:00
// If the mask comes from a constant generation instruction, replace SHUFB with insert
if ( auto [ ok , i ] = match_expr ( c , spu_get_insertion_shuffle_mask < VT > ( match < u32 > ( ) ) ) ; ok )
2018-07-05 14:35:15 +02:00
{
2019-05-05 15:28:41 +02:00
set_vr ( op . rt4 , insert ( get_vr < VT > ( op . rb ) , i , get_scalar ( get_vr < VT > ( op . ra ) ) ) ) ;
2019-04-25 19:18:27 +02:00
return true ;
2018-07-05 14:35:15 +02:00
}
2019-04-25 19:18:27 +02:00
return false ;
} ) )
{
return ;
2018-07-05 14:35:15 +02:00
}
2018-05-02 20:49:19 +02:00
const auto c = get_vr < u8 [ 16 ] > ( op . rc ) ;
2018-07-05 14:34:11 +02:00
2021-09-01 15:20:33 +02:00
if ( auto [ ok , mask ] = get_const_vector ( c . value , m_pos ) ; ok )
2018-07-05 14:34:11 +02:00
{
// Optimization: SHUFB with constant mask
if ( ( ( mask . _u64 [ 0 ] | mask . _u64 [ 1 ] ) & 0xe0e0e0e0e0e0e0e0 ) = = 0 )
{
// Trivial insert or constant shuffle (TODO)
static constexpr struct mask_info
{
u64 i1 ;
u64 i0 ;
decltype ( & cpu_translator : : get_type < void > ) type ;
u64 extract_from ;
u64 insert_to ;
} s_masks [ 30 ]
{
{ 0x0311121314151617 , 0x18191a1b1c1d1e1f , & cpu_translator : : get_type < u8 [ 16 ] > , 12 , 15 } ,
{ 0x1003121314151617 , 0x18191a1b1c1d1e1f , & cpu_translator : : get_type < u8 [ 16 ] > , 12 , 14 } ,
{ 0x1011031314151617 , 0x18191a1b1c1d1e1f , & cpu_translator : : get_type < u8 [ 16 ] > , 12 , 13 } ,
{ 0x1011120314151617 , 0x18191a1b1c1d1e1f , & cpu_translator : : get_type < u8 [ 16 ] > , 12 , 12 } ,
{ 0x1011121303151617 , 0x18191a1b1c1d1e1f , & cpu_translator : : get_type < u8 [ 16 ] > , 12 , 11 } ,
{ 0x1011121314031617 , 0x18191a1b1c1d1e1f , & cpu_translator : : get_type < u8 [ 16 ] > , 12 , 10 } ,
{ 0x1011121314150317 , 0x18191a1b1c1d1e1f , & cpu_translator : : get_type < u8 [ 16 ] > , 12 , 9 } ,
{ 0x1011121314151603 , 0x18191a1b1c1d1e1f , & cpu_translator : : get_type < u8 [ 16 ] > , 12 , 8 } ,
{ 0x1011121314151617 , 0x03191a1b1c1d1e1f , & cpu_translator : : get_type < u8 [ 16 ] > , 12 , 7 } ,
{ 0x1011121314151617 , 0x18031a1b1c1d1e1f , & cpu_translator : : get_type < u8 [ 16 ] > , 12 , 6 } ,
{ 0x1011121314151617 , 0x1819031b1c1d1e1f , & cpu_translator : : get_type < u8 [ 16 ] > , 12 , 5 } ,
{ 0x1011121314151617 , 0x18191a031c1d1e1f , & cpu_translator : : get_type < u8 [ 16 ] > , 12 , 4 } ,
{ 0x1011121314151617 , 0x18191a1b031d1e1f , & cpu_translator : : get_type < u8 [ 16 ] > , 12 , 3 } ,
{ 0x1011121314151617 , 0x18191a1b1c031e1f , & cpu_translator : : get_type < u8 [ 16 ] > , 12 , 2 } ,
{ 0x1011121314151617 , 0x18191a1b1c1d031f , & cpu_translator : : get_type < u8 [ 16 ] > , 12 , 1 } ,
{ 0x1011121314151617 , 0x18191a1b1c1d1e03 , & cpu_translator : : get_type < u8 [ 16 ] > , 12 , 0 } ,
{ 0x0203121314151617 , 0x18191a1b1c1d1e1f , & cpu_translator : : get_type < u16 [ 8 ] > , 6 , 7 } ,
{ 0x1011020314151617 , 0x18191a1b1c1d1e1f , & cpu_translator : : get_type < u16 [ 8 ] > , 6 , 6 } ,
{ 0x1011121302031617 , 0x18191a1b1c1d1e1f , & cpu_translator : : get_type < u16 [ 8 ] > , 6 , 5 } ,
{ 0x1011121314150203 , 0x18191a1b1c1d1e1f , & cpu_translator : : get_type < u16 [ 8 ] > , 6 , 4 } ,
{ 0x1011121314151617 , 0x02031a1b1c1d1e1f , & cpu_translator : : get_type < u16 [ 8 ] > , 6 , 3 } ,
{ 0x1011121314151617 , 0x181902031c1d1e1f , & cpu_translator : : get_type < u16 [ 8 ] > , 6 , 2 } ,
{ 0x1011121314151617 , 0x18191a1b02031e1f , & cpu_translator : : get_type < u16 [ 8 ] > , 6 , 1 } ,
{ 0x1011121314151617 , 0x18191a1b1c1d0203 , & cpu_translator : : get_type < u16 [ 8 ] > , 6 , 0 } ,
{ 0x0001020314151617 , 0x18191a1b1c1d1e1f , & cpu_translator : : get_type < u32 [ 4 ] > , 3 , 3 } ,
{ 0x1011121300010203 , 0x18191a1b1c1d1e1f , & cpu_translator : : get_type < u32 [ 4 ] > , 3 , 2 } ,
{ 0x1011121314151617 , 0x000102031c1d1e1f , & cpu_translator : : get_type < u32 [ 4 ] > , 3 , 1 } ,
{ 0x1011121314151617 , 0x18191a1b00010203 , & cpu_translator : : get_type < u32 [ 4 ] > , 3 , 0 } ,
{ 0x0001020304050607 , 0x18191a1b1c1d1e1f , & cpu_translator : : get_type < u64 [ 2 ] > , 1 , 1 } ,
{ 0x1011121303151617 , 0x0001020304050607 , & cpu_translator : : get_type < u64 [ 2 ] > , 1 , 0 } ,
} ;
// Check important constants from CWD-like constant generation instructions
for ( const auto & cm : s_masks )
{
if ( mask . _u64 [ 0 ] = = cm . i0 & & mask . _u64 [ 1 ] = = cm . i1 )
{
const auto t = ( this - > * cm . type ) ( ) ;
2019-03-25 19:31:16 +01:00
const auto a = get_reg_fixed ( op . ra , t ) ;
const auto b = get_reg_fixed ( op . rb , t ) ;
2018-07-05 14:34:11 +02:00
const auto e = m_ir - > CreateExtractElement ( a , cm . extract_from ) ;
2019-03-25 19:31:16 +01:00
set_reg_fixed ( op . rt4 , m_ir - > CreateInsertElement ( b , e , cm . insert_to ) ) ;
2018-07-05 14:34:11 +02:00
return ;
}
}
2021-09-01 18:34:07 +02:00
}
// Adjusted shuffle mask
v128 smask = ~ mask & v128 : : from8p ( op . ra = = op . rb ? 0xf : 0x1f ) ;
// Blend mask for encoded constants
v128 bmask { } ;
for ( u32 i = 0 ; i < 16 ; i + + )
{
if ( mask . _bytes [ i ] > = 0xe0 )
bmask . _bytes [ i ] = 0x80 ;
else if ( mask . _bytes [ i ] > = 0xc0 )
bmask . _bytes [ i ] = 0xff ;
}
2018-07-05 14:34:11 +02:00
2021-09-01 18:34:07 +02:00
const auto a = get_vr < u8 [ 16 ] > ( op . ra ) ;
const auto b = get_vr < u8 [ 16 ] > ( op . rb ) ;
const auto c = make_const_vector ( smask , get_type < u8 [ 16 ] > ( ) ) ;
const auto d = make_const_vector ( bmask , get_type < u8 [ 16 ] > ( ) ) ;
2018-07-05 14:34:11 +02:00
2021-09-01 18:34:07 +02:00
llvm : : Value * r = d ;
if ( ( ~ mask . _u64 [ 0 ] | ~ mask . _u64 [ 1 ] ) & 0x8080808080808080 ) [[likely]]
{
r = m_ir - > CreateShuffleVector ( b . value , op . ra = = op . rb ? b . value : a . value , m_ir - > CreateZExt ( c , get_type < u32 [ 16 ] > ( ) ) ) ;
if ( ( mask . _u64 [ 0 ] | mask . _u64 [ 1 ] ) & 0x8080808080808080 )
2018-07-05 14:34:11 +02:00
{
2021-09-01 18:34:07 +02:00
r = m_ir - > CreateSelect ( m_ir - > CreateICmpSLT ( make_const_vector ( mask , get_type < u8 [ 16 ] > ( ) ) , llvm : : ConstantInt : : get ( get_type < u8 [ 16 ] > ( ) , 0 ) ) , d , r ) ;
2018-07-05 14:34:11 +02:00
}
}
2021-09-01 18:34:07 +02:00
set_reg_fixed ( op . rt4 , r ) ;
return ;
2018-07-05 14:34:11 +02:00
}
2023-07-18 19:56:10 +02:00
// Check whether shuffle mask doesn't contain fixed value selectors
bool perm_only = false ;
if ( auto k = get_known_bits ( c ) ; ! ! ( k . Zero & 0x80 ) )
{
perm_only = true ;
}
2021-09-01 18:49:27 +02:00
2020-06-30 17:33:22 +02:00
const auto a = get_vr < u8 [ 16 ] > ( op . ra ) ;
const auto b = get_vr < u8 [ 16 ] > ( op . rb ) ;
// Data with swapped endian from a load instruction
2021-04-20 21:09:51 +02:00
if ( auto [ ok , as ] = match_expr ( a , byteswap ( match < u8 [ 16 ] > ( ) ) ) ; ok )
2020-06-30 17:33:22 +02:00
{
2021-04-20 21:09:51 +02:00
if ( auto [ ok , bs ] = match_expr ( b , byteswap ( match < u8 [ 16 ] > ( ) ) ) ; ok )
2020-06-30 17:33:22 +02:00
{
2021-04-21 12:33:44 +02:00
// Undo endian swapping, and rely on pshufb/vperm2b to re-reverse endianness
2022-06-08 04:32:41 +02:00
if ( m_use_avx512_icl & & ( op . ra ! = op . rb ) )
2021-04-21 05:10:57 +02:00
{
2021-09-01 18:49:27 +02:00
if ( perm_only )
{
2022-09-30 23:39:24 +02:00
set_vr ( op . rt4 , vperm2b ( as , bs , c ) ) ;
2021-09-01 18:49:27 +02:00
return ;
}
2022-06-05 02:11:22 +02:00
const auto m = gf2p8affineqb ( c , build < u8 [ 16 ] > ( 0x40 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 , 0x40 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 ) , 0x7f ) ;
2021-04-21 05:10:57 +02:00
const auto mm = select ( noncast < s8 [ 16 ] > ( m ) > = 0 , splat < u8 [ 16 ] > ( 0 ) , m ) ;
2022-09-30 23:39:24 +02:00
const auto ab = vperm2b ( as , bs , c ) ;
2021-04-21 05:10:57 +02:00
set_vr ( op . rt4 , select ( noncast < s8 [ 16 ] > ( c ) > = 0 , ab , mm ) ) ;
return ;
}
2021-04-21 12:33:44 +02:00
2021-12-15 00:29:36 +01:00
const auto x = pshufb ( build < u8 [ 16 ] > ( 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0xff , 0xff , 0x80 , 0x80 ) , ( c > > 4 ) ) ;
2020-06-30 17:33:22 +02:00
const auto ax = pshufb ( as , c ) ;
const auto bx = pshufb ( bs , c ) ;
2021-09-01 18:49:27 +02:00
if ( perm_only )
2023-03-11 20:08:27 +01:00
set_vr ( op . rt4 , select_by_bit4 ( c , ax , bx ) ) ;
2021-09-01 18:49:27 +02:00
else
2023-03-11 20:08:27 +01:00
set_vr ( op . rt4 , select_by_bit4 ( c , ax , bx ) | x ) ;
2020-06-30 17:33:22 +02:00
return ;
}
2021-09-01 15:20:33 +02:00
if ( auto [ ok , data ] = get_const_vector ( b . value , m_pos ) ; ok )
2020-06-30 17:33:22 +02:00
{
2021-09-01 18:49:27 +02:00
if ( data = = v128 : : from8p ( data . _u8 [ 0 ] ) )
2020-06-30 17:33:22 +02:00
{
2023-07-21 19:58:54 +02:00
if ( m_use_avx512_icl )
{
if ( perm_only )
{
set_vr ( op . rt4 , vperm2b256to128 ( as , b , c ) ) ;
return ;
}
const auto m = gf2p8affineqb ( c , build < u8 [ 16 ] > ( 0x40 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 , 0x40 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 ) , 0x7f ) ;
const auto mm = select ( noncast < s8 [ 16 ] > ( m ) > = 0 , splat < u8 [ 16 ] > ( 0 ) , m ) ;
const auto ab = vperm2b256to128 ( as , b , c ) ;
set_vr ( op . rt4 , select ( noncast < s8 [ 16 ] > ( c ) > = 0 , ab , mm ) ) ;
return ;
}
2020-06-30 17:33:22 +02:00
// See above
2021-12-15 00:29:36 +01:00
const auto x = pshufb ( build < u8 [ 16 ] > ( 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0xff , 0xff , 0x80 , 0x80 ) , ( c > > 4 ) ) ;
2020-06-30 17:33:22 +02:00
const auto ax = pshufb ( as , c ) ;
2021-09-01 18:49:27 +02:00
if ( perm_only )
2023-03-11 20:08:27 +01:00
set_vr ( op . rt4 , select_by_bit4 ( c , ax , b ) ) ;
2021-09-01 18:49:27 +02:00
else
2023-03-11 20:08:27 +01:00
set_vr ( op . rt4 , select_by_bit4 ( c , ax , b ) | x ) ;
2020-06-30 17:33:22 +02:00
return ;
}
}
}
2021-04-20 21:09:51 +02:00
if ( auto [ ok , bs ] = match_expr ( b , byteswap ( match < u8 [ 16 ] > ( ) ) ) ; ok )
2020-06-30 17:33:22 +02:00
{
2021-09-01 15:20:33 +02:00
if ( auto [ ok , data ] = get_const_vector ( a . value , m_pos ) ; ok )
2020-06-30 17:33:22 +02:00
{
2021-09-01 18:49:27 +02:00
if ( data = = v128 : : from8p ( data . _u8 [ 0 ] ) )
2020-06-30 17:33:22 +02:00
{
// See above
2021-12-15 00:29:36 +01:00
const auto x = pshufb ( build < u8 [ 16 ] > ( 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0xff , 0xff , 0x80 , 0x80 ) , ( c > > 4 ) ) ;
2020-06-30 17:33:22 +02:00
const auto bx = pshufb ( bs , c ) ;
2021-09-01 18:49:27 +02:00
if ( perm_only )
2023-03-11 20:08:27 +01:00
set_vr ( op . rt4 , select_by_bit4 ( c , a , bx ) ) ;
2021-09-01 18:49:27 +02:00
else
2023-03-11 20:08:27 +01:00
set_vr ( op . rt4 , select_by_bit4 ( c , a , bx ) | x ) ;
2020-06-30 17:33:22 +02:00
return ;
}
}
}
2022-09-30 23:39:24 +02:00
if ( m_use_avx512_icl & & ( op . ra ! = op . rb | | m_interp_magn ) )
2021-04-21 05:10:57 +02:00
{
2023-07-21 19:58:54 +02:00
if ( auto [ ok , data ] = get_const_vector ( b . value , m_pos ) ; ok )
{
if ( data = = v128 : : from8p ( data . _u8 [ 0 ] ) )
{
if ( perm_only )
{
set_vr ( op . rt4 , vperm2b256to128 ( a , b , eval ( c ^ 0xf ) ) ) ;
return ;
}
const auto m = gf2p8affineqb ( c , build < u8 [ 16 ] > ( 0x40 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 , 0x40 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 ) , 0x7f ) ;
const auto mm = select ( noncast < s8 [ 16 ] > ( m ) > = 0 , splat < u8 [ 16 ] > ( 0 ) , m ) ;
const auto ab = vperm2b256to128 ( a , b , eval ( c ^ 0xf ) ) ;
set_vr ( op . rt4 , select ( noncast < s8 [ 16 ] > ( c ) > = 0 , ab , mm ) ) ;
return ;
}
}
if ( auto [ ok , data ] = get_const_vector ( a . value , m_pos ) ; ok )
{
if ( data = = v128 : : from8p ( data . _u8 [ 0 ] ) )
{
if ( perm_only )
{
set_vr ( op . rt4 , vperm2b256to128 ( b , a , eval ( c ^ 0x1f ) ) ) ;
return ;
}
const auto m = gf2p8affineqb ( c , build < u8 [ 16 ] > ( 0x40 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 , 0x40 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 ) , 0x7f ) ;
const auto mm = select ( noncast < s8 [ 16 ] > ( m ) > = 0 , splat < u8 [ 16 ] > ( 0 ) , m ) ;
const auto ab = vperm2b256to128 ( b , a , eval ( c ^ 0x1f ) ) ;
set_vr ( op . rt4 , select ( noncast < s8 [ 16 ] > ( c ) > = 0 , ab , mm ) ) ;
return ;
}
}
2021-09-01 18:49:27 +02:00
if ( perm_only )
{
2022-09-30 23:39:24 +02:00
set_vr ( op . rt4 , vperm2b ( a , b , eval ( c ^ 0xf ) ) ) ;
2021-09-01 18:49:27 +02:00
return ;
}
2022-06-05 02:11:22 +02:00
const auto m = gf2p8affineqb ( c , build < u8 [ 16 ] > ( 0x40 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 , 0x40 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 ) , 0x7f ) ;
2021-04-21 05:10:57 +02:00
const auto mm = select ( noncast < s8 [ 16 ] > ( m ) > = 0 , splat < u8 [ 16 ] > ( 0 ) , m ) ;
2022-06-08 04:32:41 +02:00
const auto cr = eval ( c ^ 0xf ) ;
2022-09-30 23:39:24 +02:00
const auto ab = vperm2b ( a , b , cr ) ;
2022-06-08 04:32:41 +02:00
set_vr ( op . rt4 , select ( noncast < s8 [ 16 ] > ( c ) > = 0 , ab , mm ) ) ;
2021-04-21 05:10:57 +02:00
return ;
}
2021-12-15 00:29:36 +01:00
const auto x = pshufb ( build < u8 [ 16 ] > ( 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0xff , 0xff , 0x80 , 0x80 ) , ( c > > 4 ) ) ;
2019-04-18 16:18:46 +02:00
const auto cr = eval ( c ^ 0xf ) ;
2020-06-30 17:33:22 +02:00
const auto ax = pshufb ( a , cr ) ;
const auto bx = pshufb ( b , cr ) ;
2021-09-01 18:49:27 +02:00
if ( perm_only )
2023-03-11 20:08:27 +01:00
set_vr ( op . rt4 , select_by_bit4 ( cr , ax , bx ) ) ;
2021-09-01 18:49:27 +02:00
else
2023-03-11 20:08:27 +01:00
set_vr ( op . rt4 , select_by_bit4 ( cr , ax , bx ) | x ) ;
2018-05-02 20:49:19 +02:00
}
void MPYA ( spu_opcode_t op )
{
set_vr ( op . rt4 , ( get_vr < s32 [ 4 ] > ( op . ra ) < < 16 > > 16 ) * ( get_vr < s32 [ 4 ] > ( op . rb ) < < 16 > > 16 ) + get_vr < s32 [ 4 ] > ( op . rc ) ) ;
}
void FSCRRD ( spu_opcode_t op ) //
{
// Hack
set_vr ( op . rt , splat < u32 [ 4 ] > ( 0 ) ) ;
}
2021-03-05 20:05:37 +01:00
void FSCRWR ( spu_opcode_t /*op*/ ) //
2018-05-02 20:49:19 +02:00
{
// Hack
}
void DFCGT ( spu_opcode_t op ) //
{
return UNK ( op ) ;
}
void DFCEQ ( spu_opcode_t op ) //
{
return UNK ( op ) ;
}
void DFCMGT ( spu_opcode_t op ) //
{
2019-03-29 14:49:19 +01:00
return UNK ( op ) ;
2018-05-02 20:49:19 +02:00
}
void DFCMEQ ( spu_opcode_t op ) //
{
return UNK ( op ) ;
}
void DFTSV ( spu_opcode_t op ) //
{
return UNK ( op ) ;
}
2019-03-25 19:31:16 +01:00
void DFA ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
set_vr ( op . rt , get_vr < f64 [ 2 ] > ( op . ra ) + get_vr < f64 [ 2 ] > ( op . rb ) ) ;
}
2019-03-25 19:31:16 +01:00
void DFS ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
set_vr ( op . rt , get_vr < f64 [ 2 ] > ( op . ra ) - get_vr < f64 [ 2 ] > ( op . rb ) ) ;
}
2019-03-25 19:31:16 +01:00
void DFM ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
set_vr ( op . rt , get_vr < f64 [ 2 ] > ( op . ra ) * get_vr < f64 [ 2 ] > ( op . rb ) ) ;
}
2019-03-25 19:31:16 +01:00
void DFMA ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
2020-04-03 07:11:47 +02:00
const auto [ a , b , c ] = get_vrs < f64 [ 2 ] > ( op . ra , op . rb , op . rt ) ;
2021-12-30 17:39:18 +01:00
if ( g_cfg . core . use_accurate_dfma )
2021-09-01 19:43:57 +02:00
set_vr ( op . rt , fmuladd ( a , b , c , true ) ) ;
2020-04-03 07:11:47 +02:00
else
set_vr ( op . rt , a * b + c ) ;
2018-05-02 20:49:19 +02:00
}
2019-03-25 19:31:16 +01:00
void DFMS ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
2020-04-03 07:11:47 +02:00
const auto [ a , b , c ] = get_vrs < f64 [ 2 ] > ( op . ra , op . rb , op . rt ) ;
2021-12-30 17:39:18 +01:00
if ( g_cfg . core . use_accurate_dfma )
2021-09-01 19:43:57 +02:00
set_vr ( op . rt , fmuladd ( a , b , - c , true ) ) ;
2020-04-03 07:11:47 +02:00
else
set_vr ( op . rt , a * b - c ) ;
2018-05-02 20:49:19 +02:00
}
2019-03-25 19:31:16 +01:00
void DFNMS ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
2020-04-03 07:11:47 +02:00
const auto [ a , b , c ] = get_vrs < f64 [ 2 ] > ( op . ra , op . rb , op . rt ) ;
2021-12-30 17:39:18 +01:00
if ( g_cfg . core . use_accurate_dfma )
2021-09-01 19:43:57 +02:00
set_vr ( op . rt , fmuladd ( - a , b , c , true ) ) ;
2020-04-03 07:11:47 +02:00
else
set_vr ( op . rt , c - ( a * b ) ) ;
2018-05-02 20:49:19 +02:00
}
2019-03-25 19:31:16 +01:00
void DFNMA ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
2020-04-03 07:11:47 +02:00
const auto [ a , b , c ] = get_vrs < f64 [ 2 ] > ( op . ra , op . rb , op . rt ) ;
2021-12-30 17:39:18 +01:00
if ( g_cfg . core . use_accurate_dfma )
2021-09-01 19:43:57 +02:00
set_vr ( op . rt , - fmuladd ( a , b , c , true ) ) ;
2020-04-03 07:11:47 +02:00
else
set_vr ( op . rt , - ( a * b + c ) ) ;
2018-05-02 20:49:19 +02:00
}
2020-07-04 05:49:30 +02:00
bool is_input_positive ( value_t < f32 [ 4 ] > a )
{
2021-09-07 18:42:05 +02:00
if ( auto [ ok , v0 , v1 ] = match_expr ( a , match < f32 [ 4 ] > ( ) * match < f32 [ 4 ] > ( ) ) ; ok & & v0 . eq ( v1 ) )
2020-07-04 05:49:30 +02:00
{
2021-09-07 18:42:05 +02:00
return true ;
2020-07-04 05:49:30 +02:00
}
return false ;
}
2019-10-26 22:51:38 +02:00
// clamping helpers
value_t < f32 [ 4 ] > clamp_positive_smax ( value_t < f32 [ 4 ] > v )
{
return eval ( bitcast < f32 [ 4 ] > ( min ( bitcast < s32 [ 4 ] > ( v ) , splat < s32 [ 4 ] > ( 0x7f7fffff ) ) ) ) ;
}
value_t < f32 [ 4 ] > clamp_negative_smax ( value_t < f32 [ 4 ] > v )
{
2020-07-04 05:49:30 +02:00
if ( is_input_positive ( v ) )
{
return v ;
}
2019-10-26 22:51:38 +02:00
return eval ( bitcast < f32 [ 4 ] > ( min ( bitcast < u32 [ 4 ] > ( v ) , splat < u32 [ 4 ] > ( 0xff7fffff ) ) ) ) ;
}
value_t < f32 [ 4 ] > clamp_smax ( value_t < f32 [ 4 ] > v )
{
2022-06-06 05:53:07 +02:00
if ( m_use_avx512 )
{
if ( is_input_positive ( v ) )
{
return eval ( clamp_positive_smax ( v ) ) ;
}
2022-06-12 15:08:43 +02:00
2022-06-06 05:53:07 +02:00
if ( auto [ ok , data ] = get_const_vector ( v . value , m_pos ) ; ok )
{
// Avoid pessimation when input is constant
return eval ( clamp_positive_smax ( clamp_negative_smax ( v ) ) ) ;
}
2022-06-12 15:08:43 +02:00
return eval ( vrangeps ( v , fsplat < f32 [ 4 ] > ( std : : bit_cast < f32 , u32 > ( 0x7f7fffff ) ) , 0x2 , 0xff ) ) ;
2022-06-06 05:53:07 +02:00
}
2020-06-01 03:40:48 +02:00
return eval ( clamp_positive_smax ( clamp_negative_smax ( v ) ) ) ;
2019-10-26 22:51:38 +02:00
}
// FMA favouring zeros
value_t < f32 [ 4 ] > xmuladd ( value_t < f32 [ 4 ] > a , value_t < f32 [ 4 ] > b , value_t < f32 [ 4 ] > c )
{
const auto ma = eval ( sext < s32 [ 4 ] > ( fcmp_uno ( a ! = fsplat < f32 [ 4 ] > ( 0. ) ) ) ) ;
const auto mb = eval ( sext < s32 [ 4 ] > ( fcmp_uno ( b ! = fsplat < f32 [ 4 ] > ( 0. ) ) ) ) ;
const auto ca = eval ( bitcast < f32 [ 4 ] > ( bitcast < s32 [ 4 ] > ( a ) & mb ) ) ;
const auto cb = eval ( bitcast < f32 [ 4 ] > ( bitcast < s32 [ 4 ] > ( b ) & ma ) ) ;
return eval ( fmuladd ( ca , cb , c ) ) ;
}
2020-05-12 18:57:03 +02:00
// Checks for postive and negative zero, or Denormal (treated as zero)
2020-08-05 17:03:21 +02:00
// If sign is +-1 check equality againts all sign bits
bool is_spu_float_zero ( v128 a , int sign = 0 )
2020-05-12 18:57:03 +02:00
{
for ( u32 i = 0 ; i < 4 ; i + + )
2020-05-27 17:53:09 +02:00
{
2020-05-12 18:57:03 +02:00
const u32 exponent = a . _u32 [ i ] & 0x7f800000u ;
2020-08-05 17:03:21 +02:00
if ( exponent | | ( sign & & ( sign > = 0 ) ! = ( a . _s32 [ i ] > = 0 ) ) )
2020-05-12 18:57:03 +02:00
{
// Normalized number
return false ;
}
}
return true ;
}
2021-09-07 17:35:00 +02:00
template < typename T >
static llvm_calli < f32 [ 4 ] , T > frest ( T & & a )
{
return { " spu_frest " , { std : : forward < T > ( a ) } } ;
}
2019-03-25 19:31:16 +01:00
void FREST ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
2018-07-27 12:00:05 +02:00
// TODO
2023-07-23 09:09:24 +02:00
if ( g_cfg . core . spu_xfloat_accuracy = = xfloat_accuracy : : accurate )
2019-11-15 16:58:41 +01:00
{
const auto a = get_vr < f32 [ 4 ] > ( op . ra ) ;
const auto mask_ov = sext < s32 [ 4 ] > ( bitcast < s32 [ 4 ] > ( fabs ( a ) ) > splat < s32 [ 4 ] > ( 0x7e7fffff ) ) ;
2020-01-21 13:55:07 +01:00
const auto mask_de = eval ( noncast < u32 [ 4 ] > ( sext < s32 [ 4 ] > ( fcmp_ord ( a = = fsplat < f32 [ 4 ] > ( 0. ) ) ) ) > > 1 ) ;
2023-04-04 17:05:19 +02:00
set_vr ( op . rt , ( bitcast < s32 [ 4 ] > ( fsplat < f32 [ 4 ] > ( 1.0 ) / a ) & ~ mask_ov ) | noncast < s32 [ 4 ] > ( mask_de ) ) ;
2021-09-07 17:35:00 +02:00
return ;
2019-11-15 16:58:41 +01:00
}
2021-09-07 17:35:00 +02:00
2023-07-23 09:09:24 +02:00
if ( g_cfg . core . spu_xfloat_accuracy = = xfloat_accuracy : : approximate )
2019-11-15 16:58:41 +01:00
{
2023-04-04 17:05:19 +02:00
register_intrinsic ( " spu_frest " , [ & ] ( llvm : : CallInst * ci )
{
2023-04-26 03:00:11 +02:00
const auto a = value < f32 [ 4 ] > ( ci - > getOperand ( 0 ) ) ;
2023-05-15 18:20:47 +02:00
// Gives accuracy penalty, frest result is within one newton-raphson iteration for accuracy
const auto approx_result = fsplat < f32 [ 4 ] > ( 0.999875069f ) / a ;
2023-04-26 03:00:11 +02:00
// Zeroes the last 11 bytes of the mantissa so FI calculations end up correct if needed
2023-05-15 18:20:47 +02:00
return bitcast < f32 [ 4 ] > ( bitcast < u32 [ 4 ] > ( approx_result ) & splat < u32 [ 4 ] > ( 0xFFFFF800 ) ) ;
2023-04-04 17:05:19 +02:00
} ) ;
}
else
{
register_intrinsic ( " spu_frest " , [ & ] ( llvm : : CallInst * ci )
{
const auto a = value < f32 [ 4 ] > ( ci - > getOperand ( 0 ) ) ;
2023-04-26 03:00:11 +02:00
// Fast but this makes the result vary per cpu
2023-04-04 17:05:19 +02:00
return fre ( a ) ;
} ) ;
}
2021-09-07 17:35:00 +02:00
set_vr ( op . rt , frest ( get_vr < f32 [ 4 ] > ( op . ra ) ) ) ;
}
template < typename T >
static llvm_calli < f32 [ 4 ] , T > frsqest ( T & & a )
{
return { " spu_frsqest " , { std : : forward < T > ( a ) } } ;
2018-05-02 20:49:19 +02:00
}
2019-03-25 19:31:16 +01:00
void FRSQEST ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
2018-07-27 12:00:05 +02:00
// TODO
2023-07-23 09:09:24 +02:00
if ( g_cfg . core . spu_xfloat_accuracy = = xfloat_accuracy : : accurate )
2021-09-07 17:35:00 +02:00
{
2021-09-01 12:43:34 +02:00
set_vr ( op . rt , fsplat < f64 [ 4 ] > ( 1.0 ) / fsqrt ( fabs ( get_vr < f64 [ 4 ] > ( op . ra ) ) ) ) ;
2021-09-07 17:35:00 +02:00
return ;
}
2023-07-23 09:09:24 +02:00
if ( g_cfg . core . spu_xfloat_accuracy = = xfloat_accuracy : : approximate )
2021-09-07 17:35:00 +02:00
{
2023-04-04 17:05:19 +02:00
register_intrinsic ( " spu_frsqest " , [ & ] ( llvm : : CallInst * ci )
{
2023-04-26 03:00:11 +02:00
const auto a = value < f32 [ 4 ] > ( ci - > getOperand ( 0 ) ) ;
2023-05-15 18:20:47 +02:00
// Gives accuracy penalty, frsqest result is within one newton-raphson iteration for accuracy
const auto approx_result = fsplat < f32 [ 4 ] > ( 0.999763668f ) / fsqrt ( fabs ( a ) ) ;
2023-04-26 03:00:11 +02:00
// Zeroes the last 11 bytes of the mantissa so FI calculations end up correct if needed
2023-05-15 18:20:47 +02:00
return bitcast < f32 [ 4 ] > ( bitcast < u32 [ 4 ] > ( approx_result ) & splat < u32 [ 4 ] > ( 0xFFFFF800 ) ) ;
2023-04-04 17:05:19 +02:00
} ) ;
}
else
{
register_intrinsic ( " spu_frsqest " , [ & ] ( llvm : : CallInst * ci )
{
const auto a = value < f32 [ 4 ] > ( ci - > getOperand ( 0 ) ) ;
2023-04-26 03:00:11 +02:00
// Fast but this makes the result vary per cpu
2023-04-04 17:05:19 +02:00
return frsqe ( fabs ( a ) ) ;
} ) ;
}
2021-09-07 17:35:00 +02:00
set_vr ( op . rt , frsqest ( get_vr < f32 [ 4 ] > ( op . ra ) ) ) ;
}
template < typename T , typename U >
static llvm_calli < s32 [ 4 ] , T , U > fcgt ( T & & a , U & & b )
{
return { " spu_fcgt " , { std : : forward < T > ( a ) , std : : forward < U > ( b ) } } ;
2018-05-02 20:49:19 +02:00
}
2019-03-25 19:31:16 +01:00
void FCGT ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
2023-07-23 09:09:24 +02:00
if ( g_cfg . core . spu_xfloat_accuracy = = xfloat_accuracy : : accurate )
2018-12-17 00:05:26 +01:00
{
2019-04-18 16:18:46 +02:00
set_vr ( op . rt , sext < s32 [ 4 ] > ( fcmp_ord ( get_vr < f64 [ 4 ] > ( op . ra ) > get_vr < f64 [ 4 ] > ( op . rb ) ) ) ) ;
2018-12-17 00:05:26 +01:00
return ;
}
2021-09-07 17:35:00 +02:00
register_intrinsic ( " spu_fcgt " , [ & ] ( llvm : : CallInst * ci )
{
const auto a = value < f32 [ 4 ] > ( ci - > getOperand ( 0 ) ) ;
const auto b = value < f32 [ 4 ] > ( ci - > getOperand ( 1 ) ) ;
2018-12-17 00:05:26 +01:00
2021-09-07 17:35:00 +02:00
const value_t < f32 [ 4 ] > ab [ 2 ] { a , b } ;
2020-05-12 18:57:03 +02:00
2021-09-07 17:35:00 +02:00
std : : bitset < 2 > safe_int_compare ( 0 ) ;
std : : bitset < 2 > safe_nonzero_compare ( 0 ) ;
2020-05-12 18:57:03 +02:00
2021-09-07 17:35:00 +02:00
for ( u32 i = 0 ; i < 2 ; i + + )
{
if ( auto [ ok , data ] = get_const_vector ( ab [ i ] . value , m_pos , __LINE__ + i ) ; ok )
2020-05-12 18:57:03 +02:00
{
2021-09-07 17:35:00 +02:00
safe_int_compare . set ( i ) ;
safe_nonzero_compare . set ( i ) ;
2020-05-12 18:57:03 +02:00
2021-09-07 17:35:00 +02:00
for ( u32 j = 0 ; j < 4 ; j + + )
2020-08-14 20:33:35 +02:00
{
2021-09-07 17:35:00 +02:00
const u32 value = data . _u32 [ j ] ;
const u8 exponent = static_cast < u8 > ( value > > 23 ) ;
if ( value > = 0x7f7fffffu | | ! exponent )
{
// Postive or negative zero, Denormal (treated as zero), Negative constant, or Normalized number with exponent +127
// Cannot used signed integer compare safely
// Note: Technically this optimization is accurate for any positive value, but due to the fact that
// we don't produce "extended range" values the same way as real hardware, it's not safe to apply
// this optimization for values outside of the range of x86 floating point hardware.
safe_int_compare . reset ( i ) ;
if ( ! exponent ) safe_nonzero_compare . reset ( i ) ;
}
2020-08-14 20:33:35 +02:00
}
}
2020-05-12 18:57:03 +02:00
}
2021-09-07 17:35:00 +02:00
if ( safe_int_compare . any ( ) )
{
return eval ( sext < s32 [ 4 ] > ( bitcast < s32 [ 4 ] > ( a ) > bitcast < s32 [ 4 ] > ( b ) ) ) ;
}
2020-05-12 18:57:03 +02:00
2023-07-23 09:09:24 +02:00
if ( g_cfg . core . spu_xfloat_accuracy = = xfloat_accuracy : : approximate | | g_cfg . core . spu_xfloat_accuracy = = xfloat_accuracy : : relaxed )
2021-09-07 17:35:00 +02:00
{
const auto ai = eval ( bitcast < s32 [ 4 ] > ( a ) ) ;
const auto bi = eval ( bitcast < s32 [ 4 ] > ( b ) ) ;
2020-08-14 20:33:35 +02:00
2021-09-07 17:35:00 +02:00
if ( ! safe_nonzero_compare . any ( ) )
{
return eval ( sext < s32 [ 4 ] > ( fcmp_uno ( a ! = b ) & select ( ( ai & bi ) > = 0 , ai > bi , ai < bi ) ) ) ;
}
else
{
return eval ( sext < s32 [ 4 ] > ( select ( ( ai & bi ) > = 0 , ai > bi , ai < bi ) ) ) ;
}
}
2020-08-14 20:33:35 +02:00
else
2021-09-07 17:35:00 +02:00
{
return eval ( sext < s32 [ 4 ] > ( fcmp_ord ( a > b ) ) ) ;
}
} ) ;
set_vr ( op . rt , fcgt ( get_vr < f32 [ 4 ] > ( op . ra ) , get_vr < f32 [ 4 ] > ( op . rb ) ) ) ;
}
template < typename T , typename U >
static llvm_calli < s32 [ 4 ] , T , U > fcmgt ( T & & a , U & & b )
{
return { " spu_fcmgt " , { std : : forward < T > ( a ) , std : : forward < U > ( b ) } } ;
2018-05-02 20:49:19 +02:00
}
2019-03-25 19:31:16 +01:00
void FCMGT ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
2023-07-23 09:09:24 +02:00
if ( g_cfg . core . spu_xfloat_accuracy = = xfloat_accuracy : : accurate )
2018-12-17 00:05:26 +01:00
{
2019-04-18 16:18:46 +02:00
set_vr ( op . rt , sext < s32 [ 4 ] > ( fcmp_ord ( fabs ( get_vr < f64 [ 4 ] > ( op . ra ) ) > fabs ( get_vr < f64 [ 4 ] > ( op . rb ) ) ) ) ) ;
2018-12-17 00:05:26 +01:00
return ;
}
2021-09-07 17:35:00 +02:00
register_intrinsic ( " spu_fcmgt " , [ & ] ( llvm : : CallInst * ci )
2018-12-17 00:05:26 +01:00
{
2021-09-07 17:35:00 +02:00
const auto a = value < f32 [ 4 ] > ( ci - > getOperand ( 0 ) ) ;
const auto b = value < f32 [ 4 ] > ( ci - > getOperand ( 1 ) ) ;
2022-07-26 21:14:13 +02:00
const value_t < f32 [ 4 ] > ab [ 2 ] { a , b } ;
std : : bitset < 2 > safe_int_compare ( 0 ) ;
for ( u32 i = 0 ; i < 2 ; i + + )
{
if ( auto [ ok , data ] = get_const_vector ( ab [ i ] . value , m_pos , __LINE__ + i ) ; ok )
{
safe_int_compare . set ( i ) ;
for ( u32 j = 0 ; j < 4 ; j + + )
{
const u32 value = data . _u32 [ j ] ;
const u8 exponent = static_cast < u8 > ( value > > 23 ) ;
if ( ( value & 0x7fffffffu ) > = 0x7f7fffffu | | ! exponent )
{
// See above
safe_int_compare . reset ( i ) ;
}
}
}
}
2021-09-07 17:35:00 +02:00
const auto ma = eval ( fabs ( a ) ) ;
const auto mb = eval ( fabs ( b ) ) ;
2022-07-26 21:14:13 +02:00
const auto mai = eval ( bitcast < s32 [ 4 ] > ( ma ) ) ;
const auto mbi = eval ( bitcast < s32 [ 4 ] > ( mb ) ) ;
if ( safe_int_compare . any ( ) )
{
return eval ( sext < s32 [ 4 ] > ( mai > mbi ) ) ;
}
2023-07-23 09:09:24 +02:00
if ( g_cfg . core . spu_xfloat_accuracy = = xfloat_accuracy : : approximate )
2021-09-07 17:35:00 +02:00
{
2022-07-26 21:14:13 +02:00
return eval ( sext < s32 [ 4 ] > ( fcmp_uno ( ma > mb ) & ( mai > mbi ) ) ) ;
2021-09-07 17:35:00 +02:00
}
else
{
return eval ( sext < s32 [ 4 ] > ( fcmp_ord ( ma > mb ) ) ) ;
}
} ) ;
set_vr ( op . rt , fcmgt ( get_vr < f32 [ 4 ] > ( op . ra ) , get_vr < f32 [ 4 ] > ( op . rb ) ) ) ;
}
template < typename T , typename U >
static llvm_calli < f32 [ 4 ] , T , U > fa ( T & & a , U & & b )
{
return { " spu_fa " , { std : : forward < T > ( a ) , std : : forward < U > ( b ) } } ;
2018-05-02 20:49:19 +02:00
}
2019-03-25 19:31:16 +01:00
void FA ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
2023-07-23 09:09:24 +02:00
if ( g_cfg . core . spu_xfloat_accuracy = = xfloat_accuracy : : accurate )
2021-09-07 17:35:00 +02:00
{
2020-10-03 12:31:44 +02:00
set_vr ( op . rt , get_vr < f64 [ 4 ] > ( op . ra ) + get_vr < f64 [ 4 ] > ( op . rb ) ) ;
2021-09-07 17:35:00 +02:00
return ;
}
register_intrinsic ( " spu_fa " , [ & ] ( llvm : : CallInst * ci )
{
const auto a = value < f32 [ 4 ] > ( ci - > getOperand ( 0 ) ) ;
const auto b = value < f32 [ 4 ] > ( ci - > getOperand ( 1 ) ) ;
return a + b ;
} ) ;
set_vr ( op . rt , fa ( get_vr < f32 [ 4 ] > ( op . ra ) , get_vr < f32 [ 4 ] > ( op . rb ) ) ) ;
}
template < typename T , typename U >
static llvm_calli < f32 [ 4 ] , T , U > fs ( T & & a , U & & b )
{
return { " spu_fs " , { std : : forward < T > ( a ) , std : : forward < U > ( b ) } } ;
2018-05-02 20:49:19 +02:00
}
2019-03-25 19:31:16 +01:00
void FS ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
2023-07-23 09:09:24 +02:00
if ( g_cfg . core . spu_xfloat_accuracy = = xfloat_accuracy : : accurate )
2019-10-26 22:51:38 +02:00
{
2021-09-07 17:35:00 +02:00
set_vr ( op . rt , get_vr < f64 [ 4 ] > ( op . ra ) - get_vr < f64 [ 4 ] > ( op . rb ) ) ;
return ;
2019-10-26 22:51:38 +02:00
}
2021-09-07 17:35:00 +02:00
register_intrinsic ( " spu_fs " , [ & ] ( llvm : : CallInst * ci )
{
const auto a = value < f32 [ 4 ] > ( ci - > getOperand ( 0 ) ) ;
const auto b = value < f32 [ 4 ] > ( ci - > getOperand ( 1 ) ) ;
2023-07-23 09:09:24 +02:00
if ( g_cfg . core . spu_xfloat_accuracy = = xfloat_accuracy : : approximate )
2021-09-07 17:35:00 +02:00
{
const auto bc = clamp_smax ( b ) ; // for #4478
return eval ( a - bc ) ;
}
else
{
return eval ( a - b ) ;
}
} ) ;
set_vr ( op . rt , fs ( get_vr < f32 [ 4 ] > ( op . ra ) , get_vr < f32 [ 4 ] > ( op . rb ) ) ) ;
}
template < typename T , typename U >
static llvm_calli < f32 [ 4 ] , T , U > fm ( T & & a , U & & b )
{
return { " spu_fm " , { std : : forward < T > ( a ) , std : : forward < U > ( b ) } } ;
2018-05-02 20:49:19 +02:00
}
2019-03-25 19:31:16 +01:00
void FM ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
2023-07-23 09:09:24 +02:00
if ( g_cfg . core . spu_xfloat_accuracy = = xfloat_accuracy : : accurate )
2021-09-07 17:35:00 +02:00
{
2020-10-03 12:31:44 +02:00
set_vr ( op . rt , get_vr < f64 [ 4 ] > ( op . ra ) * get_vr < f64 [ 4 ] > ( op . rb ) ) ;
2021-09-07 17:35:00 +02:00
return ;
}
register_intrinsic ( " spu_fm " , [ & ] ( llvm : : CallInst * ci )
2018-12-17 00:05:26 +01:00
{
2021-09-07 17:35:00 +02:00
const auto a = value < f32 [ 4 ] > ( ci - > getOperand ( 0 ) ) ;
const auto b = value < f32 [ 4 ] > ( ci - > getOperand ( 1 ) ) ;
2020-06-05 10:19:15 +02:00
2023-07-23 09:09:24 +02:00
if ( g_cfg . core . spu_xfloat_accuracy = = xfloat_accuracy : : approximate )
2020-06-05 10:19:15 +02:00
{
2023-05-19 15:21:41 +02:00
if ( a . value = = b . value )
2021-09-07 17:35:00 +02:00
{
return eval ( a * b ) ;
}
const auto ma = sext < s32 [ 4 ] > ( fcmp_uno ( a ! = fsplat < f32 [ 4 ] > ( 0. ) ) ) ;
const auto mb = sext < s32 [ 4 ] > ( fcmp_uno ( b ! = fsplat < f32 [ 4 ] > ( 0. ) ) ) ;
return eval ( bitcast < f32 [ 4 ] > ( bitcast < s32 [ 4 ] > ( a * b ) & ma & mb ) ) ;
2020-06-05 10:19:15 +02:00
}
2021-09-07 17:35:00 +02:00
else
{
return eval ( a * b ) ;
}
} ) ;
2020-06-05 10:19:15 +02:00
2023-05-19 15:21:41 +02:00
const auto [ a , b ] = get_vrs < f32 [ 4 ] > ( op . ra , op . rb ) ;
if ( op . ra = = op . rb & & ! m_interp_magn )
{
set_vr ( op . rt , fm ( a , a ) ) ;
return ;
}
set_vr ( op . rt , fm ( a , b ) ) ;
2021-09-07 17:35:00 +02:00
}
template < typename T >
static llvm_calli < f64 [ 2 ] , T > fesd ( T & & a )
{
return { " spu_fesd " , { std : : forward < T > ( a ) } } ;
2018-05-02 20:49:19 +02:00
}
2019-03-25 19:31:16 +01:00
void FESD ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
2023-07-23 09:09:24 +02:00
if ( g_cfg . core . spu_xfloat_accuracy = = xfloat_accuracy : : accurate )
2018-07-27 12:00:05 +02:00
{
2021-09-07 17:35:00 +02:00
const auto r = zshuffle ( get_vr < f64 [ 4 ] > ( op . ra ) , 1 , 3 ) ;
2018-07-27 12:00:05 +02:00
const auto d = bitcast < s64 [ 2 ] > ( r ) ;
const auto a = eval ( d & 0x7fffffffffffffff ) ;
const auto s = eval ( d & 0x8000000000000000 ) ;
const auto i = select ( a = = 0x47f0000000000000 , eval ( s | 0x7ff0000000000000 ) , d ) ;
const auto n = select ( a > 0x47f0000000000000 , splat < s64 [ 2 ] > ( 0x7ff8000000000000 ) , i ) ;
set_vr ( op . rt , bitcast < f64 [ 2 ] > ( n ) ) ;
2021-09-07 17:35:00 +02:00
return ;
2018-07-27 12:00:05 +02:00
}
2021-09-07 17:35:00 +02:00
register_intrinsic ( " spu_fesd " , [ & ] ( llvm : : CallInst * ci )
2018-07-27 12:00:05 +02:00
{
2021-09-07 17:35:00 +02:00
const auto a = value < f32 [ 4 ] > ( ci - > getOperand ( 0 ) ) ;
return fpcast < f64 [ 2 ] > ( zshuffle ( a , 1 , 3 ) ) ;
} ) ;
set_vr ( op . rt , fesd ( get_vr < f32 [ 4 ] > ( op . ra ) ) ) ;
}
template < typename T >
static llvm_calli < f32 [ 4 ] , T > frds ( T & & a )
{
return { " spu_frds " , { std : : forward < T > ( a ) } } ;
2018-05-02 20:49:19 +02:00
}
2019-03-25 19:31:16 +01:00
void FRDS ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
2023-07-23 09:09:24 +02:00
if ( g_cfg . core . spu_xfloat_accuracy = = xfloat_accuracy : : accurate )
2018-07-27 12:00:05 +02:00
{
const auto r = get_vr < f64 [ 2 ] > ( op . ra ) ;
const auto d = bitcast < s64 [ 2 ] > ( r ) ;
const auto a = eval ( d & 0x7fffffffffffffff ) ;
const auto s = eval ( d & 0x8000000000000000 ) ;
const auto i = select ( a > 0x47f0000000000000 , eval ( s | 0x47f0000000000000 ) , d ) ;
const auto n = select ( a > 0x7ff0000000000000 , splat < s64 [ 2 ] > ( 0x47f8000000000000 ) , i ) ;
const auto z = select ( a < 0x3810000000000000 , s , n ) ;
2023-04-08 14:21:22 +02:00
set_vr ( op . rt , zshuffle ( bitcast < f64 [ 2 ] > ( z ) , 2 , 0 , 3 , 1 ) , nullptr , false ) ;
2021-09-07 17:35:00 +02:00
return ;
2018-07-27 12:00:05 +02:00
}
2021-09-07 17:35:00 +02:00
register_intrinsic ( " spu_frds " , [ & ] ( llvm : : CallInst * ci )
2018-07-27 12:00:05 +02:00
{
2021-09-07 17:35:00 +02:00
const auto a = value < f64 [ 2 ] > ( ci - > getOperand ( 0 ) ) ;
return zshuffle ( fpcast < f32 [ 2 ] > ( a ) , 2 , 0 , 3 , 1 ) ;
} ) ;
set_vr ( op . rt , frds ( get_vr < f64 [ 2 ] > ( op . ra ) ) ) ;
}
template < typename T , typename U >
static llvm_calli < s32 [ 4 ] , T , U > fceq ( T & & a , U & & b )
{
return { " spu_fceq " , { std : : forward < T > ( a ) , std : : forward < U > ( b ) } } ;
2018-05-02 20:49:19 +02:00
}
2019-03-25 19:31:16 +01:00
void FCEQ ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
2023-07-23 09:09:24 +02:00
if ( g_cfg . core . spu_xfloat_accuracy = = xfloat_accuracy : : accurate )
2021-08-22 09:13:34 +02:00
{
2019-04-18 16:18:46 +02:00
set_vr ( op . rt , sext < s32 [ 4 ] > ( fcmp_ord ( get_vr < f64 [ 4 ] > ( op . ra ) = = get_vr < f64 [ 4 ] > ( op . rb ) ) ) ) ;
2021-08-22 09:13:34 +02:00
return ;
}
2021-09-07 17:35:00 +02:00
register_intrinsic ( " spu_fceq " , [ & ] ( llvm : : CallInst * ci )
{
const auto a = value < f32 [ 4 ] > ( ci - > getOperand ( 0 ) ) ;
const auto b = value < f32 [ 4 ] > ( ci - > getOperand ( 1 ) ) ;
2021-08-22 09:13:34 +02:00
2021-09-07 17:35:00 +02:00
const value_t < f32 [ 4 ] > ab [ 2 ] { a , b } ;
2021-08-22 09:13:34 +02:00
2021-09-07 17:35:00 +02:00
std : : bitset < 2 > safe_float_compare ( 0 ) ;
std : : bitset < 2 > safe_int_compare ( 0 ) ;
2021-08-22 09:13:34 +02:00
2021-09-07 17:35:00 +02:00
for ( u32 i = 0 ; i < 2 ; i + + )
{
if ( auto [ ok , data ] = get_const_vector ( ab [ i ] . value , m_pos , __LINE__ + i ) ; ok )
2021-08-22 09:13:34 +02:00
{
2021-09-07 17:35:00 +02:00
safe_float_compare . set ( i ) ;
safe_int_compare . set ( i ) ;
2021-08-22 09:13:34 +02:00
2021-09-07 17:35:00 +02:00
for ( u32 j = 0 ; j < 4 ; j + + )
2021-08-22 09:13:34 +02:00
{
2021-09-07 17:35:00 +02:00
const u32 value = data . _u32 [ j ] ;
const u8 exponent = static_cast < u8 > ( value > > 23 ) ;
2021-08-22 09:13:34 +02:00
2021-09-07 17:35:00 +02:00
// unsafe if nan
if ( exponent = = 255 )
{
safe_float_compare . reset ( i ) ;
}
// unsafe if denormal or 0
if ( ! exponent )
{
safe_int_compare . reset ( i ) ;
}
2021-08-22 09:13:34 +02:00
}
}
}
2021-09-07 17:35:00 +02:00
if ( safe_float_compare . any ( ) )
{
return eval ( sext < s32 [ 4 ] > ( fcmp_ord ( a = = b ) ) ) ;
}
2021-08-22 09:13:34 +02:00
2021-09-07 17:35:00 +02:00
if ( safe_int_compare . any ( ) )
{
return eval ( sext < s32 [ 4 ] > ( bitcast < s32 [ 4 ] > ( a ) = = bitcast < s32 [ 4 ] > ( b ) ) ) ;
}
2021-08-22 09:13:34 +02:00
2023-07-23 09:09:24 +02:00
if ( g_cfg . core . spu_xfloat_accuracy = = xfloat_accuracy : : approximate )
2021-09-07 17:35:00 +02:00
{
return eval ( sext < s32 [ 4 ] > ( fcmp_ord ( a = = b ) ) | sext < s32 [ 4 ] > ( bitcast < s32 [ 4 ] > ( a ) = = bitcast < s32 [ 4 ] > ( b ) ) ) ;
}
else
{
return eval ( sext < s32 [ 4 ] > ( fcmp_ord ( a = = b ) ) ) ;
}
} ) ;
set_vr ( op . rt , fceq ( get_vr < f32 [ 4 ] > ( op . ra ) , get_vr < f32 [ 4 ] > ( op . rb ) ) ) ;
}
template < typename T , typename U >
static llvm_calli < s32 [ 4 ] , T , U > fcmeq ( T & & a , U & & b )
{
return { " spu_fcmeq " , { std : : forward < T > ( a ) , std : : forward < U > ( b ) } } ;
2018-05-02 20:49:19 +02:00
}
2019-03-25 19:31:16 +01:00
void FCMEQ ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
2023-07-23 09:09:24 +02:00
if ( g_cfg . core . spu_xfloat_accuracy = = xfloat_accuracy : : accurate )
2021-08-22 09:13:34 +02:00
{
2019-04-18 16:18:46 +02:00
set_vr ( op . rt , sext < s32 [ 4 ] > ( fcmp_ord ( fabs ( get_vr < f64 [ 4 ] > ( op . ra ) ) = = fabs ( get_vr < f64 [ 4 ] > ( op . rb ) ) ) ) ) ;
2021-08-22 09:13:34 +02:00
return ;
}
2021-09-07 17:35:00 +02:00
register_intrinsic ( " spu_fcmeq " , [ & ] ( llvm : : CallInst * ci )
{
const auto a = value < f32 [ 4 ] > ( ci - > getOperand ( 0 ) ) ;
const auto b = value < f32 [ 4 ] > ( ci - > getOperand ( 1 ) ) ;
2021-08-22 09:13:34 +02:00
2021-09-07 17:35:00 +02:00
const value_t < f32 [ 4 ] > ab [ 2 ] { a , b } ;
2021-08-22 09:13:34 +02:00
2021-09-07 17:35:00 +02:00
std : : bitset < 2 > safe_float_compare ( 0 ) ;
std : : bitset < 2 > safe_int_compare ( 0 ) ;
2021-08-22 09:13:34 +02:00
2021-09-07 17:35:00 +02:00
for ( u32 i = 0 ; i < 2 ; i + + )
{
if ( auto [ ok , data ] = get_const_vector ( ab [ i ] . value , m_pos , __LINE__ + i ) ; ok )
2021-08-22 09:13:34 +02:00
{
2021-09-07 17:35:00 +02:00
safe_float_compare . set ( i ) ;
safe_int_compare . set ( i ) ;
2021-08-22 09:13:34 +02:00
2021-09-07 17:35:00 +02:00
for ( u32 j = 0 ; j < 4 ; j + + )
2021-08-22 09:13:34 +02:00
{
2021-09-07 17:35:00 +02:00
const u32 value = data . _u32 [ j ] ;
const u8 exponent = static_cast < u8 > ( value > > 23 ) ;
2021-08-22 09:13:34 +02:00
2021-09-07 17:35:00 +02:00
// unsafe if nan
if ( exponent = = 255 )
{
safe_float_compare . reset ( i ) ;
}
// unsafe if denormal or 0
if ( ! exponent )
{
safe_int_compare . reset ( i ) ;
}
2021-08-22 09:13:34 +02:00
}
}
}
2021-09-07 17:35:00 +02:00
const auto fa = eval ( fabs ( a ) ) ;
const auto fb = eval ( fabs ( b ) ) ;
2021-08-22 09:13:34 +02:00
2021-09-07 17:35:00 +02:00
if ( safe_float_compare . any ( ) )
{
return eval ( sext < s32 [ 4 ] > ( fcmp_ord ( fa = = fb ) ) ) ;
}
2021-08-22 09:13:34 +02:00
2021-09-07 17:35:00 +02:00
if ( safe_int_compare . any ( ) )
{
return eval ( sext < s32 [ 4 ] > ( bitcast < s32 [ 4 ] > ( fa ) = = bitcast < s32 [ 4 ] > ( fb ) ) ) ;
}
2021-08-22 09:13:34 +02:00
2023-07-23 09:09:24 +02:00
if ( g_cfg . core . spu_xfloat_accuracy = = xfloat_accuracy : : approximate )
2021-09-07 17:35:00 +02:00
{
return eval ( sext < s32 [ 4 ] > ( fcmp_ord ( fa = = fb ) ) | sext < s32 [ 4 ] > ( bitcast < s32 [ 4 ] > ( fa ) = = bitcast < s32 [ 4 ] > ( fb ) ) ) ;
}
else
{
return eval ( sext < s32 [ 4 ] > ( fcmp_ord ( fa = = fb ) ) ) ;
}
} ) ;
set_vr ( op . rt , fcmeq ( get_vr < f32 [ 4 ] > ( op . ra ) , get_vr < f32 [ 4 ] > ( op . rb ) ) ) ;
2018-05-02 20:49:19 +02:00
}
2019-12-20 19:24:57 +01:00
value_t < f32 [ 4 ] > fma32x4 ( value_t < f32 [ 4 ] > a , value_t < f32 [ 4 ] > b , value_t < f32 [ 4 ] > c )
2018-12-17 00:05:26 +01:00
{
2020-05-01 11:05:24 +02:00
// Optimization: Emit only a floating multiply if the addend is zero
// This is odd since SPU code could just use the FM instruction, but it seems common enough
2021-09-01 15:20:33 +02:00
if ( auto [ ok , data ] = get_const_vector ( c . value , m_pos ) ; ok )
2020-05-01 11:05:24 +02:00
{
2020-08-05 17:03:21 +02:00
if ( is_spu_float_zero ( data , - 1 ) )
2020-05-01 11:05:24 +02:00
{
2021-09-07 17:35:00 +02:00
return eval ( a * b ) ;
2020-05-01 11:05:24 +02:00
}
2020-08-13 03:13:08 +02:00
if ( ! m_use_fma & & is_spu_float_zero ( data , + 1 ) )
{
2021-09-07 17:35:00 +02:00
return eval ( a * b + fsplat < f32 [ 4 ] > ( 0.f ) ) ;
2020-08-13 03:13:08 +02:00
}
2020-05-01 11:05:24 +02:00
}
2019-12-20 19:24:57 +01:00
2020-08-05 17:03:21 +02:00
if ( [ & ] ( )
2020-06-04 06:52:08 +02:00
{
2021-09-01 15:20:33 +02:00
if ( auto [ ok , data ] = get_const_vector ( a . value , m_pos ) ; ok )
2020-06-04 06:52:08 +02:00
{
2020-08-05 17:03:21 +02:00
if ( ! is_spu_float_zero ( data , + 1 ) )
{
return false ;
}
2021-09-01 15:20:33 +02:00
if ( auto [ ok0 , data0 ] = get_const_vector ( b . value , m_pos ) ; ok0 )
2020-08-05 17:03:21 +02:00
{
if ( is_spu_float_zero ( data0 , + 1 ) )
{
return true ;
}
}
2020-06-04 06:52:08 +02:00
}
2021-09-01 15:20:33 +02:00
if ( auto [ ok , data ] = get_const_vector ( a . value , m_pos ) ; ok )
2020-06-04 06:52:08 +02:00
{
2020-08-05 17:03:21 +02:00
if ( ! is_spu_float_zero ( data , - 1 ) )
{
return false ;
}
2021-09-01 15:20:33 +02:00
if ( auto [ ok0 , data0 ] = get_const_vector ( b . value , m_pos ) ; ok0 )
2020-08-05 17:03:21 +02:00
{
if ( is_spu_float_zero ( data0 , - 1 ) )
{
return true ;
}
}
2020-06-04 06:52:08 +02:00
}
2020-08-05 17:03:21 +02:00
return false ;
} ( ) )
{
// Just return the added value if both a and b is +0 or -0 (+0 and -0 arent't allowed alone)
return c ;
2020-06-04 06:52:08 +02:00
}
2019-12-20 19:24:57 +01:00
if ( m_use_fma )
{
2021-09-07 17:35:00 +02:00
return eval ( fmuladd ( a , b , c , true ) ) ;
2019-12-20 19:24:57 +01:00
}
// Convert to doubles
2021-09-07 17:35:00 +02:00
const auto xa = fpcast < f64 [ 4 ] > ( a ) ;
const auto xb = fpcast < f64 [ 4 ] > ( b ) ;
const auto xc = fpcast < f64 [ 4 ] > ( c ) ;
const auto xr = fmuladd ( xa , xb , xc , false ) ;
return eval ( fpcast < f32 [ 4 ] > ( xr ) ) ;
}
template < typename T , typename U , typename V >
static llvm_calli < f32 [ 4 ] , T , U , V > fnms ( T & & a , U & & b , V & & c )
{
return { " spu_fnms " , { std : : forward < T > ( a ) , std : : forward < U > ( b ) , std : : forward < V > ( c ) } } ;
2018-12-17 00:05:26 +01:00
}
2019-03-25 19:31:16 +01:00
void FNMS ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
2018-07-27 12:00:05 +02:00
// See FMA.
2023-07-23 09:09:24 +02:00
if ( g_cfg . core . spu_xfloat_accuracy = = xfloat_accuracy : : accurate )
2020-06-04 06:52:08 +02:00
{
2021-09-07 17:35:00 +02:00
const auto [ a , b , c ] = get_vrs < f64 [ 4 ] > ( op . ra , op . rb , op . rc ) ;
set_vr ( op . rt4 , fmuladd ( - a , b , c ) ) ;
return ;
2020-06-04 06:52:08 +02:00
}
2021-09-07 17:35:00 +02:00
register_intrinsic ( " spu_fnms " , [ & ] ( llvm : : CallInst * ci )
{
const auto a = value < f32 [ 4 ] > ( ci - > getOperand ( 0 ) ) ;
const auto b = value < f32 [ 4 ] > ( ci - > getOperand ( 1 ) ) ;
const auto c = value < f32 [ 4 ] > ( ci - > getOperand ( 2 ) ) ;
2023-07-23 09:09:24 +02:00
if ( g_cfg . core . spu_xfloat_accuracy = = xfloat_accuracy : : approximate | | g_cfg . core . spu_xfloat_accuracy = = xfloat_accuracy : : relaxed )
2021-09-07 17:35:00 +02:00
{
return fma32x4 ( eval ( - clamp_smax ( a ) ) , clamp_smax ( b ) , c ) ;
}
else
{
return fma32x4 ( eval ( - a ) , b , c ) ;
}
} ) ;
set_vr ( op . rt4 , fnms ( get_vr < f32 [ 4 ] > ( op . ra ) , get_vr < f32 [ 4 ] > ( op . rb ) , get_vr < f32 [ 4 ] > ( op . rc ) ) ) ;
}
template < typename T , typename U , typename V >
static llvm_calli < f32 [ 4 ] , T , U , V > fma ( T & & a , U & & b , V & & c )
{
return { " spu_fma " , { std : : forward < T > ( a ) , std : : forward < U > ( b ) , std : : forward < V > ( c ) } } ;
2018-05-02 20:49:19 +02:00
}
2019-03-25 19:31:16 +01:00
void FMA ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
2018-07-27 12:00:05 +02:00
// Hardware FMA produces the same result as multiple + add on the limited double range (xfloat).
2023-07-23 09:09:24 +02:00
if ( g_cfg . core . spu_xfloat_accuracy = = xfloat_accuracy : : accurate )
2020-06-04 06:52:08 +02:00
{
2021-09-07 17:35:00 +02:00
const auto [ a , b , c ] = get_vrs < f64 [ 4 ] > ( op . ra , op . rb , op . rc ) ;
set_vr ( op . rt4 , fmuladd ( a , b , c ) ) ;
return ;
2020-06-04 06:52:08 +02:00
}
2021-09-07 17:35:00 +02:00
register_intrinsic ( " spu_fma " , [ & ] ( llvm : : CallInst * ci )
{
const auto a = value < f32 [ 4 ] > ( ci - > getOperand ( 0 ) ) ;
const auto b = value < f32 [ 4 ] > ( ci - > getOperand ( 1 ) ) ;
const auto c = value < f32 [ 4 ] > ( ci - > getOperand ( 2 ) ) ;
2023-07-23 09:09:24 +02:00
if ( g_cfg . core . spu_xfloat_accuracy = = xfloat_accuracy : : approximate )
2021-09-07 17:35:00 +02:00
{
const auto ma = sext < s32 [ 4 ] > ( fcmp_uno ( a ! = fsplat < f32 [ 4 ] > ( 0. ) ) ) ;
const auto mb = sext < s32 [ 4 ] > ( fcmp_uno ( b ! = fsplat < f32 [ 4 ] > ( 0. ) ) ) ;
const auto ca = bitcast < f32 [ 4 ] > ( bitcast < s32 [ 4 ] > ( a ) & mb ) ;
const auto cb = bitcast < f32 [ 4 ] > ( bitcast < s32 [ 4 ] > ( b ) & ma ) ;
return fma32x4 ( eval ( ca ) , eval ( cb ) , c ) ;
}
else
{
return fma32x4 ( a , b , c ) ;
}
} ) ;
2021-09-13 20:56:01 +02:00
const auto [ a , b , c ] = get_vrs < f32 [ 4 ] > ( op . ra , op . rb , op . rc ) ;
static const auto MT = match < f32 [ 4 ] > ( ) ;
// Match sqrt
if ( auto [ ok_fnma , a1 , b1 ] = match_expr ( a , fnms ( MT , MT , fsplat < f32 [ 4 ] > ( 1.00000011920928955078125 ) ) ) ; ok_fnma )
{
if ( auto [ ok_fm2 , a2 ] = match_expr ( b , fm ( MT , fsplat < f32 [ 4 ] > ( 0.5 ) ) ) ; ok_fm2 & & a2 . eq ( b1 ) )
{
if ( auto [ ok_fm1 , a3 , b3 ] = match_expr ( c , fm ( MT , MT ) ) ; ok_fm1 & & a3 . eq ( a1 ) )
{
if ( auto [ ok_sqrte , src ] = match_expr ( a3 , spu_rsqrte ( MT ) ) ; ok_sqrte & & src . eq ( b3 ) )
{
erase_stores ( a , b , c , a3 ) ;
set_vr ( op . rt4 , fsqrt ( fabs ( src ) ) ) ;
return ;
}
}
}
}
// Match division (fast)
if ( auto [ ok_fnma , divb , diva ] = match_expr ( a , fnms ( c , MT , MT ) ) ; ok_fnma )
{
if ( auto [ ok_fm ] = match_expr ( c , fm ( diva , b ) ) ; ok_fm )
{
if ( auto [ ok_re ] = match_expr ( b , spu_re ( divb ) ) ; ok_re )
{
erase_stores ( b , c ) ;
set_vr ( op . rt4 , diva / divb ) ;
return ;
}
}
}
set_vr ( op . rt4 , fma ( a , b , c ) ) ;
2021-09-07 17:35:00 +02:00
}
template < typename T , typename U , typename V >
static llvm_calli < f32 [ 4 ] , T , U , V > fms ( T & & a , U & & b , V & & c )
{
return { " spu_fms " , { std : : forward < T > ( a ) , std : : forward < U > ( b ) , std : : forward < V > ( c ) } } ;
2018-05-02 20:49:19 +02:00
}
2019-03-25 19:31:16 +01:00
void FMS ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
2018-07-27 12:00:05 +02:00
// See FMA.
2023-07-23 09:09:24 +02:00
if ( g_cfg . core . spu_xfloat_accuracy = = xfloat_accuracy : : accurate )
2020-06-04 06:52:08 +02:00
{
2021-09-07 17:35:00 +02:00
const auto [ a , b , c ] = get_vrs < f64 [ 4 ] > ( op . ra , op . rb , op . rc ) ;
set_vr ( op . rt4 , fmuladd ( a , b , - c ) ) ;
return ;
2020-06-04 06:52:08 +02:00
}
2021-09-07 17:35:00 +02:00
register_intrinsic ( " spu_fms " , [ & ] ( llvm : : CallInst * ci )
{
const auto a = value < f32 [ 4 ] > ( ci - > getOperand ( 0 ) ) ;
const auto b = value < f32 [ 4 ] > ( ci - > getOperand ( 1 ) ) ;
const auto c = value < f32 [ 4 ] > ( ci - > getOperand ( 2 ) ) ;
2023-07-23 09:09:24 +02:00
if ( g_cfg . core . spu_xfloat_accuracy = = xfloat_accuracy : : approximate )
2021-09-07 17:35:00 +02:00
{
return fma32x4 ( clamp_smax ( a ) , clamp_smax ( b ) , eval ( - c ) ) ;
}
else
{
return fma32x4 ( a , b , eval ( - c ) ) ;
}
} ) ;
set_vr ( op . rt4 , fms ( get_vr < f32 [ 4 ] > ( op . ra ) , get_vr < f32 [ 4 ] > ( op . rb ) , get_vr < f32 [ 4 ] > ( op . rc ) ) ) ;
}
template < typename T , typename U >
static llvm_calli < f32 [ 4 ] , T , U > fi ( T & & a , U & & b )
{
return { " spu_fi " , { std : : forward < T > ( a ) , std : : forward < U > ( b ) } } ;
2018-05-02 20:49:19 +02:00
}
2021-09-07 18:42:05 +02:00
template < typename T >
static llvm_calli < f32 [ 4 ] , T > spu_re ( T & & a )
{
return { " spu_re " , { std : : forward < T > ( a ) } } ;
}
template < typename T >
static llvm_calli < f32 [ 4 ] , T > spu_rsqrte ( T & & a )
{
return { " spu_rsqrte " , { std : : forward < T > ( a ) } } ;
}
2019-03-25 19:31:16 +01:00
void FI ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
2018-07-27 12:00:05 +02:00
// TODO
2023-07-23 09:09:24 +02:00
if ( g_cfg . core . spu_xfloat_accuracy = = xfloat_accuracy : : accurate )
2021-09-01 20:33:00 +02:00
{
2018-07-27 12:00:05 +02:00
set_vr ( op . rt , get_vr < f64 [ 4 ] > ( op . rb ) ) ;
2021-09-01 20:33:00 +02:00
// const auto [a, b] = get_vrs<f64[4]>(op.ra, op.rb);
// const auto mask_se = splat<s64[4]>(0xfff0000000000000ull);
// const auto mask_bf = splat<s64[4]>(0x000fff8000000000ull);
// const auto mask_sf = splat<s64[4]>(0x0000007fe0000000ull);
// const auto mask_yf = splat<s64[4]>(0x0000ffffe0000000ull);
// const auto base = bitcast<f64[4]>((bitcast<s64[4]>(b) & mask_bf) | 0x3ff0000000000000ull);
// const auto step = fpcast<f64[4]>(bitcast<s64[4]>(b) & mask_sf) * fsplat<f64[4]>(std::exp2(-13.f));
// const auto yval = fpcast<f64[4]>(bitcast<s64[4]>(a) & mask_yf) * fsplat<f64[4]>(std::exp2(-19.f));
// set_vr(op.rt, bitcast<f64[4]>((bitcast<s64[4]>(b) & mask_se) | (bitcast<s64[4]>(base - step * yval) & ~mask_se)));
2021-09-07 17:35:00 +02:00
return ;
2021-09-01 20:33:00 +02:00
}
2021-09-07 17:35:00 +02:00
register_intrinsic ( " spu_fi " , [ & ] ( llvm : : CallInst * ci )
{
const auto a = bitcast < u32 [ 4 ] > ( value < f32 [ 4 ] > ( ci - > getOperand ( 0 ) ) ) ;
const auto b = bitcast < u32 [ 4 ] > ( value < f32 [ 4 ] > ( ci - > getOperand ( 1 ) ) ) ;
2021-09-01 20:33:00 +02:00
const auto base = ( b & 0x007ffc00u ) < < 9 ; // Base fraction
const auto ymul = ( b & 0x3ff ) * ( a & 0x7ffff ) ; // Step fraction * Y fraction (fixed point at 2^-32)
const auto bnew = bitcast < s32 [ 4 ] > ( ( base - ymul ) > > 9 ) + ( sext < s32 [ 4 ] > ( ymul < = base ) & ( 1 < < 23 ) ) ; // Subtract and correct invisible fraction bit
2021-09-07 17:35:00 +02:00
return bitcast < f32 [ 4 ] > ( ( b & 0xff800000u ) | ( bitcast < u32 [ 4 ] > ( fpcast < f32 [ 4 ] > ( bnew ) ) & ~ 0xff800000u ) ) ; // Inject old sign and exponent
} ) ;
2023-07-23 09:09:24 +02:00
if ( g_cfg . core . spu_xfloat_accuracy = = xfloat_accuracy : : approximate )
2021-09-07 18:42:05 +02:00
{
2023-04-04 17:05:19 +02:00
register_intrinsic ( " spu_re " , [ & ] ( llvm : : CallInst * ci )
{
const auto a = value < f32 [ 4 ] > ( ci - > getOperand ( 0 ) ) ;
2023-05-15 18:20:47 +02:00
// Gives accuracy penalty, frest result is within one newton-raphson iteration for accuracy
const auto approx_result = fsplat < f32 [ 4 ] > ( 0.999875069f ) / a ;
return approx_result ;
2023-04-04 17:05:19 +02:00
} ) ;
2021-09-07 18:42:05 +02:00
2023-04-04 17:05:19 +02:00
register_intrinsic ( " spu_rsqrte " , [ & ] ( llvm : : CallInst * ci )
{
const auto a = value < f32 [ 4 ] > ( ci - > getOperand ( 0 ) ) ;
2023-05-15 18:20:47 +02:00
// Gives accuracy penalty, frsqest result is within one newton-raphson iteration for accuracy
const auto approx_result = fsplat < f32 [ 4 ] > ( 0.999763668f ) / fsqrt ( fabs ( a ) ) ;
return approx_result ;
2023-04-04 17:05:19 +02:00
} ) ;
}
else
2021-09-07 18:42:05 +02:00
{
2023-04-26 03:00:11 +02:00
// For relaxed use intrinsics, those make the results vary per cpu
2023-04-04 17:05:19 +02:00
register_intrinsic ( " spu_re " , [ & ] ( llvm : : CallInst * ci )
{
const auto a = value < f32 [ 4 ] > ( ci - > getOperand ( 0 ) ) ;
return fre ( a ) ;
} ) ;
register_intrinsic ( " spu_rsqrte " , [ & ] ( llvm : : CallInst * ci )
{
const auto a = value < f32 [ 4 ] > ( ci - > getOperand ( 0 ) ) ;
return frsqe ( a ) ;
} ) ;
}
2021-09-07 18:42:05 +02:00
const auto [ a , b ] = get_vrs < f32 [ 4 ] > ( op . ra , op . rb ) ;
if ( const auto [ ok , mb ] = match_expr ( b , frest ( match < f32 [ 4 ] > ( ) ) ) ; ok & & mb . eq ( a ) )
{
erase_stores ( b ) ;
set_vr ( op . rt , spu_re ( a ) ) ;
return ;
}
if ( const auto [ ok , mb ] = match_expr ( b , frsqest ( match < f32 [ 4 ] > ( ) ) ) ; ok & & mb . eq ( a ) )
{
erase_stores ( b ) ;
set_vr ( op . rt , spu_rsqrte ( a ) ) ;
return ;
}
const auto r = eval ( fi ( a , b ) ) ;
if ( ! m_interp_magn )
spu_log . todo ( " [%s:0x%05x] Unmatched spu_fi found " , m_hash , m_pos ) ;
set_vr ( op . rt , r ) ;
2018-05-02 20:49:19 +02:00
}
2019-03-25 19:31:16 +01:00
void CFLTS ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
2023-07-23 09:09:24 +02:00
if ( g_cfg . core . spu_xfloat_accuracy = = xfloat_accuracy : : accurate )
2018-07-27 12:00:05 +02:00
{
value_t < f64 [ 4 ] > a = get_vr < f64 [ 4 ] > ( op . ra ) ;
2019-03-25 19:31:16 +01:00
value_t < f64 [ 4 ] > s ;
if ( m_interp_magn )
2019-04-23 19:08:18 +02:00
s = eval ( vsplat < f64 [ 4 ] > ( bitcast < f64 > ( ( ( 1023 + 173 ) - get_imm < u64 > ( op . i8 ) ) < < 52 ) ) ) ;
2019-03-25 19:31:16 +01:00
else
2019-04-23 19:08:18 +02:00
s = eval ( fsplat < f64 [ 4 ] > ( std : : exp2 ( static_cast < int > ( 173 - op . i8 ) ) ) ) ;
2019-03-25 19:31:16 +01:00
if ( op . i8 ! = 173 | | m_interp_magn )
a = eval ( a * s ) ;
2018-07-27 12:00:05 +02:00
value_t < s32 [ 4 ] > r ;
if ( auto ca = llvm : : dyn_cast < llvm : : ConstantDataVector > ( a . value ) )
{
const f64 data [ 4 ]
{
ca - > getElementAsDouble ( 0 ) ,
ca - > getElementAsDouble ( 1 ) ,
ca - > getElementAsDouble ( 2 ) ,
ca - > getElementAsDouble ( 3 )
} ;
v128 result ;
for ( u32 i = 0 ; i < 4 ; i + + )
{
if ( data [ i ] > = std : : exp2 ( 31.f ) )
{
2021-05-22 09:35:15 +02:00
result . _s32 [ i ] = smax ;
2018-07-27 12:00:05 +02:00
}
else if ( data [ i ] < std : : exp2 ( - 31.f ) )
{
2021-05-22 09:35:15 +02:00
result . _s32 [ i ] = smin ;
2018-07-27 12:00:05 +02:00
}
else
{
result . _s32 [ i ] = static_cast < s32 > ( data [ i ] ) ;
}
}
2018-05-02 20:49:19 +02:00
2018-07-27 12:00:05 +02:00
r . value = make_const_vector ( result , get_type < s32 [ 4 ] > ( ) ) ;
set_vr ( op . rt , r ) ;
return ;
}
if ( llvm : : isa < llvm : : ConstantAggregateZero > ( a . value ) )
{
set_vr ( op . rt , splat < u32 [ 4 ] > ( 0 ) ) ;
return ;
}
r . value = m_ir - > CreateFPToSI ( a . value , get_type < s32 [ 4 ] > ( ) ) ;
2019-04-17 02:00:53 +02:00
set_vr ( op . rt , r ^ sext < s32 [ 4 ] > ( fcmp_ord ( a > = fsplat < f64 [ 4 ] > ( std : : exp2 ( 31.f ) ) ) ) ) ;
2018-07-27 12:00:05 +02:00
}
else
{
value_t < f32 [ 4 ] > a = get_vr < f32 [ 4 ] > ( op . ra ) ;
2019-03-25 19:31:16 +01:00
value_t < f32 [ 4 ] > s ;
if ( m_interp_magn )
2019-04-23 19:08:18 +02:00
s = eval ( vsplat < f32 [ 4 ] > ( load_const < f32 > ( m_scale_float_to , get_imm < u8 > ( op . i8 ) ) ) ) ;
2019-03-25 19:31:16 +01:00
else
2019-04-23 19:08:18 +02:00
s = eval ( fsplat < f32 [ 4 ] > ( std : : exp2 ( static_cast < float > ( static_cast < s16 > ( 173 - op . i8 ) ) ) ) ) ;
2019-03-25 19:31:16 +01:00
if ( op . i8 ! = 173 | | m_interp_magn )
a = eval ( a * s ) ;
2018-07-27 12:00:05 +02:00
value_t < s32 [ 4 ] > r ;
r . value = m_ir - > CreateFPToSI ( a . value , get_type < s32 [ 4 ] > ( ) ) ;
2019-10-25 10:03:49 +02:00
set_vr ( op . rt , r ^ sext < s32 [ 4 ] > ( bitcast < s32 [ 4 ] > ( a ) > splat < s32 [ 4 ] > ( ( ( 31 + 127 ) < < 23 ) - 1 ) ) ) ;
2018-07-27 12:00:05 +02:00
}
2018-05-02 20:49:19 +02:00
}
2019-03-25 19:31:16 +01:00
void CFLTU ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
2023-07-23 09:09:24 +02:00
if ( g_cfg . core . spu_xfloat_accuracy = = xfloat_accuracy : : accurate )
2018-07-27 12:00:05 +02:00
{
value_t < f64 [ 4 ] > a = get_vr < f64 [ 4 ] > ( op . ra ) ;
2019-03-25 19:31:16 +01:00
value_t < f64 [ 4 ] > s ;
if ( m_interp_magn )
2019-04-23 19:08:18 +02:00
s = eval ( vsplat < f64 [ 4 ] > ( bitcast < f64 > ( ( ( 1023 + 173 ) - get_imm < u64 > ( op . i8 ) ) < < 52 ) ) ) ;
2019-03-25 19:31:16 +01:00
else
2019-04-23 19:08:18 +02:00
s = eval ( fsplat < f64 [ 4 ] > ( std : : exp2 ( static_cast < int > ( 173 - op . i8 ) ) ) ) ;
2019-03-25 19:31:16 +01:00
if ( op . i8 ! = 173 | | m_interp_magn )
a = eval ( a * s ) ;
2018-05-02 20:49:19 +02:00
2018-07-27 12:00:05 +02:00
value_t < s32 [ 4 ] > r ;
if ( auto ca = llvm : : dyn_cast < llvm : : ConstantDataVector > ( a . value ) )
{
const f64 data [ 4 ]
{
ca - > getElementAsDouble ( 0 ) ,
ca - > getElementAsDouble ( 1 ) ,
ca - > getElementAsDouble ( 2 ) ,
ca - > getElementAsDouble ( 3 )
} ;
v128 result ;
for ( u32 i = 0 ; i < 4 ; i + + )
{
if ( data [ i ] > = std : : exp2 ( 32.f ) )
{
2021-05-22 09:35:15 +02:00
result . _u32 [ i ] = umax ;
2018-07-27 12:00:05 +02:00
}
else if ( data [ i ] < 0. )
{
result . _u32 [ i ] = 0 ;
}
else
{
result . _u32 [ i ] = static_cast < u32 > ( data [ i ] ) ;
}
}
r . value = make_const_vector ( result , get_type < s32 [ 4 ] > ( ) ) ;
set_vr ( op . rt , r ) ;
return ;
}
if ( llvm : : isa < llvm : : ConstantAggregateZero > ( a . value ) )
{
set_vr ( op . rt , splat < u32 [ 4 ] > ( 0 ) ) ;
return ;
}
r . value = m_ir - > CreateFPToUI ( a . value , get_type < s32 [ 4 ] > ( ) ) ;
2019-10-25 10:03:49 +02:00
set_vr ( op . rt , select ( fcmp_ord ( a > = fsplat < f64 [ 4 ] > ( std : : exp2 ( 32.f ) ) ) , splat < s32 [ 4 ] > ( - 1 ) , r & sext < s32 [ 4 ] > ( fcmp_ord ( a > = fsplat < f64 [ 4 ] > ( 0. ) ) ) ) ) ;
2018-07-27 12:00:05 +02:00
}
else
{
value_t < f32 [ 4 ] > a = get_vr < f32 [ 4 ] > ( op . ra ) ;
2019-03-25 19:31:16 +01:00
value_t < f32 [ 4 ] > s ;
if ( m_interp_magn )
2019-04-23 19:08:18 +02:00
s = eval ( vsplat < f32 [ 4 ] > ( load_const < f32 > ( m_scale_float_to , get_imm < u8 > ( op . i8 ) ) ) ) ;
2019-03-25 19:31:16 +01:00
else
2019-04-23 19:08:18 +02:00
s = eval ( fsplat < f32 [ 4 ] > ( std : : exp2 ( static_cast < float > ( static_cast < s16 > ( 173 - op . i8 ) ) ) ) ) ;
2019-03-25 19:31:16 +01:00
if ( op . i8 ! = 173 | | m_interp_magn )
a = eval ( a * s ) ;
2018-07-27 12:00:05 +02:00
value_t < s32 [ 4 ] > r ;
2023-07-29 08:01:01 +02:00
if ( m_use_avx512 )
{
2023-08-12 00:34:50 +02:00
const auto sc = eval ( bitcast < f32 [ 4 ] > ( max ( bitcast < s32 [ 4 ] > ( a ) , splat < s32 [ 4 ] > ( 0x0 ) ) ) ) ;
2023-07-29 08:01:01 +02:00
r . value = m_ir - > CreateFPToUI ( sc . value , get_type < s32 [ 4 ] > ( ) ) ;
set_vr ( op . rt , r ) ;
return ;
}
2018-07-27 12:00:05 +02:00
r . value = m_ir - > CreateFPToUI ( a . value , get_type < s32 [ 4 ] > ( ) ) ;
2019-10-25 10:03:49 +02:00
set_vr ( op . rt , select ( bitcast < s32 [ 4 ] > ( a ) > splat < s32 [ 4 ] > ( ( ( 32 + 127 ) < < 23 ) - 1 ) , splat < s32 [ 4 ] > ( - 1 ) , r & ~ ( bitcast < s32 [ 4 ] > ( a ) > > 31 ) ) ) ;
2018-07-27 12:00:05 +02:00
}
2018-05-02 20:49:19 +02:00
}
2019-03-25 19:31:16 +01:00
void CSFLT ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
2023-07-23 09:09:24 +02:00
if ( g_cfg . core . spu_xfloat_accuracy = = xfloat_accuracy : : accurate )
2018-07-27 12:00:05 +02:00
{
value_t < s32 [ 4 ] > a = get_vr < s32 [ 4 ] > ( op . ra ) ;
value_t < f64 [ 4 ] > r ;
2021-09-01 15:20:33 +02:00
if ( auto [ ok , data ] = get_const_vector ( a . value , m_pos ) ; ok )
2018-07-27 12:00:05 +02:00
{
2019-04-24 15:05:29 +02:00
r . value = build < f64 [ 4 ] > ( data . _s32 [ 0 ] , data . _s32 [ 1 ] , data . _s32 [ 2 ] , data . _s32 [ 3 ] ) . eval ( m_ir ) ;
2018-07-27 12:00:05 +02:00
}
else
{
r . value = m_ir - > CreateSIToFP ( a . value , get_type < f64 [ 4 ] > ( ) ) ;
}
2019-03-25 19:31:16 +01:00
value_t < f64 [ 4 ] > s ;
if ( m_interp_magn )
2019-04-23 19:08:18 +02:00
s = eval ( vsplat < f64 [ 4 ] > ( bitcast < f64 > ( ( get_imm < u64 > ( op . i8 ) + ( 1023 - 155 ) ) < < 52 ) ) ) ;
2019-03-25 19:31:16 +01:00
else
2019-04-23 19:08:18 +02:00
s = eval ( fsplat < f64 [ 4 ] > ( std : : exp2 ( static_cast < int > ( op . i8 - 155 ) ) ) ) ;
2019-03-25 19:31:16 +01:00
if ( op . i8 ! = 155 | | m_interp_magn )
r = eval ( r * s ) ;
2018-07-27 12:00:05 +02:00
set_vr ( op . rt , r ) ;
}
else
{
value_t < f32 [ 4 ] > r ;
r . value = m_ir - > CreateSIToFP ( get_vr < s32 [ 4 ] > ( op . ra ) . value , get_type < f32 [ 4 ] > ( ) ) ;
2019-03-25 19:31:16 +01:00
value_t < f32 [ 4 ] > s ;
if ( m_interp_magn )
2019-04-23 19:08:18 +02:00
s = eval ( vsplat < f32 [ 4 ] > ( load_const < f32 > ( m_scale_to_float , get_imm < u8 > ( op . i8 ) ) ) ) ;
2019-03-25 19:31:16 +01:00
else
2019-04-23 19:08:18 +02:00
s = eval ( fsplat < f32 [ 4 ] > ( std : : exp2 ( static_cast < float > ( static_cast < s16 > ( op . i8 - 155 ) ) ) ) ) ;
2019-03-25 19:31:16 +01:00
if ( op . i8 ! = 155 | | m_interp_magn )
r = eval ( r * s ) ;
2018-07-27 12:00:05 +02:00
set_vr ( op . rt , r ) ;
}
2018-05-02 20:49:19 +02:00
}
2019-03-25 19:31:16 +01:00
void CUFLT ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
2023-07-23 09:09:24 +02:00
if ( g_cfg . core . spu_xfloat_accuracy = = xfloat_accuracy : : accurate )
2018-07-27 12:00:05 +02:00
{
value_t < s32 [ 4 ] > a = get_vr < s32 [ 4 ] > ( op . ra ) ;
value_t < f64 [ 4 ] > r ;
2021-09-01 15:20:33 +02:00
if ( auto [ ok , data ] = get_const_vector ( a . value , m_pos ) ; ok )
2018-07-27 12:00:05 +02:00
{
2019-04-24 15:05:29 +02:00
r . value = build < f64 [ 4 ] > ( data . _u32 [ 0 ] , data . _u32 [ 1 ] , data . _u32 [ 2 ] , data . _u32 [ 3 ] ) . eval ( m_ir ) ;
2018-07-27 12:00:05 +02:00
}
else
{
r . value = m_ir - > CreateUIToFP ( a . value , get_type < f64 [ 4 ] > ( ) ) ;
}
2019-03-25 19:31:16 +01:00
value_t < f64 [ 4 ] > s ;
if ( m_interp_magn )
2019-04-23 19:08:18 +02:00
s = eval ( vsplat < f64 [ 4 ] > ( bitcast < f64 > ( ( get_imm < u64 > ( op . i8 ) + ( 1023 - 155 ) ) < < 52 ) ) ) ;
2019-03-25 19:31:16 +01:00
else
2019-04-23 19:08:18 +02:00
s = eval ( fsplat < f64 [ 4 ] > ( std : : exp2 ( static_cast < int > ( op . i8 - 155 ) ) ) ) ;
2019-03-25 19:31:16 +01:00
if ( op . i8 ! = 155 | | m_interp_magn )
r = eval ( r * s ) ;
2018-07-27 12:00:05 +02:00
set_vr ( op . rt , r ) ;
}
else
{
value_t < f32 [ 4 ] > r ;
r . value = m_ir - > CreateUIToFP ( get_vr < s32 [ 4 ] > ( op . ra ) . value , get_type < f32 [ 4 ] > ( ) ) ;
2019-03-25 19:31:16 +01:00
value_t < f32 [ 4 ] > s ;
if ( m_interp_magn )
2019-04-23 19:08:18 +02:00
s = eval ( vsplat < f32 [ 4 ] > ( load_const < f32 > ( m_scale_to_float , get_imm < u8 > ( op . i8 ) ) ) ) ;
2019-03-25 19:31:16 +01:00
else
2019-04-23 19:08:18 +02:00
s = eval ( fsplat < f32 [ 4 ] > ( std : : exp2 ( static_cast < float > ( static_cast < s16 > ( op . i8 - 155 ) ) ) ) ) ;
2019-03-25 19:31:16 +01:00
if ( op . i8 ! = 155 | | m_interp_magn )
r = eval ( r * s ) ;
2018-07-27 12:00:05 +02:00
set_vr ( op . rt , r ) ;
}
2018-05-02 20:49:19 +02:00
}
2019-04-24 15:05:29 +02:00
void make_store_ls ( value_t < u64 > addr , value_t < u8 [ 16 ] > data )
{
2020-06-30 17:33:22 +02:00
const auto bswapped = byteswap ( data ) ;
2023-04-05 13:35:06 +02:00
m_ir - > CreateStore ( bswapped . eval ( m_ir ) , m_ir - > CreateGEP ( get_type < u8 > ( ) , m_lsptr , addr . value ) ) ;
2019-04-24 15:05:29 +02:00
}
auto make_load_ls ( value_t < u64 > addr )
{
value_t < u8 [ 16 ] > data ;
2023-04-05 13:35:06 +02:00
data . value = m_ir - > CreateLoad ( get_type < u8 [ 16 ] > ( ) , m_ir - > CreateGEP ( get_type < u8 > ( ) , m_lsptr , addr . value ) ) ;
2020-06-30 17:33:22 +02:00
return byteswap ( data ) ;
2019-04-24 15:05:29 +02:00
}
2019-03-25 19:31:16 +01:00
void STQX ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
2020-07-15 20:57:39 +02:00
const auto a = get_vr ( op . ra ) ;
const auto b = get_vr ( op . rb ) ;
for ( auto pair : std : : initializer_list < std : : pair < value_t < u32 [ 4 ] > , value_t < u32 [ 4 ] > > > { { a , b } , { b , a } } )
{
2021-09-01 15:20:33 +02:00
if ( auto [ ok , data ] = get_const_vector ( pair . first . value , m_pos ) ; ok )
2020-07-15 20:57:39 +02:00
{
data . _u32 [ 3 ] % = SPU_LS_SIZE ;
if ( data . _u32 [ 3 ] % 0x10 = = 0 )
{
value_t < u64 > addr = eval ( splat < u64 > ( data . _u32 [ 3 ] ) + zext < u64 > ( extract ( pair . second , 3 ) & 0x3fff0 ) ) ;
make_store_ls ( addr , get_vr < u8 [ 16 ] > ( op . rt ) ) ;
return ;
}
}
}
value_t < u64 > addr = eval ( zext < u64 > ( ( extract ( a , 3 ) + extract ( b , 3 ) ) & 0x3fff0 ) ) ;
2019-04-24 15:05:29 +02:00
make_store_ls ( addr , get_vr < u8 [ 16 ] > ( op . rt ) ) ;
2018-05-02 20:49:19 +02:00
}
2019-03-25 19:31:16 +01:00
void LQX ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
2020-07-15 20:57:39 +02:00
const auto a = get_vr ( op . ra ) ;
const auto b = get_vr ( op . rb ) ;
for ( auto pair : std : : initializer_list < std : : pair < value_t < u32 [ 4 ] > , value_t < u32 [ 4 ] > > > { { a , b } , { b , a } } )
{
2021-09-01 15:20:33 +02:00
if ( auto [ ok , data ] = get_const_vector ( pair . first . value , m_pos ) ; ok )
2020-07-15 20:57:39 +02:00
{
data . _u32 [ 3 ] % = SPU_LS_SIZE ;
if ( data . _u32 [ 3 ] % 0x10 = = 0 )
{
value_t < u64 > addr = eval ( splat < u64 > ( data . _u32 [ 3 ] ) + zext < u64 > ( extract ( pair . second , 3 ) & 0x3fff0 ) ) ;
set_vr ( op . rt , make_load_ls ( addr ) ) ;
return ;
}
}
}
value_t < u64 > addr = eval ( zext < u64 > ( ( extract ( a , 3 ) + extract ( b , 3 ) ) & 0x3fff0 ) ) ;
2019-04-24 15:05:29 +02:00
set_vr ( op . rt , make_load_ls ( addr ) ) ;
2018-05-02 20:49:19 +02:00
}
2019-03-25 19:31:16 +01:00
void STQA ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
2019-03-25 19:31:16 +01:00
value_t < u64 > addr = eval ( ( get_imm < u64 > ( op . i16 , false ) < < 2 ) & 0x3fff0 ) ;
2019-04-24 15:05:29 +02:00
make_store_ls ( addr , get_vr < u8 [ 16 ] > ( op . rt ) ) ;
2018-05-02 20:49:19 +02:00
}
2019-03-25 19:31:16 +01:00
void LQA ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
2019-03-25 19:31:16 +01:00
value_t < u64 > addr = eval ( ( get_imm < u64 > ( op . i16 , false ) < < 2 ) & 0x3fff0 ) ;
2019-04-24 15:05:29 +02:00
set_vr ( op . rt , make_load_ls ( addr ) ) ;
2018-05-02 20:49:19 +02:00
}
2022-06-03 03:37:40 +02:00
llvm : : Value * get_pc_as_u64 ( u32 addr )
{
return m_ir - > CreateAdd ( m_ir - > CreateZExt ( m_base_pc , get_type < u64 > ( ) ) , m_ir - > getInt64 ( addr - m_base ) ) ;
}
2018-05-02 20:49:19 +02:00
void STQR ( spu_opcode_t op ) //
{
2019-03-25 19:31:16 +01:00
value_t < u64 > addr ;
2022-06-03 03:37:40 +02:00
addr . value = m_interp_magn ? m_ir - > CreateZExt ( m_interp_pc , get_type < u64 > ( ) ) : get_pc_as_u64 ( m_pos ) ;
2020-07-15 20:57:39 +02:00
addr = eval ( ( ( get_imm < u64 > ( op . i16 , false ) < < 2 ) + addr ) & ( m_interp_magn ? 0x3fff0 : ~ 0xf ) ) ;
2019-04-24 15:05:29 +02:00
make_store_ls ( addr , get_vr < u8 [ 16 ] > ( op . rt ) ) ;
2018-05-02 20:49:19 +02:00
}
void LQR ( spu_opcode_t op ) //
{
2019-03-25 19:31:16 +01:00
value_t < u64 > addr ;
2022-06-03 03:37:40 +02:00
addr . value = m_interp_magn ? m_ir - > CreateZExt ( m_interp_pc , get_type < u64 > ( ) ) : get_pc_as_u64 ( m_pos ) ;
2020-07-15 20:57:39 +02:00
addr = eval ( ( ( get_imm < u64 > ( op . i16 , false ) < < 2 ) + addr ) & ( m_interp_magn ? 0x3fff0 : ~ 0xf ) ) ;
2019-04-24 15:05:29 +02:00
set_vr ( op . rt , make_load_ls ( addr ) ) ;
2018-05-02 20:49:19 +02:00
}
2019-03-25 19:31:16 +01:00
void STQD ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
2019-05-05 15:28:41 +02:00
if ( m_finfo & & m_finfo - > fn )
{
2019-05-15 14:42:03 +02:00
if ( op . rt < = s_reg_sp | | ( op . rt > = s_reg_80 & & op . rt < = s_reg_127 ) )
2019-05-05 15:28:41 +02:00
{
if ( m_block - > bb - > reg_save_dom [ op . rt ] & & get_reg_raw ( op . rt ) = = m_finfo - > load [ op . rt ] )
{
return ;
}
}
}
2020-07-15 20:57:39 +02:00
value_t < u64 > addr = eval ( zext < u64 > ( extract ( get_vr ( op . ra ) , 3 ) & 0x3fff0 ) + ( get_imm < u64 > ( op . si10 ) < < 4 ) ) ;
2019-04-24 15:05:29 +02:00
make_store_ls ( addr , get_vr < u8 [ 16 ] > ( op . rt ) ) ;
2018-05-02 20:49:19 +02:00
}
2019-03-25 19:31:16 +01:00
void LQD ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
2020-07-15 20:57:39 +02:00
value_t < u64 > addr = eval ( zext < u64 > ( extract ( get_vr ( op . ra ) , 3 ) & 0x3fff0 ) + ( get_imm < u64 > ( op . si10 ) < < 4 ) ) ;
2019-04-24 15:05:29 +02:00
set_vr ( op . rt , make_load_ls ( addr ) ) ;
2018-05-02 20:49:19 +02:00
}
2018-07-05 17:41:04 +02:00
void make_halt ( value_t < bool > cond )
2018-05-02 20:49:19 +02:00
{
2018-07-05 17:41:04 +02:00
const auto next = llvm : : BasicBlock : : Create ( m_context , " " , m_function ) ;
const auto halt = llvm : : BasicBlock : : Create ( m_context , " " , m_function ) ;
2019-03-13 18:57:42 +01:00
m_ir - > CreateCondBr ( cond . value , halt , next , m_md_unlikely ) ;
2018-07-05 17:41:04 +02:00
m_ir - > SetInsertPoint ( halt ) ;
2019-03-25 19:31:16 +01:00
if ( m_interp_magn )
2023-04-08 14:21:22 +02:00
m_ir - > CreateStore ( m_function - > getArg ( 2 ) , spu_ptr < u32 > ( & spu_thread : : pc ) ) ;
2019-05-15 14:42:03 +02:00
else
update_pc ( ) ;
2018-07-25 15:39:03 +02:00
const auto ptr = _ptr < u32 > ( m_memptr , 0xffdead00 ) ;
2023-04-08 14:21:22 +02:00
m_ir - > CreateStore ( m_ir - > getInt32 ( " HALT " _u32 ) , ptr ) ;
2018-05-02 20:49:19 +02:00
m_ir - > CreateBr ( next ) ;
2018-07-05 17:41:04 +02:00
m_ir - > SetInsertPoint ( next ) ;
2018-05-02 20:49:19 +02:00
}
2019-03-25 19:31:16 +01:00
void HGT ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
const auto cond = eval ( extract ( get_vr < s32 [ 4 ] > ( op . ra ) , 3 ) > extract ( get_vr < s32 [ 4 ] > ( op . rb ) , 3 ) ) ;
2018-07-05 17:41:04 +02:00
make_halt ( cond ) ;
2018-05-02 20:49:19 +02:00
}
2019-03-25 19:31:16 +01:00
void HEQ ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
const auto cond = eval ( extract ( get_vr ( op . ra ) , 3 ) = = extract ( get_vr ( op . rb ) , 3 ) ) ;
2018-07-05 17:41:04 +02:00
make_halt ( cond ) ;
2018-05-02 20:49:19 +02:00
}
2019-03-25 19:31:16 +01:00
void HLGT ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
const auto cond = eval ( extract ( get_vr ( op . ra ) , 3 ) > extract ( get_vr ( op . rb ) , 3 ) ) ;
2018-07-05 17:41:04 +02:00
make_halt ( cond ) ;
2018-05-02 20:49:19 +02:00
}
2019-03-25 19:31:16 +01:00
void HGTI ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
2019-03-25 19:31:16 +01:00
const auto cond = eval ( extract ( get_vr < s32 [ 4 ] > ( op . ra ) , 3 ) > get_imm < s32 > ( op . si10 ) ) ;
2018-07-05 17:41:04 +02:00
make_halt ( cond ) ;
2018-05-02 20:49:19 +02:00
}
2019-03-25 19:31:16 +01:00
void HEQI ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
2019-03-25 19:31:16 +01:00
const auto cond = eval ( extract ( get_vr ( op . ra ) , 3 ) = = get_imm < u32 > ( op . si10 ) ) ;
2018-07-05 17:41:04 +02:00
make_halt ( cond ) ;
2018-05-02 20:49:19 +02:00
}
2019-03-25 19:31:16 +01:00
void HLGTI ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
2019-03-25 19:31:16 +01:00
const auto cond = eval ( extract ( get_vr ( op . ra ) , 3 ) > get_imm < u32 > ( op . si10 ) ) ;
2018-07-05 17:41:04 +02:00
make_halt ( cond ) ;
2018-05-02 20:49:19 +02:00
}
2021-03-05 20:05:37 +01:00
void HBR ( [[maybe_unused]] spu_opcode_t op ) //
2018-05-02 20:49:19 +02:00
{
// TODO: use the hint.
}
2021-03-05 20:05:37 +01:00
void HBRA ( [[maybe_unused]] spu_opcode_t op ) //
2018-05-02 20:49:19 +02:00
{
// TODO: use the hint.
}
2021-03-05 20:05:37 +01:00
void HBRR ( [[maybe_unused]] spu_opcode_t op ) //
2018-05-02 20:49:19 +02:00
{
// TODO: use the hint.
}
// TODO
2018-10-11 00:17:19 +02:00
static u32 exec_check_interrupts ( spu_thread * _spu , u32 addr )
2018-05-02 20:49:19 +02:00
{
_spu - > set_interrupt_status ( true ) ;
2018-08-06 09:19:47 +02:00
if ( _spu - > ch_events . load ( ) . count )
2018-05-02 20:49:19 +02:00
{
_spu - > interrupts_enabled = false ;
_spu - > srr0 = addr ;
2018-06-10 14:46:01 +02:00
// Test for BR/BRA instructions (they are equivalent at zero pc)
const u32 br = _spu - > _ref < const u32 > ( 0 ) ;
if ( ( br & 0xfd80007f ) = = 0x30000000 )
{
return ( br > > 5 ) & 0x3fffc ;
}
2018-05-02 20:49:19 +02:00
return 0 ;
}
return addr ;
}
2018-06-10 14:46:01 +02:00
llvm : : BasicBlock * add_block_indirect ( spu_opcode_t op , value_t < u32 > addr , bool ret = true )
2018-05-02 20:49:19 +02:00
{
2019-03-25 19:31:16 +01:00
if ( m_interp_magn )
{
m_interp_bblock = llvm : : BasicBlock : : Create ( m_context , " " , m_function ) ;
const auto cblock = m_ir - > GetInsertBlock ( ) ;
const auto result = llvm : : BasicBlock : : Create ( m_context , " " , m_function ) ;
const auto e_exec = llvm : : BasicBlock : : Create ( m_context , " " , m_function ) ;
const auto d_test = llvm : : BasicBlock : : Create ( m_context , " " , m_function ) ;
const auto d_exec = llvm : : BasicBlock : : Create ( m_context , " " , m_function ) ;
const auto d_done = llvm : : BasicBlock : : Create ( m_context , " " , m_function ) ;
m_ir - > SetInsertPoint ( result ) ;
m_ir - > CreateCondBr ( get_imm < bool > ( op . e ) . value , e_exec , d_test , m_md_unlikely ) ;
m_ir - > SetInsertPoint ( e_exec ) ;
2019-05-05 15:28:41 +02:00
const auto e_addr = call ( " spu_check_interrupts " , & exec_check_interrupts , m_thread , addr . value ) ;
2019-03-25 19:31:16 +01:00
m_ir - > CreateBr ( d_test ) ;
m_ir - > SetInsertPoint ( d_test ) ;
const auto target = m_ir - > CreatePHI ( get_type < u32 > ( ) , 2 ) ;
target - > addIncoming ( addr . value , result ) ;
target - > addIncoming ( e_addr , e_exec ) ;
m_ir - > CreateCondBr ( get_imm < bool > ( op . d ) . value , d_exec , d_done , m_md_unlikely ) ;
m_ir - > SetInsertPoint ( d_exec ) ;
2023-03-11 20:08:27 +01:00
m_ir - > CreateStore ( m_ir - > getFalse ( ) , spu_ptr < bool > ( & spu_thread : : interrupts_enabled ) ) ;
2019-03-25 19:31:16 +01:00
m_ir - > CreateBr ( d_done ) ;
m_ir - > SetInsertPoint ( d_done ) ;
m_ir - > CreateBr ( m_interp_bblock ) ;
m_ir - > SetInsertPoint ( cblock ) ;
m_interp_pc = target ;
return result ;
}
2019-05-16 01:41:31 +02:00
if ( llvm : : isa < llvm : : Constant > ( addr . value ) )
2018-05-02 20:49:19 +02:00
{
2018-06-10 14:46:01 +02:00
// Fixed branch excludes the possibility it's a function return (TODO)
ret = false ;
}
2018-05-10 18:38:07 +02:00
2019-05-05 15:28:41 +02:00
if ( m_finfo & & m_finfo - > fn & & op . opcode )
{
const auto cblock = m_ir - > GetInsertBlock ( ) ;
const auto result = llvm : : BasicBlock : : Create ( m_context , " " , m_function ) ;
m_ir - > SetInsertPoint ( result ) ;
ret_function ( ) ;
m_ir - > SetInsertPoint ( cblock ) ;
return result ;
}
2018-06-10 14:46:01 +02:00
// Load stack addr if necessary
value_t < u32 > sp ;
2018-05-10 18:38:07 +02:00
2018-06-10 14:46:01 +02:00
if ( ret & & g_cfg . core . spu_block_size ! = spu_block_size_type : : safe )
{
2019-05-05 15:28:41 +02:00
if ( op . opcode )
{
sp = eval ( extract ( get_reg_fixed ( 1 ) , 3 ) & 0x3fff0 ) ;
}
else
{
2023-03-10 23:57:21 +01:00
sp . value = m_ir - > CreateLoad ( get_type < u32 > ( ) , spu_ptr < u32 > ( & spu_thread : : gpr , 1 , & v128 : : _u32 , 3 ) ) ;
2019-05-05 15:28:41 +02:00
}
2018-06-10 14:46:01 +02:00
}
2018-05-10 18:38:07 +02:00
2018-06-10 14:46:01 +02:00
const auto cblock = m_ir - > GetInsertBlock ( ) ;
const auto result = llvm : : BasicBlock : : Create ( m_context , " " , m_function ) ;
m_ir - > SetInsertPoint ( result ) ;
2018-05-10 18:38:07 +02:00
2018-06-10 14:46:01 +02:00
if ( op . e )
{
2019-05-05 15:28:41 +02:00
addr . value = call ( " spu_check_interrupts " , & exec_check_interrupts , m_thread , addr . value ) ;
2018-06-10 14:46:01 +02:00
}
2018-05-10 18:38:07 +02:00
2018-06-10 14:46:01 +02:00
if ( op . d )
{
2023-03-11 20:08:27 +01:00
m_ir - > CreateStore ( m_ir - > getFalse ( ) , spu_ptr < bool > ( & spu_thread : : interrupts_enabled ) ) ;
2018-05-10 18:38:07 +02:00
}
2018-10-11 00:17:19 +02:00
m_ir - > CreateStore ( addr . value , spu_ptr < u32 > ( & spu_thread : : pc ) ) ;
2018-05-02 20:49:19 +02:00
2019-05-16 03:12:08 +02:00
if ( ret & & g_cfg . core . spu_block_size > = spu_block_size_type : : mega )
2018-06-10 14:46:01 +02:00
{
// Compare address stored in stack mirror with addr
2018-10-11 00:17:19 +02:00
const auto stack0 = eval ( zext < u64 > ( sp ) + : : offset32 ( & spu_thread : : stack_mirror ) ) ;
2018-06-10 14:46:01 +02:00
const auto stack1 = eval ( stack0 + 8 ) ;
2023-04-05 13:35:06 +02:00
const auto _ret = m_ir - > CreateLoad ( get_type < u64 > ( ) , m_ir - > CreateGEP ( get_type < u8 > ( ) , m_thread , stack0 . value ) ) ;
const auto link = m_ir - > CreateLoad ( get_type < u64 > ( ) , m_ir - > CreateGEP ( get_type < u8 > ( ) , m_thread , stack1 . value ) ) ;
2018-06-10 14:46:01 +02:00
const auto fail = llvm : : BasicBlock : : Create ( m_context , " " , m_function ) ;
const auto done = llvm : : BasicBlock : : Create ( m_context , " " , m_function ) ;
2020-04-04 20:33:46 +02:00
const auto next = llvm : : BasicBlock : : Create ( m_context , " " , m_function ) ;
m_ir - > CreateCondBr ( m_ir - > CreateICmpEQ ( addr . value , m_ir - > CreateTrunc ( link , get_type < u32 > ( ) ) ) , next , fail , m_md_likely ) ;
m_ir - > SetInsertPoint ( next ) ;
2023-04-05 13:35:06 +02:00
const auto cmp2 = m_ir - > CreateLoad ( get_type < u32 > ( ) , m_ir - > CreateGEP ( get_type < u8 > ( ) , m_lsptr , addr . value ) ) ;
2020-04-04 20:33:46 +02:00
m_ir - > CreateCondBr ( m_ir - > CreateICmpEQ ( cmp2 , m_ir - > CreateTrunc ( _ret , get_type < u32 > ( ) ) ) , done , fail , m_md_likely ) ;
2018-06-10 14:46:01 +02:00
m_ir - > SetInsertPoint ( done ) ;
// Clear stack mirror and return by tail call to the provided return address
2023-04-05 13:35:06 +02:00
m_ir - > CreateStore ( splat < u64 [ 2 ] > ( - 1 ) . eval ( m_ir ) , m_ir - > CreateGEP ( get_type < u8 > ( ) , m_thread , stack0 . value ) ) ;
2020-05-27 17:53:09 +02:00
const auto targ = m_ir - > CreateAdd ( m_ir - > CreateLShr ( _ret , 32 ) , get_segment_base ( ) ) ;
2020-11-02 04:07:58 +01:00
const auto type = m_finfo - > chunk - > getFunctionType ( ) ;
const auto fval = m_ir - > CreateIntToPtr ( targ , type - > getPointerTo ( ) ) ;
tail_chunk ( { type , fval } , m_ir - > CreateTrunc ( m_ir - > CreateLShr ( link , 32 ) , get_type < u32 > ( ) ) ) ;
2018-06-10 14:46:01 +02:00
m_ir - > SetInsertPoint ( fail ) ;
}
2019-10-26 23:16:44 +02:00
if ( g_cfg . core . spu_block_size > = spu_block_size_type : : mega )
2018-05-02 20:49:19 +02:00
{
2018-06-10 14:46:01 +02:00
// Try to load chunk address from the function table
2019-05-05 15:28:41 +02:00
const auto fail = llvm : : BasicBlock : : Create ( m_context , " " , m_function ) ;
const auto done = llvm : : BasicBlock : : Create ( m_context , " " , m_function ) ;
2019-05-16 03:02:33 +02:00
const auto ad32 = m_ir - > CreateSub ( addr . value , m_base_pc ) ;
m_ir - > CreateCondBr ( m_ir - > CreateICmpULT ( ad32 , m_ir - > getInt32 ( m_size ) ) , done , fail , m_md_likely ) ;
2019-05-05 15:28:41 +02:00
m_ir - > SetInsertPoint ( done ) ;
2019-05-16 03:02:33 +02:00
const auto ad64 = m_ir - > CreateZExt ( ad32 , get_type < u64 > ( ) ) ;
2023-03-11 22:48:20 +01:00
const auto pptr = dyn_cast < llvm : : GetElementPtrInst > ( m_ir - > CreateGEP ( m_function_table - > getValueType ( ) , m_function_table , { m_ir - > getInt64 ( 0 ) , m_ir - > CreateLShr ( ad64 , 2 , " " , true ) } ) ) ;
tail_chunk ( { m_dispatch - > getFunctionType ( ) , m_ir - > CreateLoad ( pptr - > getResultElementType ( ) , pptr ) } ) ;
2019-05-05 15:28:41 +02:00
m_ir - > SetInsertPoint ( fail ) ;
2018-05-02 20:49:19 +02:00
}
2019-05-12 02:22:14 +02:00
tail_chunk ( nullptr ) ;
2018-06-10 14:46:01 +02:00
m_ir - > SetInsertPoint ( cblock ) ;
return result ;
2018-05-02 20:49:19 +02:00
}
2019-03-25 19:31:16 +01:00
llvm : : BasicBlock * add_block_next ( )
{
if ( m_interp_magn )
{
const auto cblock = m_ir - > GetInsertBlock ( ) ;
m_ir - > SetInsertPoint ( m_interp_bblock ) ;
const auto target = m_ir - > CreatePHI ( get_type < u32 > ( ) , 2 ) ;
target - > addIncoming ( m_interp_pc_next , cblock ) ;
target - > addIncoming ( m_interp_pc , m_interp_bblock - > getSinglePredecessor ( ) ) ;
m_ir - > SetInsertPoint ( cblock ) ;
m_interp_pc = target ;
return m_interp_bblock ;
}
return add_block ( m_pos + 4 ) ;
}
2018-05-02 20:49:19 +02:00
void BIZ ( spu_opcode_t op ) //
{
2019-03-25 19:31:16 +01:00
if ( m_block ) m_block - > block_end = m_ir - > GetInsertBlock ( ) ;
2021-05-27 08:55:49 +02:00
2021-09-29 11:09:42 +02:00
const auto rt = get_vr < u8 [ 16 ] > ( op . rt ) ;
2021-10-26 10:56:47 +02:00
// Checking for zero doesn't care about the order of the bytes,
2021-09-29 11:09:42 +02:00
// so load the data before it's byteswapped
if ( auto [ ok , as ] = match_expr ( rt , byteswap ( match < u8 [ 16 ] > ( ) ) ) ; ok )
{
m_block - > block_end = m_ir - > GetInsertBlock ( ) ;
const auto cond = eval ( extract ( bitcast < u32 [ 4 ] > ( as ) , 0 ) = = 0 ) ;
const auto addr = eval ( extract ( get_vr ( op . ra ) , 3 ) & 0x3fffc ) ;
const auto target = add_block_indirect ( op , addr ) ;
m_ir - > CreateCondBr ( cond . value , target , add_block_next ( ) ) ;
return ;
}
2021-10-26 10:56:47 +02:00
const auto ox = get_vr < u32 [ 4 ] > ( op . rt ) ;
// Instead of extracting the value generated by orx, just test the input to orx with ptest
if ( auto [ ok , as ] = match_expr ( ox , orx ( match < u32 [ 4 ] > ( ) ) ) ; ok )
{
m_block - > block_end = m_ir - > GetInsertBlock ( ) ;
const auto a = extract ( bitcast < u64 [ 2 ] > ( as ) , 0 ) ;
const auto b = extract ( bitcast < u64 [ 2 ] > ( as ) , 1 ) ;
const auto cond = eval ( ( a | b ) = = 0 ) ;
const auto addr = eval ( extract ( get_vr ( op . ra ) , 3 ) & 0x3fffc ) ;
const auto target = add_block_indirect ( op , addr ) ;
m_ir - > CreateCondBr ( cond . value , target , add_block_next ( ) ) ;
return ;
}
2021-05-27 08:55:49 +02:00
// Check sign bit instead (optimization)
if ( match_vr < s32 [ 4 ] , s64 [ 2 ] > ( op . rt , [ & ] ( auto c , auto MP )
{
using VT = typename decltype ( MP ) : : type ;
if ( auto [ ok , x ] = match_expr ( c , sext < VT > ( match < bool [ std : : extent_v < VT > ] > ( ) ) ) ; ok )
{
const auto a = get_vr < s8 [ 16 ] > ( op . rt ) ;
const auto cond = eval ( bitcast < s16 > ( trunc < bool [ 16 ] > ( a ) ) > = 0 ) ;
const auto addr = eval ( extract ( get_vr ( op . ra ) , 3 ) & 0x3fffc ) ;
const auto target = add_block_indirect ( op , addr ) ;
m_ir - > CreateCondBr ( cond . value , target , add_block_next ( ) ) ;
return true ;
}
return false ;
} ) )
{
return ;
}
2018-05-02 20:49:19 +02:00
const auto cond = eval ( extract ( get_vr ( op . rt ) , 3 ) = = 0 ) ;
const auto addr = eval ( extract ( get_vr ( op . ra ) , 3 ) & 0x3fffc ) ;
2018-06-10 14:46:01 +02:00
const auto target = add_block_indirect ( op , addr ) ;
2019-03-25 19:31:16 +01:00
m_ir - > CreateCondBr ( cond . value , target , add_block_next ( ) ) ;
2018-05-02 20:49:19 +02:00
}
void BINZ ( spu_opcode_t op ) //
{
2019-03-25 19:31:16 +01:00
if ( m_block ) m_block - > block_end = m_ir - > GetInsertBlock ( ) ;
2021-05-27 08:55:49 +02:00
2021-09-29 11:09:42 +02:00
const auto rt = get_vr < u8 [ 16 ] > ( op . rt ) ;
2021-10-26 10:56:47 +02:00
// Checking for zero doesn't care about the order of the bytes,
2021-09-29 11:09:42 +02:00
// so load the data before it's byteswapped
if ( auto [ ok , as ] = match_expr ( rt , byteswap ( match < u8 [ 16 ] > ( ) ) ) ; ok )
{
m_block - > block_end = m_ir - > GetInsertBlock ( ) ;
const auto cond = eval ( extract ( bitcast < u32 [ 4 ] > ( as ) , 0 ) ! = 0 ) ;
const auto addr = eval ( extract ( get_vr ( op . ra ) , 3 ) & 0x3fffc ) ;
const auto target = add_block_indirect ( op , addr ) ;
m_ir - > CreateCondBr ( cond . value , target , add_block_next ( ) ) ;
return ;
}
2021-10-26 10:56:47 +02:00
const auto ox = get_vr < u32 [ 4 ] > ( op . rt ) ;
// Instead of extracting the value generated by orx, just test the input to orx with ptest
if ( auto [ ok , as ] = match_expr ( ox , orx ( match < u32 [ 4 ] > ( ) ) ) ; ok )
{
m_block - > block_end = m_ir - > GetInsertBlock ( ) ;
const auto a = extract ( bitcast < u64 [ 2 ] > ( as ) , 0 ) ;
const auto b = extract ( bitcast < u64 [ 2 ] > ( as ) , 1 ) ;
const auto cond = eval ( ( a | b ) ! = 0 ) ;
const auto addr = eval ( extract ( get_vr ( op . ra ) , 3 ) & 0x3fffc ) ;
const auto target = add_block_indirect ( op , addr ) ;
m_ir - > CreateCondBr ( cond . value , target , add_block_next ( ) ) ;
return ;
}
2021-09-29 11:09:42 +02:00
2021-05-27 08:55:49 +02:00
// Check sign bit instead (optimization)
if ( match_vr < s32 [ 4 ] , s64 [ 2 ] > ( op . rt , [ & ] ( auto c , auto MP )
{
using VT = typename decltype ( MP ) : : type ;
if ( auto [ ok , x ] = match_expr ( c , sext < VT > ( match < bool [ std : : extent_v < VT > ] > ( ) ) ) ; ok )
{
const auto a = get_vr < s8 [ 16 ] > ( op . rt ) ;
const auto cond = eval ( bitcast < s16 > ( trunc < bool [ 16 ] > ( a ) ) < 0 ) ;
const auto addr = eval ( extract ( get_vr ( op . ra ) , 3 ) & 0x3fffc ) ;
const auto target = add_block_indirect ( op , addr ) ;
m_ir - > CreateCondBr ( cond . value , target , add_block_next ( ) ) ;
return true ;
}
return false ;
} ) )
{
return ;
}
2021-07-27 08:27:03 +02:00
2018-05-02 20:49:19 +02:00
const auto cond = eval ( extract ( get_vr ( op . rt ) , 3 ) ! = 0 ) ;
const auto addr = eval ( extract ( get_vr ( op . ra ) , 3 ) & 0x3fffc ) ;
2018-06-10 14:46:01 +02:00
const auto target = add_block_indirect ( op , addr ) ;
2019-03-25 19:31:16 +01:00
m_ir - > CreateCondBr ( cond . value , target , add_block_next ( ) ) ;
2018-05-02 20:49:19 +02:00
}
void BIHZ ( spu_opcode_t op ) //
{
2019-03-25 19:31:16 +01:00
if ( m_block ) m_block - > block_end = m_ir - > GetInsertBlock ( ) ;
2021-05-27 08:55:49 +02:00
// Check sign bits of 2 vector elements (optimization)
if ( match_vr < s8 [ 16 ] , s16 [ 8 ] , s32 [ 4 ] , s64 [ 2 ] > ( op . rt , [ & ] ( auto c , auto MP )
{
using VT = typename decltype ( MP ) : : type ;
if ( auto [ ok , x ] = match_expr ( c , sext < VT > ( match < bool [ std : : extent_v < VT > ] > ( ) ) ) ; ok )
{
const auto a = get_vr < s8 [ 16 ] > ( op . rt ) ;
const auto cond = eval ( ( bitcast < s16 > ( trunc < bool [ 16 ] > ( a ) ) & 0x3000 ) = = 0 ) ;
const auto addr = eval ( extract ( get_vr ( op . ra ) , 3 ) & 0x3fffc ) ;
const auto target = add_block_indirect ( op , addr ) ;
m_ir - > CreateCondBr ( cond . value , target , add_block_next ( ) ) ;
return true ;
}
return false ;
} ) )
{
return ;
}
2018-05-02 20:49:19 +02:00
const auto cond = eval ( extract ( get_vr < u16 [ 8 ] > ( op . rt ) , 6 ) = = 0 ) ;
const auto addr = eval ( extract ( get_vr ( op . ra ) , 3 ) & 0x3fffc ) ;
2018-06-10 14:46:01 +02:00
const auto target = add_block_indirect ( op , addr ) ;
2019-03-25 19:31:16 +01:00
m_ir - > CreateCondBr ( cond . value , target , add_block_next ( ) ) ;
2018-05-02 20:49:19 +02:00
}
void BIHNZ ( spu_opcode_t op ) //
{
2019-03-25 19:31:16 +01:00
if ( m_block ) m_block - > block_end = m_ir - > GetInsertBlock ( ) ;
2021-05-27 08:55:49 +02:00
// Check sign bits of 2 vector elements (optimization)
if ( match_vr < s8 [ 16 ] , s16 [ 8 ] , s32 [ 4 ] , s64 [ 2 ] > ( op . rt , [ & ] ( auto c , auto MP )
{
using VT = typename decltype ( MP ) : : type ;
if ( auto [ ok , x ] = match_expr ( c , sext < VT > ( match < bool [ std : : extent_v < VT > ] > ( ) ) ) ; ok )
{
const auto a = get_vr < s8 [ 16 ] > ( op . rt ) ;
const auto cond = eval ( ( bitcast < s16 > ( trunc < bool [ 16 ] > ( a ) ) & 0x3000 ) ! = 0 ) ;
const auto addr = eval ( extract ( get_vr ( op . ra ) , 3 ) & 0x3fffc ) ;
const auto target = add_block_indirect ( op , addr ) ;
m_ir - > CreateCondBr ( cond . value , target , add_block_next ( ) ) ;
return true ;
}
return false ;
} ) )
{
return ;
}
2018-05-02 20:49:19 +02:00
const auto cond = eval ( extract ( get_vr < u16 [ 8 ] > ( op . rt ) , 6 ) ! = 0 ) ;
const auto addr = eval ( extract ( get_vr ( op . ra ) , 3 ) & 0x3fffc ) ;
2018-06-10 14:46:01 +02:00
const auto target = add_block_indirect ( op , addr ) ;
2019-03-25 19:31:16 +01:00
m_ir - > CreateCondBr ( cond . value , target , add_block_next ( ) ) ;
2018-05-02 20:49:19 +02:00
}
void BI ( spu_opcode_t op ) //
{
2019-03-25 19:31:16 +01:00
if ( m_block ) m_block - > block_end = m_ir - > GetInsertBlock ( ) ;
2018-05-02 20:49:19 +02:00
const auto addr = eval ( extract ( get_vr ( op . ra ) , 3 ) & 0x3fffc ) ;
2018-06-10 14:46:01 +02:00
2019-03-25 19:31:16 +01:00
if ( m_interp_magn )
{
m_ir - > CreateBr ( add_block_indirect ( op , addr ) ) ;
return ;
}
2018-06-10 14:46:01 +02:00
// Create jump table if necessary (TODO)
const auto tfound = m_targets . find ( m_pos ) ;
2023-09-01 14:48:43 +02:00
if ( op . d & & tfound ! = m_targets . end ( ) & & tfound - > second . size ( ) = = 1 & & tfound - > second [ 0 ] = = spu_branch_target ( m_pos , 1 ) )
{
// Interrupts-disable pattern
m_ir - > CreateStore ( m_ir - > getFalse ( ) , spu_ptr < bool > ( & spu_thread : : interrupts_enabled ) ) ;
return ;
}
2019-05-05 15:28:41 +02:00
if ( ! op . d & & ! op . e & & tfound ! = m_targets . end ( ) & & tfound - > second . size ( ) > 1 )
2018-06-10 14:46:01 +02:00
{
// Shift aligned address for switch
2019-05-15 23:54:50 +02:00
const auto addrfx = m_ir - > CreateSub ( addr . value , m_base_pc ) ;
2019-05-05 15:28:41 +02:00
const auto sw_arg = m_ir - > CreateLShr ( addrfx , 2 , " " , true ) ;
2018-06-10 14:46:01 +02:00
// Initialize jump table targets
std : : map < u32 , llvm : : BasicBlock * > targets ;
for ( u32 target : tfound - > second )
{
if ( m_block_info [ target / 4 ] )
{
2018-06-26 14:00:09 +02:00
targets . emplace ( target , nullptr ) ;
2018-06-10 14:46:01 +02:00
}
}
2018-06-26 14:00:09 +02:00
// Initialize target basic blocks
for ( auto & pair : targets )
{
pair . second = add_block ( pair . first ) ;
}
2019-05-05 15:28:41 +02:00
if ( targets . empty ( ) )
{
// Emergency exit
2020-03-27 12:20:37 +01:00
spu_log . error ( " [%s] [0x%05x] No jump table targets at 0x%05x (%u) " , m_hash , m_entry , m_pos , tfound - > second . size ( ) ) ;
2019-05-05 15:28:41 +02:00
m_ir - > CreateBr ( add_block_indirect ( op , addr ) ) ;
return ;
}
2018-06-10 14:46:01 +02:00
// Get jump table bounds (optimization)
const u32 start = targets . begin ( ) - > first ;
const u32 end = targets . rbegin ( ) - > first + 4 ;
// Emit switch instruction aiming for a jumptable in the end (indirectbr could guarantee it)
const auto sw = m_ir - > CreateSwitch ( sw_arg , llvm : : BasicBlock : : Create ( m_context , " " , m_function ) , ( end - start ) / 4 ) ;
for ( u32 pos = start ; pos < end ; pos + = 4 )
{
if ( m_block_info [ pos / 4 ] & & targets . count ( pos ) )
{
const auto found = targets . find ( pos ) ;
if ( found ! = targets . end ( ) )
{
2019-05-15 23:54:50 +02:00
sw - > addCase ( m_ir - > getInt32 ( pos / 4 - m_base / 4 ) , found - > second ) ;
2018-06-10 14:46:01 +02:00
continue ;
}
}
2019-05-15 23:54:50 +02:00
sw - > addCase ( m_ir - > getInt32 ( pos / 4 - m_base / 4 ) , sw - > getDefaultDest ( ) ) ;
2018-06-10 14:46:01 +02:00
}
// Exit function on unexpected target
m_ir - > SetInsertPoint ( sw - > getDefaultDest ( ) ) ;
2023-03-11 20:08:27 +01:00
m_ir - > CreateStore ( addr . value , spu_ptr < u32 > ( & spu_thread : : pc ) ) ;
2019-05-05 15:28:41 +02:00
if ( m_finfo & & m_finfo - > fn )
{
// Can't afford external tail call in true functions
2023-04-08 14:21:22 +02:00
m_ir - > CreateStore ( m_ir - > getInt32 ( " BIJT " _u32 ) , _ptr < u32 > ( m_memptr , 0xffdead20 ) ) ;
2023-04-16 18:36:54 +02:00
m_ir - > CreateCall ( m_test_state , { m_thread } ) ;
2019-05-05 15:28:41 +02:00
m_ir - > CreateBr ( sw - > getDefaultDest ( ) ) ;
}
else
{
2019-05-12 02:22:14 +02:00
tail_chunk ( nullptr ) ;
2019-05-05 15:28:41 +02:00
}
2018-06-10 14:46:01 +02:00
}
else
{
// Simple indirect branch
m_ir - > CreateBr ( add_block_indirect ( op , addr ) ) ;
}
2018-05-02 20:49:19 +02:00
}
void BISL ( spu_opcode_t op ) //
{
2019-03-25 19:31:16 +01:00
if ( m_block ) m_block - > block_end = m_ir - > GetInsertBlock ( ) ;
2018-05-02 20:49:19 +02:00
const auto addr = eval ( extract ( get_vr ( op . ra ) , 3 ) & 0x3fffc ) ;
2018-06-10 14:46:01 +02:00
set_link ( op ) ;
m_ir - > CreateBr ( add_block_indirect ( op , addr , false ) ) ;
2018-05-02 20:49:19 +02:00
}
void IRET ( spu_opcode_t op ) //
{
2019-03-25 19:31:16 +01:00
if ( m_block ) m_block - > block_end = m_ir - > GetInsertBlock ( ) ;
2018-05-02 20:49:19 +02:00
value_t < u32 > srr0 ;
2023-03-10 23:57:21 +01:00
srr0 . value = m_ir - > CreateLoad ( get_type < u32 > ( ) , spu_ptr < u32 > ( & spu_thread : : srr0 ) ) ;
2018-06-10 14:46:01 +02:00
m_ir - > CreateBr ( add_block_indirect ( op , srr0 ) ) ;
2018-05-02 20:49:19 +02:00
}
void BISLED ( spu_opcode_t op ) //
{
2019-03-29 14:49:19 +01:00
if ( m_block ) m_block - > block_end = m_ir - > GetInsertBlock ( ) ;
const auto addr = eval ( extract ( get_vr ( op . ra ) , 3 ) & 0x3fffc ) ;
set_link ( op ) ;
2023-03-10 23:57:21 +01:00
const auto mask = m_ir - > CreateTrunc ( m_ir - > CreateLShr ( m_ir - > CreateLoad ( get_type < u64 > ( ) , spu_ptr < u64 > ( & spu_thread : : ch_events ) , true ) , 32 ) , get_type < u32 > ( ) ) ;
2018-08-06 09:19:47 +02:00
const auto res = call ( " spu_get_events " , & exec_get_events , m_thread , mask ) ;
2019-03-29 14:49:19 +01:00
const auto target = add_block_indirect ( op , addr ) ;
2019-05-05 15:28:41 +02:00
m_ir - > CreateCondBr ( m_ir - > CreateICmpNE ( res , m_ir - > getInt32 ( 0 ) ) , target , add_block_next ( ) ) ;
2018-05-02 20:49:19 +02:00
}
void BRZ ( spu_opcode_t op ) //
{
2019-03-25 19:31:16 +01:00
if ( m_interp_magn )
{
value_t < u32 > target ;
target . value = m_interp_pc ;
target = eval ( ( target + ( get_imm < u32 > ( op . i16 , false ) < < 2 ) ) & 0x3fffc ) ;
m_interp_pc = m_ir - > CreateSelect ( eval ( extract ( get_vr ( op . rt ) , 3 ) = = 0 ) . value , target . value , m_interp_pc_next ) ;
return ;
}
2018-05-02 20:49:19 +02:00
const u32 target = spu_branch_target ( m_pos , op . i16 ) ;
2021-09-29 11:09:42 +02:00
const auto rt = get_vr < u8 [ 16 ] > ( op . rt ) ;
2021-10-26 10:56:47 +02:00
// Checking for zero doesn't care about the order of the bytes,
2021-09-29 11:09:42 +02:00
// so load the data before it's byteswapped
if ( auto [ ok , as ] = match_expr ( rt , byteswap ( match < u8 [ 16 ] > ( ) ) ) ; ok )
{
if ( target ! = m_pos + 4 )
{
m_block - > block_end = m_ir - > GetInsertBlock ( ) ;
const auto cond = eval ( extract ( bitcast < u32 [ 4 ] > ( as ) , 0 ) = = 0 ) ;
m_ir - > CreateCondBr ( cond . value , add_block ( target ) , add_block ( m_pos + 4 ) ) ;
return ;
}
}
2021-10-26 10:56:47 +02:00
const auto ox = get_vr < u32 [ 4 ] > ( op . rt ) ;
// Instead of extracting the value generated by orx, just test the input to orx with ptest
if ( auto [ ok , as ] = match_expr ( ox , orx ( match < u32 [ 4 ] > ( ) ) ) ; ok )
{
if ( target ! = m_pos + 4 )
{
m_block - > block_end = m_ir - > GetInsertBlock ( ) ;
const auto a = extract ( bitcast < u64 [ 2 ] > ( as ) , 0 ) ;
const auto b = extract ( bitcast < u64 [ 2 ] > ( as ) , 1 ) ;
const auto cond = eval ( ( a | b ) = = 0 ) ;
m_ir - > CreateCondBr ( cond . value , add_block ( target ) , add_block ( m_pos + 4 ) ) ;
return ;
}
}
2021-05-15 12:30:38 +02:00
// Check sign bit instead (optimization)
if ( match_vr < s32 [ 4 ] , s64 [ 2 ] > ( op . rt , [ & ] ( auto c , auto MP )
{
using VT = typename decltype ( MP ) : : type ;
if ( auto [ ok , x ] = match_expr ( c , sext < VT > ( match < bool [ std : : extent_v < VT > ] > ( ) ) ) ; ok )
{
if ( target ! = m_pos + 4 )
{
2021-05-22 09:35:15 +02:00
m_block - > block_end = m_ir - > GetInsertBlock ( ) ;
2021-05-15 12:30:38 +02:00
const auto a = get_vr < s8 [ 16 ] > ( op . rt ) ;
const auto cond = eval ( bitcast < s16 > ( trunc < bool [ 16 ] > ( a ) ) > = 0 ) ;
m_ir - > CreateCondBr ( cond . value , add_block ( target ) , add_block ( m_pos + 4 ) ) ;
return true ;
}
}
2021-05-22 09:35:15 +02:00
2021-05-15 12:30:38 +02:00
return false ;
} ) )
{
return ;
}
2018-06-10 14:46:01 +02:00
if ( target ! = m_pos + 4 )
2018-05-02 20:49:19 +02:00
{
2018-06-10 14:46:01 +02:00
m_block - > block_end = m_ir - > GetInsertBlock ( ) ;
const auto cond = eval ( extract ( get_vr ( op . rt ) , 3 ) = = 0 ) ;
m_ir - > CreateCondBr ( cond . value , add_block ( target ) , add_block ( m_pos + 4 ) ) ;
2018-05-02 20:49:19 +02:00
}
}
void BRNZ ( spu_opcode_t op ) //
{
2019-03-25 19:31:16 +01:00
if ( m_interp_magn )
{
value_t < u32 > target ;
target . value = m_interp_pc ;
target = eval ( ( target + ( get_imm < u32 > ( op . i16 , false ) < < 2 ) ) & 0x3fffc ) ;
m_interp_pc = m_ir - > CreateSelect ( eval ( extract ( get_vr ( op . rt ) , 3 ) ! = 0 ) . value , target . value , m_interp_pc_next ) ;
return ;
}
2018-05-02 20:49:19 +02:00
const u32 target = spu_branch_target ( m_pos , op . i16 ) ;
2021-09-29 11:09:42 +02:00
const auto rt = get_vr < u8 [ 16 ] > ( op . rt ) ;
2021-10-26 10:56:47 +02:00
// Checking for zero doesn't care about the order of the bytes,
2021-09-29 11:09:42 +02:00
// so load the data before it's byteswapped
if ( auto [ ok , as ] = match_expr ( rt , byteswap ( match < u8 [ 16 ] > ( ) ) ) ; ok )
{
if ( target ! = m_pos + 4 )
{
m_block - > block_end = m_ir - > GetInsertBlock ( ) ;
const auto cond = eval ( extract ( bitcast < u32 [ 4 ] > ( as ) , 0 ) ! = 0 ) ;
m_ir - > CreateCondBr ( cond . value , add_block ( target ) , add_block ( m_pos + 4 ) ) ;
return ;
}
}
2021-10-26 10:56:47 +02:00
const auto ox = get_vr < u32 [ 4 ] > ( op . rt ) ;
// Instead of extracting the value generated by orx, just test the input to orx with ptest
if ( auto [ ok , as ] = match_expr ( ox , orx ( match < u32 [ 4 ] > ( ) ) ) ; ok )
{
if ( target ! = m_pos + 4 )
{
m_block - > block_end = m_ir - > GetInsertBlock ( ) ;
const auto a = extract ( bitcast < u64 [ 2 ] > ( as ) , 0 ) ;
const auto b = extract ( bitcast < u64 [ 2 ] > ( as ) , 1 ) ;
const auto cond = eval ( ( a | b ) ! = 0 ) ;
m_ir - > CreateCondBr ( cond . value , add_block ( target ) , add_block ( m_pos + 4 ) ) ;
return ;
}
}
2021-05-15 12:30:38 +02:00
// Check sign bit instead (optimization)
if ( match_vr < s32 [ 4 ] , s64 [ 2 ] > ( op . rt , [ & ] ( auto c , auto MP )
{
using VT = typename decltype ( MP ) : : type ;
if ( auto [ ok , x ] = match_expr ( c , sext < VT > ( match < bool [ std : : extent_v < VT > ] > ( ) ) ) ; ok )
{
if ( target ! = m_pos + 4 )
{
2021-05-22 09:35:15 +02:00
m_block - > block_end = m_ir - > GetInsertBlock ( ) ;
2021-05-15 12:30:38 +02:00
const auto a = get_vr < s8 [ 16 ] > ( op . rt ) ;
const auto cond = eval ( bitcast < s16 > ( trunc < bool [ 16 ] > ( a ) ) < 0 ) ;
m_ir - > CreateCondBr ( cond . value , add_block ( target ) , add_block ( m_pos + 4 ) ) ;
return true ;
}
}
2021-05-22 09:35:15 +02:00
2021-05-15 12:30:38 +02:00
return false ;
} ) )
{
return ;
}
2018-06-10 14:46:01 +02:00
if ( target ! = m_pos + 4 )
2018-05-02 20:49:19 +02:00
{
2018-06-10 14:46:01 +02:00
m_block - > block_end = m_ir - > GetInsertBlock ( ) ;
const auto cond = eval ( extract ( get_vr ( op . rt ) , 3 ) ! = 0 ) ;
m_ir - > CreateCondBr ( cond . value , add_block ( target ) , add_block ( m_pos + 4 ) ) ;
2018-05-02 20:49:19 +02:00
}
}
void BRHZ ( spu_opcode_t op ) //
{
2019-03-25 19:31:16 +01:00
if ( m_interp_magn )
{
value_t < u32 > target ;
target . value = m_interp_pc ;
target = eval ( ( target + ( get_imm < u32 > ( op . i16 , false ) < < 2 ) ) & 0x3fffc ) ;
m_interp_pc = m_ir - > CreateSelect ( eval ( extract ( get_vr < u16 [ 8 ] > ( op . rt ) , 6 ) = = 0 ) . value , target . value , m_interp_pc_next ) ;
return ;
}
2018-05-02 20:49:19 +02:00
const u32 target = spu_branch_target ( m_pos , op . i16 ) ;
2021-05-27 08:55:49 +02:00
// Check sign bits of 2 vector elements (optimization)
if ( match_vr < s8 [ 16 ] , s16 [ 8 ] , s32 [ 4 ] , s64 [ 2 ] > ( op . rt , [ & ] ( auto c , auto MP )
{
using VT = typename decltype ( MP ) : : type ;
if ( auto [ ok , x ] = match_expr ( c , sext < VT > ( match < bool [ std : : extent_v < VT > ] > ( ) ) ) ; ok )
{
if ( target ! = m_pos + 4 )
{
m_block - > block_end = m_ir - > GetInsertBlock ( ) ;
const auto a = get_vr < s8 [ 16 ] > ( op . rt ) ;
const auto cond = eval ( ( bitcast < s16 > ( trunc < bool [ 16 ] > ( a ) ) & 0x3000 ) = = 0 ) ;
m_ir - > CreateCondBr ( cond . value , add_block ( target ) , add_block ( m_pos + 4 ) ) ;
return true ;
}
}
return false ;
} ) )
{
return ;
}
2018-06-10 14:46:01 +02:00
if ( target ! = m_pos + 4 )
2018-05-02 20:49:19 +02:00
{
2018-06-10 14:46:01 +02:00
m_block - > block_end = m_ir - > GetInsertBlock ( ) ;
const auto cond = eval ( extract ( get_vr < u16 [ 8 ] > ( op . rt ) , 6 ) = = 0 ) ;
m_ir - > CreateCondBr ( cond . value , add_block ( target ) , add_block ( m_pos + 4 ) ) ;
2018-05-02 20:49:19 +02:00
}
}
void BRHNZ ( spu_opcode_t op ) //
{
2019-03-25 19:31:16 +01:00
if ( m_interp_magn )
{
value_t < u32 > target ;
target . value = m_interp_pc ;
target = eval ( ( target + ( get_imm < u32 > ( op . i16 , false ) < < 2 ) ) & 0x3fffc ) ;
m_interp_pc = m_ir - > CreateSelect ( eval ( extract ( get_vr < u16 [ 8 ] > ( op . rt ) , 6 ) ! = 0 ) . value , target . value , m_interp_pc_next ) ;
return ;
}
2018-05-02 20:49:19 +02:00
const u32 target = spu_branch_target ( m_pos , op . i16 ) ;
2021-05-27 08:55:49 +02:00
// Check sign bits of 2 vector elements (optimization)
if ( match_vr < s8 [ 16 ] , s16 [ 8 ] , s32 [ 4 ] , s64 [ 2 ] > ( op . rt , [ & ] ( auto c , auto MP )
{
using VT = typename decltype ( MP ) : : type ;
if ( auto [ ok , x ] = match_expr ( c , sext < VT > ( match < bool [ std : : extent_v < VT > ] > ( ) ) ) ; ok )
{
if ( target ! = m_pos + 4 )
{
m_block - > block_end = m_ir - > GetInsertBlock ( ) ;
const auto a = get_vr < s8 [ 16 ] > ( op . rt ) ;
const auto cond = eval ( ( bitcast < s16 > ( trunc < bool [ 16 ] > ( a ) ) & 0x3000 ) ! = 0 ) ;
m_ir - > CreateCondBr ( cond . value , add_block ( target ) , add_block ( m_pos + 4 ) ) ;
return true ;
}
}
return false ;
} ) )
{
return ;
}
2018-06-10 14:46:01 +02:00
if ( target ! = m_pos + 4 )
2018-05-02 20:49:19 +02:00
{
2018-06-10 14:46:01 +02:00
m_block - > block_end = m_ir - > GetInsertBlock ( ) ;
const auto cond = eval ( extract ( get_vr < u16 [ 8 ] > ( op . rt ) , 6 ) ! = 0 ) ;
m_ir - > CreateCondBr ( cond . value , add_block ( target ) , add_block ( m_pos + 4 ) ) ;
2018-05-02 20:49:19 +02:00
}
}
void BRA ( spu_opcode_t op ) //
{
2019-03-25 19:31:16 +01:00
if ( m_interp_magn )
{
m_interp_pc = eval ( ( get_imm < u32 > ( op . i16 , false ) < < 2 ) & 0x3fffc ) . value ;
return ;
}
2018-05-02 20:49:19 +02:00
const u32 target = spu_branch_target ( 0 , op . i16 ) ;
2019-05-16 01:41:31 +02:00
m_block - > block_end = m_ir - > GetInsertBlock ( ) ;
m_ir - > CreateBr ( add_block ( target , true ) ) ;
2018-05-02 20:49:19 +02:00
}
void BRASL ( spu_opcode_t op ) //
{
2018-06-10 14:46:01 +02:00
set_link ( op ) ;
2018-05-02 20:49:19 +02:00
BRA ( op ) ;
}
void BR ( spu_opcode_t op ) //
{
2019-03-25 19:31:16 +01:00
if ( m_interp_magn )
{
value_t < u32 > target ;
target . value = m_interp_pc ;
target = eval ( ( target + ( get_imm < u32 > ( op . i16 , false ) < < 2 ) ) & 0x3fffc ) ;
m_interp_pc = target . value ;
return ;
}
2018-05-02 20:49:19 +02:00
const u32 target = spu_branch_target ( m_pos , op . i16 ) ;
if ( target ! = m_pos + 4 )
{
2018-06-10 14:46:01 +02:00
m_block - > block_end = m_ir - > GetInsertBlock ( ) ;
m_ir - > CreateBr ( add_block ( target ) ) ;
2018-05-02 20:49:19 +02:00
}
}
void BRSL ( spu_opcode_t op ) //
2018-06-10 14:46:01 +02:00
{
set_link ( op ) ;
2019-05-05 15:28:41 +02:00
const u32 target = spu_branch_target ( m_pos , op . i16 ) ;
if ( m_finfo & & m_finfo - > fn & & target ! = m_pos + 4 )
{
if ( auto fn = add_function ( target ) - > fn )
{
call_function ( fn ) ;
return ;
}
else
{
2020-02-01 09:36:09 +01:00
spu_log . fatal ( " [0x%x] Can't add function 0x%x " , m_pos , target ) ;
2019-05-05 15:28:41 +02:00
return ;
}
}
2018-06-10 14:46:01 +02:00
BR ( op ) ;
}
void set_link ( spu_opcode_t op )
2018-05-02 20:49:19 +02:00
{
2019-03-25 19:31:16 +01:00
if ( m_interp_magn )
{
value_t < u32 > next ;
next . value = m_interp_pc_next ;
set_vr ( op . rt , insert ( splat < u32 [ 4 ] > ( 0 ) , 3 , next ) ) ;
return ;
}
2019-05-15 23:51:47 +02:00
set_vr ( op . rt , insert ( splat < u32 [ 4 ] > ( 0 ) , 3 , value < u32 > ( get_pc ( m_pos + 4 ) ) & 0x3fffc ) ) ;
2019-05-05 15:28:41 +02:00
if ( m_finfo & & m_finfo - > fn )
{
return ;
}
2018-06-10 14:46:01 +02:00
2019-05-16 03:12:08 +02:00
if ( g_cfg . core . spu_block_size > = spu_block_size_type : : mega & & m_block_info [ m_pos / 4 + 1 ] & & m_entry_info [ m_pos / 4 + 1 ] )
2018-06-10 14:46:01 +02:00
{
// Store the return function chunk address at the stack mirror
2019-05-05 15:28:41 +02:00
const auto pfunc = add_function ( m_pos + 4 ) ;
2019-03-25 19:31:16 +01:00
const auto stack0 = eval ( zext < u64 > ( extract ( get_reg_fixed ( 1 ) , 3 ) & 0x3fff0 ) + : : offset32 ( & spu_thread : : stack_mirror ) ) ;
2018-06-10 14:46:01 +02:00
const auto stack1 = eval ( stack0 + 8 ) ;
2020-05-27 17:53:09 +02:00
const auto rel_ptr = m_ir - > CreateSub ( m_ir - > CreatePtrToInt ( pfunc - > chunk , get_type < u64 > ( ) ) , get_segment_base ( ) ) ;
2020-04-04 20:33:46 +02:00
const auto ptr_plus_op = m_ir - > CreateOr ( m_ir - > CreateShl ( rel_ptr , 32 ) , m_ir - > getInt64 ( m_next_op ) ) ;
2019-05-05 15:28:41 +02:00
const auto base_plus_pc = m_ir - > CreateOr ( m_ir - > CreateShl ( m_ir - > CreateZExt ( m_base_pc , get_type < u64 > ( ) ) , 32 ) , m_ir - > getInt64 ( m_pos + 4 ) ) ;
2023-04-05 13:35:06 +02:00
m_ir - > CreateStore ( ptr_plus_op , m_ir - > CreateGEP ( get_type < u8 > ( ) , m_thread , stack0 . value ) ) ;
m_ir - > CreateStore ( base_plus_pc , m_ir - > CreateGEP ( get_type < u8 > ( ) , m_thread , stack1 . value ) ) ;
2018-06-10 14:46:01 +02:00
}
2018-05-02 20:49:19 +02:00
}
2020-05-27 17:53:09 +02:00
llvm : : Value * get_segment_base ( )
{
const auto type = llvm : : FunctionType : : get ( get_type < void > ( ) , { } , false ) ;
const auto func = llvm : : cast < llvm : : Function > ( m_module - > getOrInsertFunction ( " spu_segment_base " , type ) . getCallee ( ) ) ;
m_engine - > updateGlobalMapping ( " spu_segment_base " , reinterpret_cast < u64 > ( jit_runtime : : alloc ( 0 , 0 ) ) ) ;
return m_ir - > CreatePtrToInt ( func , get_type < u64 > ( ) ) ;
}
2020-03-24 09:40:22 +01:00
static decltype ( & spu_llvm_recompiler : : UNK ) decode ( u32 op ) ;
2018-05-02 20:49:19 +02:00
} ;
2019-03-25 19:31:16 +01:00
std : : unique_ptr < spu_recompiler_base > spu_recompiler_base : : make_llvm_recompiler ( u8 magn )
2018-05-02 20:49:19 +02:00
{
2019-03-25 19:31:16 +01:00
return std : : make_unique < spu_llvm_recompiler > ( magn ) ;
2018-05-02 20:49:19 +02:00
}
2021-12-30 17:39:18 +01:00
const spu_decoder < spu_llvm_recompiler > s_spu_llvm_decoder ;
2020-03-24 09:40:22 +01:00
decltype ( & spu_llvm_recompiler : : UNK ) spu_llvm_recompiler : : decode ( u32 op )
{
2021-12-30 17:39:18 +01:00
return s_spu_llvm_decoder . decode ( op ) ;
2020-03-24 09:40:22 +01:00
}
2018-05-12 21:55:52 +02:00
# else
2019-03-25 19:31:16 +01:00
std : : unique_ptr < spu_recompiler_base > spu_recompiler_base : : make_llvm_recompiler ( u8 magn )
2018-05-12 21:55:52 +02:00
{
2019-03-25 19:31:16 +01:00
if ( magn )
{
return nullptr ;
}
2018-05-12 21:55:52 +02:00
fmt : : throw_exception ( " LLVM is not available in this build. " ) ;
}
2023-12-29 15:14:22 +01:00
# endif // LLVM_AVAILABLE