rpcsx/rpcs3/Emu/Cell/PPUTranslator.cpp

5095 lines
147 KiB
C++
Raw Normal View History

#include <bit>
2020-12-05 13:08:24 +01:00
#ifdef LLVM_AVAILABLE
2016-06-07 22:24:20 +02:00
#include "Emu/system_config.h"
#include "Emu/Cell/Common.h"
2016-06-07 22:24:20 +02:00
#include "PPUTranslator.h"
#include "PPUThread.h"
#include "util/types.hpp"
#include "util/endian.hpp"
2020-03-07 10:29:23 +01:00
#include "util/logs.hpp"
#include "util/v128.hpp"
#include "util/simd.hpp"
2017-06-25 16:22:33 +02:00
#include <algorithm>
2016-06-07 22:24:20 +02:00
using namespace llvm;
const ppu_decoder<PPUTranslator> s_ppu_decoder;
extern const ppu_decoder<ppu_itype> g_ppu_itype;
extern const ppu_decoder<ppu_iname> g_ppu_iname;
2016-06-07 22:24:20 +02:00
PPUTranslator::PPUTranslator(LLVMContext& context, Module* _module, const ppu_module& info, ExecutionEngine& engine)
: cpu_translator(_module, false)
2017-06-26 15:55:35 +02:00
, m_info(info)
2021-11-02 17:30:03 +01:00
, m_pure_attr()
2016-06-07 22:24:20 +02:00
{
2018-05-01 12:21:45 +02:00
// Bind context
cpu_translator::initialize(context, engine);
2018-05-01 12:21:45 +02:00
2016-06-07 22:24:20 +02:00
// Thread context struct (TODO: safer member access)
const u32 off0 = offset32(&ppu_thread::state);
const u32 off1 = offset32(&ppu_thread::gpr);
2017-02-13 18:51:37 +01:00
std::vector<Type*> thread_struct;
thread_struct.emplace_back(ArrayType::get(GetType<char>(), off0));
thread_struct.emplace_back(GetType<u32>()); // state
thread_struct.emplace_back(ArrayType::get(GetType<char>(), off1 - off0 - 4));
thread_struct.insert(thread_struct.end(), 32, GetType<u64>()); // gpr[0..31]
thread_struct.insert(thread_struct.end(), 32, GetType<f64>()); // fpr[0..31]
thread_struct.insert(thread_struct.end(), 32, GetType<u32[4]>()); // vr[0..31]
thread_struct.insert(thread_struct.end(), 32, GetType<bool>()); // cr[0..31]
thread_struct.insert(thread_struct.end(), 32, GetType<bool>()); // fpscr
2017-04-13 02:32:28 +02:00
thread_struct.insert(thread_struct.end(), 2, GetType<u64>()); // lr, ctr
thread_struct.insert(thread_struct.end(), 2, GetType<u32>()); // vrsave, cia
thread_struct.insert(thread_struct.end(), 3, GetType<bool>()); // so, ov, ca
thread_struct.insert(thread_struct.end(), 1, GetType<u8>()); // cnt
thread_struct.insert(thread_struct.end(), 1, GetType<bool>()); // nj
thread_struct.emplace_back(ArrayType::get(GetType<char>(), 3)); // Padding
thread_struct.insert(thread_struct.end(), 1, GetType<u32[4]>()); // sat
thread_struct.insert(thread_struct.end(), 1, GetType<u32>()); // jm_mask
2016-06-07 22:24:20 +02:00
m_thread_type = StructType::create(m_context, thread_struct, "context_t");
2016-06-27 18:34:08 +02:00
2017-02-13 18:51:37 +01:00
const auto md_name = MDString::get(m_context, "branch_weights");
const auto md_low = ValueAsMetadata::get(ConstantInt::get(GetType<u32>(), 1));
const auto md_high = ValueAsMetadata::get(ConstantInt::get(GetType<u32>(), 666));
// Metadata for branch weights
m_md_likely = MDTuple::get(m_context, {md_name, md_high, md_low});
m_md_unlikely = MDTuple::get(m_context, {md_name, md_low, md_high});
2017-07-01 01:08:51 +02:00
// Sort relevant relocations (TODO)
const auto caddr = m_info.segs[0].addr;
const auto cend = caddr + m_info.segs[0].size;
for (const auto& rel : m_info.relocs)
{
if (rel.addr >= caddr && rel.addr < cend)
{
// Check relocation type
switch (rel.type)
{
// Ignore relative relocations, they are handled in emitted code
// Comment out types we haven't confirmed as used and working
case 10:
case 11:
// case 12:
// case 13:
// case 26:
// case 28:
{
ppu_log.notice("Ignoring relative relocation at 0x%x (%u)", rel.addr, rel.type);
continue;
}
// Ignore 64-bit relocations
2017-07-01 01:08:51 +02:00
case 20:
case 22:
case 38:
case 43:
case 44:
case 45:
case 46:
case 51:
case 68:
case 73:
case 78:
{
ppu_log.error("Ignoring 64-bit relocation at 0x%x (%u)", rel.addr, rel.type);
2017-07-01 01:08:51 +02:00
continue;
}
2021-04-09 21:12:47 +02:00
default: break;
2017-07-01 01:08:51 +02:00
}
// Align relocation address (TODO)
if (!m_relocs.emplace(rel.addr & ~3, &rel).second)
{
ppu_log.error("Relocation repeated at 0x%x (%u)", rel.addr, rel.type);
2017-07-01 01:08:51 +02:00
}
}
}
if (!m_info.relocs.empty())
2017-07-01 01:08:51 +02:00
{
m_reloc = &m_info.segs[0];
}
const auto nan_v = v128::from32p(0x7FC00000u);
nan_vec4 = make_const_vector(nan_v, get_type<f32[4]>());
2016-06-07 22:24:20 +02:00
}
PPUTranslator::~PPUTranslator()
{
}
2016-06-21 22:30:12 +02:00
Type* PPUTranslator::GetContextType()
{
return m_thread_type;
}
u32 ppu_get_far_jump(u32 pc);
Function* PPUTranslator::Translate(const ppu_function& info)
2016-06-07 22:24:20 +02:00
{
m_function = m_module->getFunction(info.name);
2017-12-16 01:21:55 +01:00
std::fill(std::begin(m_globals), std::end(m_globals), nullptr);
std::fill(std::begin(m_locals), std::end(m_locals), nullptr);
2018-07-22 18:06:28 +02:00
IRBuilder<> irb(BasicBlock::Create(m_context, "__entry", m_function));
m_ir = &irb;
2016-06-07 22:24:20 +02:00
2017-07-01 01:08:51 +02:00
// Instruction address is (m_addr + base)
const u64 base = m_reloc ? m_reloc->addr : 0;
m_addr = info.addr - base;
m_attr = info.attr;
2017-07-01 01:08:51 +02:00
// Don't emit check in small blocks without terminator
bool need_check = info.size >= 16;
for (u32 addr = m_addr; addr < m_addr + info.size; addr += 4)
{
const u32 op = vm::read32(vm::cast(addr + base));
switch (g_ppu_itype.decode(op))
{
case ppu_itype::UNK:
case ppu_itype::ECIWX:
case ppu_itype::ECOWX:
case ppu_itype::TD:
case ppu_itype::TDI:
case ppu_itype::TW:
case ppu_itype::TWI:
case ppu_itype::B:
case ppu_itype::BC:
case ppu_itype::BCCTR:
case ppu_itype::BCLR:
case ppu_itype::SC:
{
need_check = true;
break;
}
default:
{
break;
}
}
}
m_thread = m_function->getArg(1);
m_base = m_function->getArg(3);
m_exec = m_function->getArg(0);
m_seg0 = m_function->getArg(2);
m_gpr[0] = m_function->getArg(4);
m_gpr[1] = m_function->getArg(5);
m_gpr[2] = m_function->getArg(6);
2017-06-29 16:27:19 +02:00
2018-07-22 18:06:28 +02:00
const auto body = BasicBlock::Create(m_context, "__body", m_function);
//Call(GetType<void>(), "__trace", GetAddr());
if (need_check)
{
// Check status register in the entry block
auto ptr = llvm::dyn_cast<GetElementPtrInst>(m_ir->CreateStructGEP(m_thread_type, m_thread, 1));
assert(ptr->getResultElementType() == GetType<u32>());
const auto vstate = m_ir->CreateLoad(ptr->getResultElementType(), ptr, true);
const auto vcheck = BasicBlock::Create(m_context, "__test", m_function);
m_ir->CreateCondBr(m_ir->CreateIsNull(vstate), body, vcheck, m_md_likely);
// Create tail call to the check function
m_ir->SetInsertPoint(vcheck);
Call(GetType<void>(), "__check", m_thread, GetAddr())->setTailCall();
m_ir->CreateRetVoid();
}
else
{
m_ir->CreateBr(body);
}
2018-07-22 18:06:28 +02:00
m_ir->SetInsertPoint(body);
2017-02-22 22:35:29 +01:00
2017-02-25 19:24:36 +01:00
// Process blocks
const auto block = std::make_pair(info.addr, info.size);
2016-06-07 22:24:20 +02:00
{
// Optimize BLR (prefetch LR)
2018-02-09 15:49:37 +01:00
if (vm::read32(vm::cast(block.first + block.second - 4)) == ppu_instructions::BLR())
{
RegLoad(m_lr);
}
2017-02-25 19:24:36 +01:00
// Process the instructions
2017-07-01 01:08:51 +02:00
for (m_addr = block.first - base; m_addr < block.first + block.second - base; m_addr += 4)
2017-02-22 22:35:29 +01:00
{
2018-07-22 18:06:28 +02:00
if (m_ir->GetInsertBlock()->getTerminator())
2017-02-25 19:24:36 +01:00
{
break;
}
2017-07-01 01:08:51 +02:00
// Find the relocation at current address
const auto rel_found = m_relocs.find(m_addr + base);
if (rel_found != m_relocs.end())
{
m_rel = rel_found->second;
}
else
{
m_rel = nullptr;
}
2018-02-09 15:49:37 +01:00
const u32 op = vm::read32(vm::cast(m_addr + base));
2017-02-22 22:35:29 +01:00
(this->*(s_ppu_decoder.decode(op)))({op});
2017-07-01 01:08:51 +02:00
if (m_rel)
{
// This is very bad. m_rel is normally set to nullptr after a relocation is handled (so it wasn't)
ppu_log.error("LLVM: [0x%x] Unsupported relocation(%u) in '%s' (opcode=0x%x '%s'). Please report.", rel_found->first, m_rel->type, m_info.name, op, g_ppu_iname.decode(op));
2017-07-01 01:08:51 +02:00
return nullptr;
}
2017-02-22 22:35:29 +01:00
}
2016-06-07 22:24:20 +02:00
2017-02-25 19:24:36 +01:00
// Finalize current block if necessary (create branch to the next address)
2018-07-22 18:06:28 +02:00
if (!m_ir->GetInsertBlock()->getTerminator())
2016-06-07 22:24:20 +02:00
{
FlushRegisters();
2017-07-01 01:08:51 +02:00
CallFunction(m_addr);
2017-02-22 22:35:29 +01:00
}
}
replace_intrinsics(*m_function);
2016-06-07 22:24:20 +02:00
return m_function;
}
Value* PPUTranslator::VecHandleNan(Value* val)
{
const auto is_nan = m_ir->CreateFCmpUNO(val, val);
val = m_ir->CreateSelect(is_nan, nan_vec4, val);
return val;
}
Value* PPUTranslator::VecHandleDenormal(Value* val)
{
const auto type = val->getType();
const auto value = bitcast(val, GetType<u32[4]>());
2020-11-02 04:07:58 +01:00
const auto mask = SExt(m_ir->CreateICmpEQ(m_ir->CreateAnd(value, Broadcast(RegLoad(m_jm_mask), 4)), ConstantAggregateZero::get(value->getType())), GetType<s32[4]>());
const auto nz = m_ir->CreateLShr(mask, 1);
const auto result = m_ir->CreateAnd(m_ir->CreateNot(nz), value);
return bitcast(result, type);
}
Value* PPUTranslator::VecHandleResult(Value* val)
{
val = g_cfg.core.ppu_fix_vnan ? VecHandleNan(val) : val;
val = g_cfg.core.ppu_llvm_nj_fixup ? VecHandleDenormal(val) : val;
return val;
}
2017-07-01 01:08:51 +02:00
Value* PPUTranslator::GetAddr(u64 _add)
{
if (m_reloc)
{
// Load segment address from global variable, compute actual instruction address
return m_ir->CreateAdd(m_ir->getInt64(m_addr + _add), m_seg0);
2017-07-01 01:08:51 +02:00
}
2017-12-16 01:21:55 +01:00
2017-07-01 01:08:51 +02:00
return m_ir->getInt64(m_addr + _add);
}
2016-06-07 22:24:20 +02:00
Type* PPUTranslator::ScaleType(Type* type, s32 pow2)
{
ensure(type->getScalarType()->isIntegerTy());
ensure(pow2 > -32 && pow2 < 32);
2016-06-07 22:24:20 +02:00
uint scaled = type->getScalarSizeInBits();
ensure((scaled & (scaled - 1)) == 0);
if (pow2 > 0)
{
scaled <<= pow2;
}
else if (pow2 < 0)
{
scaled >>= -pow2;
}
ensure(scaled);
const auto new_type = m_ir->getIntNTy(scaled);
2021-11-02 17:30:03 +01:00
const auto vec_type = dyn_cast<FixedVectorType>(type);
2020-11-02 04:07:58 +01:00
return vec_type ? VectorType::get(new_type, vec_type->getNumElements(), false) : cast<Type>(new_type);
2016-06-07 22:24:20 +02:00
}
Value* PPUTranslator::DuplicateExt(Value* arg)
{
const auto extended = ZExt(arg);
return m_ir->CreateOr(extended, m_ir->CreateShl(extended, arg->getType()->getScalarSizeInBits()));
}
Value* PPUTranslator::RotateLeft(Value* arg, u64 n)
{
return !n ? arg : m_ir->CreateOr(m_ir->CreateShl(arg, n), m_ir->CreateLShr(arg, arg->getType()->getScalarSizeInBits() - n));
}
Value* PPUTranslator::RotateLeft(Value* arg, Value* n)
{
const u64 mask = arg->getType()->getScalarSizeInBits() - 1;
return m_ir->CreateOr(m_ir->CreateShl(arg, m_ir->CreateAnd(n, mask)), m_ir->CreateLShr(arg, m_ir->CreateAnd(m_ir->CreateNeg(n), mask)));
}
void PPUTranslator::CallFunction(u64 target, Value* indirect)
2016-06-07 22:24:20 +02:00
{
const auto type = m_function->getFunctionType();
const auto block = m_ir->GetInsertBlock();
2017-06-28 13:56:35 +02:00
2020-11-02 04:07:58 +01:00
FunctionCallee callee;
auto seg0 = m_seg0;
if (!indirect)
2016-06-27 18:34:08 +02:00
{
const u64 base = m_reloc ? m_reloc->addr : 0;
const u32 caddr = m_info.segs[0].addr;
const u32 cend = caddr + m_info.segs[0].size - 1;
const u64 _target = target + base;
if (_target >= caddr && _target <= cend)
{
callee = m_module->getOrInsertFunction(fmt::format("__0x%x", target), type);
cast<Function>(callee.getCallee())->setCallingConv(CallingConv::GHC);
}
else
{
indirect = m_reloc ? m_ir->CreateAdd(m_ir->getInt64(target), seg0) : m_ir->getInt64(target);
}
2016-06-27 18:34:08 +02:00
}
if (indirect)
2016-06-27 18:34:08 +02:00
{
m_ir->CreateStore(Trunc(indirect, GetType<u32>()), m_ir->CreateStructGEP(m_thread_type, m_thread, static_cast<uint>(&m_cia - m_locals)), true);
// Try to optimize
if (auto inst = dyn_cast_or_null<Instruction>(indirect))
{
if (auto next = inst->getNextNode())
{
m_ir->SetInsertPoint(next);
}
}
const auto pos = m_ir->CreateShl(indirect, 1);
const auto ptr = dyn_cast<GetElementPtrInst>(m_ir->CreateGEP(get_type<u8>(), m_exec, pos));
const auto val = m_ir->CreateLoad(get_type<u64>(), m_ir->CreateBitCast(ptr, get_type<u64*>()));
callee = FunctionCallee(type, m_ir->CreateIntToPtr(m_ir->CreateAnd(val, 0xffff'ffff'ffff), type->getPointerTo()));
// Load new segment address
seg0 = m_ir->CreateShl(m_ir->CreateLShr(val, 48), 13);
2016-06-27 18:34:08 +02:00
}
2016-06-07 22:24:20 +02:00
m_ir->SetInsertPoint(block);
const auto c = m_ir->CreateCall(callee, {m_exec, m_thread, seg0, m_base, GetGpr(0), GetGpr(1), GetGpr(2)});
c->setTailCallKind(llvm::CallInst::TCK_Tail);
c->setCallingConv(CallingConv::GHC);
m_ir->CreateRetVoid();
}
2016-06-07 22:24:20 +02:00
Value* PPUTranslator::RegInit(Value*& local)
{
const auto index = ::narrow<uint>(&local - m_locals);
if (auto old = cast_or_null<Instruction>(m_globals[index]))
2016-06-07 22:24:20 +02:00
{
old->eraseFromParent();
2016-06-07 22:24:20 +02:00
}
// (Re)Initialize global, will be written in FlushRegisters
m_globals[index] = m_ir->CreateStructGEP(m_thread_type, m_thread, index);
return m_globals[index];
}
Value* PPUTranslator::RegLoad(Value*& local)
{
2017-10-23 22:03:16 +02:00
const auto index = ::narrow<uint>(&local - m_locals);
if (local)
{
// Simple load
return local;
}
2016-06-07 22:24:20 +02:00
// Load from the global value
auto ptr = llvm::dyn_cast<llvm::GetElementPtrInst>(m_ir->CreateStructGEP(m_thread_type, m_thread, index));
local = m_ir->CreateLoad(ptr->getResultElementType(), ptr);
return local;
}
2016-06-07 22:24:20 +02:00
void PPUTranslator::RegStore(llvm::Value* value, llvm::Value*& local)
{
2021-01-12 11:01:06 +01:00
RegInit(local);
local = value;
}
void PPUTranslator::FlushRegisters()
{
const auto block = m_ir->GetInsertBlock();
for (auto& local : m_locals)
{
2017-10-23 22:03:16 +02:00
const auto index = ::narrow<uint>(&local - m_locals);
2016-06-07 22:24:20 +02:00
// Store value if necessary
if (local && m_globals[index])
{
if (auto next = cast<Instruction>(m_globals[index])->getNextNode())
{
m_ir->SetInsertPoint(next);
}
else
{
m_ir->SetInsertPoint(block);
}
2017-06-26 11:30:25 +02:00
m_ir->CreateStore(local, bitcast(m_globals[index], local->getType()->getPointerTo()));
2017-06-26 11:30:25 +02:00
m_globals[index] = nullptr;
}
}
m_ir->SetInsertPoint(block);
2016-06-07 22:24:20 +02:00
}
Value* PPUTranslator::Solid(Value* value)
{
const u32 size = ::narrow<u32>(+value->getType()->getPrimitiveSizeInBits());
2016-06-07 22:24:20 +02:00
/* Workarounds (casting bool vectors directly may produce invalid code) */
2017-12-16 01:21:55 +01:00
2016-06-07 22:24:20 +02:00
if (value->getType() == GetType<bool[4]>())
{
return bitcast(SExt(value, GetType<u32[4]>()), m_ir->getIntNTy(128));
2016-06-07 22:24:20 +02:00
}
if (value->getType() == GetType<bool[8]>())
{
return bitcast(SExt(value, GetType<u16[8]>()), m_ir->getIntNTy(128));
2016-06-07 22:24:20 +02:00
}
if (value->getType() == GetType<bool[16]>())
{
return bitcast(SExt(value, GetType<u8[16]>()), m_ir->getIntNTy(128));
2016-06-07 22:24:20 +02:00
}
return bitcast(value, m_ir->getIntNTy(size));
2016-06-07 22:24:20 +02:00
}
Value* PPUTranslator::IsZero(Value* value)
{
return m_ir->CreateIsNull(Solid(value));
}
Value* PPUTranslator::IsNotZero(Value* value)
{
return m_ir->CreateIsNotNull(Solid(value));
}
Value* PPUTranslator::IsOnes(Value* value)
{
value = Solid(value);
return m_ir->CreateICmpEQ(value, ConstantInt::getSigned(value->getType(), -1));
}
Value* PPUTranslator::IsNotOnes(Value* value)
{
value = Solid(value);
return m_ir->CreateICmpNE(value, ConstantInt::getSigned(value->getType(), -1));
}
Value* PPUTranslator::Broadcast(Value* value, u32 count)
{
if (const auto cv = dyn_cast<Constant>(value))
{
return ConstantVector::getSplat(llvm::ElementCount::get(count, false), cv);
2016-06-07 22:24:20 +02:00
}
return m_ir->CreateVectorSplat(count, value);
}
Value* PPUTranslator::Shuffle(Value* left, Value* right, std::initializer_list<u32> indices)
{
const auto type = left->getType();
if (!right)
{
right = UndefValue::get(type);
}
if (!m_is_be)
{
std::vector<u32> data; data.reserve(indices.size());
2021-11-02 17:30:03 +01:00
const u32 mask = cast<FixedVectorType>(type)->getNumElements() - 1;
2016-06-07 22:24:20 +02:00
// Transform indices (works for vectors with size 2^N)
2020-12-18 08:39:54 +01:00
for (usz i = 0; i < indices.size(); i++)
2016-06-07 22:24:20 +02:00
{
data.push_back(*(indices.begin() + indices.size() - 1 - i) ^ mask);
2016-06-07 22:24:20 +02:00
}
return m_ir->CreateShuffleVector(left, right, ConstantDataVector::get(m_context, data));
}
return m_ir->CreateShuffleVector(left, right, ConstantDataVector::get(m_context, { indices.begin(), indices.end() }));
}
Value* PPUTranslator::SExt(Value* value, Type* type)
{
type = type ? type : ScaleType(value->getType(), 1);
return value->getType() != type ? m_ir->CreateSExt(value, type) : value;
2016-06-07 22:24:20 +02:00
}
Value* PPUTranslator::ZExt(Value* value, Type* type)
{
type = type ? type : ScaleType(value->getType(), 1);
return value->getType() != type ? m_ir->CreateZExt(value, type) : value;
2016-06-07 22:24:20 +02:00
}
Value* PPUTranslator::Add(std::initializer_list<Value*> args)
{
Value* result{};
for (auto arg : args)
{
result = result ? m_ir->CreateAdd(result, arg) : arg;
}
return result;
}
Value* PPUTranslator::Trunc(Value* value, Type* type)
{
type = type ? type : ScaleType(value->getType(), -1);
return type != value->getType() ? m_ir->CreateTrunc(value, type) : value;
2016-06-07 22:24:20 +02:00
}
2017-02-13 18:51:37 +01:00
void PPUTranslator::UseCondition(MDNode* hint, Value* cond)
2016-06-07 22:24:20 +02:00
{
FlushRegisters();
2016-06-07 22:24:20 +02:00
if (cond)
{
2017-07-01 01:08:51 +02:00
const auto local = BasicBlock::Create(m_context, "__cond", m_function);
const auto next = BasicBlock::Create(m_context, "__next", m_function);
m_ir->CreateCondBr(cond, local, next, hint);
m_ir->SetInsertPoint(next);
2017-07-01 01:08:51 +02:00
CallFunction(m_addr + 4);
2016-06-07 22:24:20 +02:00
m_ir->SetInsertPoint(local);
}
}
llvm::Value* PPUTranslator::GetMemory(llvm::Value* addr, llvm::Type* type)
{
return bitcast(m_ir->CreateGEP(get_type<u8>(), m_base, addr), type->getPointerTo());
2016-06-07 22:24:20 +02:00
}
Value* PPUTranslator::ReadMemory(Value* addr, Type* type, bool is_be, u32 align)
{
const u32 size = ::narrow<u32>(+type->getPrimitiveSizeInBits());
2016-06-07 22:24:20 +02:00
if (is_be ^ m_is_be && size > 8)
{
// Read, byteswap, bitcast
const auto int_type = m_ir->getIntNTy(size);
const auto value = m_ir->CreateAlignedLoad(int_type, GetMemory(addr, int_type), llvm::MaybeAlign{align});
return bitcast(Call(int_type, fmt::format("llvm.bswap.i%u", size), value), type);
2016-06-07 22:24:20 +02:00
}
// Read normally
return m_ir->CreateAlignedLoad(type, GetMemory(addr, type), llvm::MaybeAlign{align});
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::WriteMemory(Value* addr, Value* value, bool is_be, u32 align)
{
const auto type = value->getType();
const u32 size = ::narrow<u32>(+type->getPrimitiveSizeInBits());
2016-06-07 22:24:20 +02:00
if (is_be ^ m_is_be && size > 8)
{
// Bitcast, byteswap
const auto int_type = m_ir->getIntNTy(size);
value = Call(int_type, fmt::format("llvm.bswap.i%u", size), bitcast(value, int_type));
2016-06-07 22:24:20 +02:00
}
// Write
m_ir->CreateAlignedStore(value, GetMemory(addr, value->getType()), llvm::MaybeAlign{align});
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::CompilationError(const std::string& error)
{
ppu_log.error("LLVM: [0x%08x] Error: %s", m_addr + (m_reloc ? m_reloc->addr : 0), error);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::MFVSCR(ppu_opcode_t op)
{
const auto vsat = g_cfg.core.ppu_set_sat_bit ? ZExt(IsNotZero(RegLoad(m_sat)), GetType<u32>()) : m_ir->getInt32(0);
const auto vscr = m_ir->CreateOr(vsat, m_ir->CreateShl(ZExt(RegLoad(m_nj), GetType<u32>()), 16));
2020-11-02 04:07:58 +01:00
SetVr(op.vd, m_ir->CreateInsertElement(ConstantAggregateZero::get(GetType<u32[4]>()), vscr, m_ir->getInt32(m_is_be ? 3 : 0)));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::MTVSCR(ppu_opcode_t op)
{
const auto vscr = m_ir->CreateExtractElement(GetVr(op.vb, VrType::vi32), m_ir->getInt32(m_is_be ? 3 : 0));
const auto nj = Trunc(m_ir->CreateLShr(vscr, 16), GetType<bool>());
RegStore(nj, m_nj);
if (g_cfg.core.ppu_llvm_nj_fixup)
RegStore(m_ir->CreateSelect(nj, m_ir->getInt32(0x7f80'0000), m_ir->getInt32(0x7fff'ffff)), m_jm_mask);
if (g_cfg.core.ppu_set_sat_bit)
RegStore(m_ir->CreateInsertElement(ConstantAggregateZero::get(GetType<u32[4]>()), m_ir->CreateAnd(vscr, 1), m_ir->getInt32(0)), m_sat);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VADDCUW(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u32[4]>(op.va, op.vb);
set_vr(op.vd, zext<u32[4]>(a + b < a));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VADDFP(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<f32[4]>(op.va, op.vb);
set_vr(op.vd, vec_handle_result(a + b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VADDSBS(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<s8[16]>(op.va, op.vb);
const auto r = add_sat(a, b);
set_vr(op.vd, r);
set_sat(r ^ (a + b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VADDSHS(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<s16[8]>(op.va, op.vb);
const auto r = add_sat(a, b);
set_vr(op.vd, r);
set_sat(r ^ (a + b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VADDSWS(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<s32[4]>(op.va, op.vb);
const auto r = add_sat(a, b);
set_vr(op.vd, r);
set_sat(r ^ (a + b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VADDUBM(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u8[16]>(op.va, op.vb);
set_vr(op.vd, a + b);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VADDUBS(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u8[16]>(op.va, op.vb);
const auto r = add_sat(a, b);
set_vr(op.vd, r);
set_sat(r ^ (a + b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VADDUHM(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u16[8]>(op.va, op.vb);
set_vr(op.vd, a + b);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VADDUHS(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u16[8]>(op.va, op.vb);
const auto r = add_sat(a, b);
set_vr(op.vd, r);
set_sat(r ^ (a + b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VADDUWM(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u32[4]>(op.va, op.vb);
set_vr(op.vd, a + b);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VADDUWS(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u32[4]>(op.va, op.vb);
const auto r = add_sat(a, b);
set_vr(op.vd, r);
set_sat(r ^ (a + b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VAND(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u32[4]>(op.va, op.vb);
set_vr(op.vd, a & b);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VANDC(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u32[4]>(op.va, op.vb);
set_vr(op.vd, a & ~b);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VAVGSB(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<s8[16]>(op.va, op.vb);
set_vr(op.vd, avg(a, b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VAVGSH(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<s16[8]>(op.va, op.vb);
set_vr(op.vd, avg(a, b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VAVGSW(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<s32[4]>(op.va, op.vb);
set_vr(op.vd, avg(a, b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VAVGUB(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u8[16]>(op.va, op.vb);
set_vr(op.vd, avg(a, b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VAVGUH(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u16[8]>(op.va, op.vb);
set_vr(op.vd, avg(a, b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VAVGUW(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u32[4]>(op.va, op.vb);
set_vr(op.vd, avg(a, b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VCFSX(ppu_opcode_t op)
{
const auto b = get_vr<s32[4]>(op.vb);
set_vr(op.vd, fpcast<f32[4]>(b) * fsplat<f32[4]>(std::pow(2, -static_cast<int>(op.vuimm))));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VCFUX(ppu_opcode_t op)
{
const auto b = get_vr<u32[4]>(op.vb);
set_vr(op.vd, fpcast<f32[4]>(b) * fsplat<f32[4]>(std::pow(2, -static_cast<int>(op.vuimm))));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VCMPBFP(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<f32[4]>(op.va, op.vb);
const auto nle = sext<s32[4]>(fcmp_uno(a > b)) & 0x8000'0000;
const auto nge = sext<s32[4]>(fcmp_uno(a < -b)) & 0x4000'0000;
const auto r = eval(nle | nge);
set_vr(op.vd, r);
if (op.oe) SetCrField(6, m_ir->getFalse(), m_ir->getFalse(), IsZero(r.value), m_ir->getFalse());
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VCMPEQFP(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<f32[4]>(op.va, op.vb);
const auto r = eval(sext<s32[4]>(fcmp_ord(a == b)));
set_vr(op.vd, r);
if (op.oe) SetCrField(6, IsOnes(r.value), m_ir->getFalse(), IsZero(r.value), m_ir->getFalse());
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VCMPEQUB(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u8[16]>(op.va, op.vb);
const auto r = eval(sext<s8[16]>(a == b));
set_vr(op.vd, r);
if (op.oe) SetCrField(6, IsOnes(r.value), m_ir->getFalse(), IsZero(r.value), m_ir->getFalse());
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VCMPEQUH(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u16[8]>(op.va, op.vb);
const auto r = eval(sext<s16[8]>(a == b));
set_vr(op.vd, r);
if (op.oe) SetCrField(6, IsOnes(r.value), m_ir->getFalse(), IsZero(r.value), m_ir->getFalse());
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VCMPEQUW(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u32[4]>(op.va, op.vb);
const auto r = eval(sext<s32[4]>(a == b));
set_vr(op.vd, r);
if (op.oe) SetCrField(6, IsOnes(r.value), m_ir->getFalse(), IsZero(r.value), m_ir->getFalse());
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VCMPGEFP(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<f32[4]>(op.va, op.vb);
const auto r = eval(sext<s32[4]>(fcmp_ord(a >= b)));
set_vr(op.vd, r);
if (op.oe) SetCrField(6, IsOnes(r.value), m_ir->getFalse(), IsZero(r.value), m_ir->getFalse());
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VCMPGTFP(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<f32[4]>(op.va, op.vb);
const auto r = eval(sext<s32[4]>(fcmp_ord(a > b)));
set_vr(op.vd, r);
if (op.oe) SetCrField(6, IsOnes(r.value), m_ir->getFalse(), IsZero(r.value), m_ir->getFalse());
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VCMPGTSB(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<s8[16]>(op.va, op.vb);
const auto r = eval(sext<s8[16]>(a > b));
set_vr(op.vd, r);
if (op.oe) SetCrField(6, IsOnes(r.value), m_ir->getFalse(), IsZero(r.value), m_ir->getFalse());
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VCMPGTSH(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<s16[8]>(op.va, op.vb);
const auto r = eval(sext<s16[8]>(a > b));
set_vr(op.vd, r);
if (op.oe) SetCrField(6, IsOnes(r.value), m_ir->getFalse(), IsZero(r.value), m_ir->getFalse());
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VCMPGTSW(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<s32[4]>(op.va, op.vb);
const auto r = eval(sext<s32[4]>(a > b));
set_vr(op.vd, r);
if (op.oe) SetCrField(6, IsOnes(r.value), m_ir->getFalse(), IsZero(r.value), m_ir->getFalse());
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VCMPGTUB(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u8[16]>(op.va, op.vb);
const auto r = eval(sext<s8[16]>(a > b));
set_vr(op.vd, r);
if (op.oe) SetCrField(6, IsOnes(r.value), m_ir->getFalse(), IsZero(r.value), m_ir->getFalse());
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VCMPGTUH(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u16[8]>(op.va, op.vb);
const auto r = eval(sext<s16[8]>(a > b));
set_vr(op.vd, r);
if (op.oe) SetCrField(6, IsOnes(r.value), m_ir->getFalse(), IsZero(r.value), m_ir->getFalse());
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VCMPGTUW(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u32[4]>(op.va, op.vb);
const auto r = eval(sext<s32[4]>(a > b));
set_vr(op.vd, r);
if (op.oe) SetCrField(6, IsOnes(r.value), m_ir->getFalse(), IsZero(r.value), m_ir->getFalse());
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VCTSXS(ppu_opcode_t op)
{
const auto b = get_vr<f32[4]>(op.vb);
const auto scaled = b * fsplat<f32[4]>(std::pow(2, 0 + op.vuimm));
const auto const1 = fsplat<f32[4]>(-std::pow(2, 31));
const auto is_nan = fcmp_uno(b != b);
const auto sat_l = fcmp_ord(scaled < const1);
const auto sat_h = fcmp_ord(scaled >= fsplat<f32[4]>(std::pow(2, 31)));
value_t<s32[4]> converted = eval(fpcast<s32[4]>(select(sat_l, const1, scaled)));
if (g_cfg.core.ppu_fix_vnan)
converted = eval(select(is_nan, splat<s32[4]>(0), converted)); // NaN -> 0
set_vr(op.vd, select(sat_h, splat<s32[4]>(0x7fff'ffff), converted));
set_sat(sext<s32[4]>(sat_l) | sext<s32[4]>(sat_h));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VCTUXS(ppu_opcode_t op)
{
const auto b = get_vr<f32[4]>(op.vb);
const auto scaled = b * fsplat<f32[4]>(std::pow(2, 0 + op.vuimm));
const auto const0 = fsplat<f32[4]>(0.);
const auto is_nan = fcmp_uno(b != b);
const auto sat_l = fcmp_ord(scaled < const0);
const auto sat_h = fcmp_ord(scaled >= fsplat<f32[4]>(std::pow(2, 32)));
value_t<u32[4]> converted = eval(fpcast<u32[4]>(select(sat_l, const0, scaled)));
if (g_cfg.core.ppu_fix_vnan)
converted = eval(select(is_nan, splat<u32[4]>(0), converted)); // NaN -> 0
set_vr(op.vd, select(sat_h, splat<u32[4]>(0xffff'ffff), converted));
set_sat(sext<s32[4]>(sat_l) | sext<s32[4]>(sat_h));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VEXPTEFP(ppu_opcode_t op)
{
const auto b = get_vr<f32[4]>(op.vb);
set_vr(op.vd, vec_handle_result(llvm_calli<f32[4], decltype(b)>{"llvm.exp2.v4f32", {b}}));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VLOGEFP(ppu_opcode_t op)
{
const auto b = get_vr<f32[4]>(op.vb);
set_vr(op.vd, vec_handle_result(llvm_calli<f32[4], decltype(b)>{"llvm.log2.v4f32", {b}}));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VMADDFP(ppu_opcode_t op)
{
auto [a, b, c] = get_vrs<f32[4]>(op.va, op.vb, op.vc);
// Optimization: Emit only a floating multiply if the addend is zero
if (auto [ok, data] = get_const_vector(b.value, m_addr); ok)
{
if (data == v128::from32p(1u << 31))
{
set_vr(op.vd, vec_handle_result(a * c));
ppu_log.notice("LLVM: VMADDFP with -0 addend at [0x%08x]", m_addr + (m_reloc ? m_reloc->addr : 0));
return;
}
if (!m_use_fma && data == v128{})
{
set_vr(op.vd, vec_handle_result(a * c + fsplat<f32[4]>(0.f)));
ppu_log.notice("LLVM: VMADDFP with -0 addend at [0x%08x]", m_addr + (m_reloc ? m_reloc->addr : 0));
return;
}
}
if (m_use_fma)
{
set_vr(op.vd, vec_handle_result(fmuladd(a, c, b)));
return;
}
// Emulated FMA via double precision (caution: out-of-lane algorithm)
const auto xa = fpcast<f64[4]>(a);
const auto xb = fpcast<f64[4]>(b);
const auto xc = fpcast<f64[4]>(c);
const auto xr = fmuladd(xa, xc, xb);
set_vr(op.vd, vec_handle_result(fpcast<f32[4]>(xr)));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VMAXFP(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<f32[4]>(op.va, op.vb);
set_vr(op.vd, vec_handle_result(bitcast<f32[4]>(bitcast<u32[4]>(fmax(a, b)) & bitcast<u32[4]>(fmax(b, a)))));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VMAXSB(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<s8[16]>(op.va, op.vb);
set_vr(op.vd, max(a, b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VMAXSH(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<s16[8]>(op.va, op.vb);
set_vr(op.vd, max(a, b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VMAXSW(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<s32[4]>(op.va, op.vb);
set_vr(op.vd, max(a, b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VMAXUB(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u8[16]>(op.va, op.vb);
set_vr(op.vd, max(a, b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VMAXUH(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u16[8]>(op.va, op.vb);
set_vr(op.vd, max(a, b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VMAXUW(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u32[4]>(op.va, op.vb);
set_vr(op.vd, max(a, b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VMHADDSHS(ppu_opcode_t op)
{
// Caution: out-of-lane algorithm
const auto [a, b, c] = get_vrs<s16[8]>(op.va, op.vb, op.vc);
const auto m = ((sext<s32[8]>(a) * sext<s32[8]>(b)) >> 15) + sext<s32[8]>(c);
const auto r = trunc<u16[8]>(min(max(m, splat<s32[8]>(-0x8000)), splat<s32[8]>(0x7fff)));
set_vr(op.vd, r);
set_sat(trunc<u16[8]>((m + 0x8000) >> 16));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VMHRADDSHS(ppu_opcode_t op)
{
// Caution: out-of-lane algorithm
const auto [a, b, c] = get_vrs<s16[8]>(op.va, op.vb, op.vc);
const auto m = ((sext<s32[8]>(a) * sext<s32[8]>(b) + splat<s32[8]>(0x4000)) >> 15) + sext<s32[8]>(c);
const auto r = trunc<u16[8]>(min(max(m, splat<s32[8]>(-0x8000)), splat<s32[8]>(0x7fff)));
set_vr(op.vd, r);
set_sat(trunc<u16[8]>((m + 0x8000) >> 16));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VMINFP(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<f32[4]>(op.va, op.vb);
set_vr(op.vd, vec_handle_result(bitcast<f32[4]>(bitcast<u32[4]>(fmin(a, b)) | bitcast<u32[4]>(fmin(b, a)))));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VMINSB(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<s8[16]>(op.va, op.vb);
set_vr(op.vd, min(a, b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VMINSH(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<s16[8]>(op.va, op.vb);
set_vr(op.vd, min(a, b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VMINSW(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<s32[4]>(op.va, op.vb);
set_vr(op.vd, min(a, b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VMINUB(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u8[16]>(op.va, op.vb);
set_vr(op.vd, min(a, b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VMINUH(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u16[8]>(op.va, op.vb);
set_vr(op.vd, min(a, b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VMINUW(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u32[4]>(op.va, op.vb);
set_vr(op.vd, min(a, b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VMLADDUHM(ppu_opcode_t op)
{
const auto [a, b, c] = get_vrs<u16[8]>(op.va, op.vb, op.vc);
set_vr(op.vd, a * b + c);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VMRGHB(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u8[16]>(op.va, op.vb);
set_vr(op.vd, shuffle2(a, b, 24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VMRGHH(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u16[8]>(op.va, op.vb);
set_vr(op.vd, shuffle2(a, b, 12, 4, 13, 5, 14, 6, 15, 7));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VMRGHW(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u32[4]>(op.va, op.vb);
set_vr(op.vd, shuffle2(a, b, 6, 2, 7, 3));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VMRGLB(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u8[16]>(op.va, op.vb);
set_vr(op.vd, shuffle2(a, b, 16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VMRGLH(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u16[8]>(op.va, op.vb);
set_vr(op.vd, shuffle2(a, b, 8, 0, 9, 1, 10, 2, 11, 3));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VMRGLW(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u32[4]>(op.va, op.vb);
set_vr(op.vd, shuffle2(a, b, 4, 0, 5, 1));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VMSUMMBM(ppu_opcode_t op)
{
2018-04-30 19:52:08 +02:00
const auto a = get_vr<s16[8]>(op.va);
const auto b = get_vr<u16[8]>(op.vb);
const auto c = get_vr<s32[4]>(op.vc);
const auto ml = bitcast<s32[4]>((a << 8 >> 8) * noncast<s16[8]>(b << 8 >> 8));
const auto mh = bitcast<s32[4]>((a >> 8) * noncast<s16[8]>(b >> 8));
set_vr(op.vd, ((ml << 16 >> 16) + (ml >> 16)) + ((mh << 16 >> 16) + (mh >> 16)) + c);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VMSUMSHM(ppu_opcode_t op)
{
const auto [a, b, c] = get_vrs<s32[4]>(op.va, op.vb, op.vc);
const auto ml = (a << 16 >> 16) * (b << 16 >> 16);
const auto mh = (a >> 16) * (b >> 16);
set_vr(op.vd, ml + mh + c);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VMSUMSHS(ppu_opcode_t op)
{
const auto [a, b, c] = get_vrs<s32[4]>(op.va, op.vb, op.vc);
const auto ml = (a << 16 >> 16) * (b << 16 >> 16);
const auto mh = (a >> 16) * (b >> 16);
2018-04-30 19:52:08 +02:00
const auto m = eval(ml + mh);
const auto s = eval(m + c);
const auto z = eval((c >> 31) ^ 0x7fffffff);
const auto mx = eval(m ^ sext<s32[4]>(m == 0x80000000u));
const auto x = eval(((mx ^ s) & ~(c ^ mx)) >> 31);
set_vr(op.vd, eval((z & x) | (s & ~x)));
set_sat(x);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VMSUMUBM(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u16[8]>(op.va, op.vb);
2018-04-30 19:52:08 +02:00
const auto c = get_vr<u32[4]>(op.vc);
const auto ml = bitcast<u32[4]>((a << 8 >> 8) * (b << 8 >> 8));
const auto mh = bitcast<u32[4]>((a >> 8) * (b >> 8));
set_vr(op.vd, eval(((ml << 16 >> 16) + (ml >> 16)) + ((mh << 16 >> 16) + (mh >> 16)) + c));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VMSUMUHM(ppu_opcode_t op)
{
const auto [a, b, c] = get_vrs<u32[4]>(op.va, op.vb, op.vc);
const auto ml = (a << 16 >> 16) * (b << 16 >> 16);
const auto mh = (a >> 16) * (b >> 16);
set_vr(op.vd, ml + mh + c);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VMSUMUHS(ppu_opcode_t op)
{
const auto [a, b, c] = get_vrs<u32[4]>(op.va, op.vb, op.vc);
const auto ml = (a << 16 >> 16) * (b << 16 >> 16);
const auto mh = (a >> 16) * (b >> 16);
2018-04-30 19:52:08 +02:00
const auto s = eval(ml + mh);
const auto s2 = eval(s + c);
2019-04-06 08:15:04 +02:00
const auto x = eval((s < ml) | (s2 < s));
2018-07-09 18:54:01 +02:00
set_vr(op.vd, select(x, splat<u32[4]>(-1), s2));
set_sat(x);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VMULESB(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<s16[8]>(op.va, op.vb);
set_vr(op.vd, (a >> 8) * (b >> 8));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VMULESH(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<s32[4]>(op.va, op.vb);
set_vr(op.vd, (a >> 16) * (b >> 16));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VMULEUB(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u16[8]>(op.va, op.vb);
set_vr(op.vd, (a >> 8) * (b >> 8));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VMULEUH(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u32[4]>(op.va, op.vb);
set_vr(op.vd, (a >> 16) * (b >> 16));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VMULOSB(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<s16[8]>(op.va, op.vb);
set_vr(op.vd, (a << 8 >> 8) * (b << 8 >> 8));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VMULOSH(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<s32[4]>(op.va, op.vb);
set_vr(op.vd, (a << 16 >> 16) * (b << 16 >> 16));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VMULOUB(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u16[8]>(op.va, op.vb);
set_vr(op.vd, (a << 8 >> 8) * (b << 8 >> 8));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VMULOUH(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u32[4]>(op.va, op.vb);
set_vr(op.vd, (a << 16 >> 16) * (b << 16 >> 16));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VNMSUBFP(ppu_opcode_t op)
{
auto [a, b, c] = get_vrs<f32[4]>(op.va, op.vb, op.vc);
// Optimization: Emit only a floating multiply if the addend is zero
if (const auto [ok, data] = get_const_vector(b.value, m_addr); ok)
{
if (data == v128{})
{
set_vr(op.vd, vec_handle_result(-(a * c)));
ppu_log.notice("LLVM: VNMSUBFP with 0 addend at [0x%08x]", m_addr + (m_reloc ? m_reloc->addr : 0));
return;
}
if (!m_use_fma && data == v128::from32p(1u << 31))
{
set_vr(op.vd, vec_handle_result(-(a * c - fsplat<f32[4]>(0.f))));
ppu_log.notice("LLVM: VNMSUBFP with -0 addend at [0x%08x]", m_addr + (m_reloc ? m_reloc->addr : 0));
return;
}
}
// Differs from the emulated path with regards to negative zero
if (m_use_fma)
{
set_vr(op.vd, vec_handle_result(-fmuladd(a, c, -b)));
return;
}
// Emulated FMA via double precision (caution: out-of-lane algorithm)
const auto xa = fpcast<f64[4]>(a);
const auto xb = fpcast<f64[4]>(b);
const auto xc = fpcast<f64[4]>(c);
const auto nr = xa * xc - xb;
set_vr(op.vd, vec_handle_result(fpcast<f32[4]>(-nr)));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VNOR(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u32[4]>(op.va, op.vb);
set_vr(op.vd, ~(a | b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VOR(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u32[4]>(op.va, op.vb);
set_vr(op.vd, a | b);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VPERM(ppu_opcode_t op)
{
const auto [a, b, c] = get_vrs<u8[16]>(op.va, op.vb, op.vc);
2020-11-02 04:07:58 +01:00
if (op.ra == op.rb)
{
set_vr(op.vd, pshufb(a, ~c & 0xf));
return;
}
if (m_use_avx512_icl)
{
const auto i = eval(~c);
set_vr(op.vd, vperm2b(b, a, i));
return;
}
const auto i = eval(~c & 0x1f);
set_vr(op.vd, select(noncast<s8[16]>(c << 3) >= 0, pshufb(a, i), pshufb(b, i)));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VPKPX(ppu_opcode_t op)
{
// Caution: out-of-lane algorithm
const auto [a, b] = get_vrs<u32[4]>(op.va, op.vb);
const auto ab = shuffle2(b, a, 0, 1, 2, 3, 4, 5, 6, 7);
const auto e1 = (ab & 0x01f80000) >> 9;
const auto e2 = (ab & 0xf800) >> 6;
const auto e3 = (ab & 0xf8) >> 3;
set_vr(op.vd, trunc<u16[8]>(e1 | e2 | e3));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VPKSHSS(ppu_opcode_t op)
{
// Caution: potentially out-of-lane algorithm
const auto [a, b] = get_vrs<s16[8]>(op.va, op.vb);
const auto ab = shuffle2(b, a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
const auto r = trunc<u8[16]>(min(max(ab, splat<s16[16]>(-0x80)), splat<s16[16]>(0x7f)));
set_vr(op.vd, r);
set_sat(bitcast<u16[8]>((a + 0x80) | (b + 0x80)) >> 8);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VPKSHUS(ppu_opcode_t op)
{
// Caution: potentially out-of-lane algorithm
const auto [a, b] = get_vrs<s16[8]>(op.va, op.vb);
const auto ab = shuffle2(b, a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
const auto r = trunc<u8[16]>(min(max(ab, splat<s16[16]>(0)), splat<s16[16]>(0xff)));
set_vr(op.vd, r);
set_sat(bitcast<u16[8]>(a | b) >> 8);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VPKSWSS(ppu_opcode_t op)
{
// Caution: potentially out-of-lane algorithm
const auto [a, b] = get_vrs<s32[4]>(op.va, op.vb);
const auto ab = shuffle2(b, a, 0, 1, 2, 3, 4, 5, 6, 7);
const auto r = trunc<u16[8]>(min(max(ab, splat<s32[8]>(-0x8000)), splat<s32[8]>(0x7fff)));
set_vr(op.vd, r);
set_sat(bitcast<u32[4]>((a + 0x8000) | (b + 0x8000)) >> 16);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VPKSWUS(ppu_opcode_t op)
{
// Caution: potentially out-of-lane algorithm
const auto [a, b] = get_vrs<s32[4]>(op.va, op.vb);
const auto ab = shuffle2(b, a, 0, 1, 2, 3, 4, 5, 6, 7);
const auto r = trunc<u16[8]>(min(max(ab, splat<s32[8]>(0)), splat<s32[8]>(0xffff)));
set_vr(op.vd, r);
set_sat(bitcast<u32[4]>(a | b) >> 16);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VPKUHUM(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u8[16]>(op.va, op.vb);
const auto r = shuffle2(b, a, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
set_vr(op.vd, r);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VPKUHUS(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u16[8]>(op.va, op.vb);
const auto ta = bitcast<u8[16]>(min(a, splat<u16[8]>(0xff)));
const auto tb = bitcast<u8[16]>(min(b, splat<u16[8]>(0xff)));
const auto r = shuffle2(tb, ta, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
set_vr(op.vd, r);
set_sat((a | b) >> 8);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VPKUWUM(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u16[8]>(op.va, op.vb);
const auto r = shuffle2(b, a, 0, 2, 4, 6, 8, 10, 12, 14);
set_vr(op.vd, r);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VPKUWUS(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u32[4]>(op.va, op.vb);
const auto ta = bitcast<u16[8]>(min(a, splat<u32[4]>(0xffff)));
const auto tb = bitcast<u16[8]>(min(b, splat<u32[4]>(0xffff)));
const auto r = shuffle2(tb, ta, 0, 2, 4, 6, 8, 10, 12, 14);
set_vr(op.vd, r);
set_sat((a | b) >> 16);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VREFP(ppu_opcode_t op)
{
set_vr(op.vd, vec_handle_result(fsplat<f32[4]>(1.0) / get_vr<f32[4]>(op.vb)));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VRFIM(ppu_opcode_t op)
{
set_vr(op.vd, vec_handle_result(call<f32[4]>(get_intrinsic<f32[4]>(Intrinsic::floor), get_vr<f32[4]>(op.vb))));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VRFIN(ppu_opcode_t op)
{
set_vr(op.vd, vec_handle_result(call<f32[4]>(get_intrinsic<f32[4]>(Intrinsic::roundeven), get_vr<f32[4]>(op.vb))));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VRFIP(ppu_opcode_t op)
{
set_vr(op.vd, vec_handle_result(call<f32[4]>(get_intrinsic<f32[4]>(Intrinsic::ceil), get_vr<f32[4]>(op.vb))));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VRFIZ(ppu_opcode_t op)
{
set_vr(op.vd, vec_handle_result(call<f32[4]>(get_intrinsic<f32[4]>(Intrinsic::trunc), get_vr<f32[4]>(op.vb))));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VRLB(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u8[16]>(op.va, op.vb);
set_vr(op.vd, rol(a, b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VRLH(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u16[8]>(op.va, op.vb);
set_vr(op.vd, rol(a, b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VRLW(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u32[4]>(op.va, op.vb);
set_vr(op.vd, rol(a, b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VRSQRTEFP(ppu_opcode_t op)
{
set_vr(op.vd, vec_handle_result(fsplat<f32[4]>(1.0) / call<f32[4]>(get_intrinsic<f32[4]>(Intrinsic::sqrt), get_vr<f32[4]>(op.vb))));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VSEL(ppu_opcode_t op)
{
const auto c = get_vr<u32[4]>(op.vc);
// Check if the constant mask doesn't require bit granularity
if (auto [ok, mask] = get_const_vector(c.value, m_addr); ok)
{
bool sel_32 = true;
for (u32 i = 0; i < 4; i++)
{
if (mask._u32[i] && mask._u32[i] != 0xFFFFFFFF)
{
sel_32 = false;
break;
}
}
if (sel_32)
{
set_vr(op.vd, select(noncast<s32[4]>(c) != 0, get_vr<u32[4]>(op.vb), get_vr<u32[4]>(op.va)));
return;
}
2020-11-02 04:07:58 +01:00
bool sel_16 = true;
for (u32 i = 0; i < 8; i++)
{
if (mask._u16[i] && mask._u16[i] != 0xFFFF)
{
sel_16 = false;
break;
}
}
if (sel_16)
{
set_vr(op.vd, select(bitcast<s16[8]>(c) != 0, get_vr<u16[8]>(op.vb), get_vr<u16[8]>(op.va)));
return;
}
2020-11-02 04:07:58 +01:00
bool sel_8 = true;
for (u32 i = 0; i < 16; i++)
{
if (mask._u8[i] && mask._u8[i] != 0xFF)
{
sel_8 = false;
break;
}
}
if (sel_8)
{
set_vr(op.vd, select(bitcast<s8[16]>(c) != 0,get_vr<u8[16]>(op.vb), get_vr<u8[16]>(op.va)));
return;
}
}
const auto [a, b] = get_vrs<u32[4]>(op.va, op.vb);
set_vr(op.vd, eval((b & c) | (a & ~c)));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VSL(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u8[16]>(op.va, op.vb);
set_vr(op.vd, fshl(a, zshuffle(a, 16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14), b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VSLB(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u8[16]>(op.va, op.vb);
set_vr(op.vd, a << (b & 7));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VSLDOI(ppu_opcode_t op)
{
if (op.vsh == 0)
{
set_vr(op.vd, get_vr<u32[4]>(op.va));
2016-06-07 22:24:20 +02:00
}
else if ((op.vsh % 4) == 0)
{
const auto [a, b] = get_vrs<u32[4]>(op.va, op.vb);
2016-06-07 22:24:20 +02:00
const auto s = op.vsh / 4;
const auto x = 7;
set_vr(op.vd, shuffle2(b, a, (s + 3) ^ x, (s + 2) ^ x, (s + 1) ^ x, (s) ^ x));
2016-06-07 22:24:20 +02:00
}
else if ((op.vsh % 2) == 0)
{
const auto [a, b] = get_vrs<u16[8]>(op.va, op.vb);
2016-06-07 22:24:20 +02:00
const auto s = op.vsh / 2;
const auto x = 15;
set_vr(op.vd, shuffle2(b, a, (s + 7) ^ x, (s + 6) ^ x, (s + 5) ^ x, (s + 4) ^ x, (s + 3) ^ x, (s + 2) ^ x, (s + 1) ^ x, (s) ^ x));
2016-06-07 22:24:20 +02:00
}
else
{
const auto [a, b] = get_vrs<u8[16]>(op.va, op.vb);
2016-06-07 22:24:20 +02:00
const auto s = op.vsh;
const auto x = 31;
set_vr(op.vd, shuffle2(b, a, (s + 15) ^ x, (s + 14) ^ x, (s + 13) ^ x, (s + 12) ^ x, (s + 11) ^ x, (s + 10) ^ x, (s + 9) ^ x, (s + 8) ^ x, (s + 7) ^ x, (s + 6) ^ x, (s + 5) ^ x, (s + 4) ^ x, (s + 3) ^ x, (s + 2) ^ x, (s + 1) ^ x, (s) ^ x));
2016-06-07 22:24:20 +02:00
}
}
void PPUTranslator::VSLH(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u16[8]>(op.va, op.vb);
set_vr(op.vd, a << (b & 15));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VSLO(ppu_opcode_t op)
{
// TODO (rare)
const auto [a, b] = get_vrs<u128>(op.va, op.vb);
set_vr(op.vd, a << (b & 0x78));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VSLW(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u32[4]>(op.va, op.vb);
set_vr(op.vd, a << (b & 31));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VSPLTB(ppu_opcode_t op)
{
const u32 ui = ~op.vuimm & 0xf;
set_vr(op.vd, zshuffle(get_vr<u8[16]>(op.vb), ui, ui, ui, ui, ui, ui, ui, ui, ui, ui, ui, ui, ui, ui, ui, ui));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VSPLTH(ppu_opcode_t op)
{
const u32 ui = ~op.vuimm & 0x7;
set_vr(op.vd, zshuffle(get_vr<u16[8]>(op.vb), ui, ui, ui, ui, ui, ui, ui, ui));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VSPLTISB(ppu_opcode_t op)
{
set_vr(op.vd, splat<u8[16]>(op.vsimm));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VSPLTISH(ppu_opcode_t op)
{
set_vr(op.vd, splat<u16[8]>(op.vsimm));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VSPLTISW(ppu_opcode_t op)
{
set_vr(op.vd, splat<u32[4]>(op.vsimm));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VSPLTW(ppu_opcode_t op)
{
const u32 ui = ~op.vuimm & 0x3;
set_vr(op.vd, zshuffle(get_vr<u32[4]>(op.vb), ui, ui, ui, ui));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VSR(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u8[16]>(op.va, op.vb);
set_vr(op.vd, fshr(zshuffle(a, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), a, b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VSRAB(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<s8[16]>(op.va, op.vb);
set_vr(op.vd, a >> (b & 7));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VSRAH(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<s16[8]>(op.va, op.vb);
set_vr(op.vd, a >> (b & 15));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VSRAW(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<s32[4]>(op.va, op.vb);
set_vr(op.vd, a >> (b & 31));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VSRB(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u8[16]>(op.va, op.vb);
set_vr(op.vd, a >> (b & 7));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VSRH(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u16[8]>(op.va, op.vb);
set_vr(op.vd, a >> (b & 15));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VSRO(ppu_opcode_t op)
{
// TODO (very rare)
const auto [a, b] = get_vrs<u128>(op.va, op.vb);
set_vr(op.vd, a >> (b & 0x78));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VSRW(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u32[4]>(op.va, op.vb);
set_vr(op.vd, a >> (b & 31));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VSUBCUW(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u32[4]>(op.va, op.vb);
set_vr(op.vd, zext<u32[4]>(a >= b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VSUBFP(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<f32[4]>(op.va, op.vb);
set_vr(op.vd, vec_handle_result(a - b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VSUBSBS(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<s8[16]>(op.va, op.vb);
const auto r = sub_sat(a, b);
set_vr(op.vd, r);
set_sat(r ^ (a - b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VSUBSHS(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<s16[8]>(op.va, op.vb);
const auto r = sub_sat(a, b);
set_vr(op.vd, r);
set_sat(r ^ (a - b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VSUBSWS(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<s32[4]>(op.va, op.vb);
const auto r = sub_sat(a, b);
set_vr(op.vd, r);
set_sat(r ^ (a - b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VSUBUBM(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u8[16]>(op.va, op.vb);
set_vr(op.vd, eval(a - b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VSUBUBS(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u8[16]>(op.va, op.vb);
const auto r = sub_sat(a, b);
set_vr(op.vd, r);
set_sat(r ^ (a - b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VSUBUHM(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u16[8]>(op.va, op.vb);
set_vr(op.vd, eval(a - b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VSUBUHS(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u16[8]>(op.va, op.vb);
const auto r = sub_sat(a, b);
set_vr(op.vd, r);
set_sat(r ^ (a - b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VSUBUWM(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u32[4]>(op.va, op.vb);
set_vr(op.vd, eval(a - b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VSUBUWS(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<u32[4]>(op.va, op.vb);
const auto r = sub_sat(a, b);
set_vr(op.vd, r);
set_sat(r ^ (a - b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VSUMSWS(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<s32[4]>(op.va, op.vb);
const auto x = sext<s64[2]>(zshuffle(a, 0, 1));
const auto y = sext<s64[2]>(zshuffle(a, 2, 3));
const auto z = sext<s64[2]>(zshuffle(b, 0, 4));
const auto s = eval(x + y + z);
const auto r = min(max(zshuffle(s, 0, 2) + zshuffle(s, 1, 2), splat<s64[2]>(-0x8000'0000ll)), splat<s64[2]>(0x7fff'ffff));
set_vr(op.vd, zshuffle(bitcast<u32[4]>(r), 0, 4, 4, 4));
set_sat(bitcast<u64[2]>(r + 0x8000'0000) >> 32);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VSUM2SWS(ppu_opcode_t op)
{
const auto [a, b] = get_vrs<s64[2]>(op.va, op.vb);
const auto x = a << 32 >> 32;
const auto y = a >> 32;
const auto z = b >> 32;
const auto r = min(max(x + y + z, splat<s64[2]>(-0x8000'0000ll)), splat<s64[2]>(0x7fff'ffff));
set_vr(op.vd, zshuffle(bitcast<u32[4]>(r), 0, 4, 2, 4));
set_sat(bitcast<u64[2]>(r + 0x8000'0000) >> 32);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VSUM4SBS(ppu_opcode_t op)
{
const auto a = get_vr<s16[8]>(op.va);
const auto b = get_vr<s32[4]>(op.vb);
const auto x = eval(bitcast<s32[4]>((a << 8 >> 8) + (a >> 8)));
const auto s = eval((x << 16 >> 16) + (x >> 16));
const auto r = add_sat(s, b);
set_vr(op.vd, r);
set_sat(r ^ (s + b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VSUM4SHS(ppu_opcode_t op)
{
const auto a = get_vr<s32[4]>(op.va);
const auto b = get_vr<s32[4]>(op.vb);
const auto s = eval((a << 16 >> 16) + (a >> 16));
const auto r = add_sat(s, b);
set_vr(op.vd, r);
set_sat(r ^ (s + b));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VSUM4UBS(ppu_opcode_t op)
{
const auto a = get_vr<u16[8]>(op.va);
const auto b = get_vr<u32[4]>(op.vb);
const auto x = eval(bitcast<u32[4]>((a & 0xff) + (a >> 8)));
const auto s = eval((x & 0xffff) + (x >> 16));
const auto r = add_sat(s, b);
set_vr(op.vd, r);
set_sat(r ^ (s + b));
2016-06-07 22:24:20 +02:00
}
#define UNPACK_PIXEL_OP(px) (px & 0xff00001f) | ((px << 6) & 0x1f0000) | ((px << 3) & 0x1f00)
2016-06-07 22:24:20 +02:00
void PPUTranslator::VUPKHPX(ppu_opcode_t op)
{
// Caution: potentially out-of-lane algorithm
const auto px = sext<s32[4]>(zshuffle(get_vr<s16[8]>(op.vb), 4, 5, 6, 7));
set_vr(op.vd, UNPACK_PIXEL_OP(px));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VUPKHSB(ppu_opcode_t op)
{
// Caution: potentially out-of-lane algorithm
const auto r = sext<s16[8]>(zshuffle(get_vr<s8[16]>(op.vb), 8, 9, 10, 11, 12, 13, 14, 15));
set_vr(op.vd, r);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VUPKHSH(ppu_opcode_t op)
{
// Caution: potentially out-of-lane algorithm
const auto r = sext<s32[4]>(zshuffle(get_vr<s16[8]>(op.vb), 4, 5, 6, 7));
set_vr(op.vd, r);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VUPKLPX(ppu_opcode_t op)
{
// Caution: potentially out-of-lane algorithm
const auto px = sext<s32[4]>(zshuffle(get_vr<s16[8]>(op.vb), 0, 1, 2, 3));
set_vr(op.vd, UNPACK_PIXEL_OP(px));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VUPKLSB(ppu_opcode_t op)
{
// Caution: potentially out-of-lane algorithm
const auto r = sext<s16[8]>(zshuffle(get_vr<s8[16]>(op.vb), 0, 1, 2, 3, 4, 5, 6, 7));
set_vr(op.vd, r);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VUPKLSH(ppu_opcode_t op)
{
// Caution: potentially out-of-lane algorithm
const auto r = sext<s32[4]>(zshuffle(get_vr<s16[8]>(op.vb), 0, 1, 2, 3));
set_vr(op.vd, r);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::VXOR(ppu_opcode_t op)
{
if (op.va == op.vb)
{
// Assign zero, break dependencies
set_vr(op.vd, splat<u32[4]>(0));
2016-06-07 22:24:20 +02:00
return;
}
const auto [a, b] = get_vrs<u32[4]>(op.va, op.vb);
set_vr(op.vd, a ^ b);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::TDI(ppu_opcode_t op)
{
2017-02-13 18:51:37 +01:00
UseCondition(m_md_unlikely, CheckTrapCondition(op.bo, GetGpr(op.ra), m_ir->getInt64(op.simm16)));
2017-07-01 01:08:51 +02:00
Trap();
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::TWI(ppu_opcode_t op)
{
2017-02-13 18:51:37 +01:00
UseCondition(m_md_unlikely, CheckTrapCondition(op.bo, GetGpr(op.ra, 32), m_ir->getInt32(op.simm16)));
2017-07-01 01:08:51 +02:00
Trap();
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::MULLI(ppu_opcode_t op)
{
SetGpr(op.rd, m_ir->CreateMul(GetGpr(op.ra), m_ir->getInt64(op.simm16)));
}
void PPUTranslator::SUBFIC(ppu_opcode_t op)
{
const auto a = GetGpr(op.ra);
const auto imm = m_ir->getInt64(op.simm16);
const auto result = m_ir->CreateSub(imm, a);
SetGpr(op.rd, result);
SetCarry(m_ir->CreateICmpULE(result, imm));
}
void PPUTranslator::CMPLI(ppu_opcode_t op)
{
SetCrFieldUnsignedCmp(op.crfd, GetGpr(op.ra, op.l10 ? 64 : 32), op.l10 ? m_ir->getInt64(op.uimm16) : m_ir->getInt32(op.uimm16));
}
void PPUTranslator::CMPI(ppu_opcode_t op)
{
SetCrFieldSignedCmp(op.crfd, GetGpr(op.ra, op.l10 ? 64 : 32), op.l10 ? m_ir->getInt64(op.simm16) : m_ir->getInt32(op.simm16));
}
void PPUTranslator::ADDIC(ppu_opcode_t op)
{
2017-07-01 01:08:51 +02:00
Value* imm = m_ir->getInt64(op.simm16);
if (m_rel && (m_rel->type >= 4u && m_rel->type <= 6u))
2017-07-01 01:08:51 +02:00
{
imm = SExt(ReadMemory(GetAddr(+2), GetType<u16>()), GetType<u64>());
m_rel = nullptr;
}
2016-06-07 22:24:20 +02:00
const auto a = GetGpr(op.ra);
const auto result = m_ir->CreateAdd(a, imm);
SetGpr(op.rd, result);
SetCarry(m_ir->CreateICmpULT(result, imm));
if (op.main & 1) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
}
void PPUTranslator::ADDI(ppu_opcode_t op)
{
2017-07-01 01:08:51 +02:00
Value* imm = m_ir->getInt64(op.simm16);
if (m_rel && (m_rel->type >= 4u && m_rel->type <= 6u))
2017-07-01 01:08:51 +02:00
{
imm = SExt(ReadMemory(GetAddr(+2), GetType<u16>()), GetType<u64>());
m_rel = nullptr;
}
2016-06-07 22:24:20 +02:00
SetGpr(op.rd, op.ra ? m_ir->CreateAdd(GetGpr(op.ra), imm) : imm);
}
void PPUTranslator::ADDIS(ppu_opcode_t op)
{
2017-07-01 01:08:51 +02:00
Value* imm = m_ir->getInt64(op.simm16 << 16);
if (m_rel && (m_rel->type >= 4u && m_rel->type <= 6u))
2017-07-01 01:08:51 +02:00
{
imm = m_ir->CreateShl(SExt(ReadMemory(GetAddr(+2), GetType<u16>()), GetType<u64>()), 16);
m_rel = nullptr;
}
2016-06-07 22:24:20 +02:00
SetGpr(op.rd, op.ra ? m_ir->CreateAdd(GetGpr(op.ra), imm) : imm);
}
void PPUTranslator::BC(ppu_opcode_t op)
{
const s32 bt14 = op.bt14; // Workaround for VS 16.5
const u64 target = (op.aa ? 0 : m_addr) + bt14;
2017-07-01 01:08:51 +02:00
if (op.aa && m_reloc)
{
CompilationError("Branch with absolute address");
}
2016-06-07 22:24:20 +02:00
if (op.lk)
{
m_ir->CreateStore(GetAddr(+4), m_ir->CreateStructGEP(m_thread_type, m_thread, static_cast<uint>(&m_lr - m_locals)));
2016-06-07 22:24:20 +02:00
}
2018-11-23 11:49:03 +01:00
UseCondition(CheckBranchProbability(op.bo), CheckBranchCondition(op.bo, op.bi));
CallFunction(target);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::SC(ppu_opcode_t op)
{
2017-02-12 19:12:08 +01:00
if (op.opcode != ppu_instructions::SC(0) && op.opcode != ppu_instructions::SC(1))
{
return UNK(op);
}
const auto num = GetGpr(11);
2017-07-01 01:08:51 +02:00
RegStore(Trunc(GetAddr()), m_cia);
FlushRegisters();
2017-06-25 23:15:54 +02:00
if (!op.lev && isa<ConstantInt>(num))
{
// Try to determine syscall using the constant value from r11
const u64 index = cast<ConstantInt>(num)->getZExtValue();
if (index < 1024)
{
PPU LLVM arm64+macOS port (#12115) * BufferUtils: use naive function pointer on Apple arm64 Use naive function pointer on Apple arm64 because ASLR breaks asmjit. See BufferUtils.cpp comment for explanation on why this happens and how to fix if you want to use asmjit. * build-macos: fix source maps for Mac Tell Qt not to strip debug symbols when we're in debug or relwithdebinfo modes. * LLVM PPU: fix aarch64 on macOS Force MachO on macOS to fix LLVM being unable to patch relocations during codegen. Adds Aarch64 NEON intrinsics for x86 intrinsics used by PPUTranslator/Recompiler. * virtual memory: use 16k pages on aarch64 macOS Temporary hack to get things working by using 16k pages instead of 4k pages in VM emulation. * PPU/SPU: fix NEON intrinsics and compilation for arm64 macOS Fixes some intrinsics usage and patches usages of asmjit to properly emit absolute jmps so ASLR doesn't cause out of bounds rel jumps. Also patches the SPU recompiler to properly work on arm64 by telling LLVM to target arm64. * virtual memory: fix W^X toggles on macOS aarch64 Fixes W^X on macOS aarch64 by setting all JIT mmap'd regions to default to RW mode. For both SPU and PPU execution threads, when initialization finishes we toggle to RX mode. This exploits Apple's per-thread setting for RW/RX to let us be technically compliant with the OS's W^X enforcement while not needing to actually separate the memory allocated for code/data. * PPU: implement aarch64 specific functions Implements ppu_gateway for arm64 and patches LLVM initialization to use the correct triple. Adds some fixes for macOS W^X JIT restrictions when entering/exiting JITed code. * PPU: Mark rpcs3 calls as non-tail Strictly speaking, rpcs3 JIT -> C++ calls are not tail calls. If you call a function inside e.g. an L2 syscall, it will clobber LR on arm64 and subtly break returns in emulated code. Only JIT -> JIT "calls" should be tail. * macOS/arm64: compatibility fixes * vm: patch virtual memory for arm64 macOS Tag mmap calls with MAP_JIT to allow W^X on macOS. Fix mmap calls to existing mmap'd addresses that were tagged with MAP_JIT on macOS. Fix memory unmapping on 16K page machines with a hack to mark "unmapped" pages as RW. * PPU: remove wrong comment * PPU: fix a merge regression * vm: remove 16k page hacks * PPU: formatting fixes * PPU: fix arm64 null function assembly * ppu: clean up arch-specific instructions
2022-06-14 14:28:38 +02:00
Call(GetType<void>(), fmt::format("%s", ppu_syscall_code(index)), m_thread);
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
2017-06-25 23:15:54 +02:00
m_ir->CreateRetVoid();
return;
}
}
PPU LLVM arm64+macOS port (#12115) * BufferUtils: use naive function pointer on Apple arm64 Use naive function pointer on Apple arm64 because ASLR breaks asmjit. See BufferUtils.cpp comment for explanation on why this happens and how to fix if you want to use asmjit. * build-macos: fix source maps for Mac Tell Qt not to strip debug symbols when we're in debug or relwithdebinfo modes. * LLVM PPU: fix aarch64 on macOS Force MachO on macOS to fix LLVM being unable to patch relocations during codegen. Adds Aarch64 NEON intrinsics for x86 intrinsics used by PPUTranslator/Recompiler. * virtual memory: use 16k pages on aarch64 macOS Temporary hack to get things working by using 16k pages instead of 4k pages in VM emulation. * PPU/SPU: fix NEON intrinsics and compilation for arm64 macOS Fixes some intrinsics usage and patches usages of asmjit to properly emit absolute jmps so ASLR doesn't cause out of bounds rel jumps. Also patches the SPU recompiler to properly work on arm64 by telling LLVM to target arm64. * virtual memory: fix W^X toggles on macOS aarch64 Fixes W^X on macOS aarch64 by setting all JIT mmap'd regions to default to RW mode. For both SPU and PPU execution threads, when initialization finishes we toggle to RX mode. This exploits Apple's per-thread setting for RW/RX to let us be technically compliant with the OS's W^X enforcement while not needing to actually separate the memory allocated for code/data. * PPU: implement aarch64 specific functions Implements ppu_gateway for arm64 and patches LLVM initialization to use the correct triple. Adds some fixes for macOS W^X JIT restrictions when entering/exiting JITed code. * PPU: Mark rpcs3 calls as non-tail Strictly speaking, rpcs3 JIT -> C++ calls are not tail calls. If you call a function inside e.g. an L2 syscall, it will clobber LR on arm64 and subtly break returns in emulated code. Only JIT -> JIT "calls" should be tail. * macOS/arm64: compatibility fixes * vm: patch virtual memory for arm64 macOS Tag mmap calls with MAP_JIT to allow W^X on macOS. Fix mmap calls to existing mmap'd addresses that were tagged with MAP_JIT on macOS. Fix memory unmapping on 16K page machines with a hack to mark "unmapped" pages as RW. * PPU: remove wrong comment * PPU: fix a merge regression * vm: remove 16k page hacks * PPU: formatting fixes * PPU: fix arm64 null function assembly * ppu: clean up arch-specific instructions
2022-06-14 14:28:38 +02:00
Call(GetType<void>(), op.lev ? "__lv1call" : "__syscall", m_thread, num);
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
m_ir->CreateRetVoid();
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::B(ppu_opcode_t op)
{
const s32 bt24 = op.bt24; // Workaround for VS 16.5
const u64 target = (op.aa ? 0 : m_addr) + bt24;
2017-07-01 01:08:51 +02:00
if (op.aa && m_reloc)
{
CompilationError("Branch with absolute address");
}
2016-06-07 22:24:20 +02:00
if (op.lk)
2016-06-07 22:24:20 +02:00
{
2017-07-01 01:08:51 +02:00
RegStore(GetAddr(+4), m_lr);
2016-06-07 22:24:20 +02:00
}
2017-12-16 01:21:55 +01:00
FlushRegisters();
CallFunction(target);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::MCRF(ppu_opcode_t op)
{
const auto le = GetCrb(op.crfs * 4 + 0);
const auto ge = GetCrb(op.crfs * 4 + 1);
const auto eq = GetCrb(op.crfs * 4 + 2);
const auto so = GetCrb(op.crfs * 4 + 3);
SetCrField(op.crfd, le, ge, eq, so);
}
void PPUTranslator::BCLR(ppu_opcode_t op)
{
const auto target = RegLoad(m_lr);
2016-06-07 22:24:20 +02:00
if (op.lk)
{
m_ir->CreateStore(GetAddr(+4), m_ir->CreateStructGEP(m_thread_type, m_thread, static_cast<uint>(&m_lr - m_locals)));
2016-06-07 22:24:20 +02:00
}
2018-11-23 11:49:03 +01:00
UseCondition(CheckBranchProbability(op.bo), CheckBranchCondition(op.bo, op.bi));
CallFunction(0, target);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::CRNOR(ppu_opcode_t op)
{
SetCrb(op.crbd, m_ir->CreateNot(m_ir->CreateOr(GetCrb(op.crba), GetCrb(op.crbb))));
}
void PPUTranslator::CRANDC(ppu_opcode_t op)
{
SetCrb(op.crbd, m_ir->CreateAnd(GetCrb(op.crba), m_ir->CreateNot(GetCrb(op.crbb))));
}
2021-03-05 20:05:37 +01:00
void PPUTranslator::ISYNC(ppu_opcode_t)
2016-06-07 22:24:20 +02:00
{
m_ir->CreateFence(AtomicOrdering::Acquire);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::CRXOR(ppu_opcode_t op)
{
SetCrb(op.crbd, m_ir->CreateXor(GetCrb(op.crba), GetCrb(op.crbb)));
}
2021-03-05 20:05:37 +01:00
void PPUTranslator::DCBI(ppu_opcode_t)
2016-06-07 22:24:20 +02:00
{
}
void PPUTranslator::CRNAND(ppu_opcode_t op)
{
SetCrb(op.crbd, m_ir->CreateNot(m_ir->CreateAnd(GetCrb(op.crba), GetCrb(op.crbb))));
}
void PPUTranslator::CRAND(ppu_opcode_t op)
{
SetCrb(op.crbd, m_ir->CreateAnd(GetCrb(op.crba), GetCrb(op.crbb)));
}
void PPUTranslator::CREQV(ppu_opcode_t op)
{
SetCrb(op.crbd, m_ir->CreateNot(m_ir->CreateXor(GetCrb(op.crba), GetCrb(op.crbb))));
}
void PPUTranslator::CRORC(ppu_opcode_t op)
{
SetCrb(op.crbd, m_ir->CreateOr(GetCrb(op.crba), m_ir->CreateNot(GetCrb(op.crbb))));
}
void PPUTranslator::CROR(ppu_opcode_t op)
{
SetCrb(op.crbd, m_ir->CreateOr(GetCrb(op.crba), GetCrb(op.crbb)));
}
void PPUTranslator::BCCTR(ppu_opcode_t op)
{
const auto target = RegLoad(m_ctr);
if (op.lk)
2016-06-07 22:24:20 +02:00
{
m_ir->CreateStore(GetAddr(+4), m_ir->CreateStructGEP(m_thread_type, m_thread, static_cast<uint>(&m_lr - m_locals)));
2016-06-07 22:24:20 +02:00
}
2018-11-23 11:49:03 +01:00
UseCondition(CheckBranchProbability(op.bo | 0x4), CheckBranchCondition(op.bo | 0x4, op.bi));
CallFunction(0, target);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::RLWIMI(ppu_opcode_t op)
{
const u64 mask = ppu_rotate_mask(32 + op.mb32, 32 + op.me32);
Value* result;
if (op.mb32 <= op.me32)
{
if (op.mb32 == 0 && op.me32 == 31)
{
result = RotateLeft(GetGpr(op.rs, 32), op.sh32);
}
else if (op.mb32 == 0 && op.sh32 == 31 - op.me32)
{
result = m_ir->CreateShl(GetGpr(op.rs, 32), op.sh32);
}
else if (op.me32 == 31 && op.sh32 == 32 - op.mb32)
{
result = m_ir->CreateLShr(GetGpr(op.rs, 32), 32 - op.sh32);
}
else if (op.mb32 == 0 && op.sh32 < 31 - op.me32)
{
// INSLWI and other possible mnemonics
result = m_ir->CreateAnd(m_ir->CreateShl(GetGpr(op.rs, 32), op.sh32), mask);
}
else if (op.me32 == 31 && 32 - op.sh32 < op.mb32)
{
// INSRWI and other possible mnemonics
result = m_ir->CreateAnd(m_ir->CreateLShr(GetGpr(op.rs, 32), 32 - op.sh32), mask);
}
else
{
// Generic op
result = m_ir->CreateAnd(RotateLeft(GetGpr(op.rs, 32), op.sh32), mask);
}
// Extend 32-bit op result
result = ZExt(result);
}
else
{
// Full 64-bit op with duplication
result = m_ir->CreateAnd(RotateLeft(DuplicateExt(GetGpr(op.rs, 32)), op.sh32), mask);
}
if (mask != umax)
2016-06-07 22:24:20 +02:00
{
// Insertion
result = m_ir->CreateOr(result, m_ir->CreateAnd(GetGpr(op.ra), ~mask));
}
SetGpr(op.ra, result);
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
}
void PPUTranslator::RLWINM(ppu_opcode_t op)
{
const u64 mask = ppu_rotate_mask(32 + op.mb32, 32 + op.me32);
Value* result;
if (op.mb32 <= op.me32)
{
if (op.mb32 == 0 && op.me32 == 31)
{
// ROTLWI, ROTRWI mnemonics
result = RotateLeft(GetGpr(op.rs, 32), op.sh32);
}
else if (op.mb32 == 0 && op.sh32 == 31 - op.me32)
{
// SLWI mnemonic
result = m_ir->CreateShl(GetGpr(op.rs, 32), op.sh32);
}
else if (op.me32 == 31 && op.sh32 == 32 - op.mb32)
{
// SRWI mnemonic
result = m_ir->CreateLShr(GetGpr(op.rs, 32), 32 - op.sh32);
}
else if (op.mb32 == 0 && op.sh32 < 31 - op.me32)
{
// EXTLWI and other possible mnemonics
result = m_ir->CreateAnd(m_ir->CreateShl(GetGpr(op.rs, 32), op.sh32), mask);
}
else if (op.me32 == 31 && 32 - op.sh32 < op.mb32)
{
// EXTRWI and other possible mnemonics
result = m_ir->CreateAnd(m_ir->CreateLShr(GetGpr(op.rs, 32), 32 - op.sh32), mask);
}
else
{
// Generic op, including CLRLWI, CLRRWI mnemonics
result = m_ir->CreateAnd(RotateLeft(GetGpr(op.rs, 32), op.sh32), mask);
}
// Extend 32-bit op result
result = ZExt(result);
}
else
{
// Full 64-bit op with duplication
result = m_ir->CreateAnd(RotateLeft(DuplicateExt(GetGpr(op.rs, 32)), op.sh32), mask);
}
SetGpr(op.ra, result);
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
}
void PPUTranslator::RLWNM(ppu_opcode_t op)
{
const u64 mask = ppu_rotate_mask(32 + op.mb32, 32 + op.me32);
Value* result;
if (op.mb32 <= op.me32)
{
if (op.mb32 == 0 && op.me32 == 31)
{
// ROTLW mnemonic
result = RotateLeft(GetGpr(op.rs, 32), GetGpr(op.rb, 32));
}
else
{
// Generic op
result = m_ir->CreateAnd(RotateLeft(GetGpr(op.rs, 32), GetGpr(op.rb, 32)), mask);
}
2017-12-16 01:21:55 +01:00
2016-06-07 22:24:20 +02:00
// Extend 32-bit op result
result = ZExt(result);
}
else
{
// Full 64-bit op with duplication
result = m_ir->CreateAnd(RotateLeft(DuplicateExt(GetGpr(op.rs, 32)), GetGpr(op.rb)), mask);
}
SetGpr(op.ra, result);
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
}
void PPUTranslator::ORI(ppu_opcode_t op)
{
2017-07-01 01:08:51 +02:00
Value* imm = m_ir->getInt64(op.uimm16);
if (m_rel && (m_rel->type >= 4u && m_rel->type <= 6u))
2017-07-01 01:08:51 +02:00
{
imm = ZExt(ReadMemory(GetAddr(+2), GetType<u16>()), GetType<u64>());
m_rel = nullptr;
}
SetGpr(op.ra, m_ir->CreateOr(GetGpr(op.rs), imm));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::ORIS(ppu_opcode_t op)
{
2017-07-01 01:08:51 +02:00
Value* imm = m_ir->getInt64(op.uimm16 << 16);
if (m_rel && (m_rel->type >= 4u && m_rel->type <= 6u))
2017-07-01 01:08:51 +02:00
{
imm = m_ir->CreateShl(ZExt(ReadMemory(GetAddr(+2), GetType<u16>()), GetType<u64>()), 16);
m_rel = nullptr;
}
SetGpr(op.ra, m_ir->CreateOr(GetGpr(op.rs), imm));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::XORI(ppu_opcode_t op)
{
SetGpr(op.ra, m_ir->CreateXor(GetGpr(op.rs), op.uimm16));
}
void PPUTranslator::XORIS(ppu_opcode_t op)
{
SetGpr(op.ra, m_ir->CreateXor(GetGpr(op.rs), op.uimm16 << 16));
}
void PPUTranslator::ANDI(ppu_opcode_t op)
{
const auto result = m_ir->CreateAnd(GetGpr(op.rs), op.uimm16);
SetGpr(op.ra, result);
SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
}
void PPUTranslator::ANDIS(ppu_opcode_t op)
{
const auto result = m_ir->CreateAnd(GetGpr(op.rs), op.uimm16 << 16);
SetGpr(op.ra, result);
SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
}
void PPUTranslator::RLDICL(ppu_opcode_t op)
{
const u32 sh = op.sh64;
const u32 mb = op.mbe64;
const u64 mask = ~0ull >> mb;
Value* result;
if (64 - sh < mb)
{
// EXTRDI and other possible mnemonics
result = m_ir->CreateAnd(m_ir->CreateLShr(GetGpr(op.rs), 64 - sh), mask);
}
else if (64 - sh == mb)
{
// SRDI mnemonic
result = m_ir->CreateLShr(GetGpr(op.rs), 64 - sh);
}
else
{
// Generic op, including CLRLDI mnemonic
result = m_ir->CreateAnd(RotateLeft(GetGpr(op.rs), sh), mask);
}
2017-12-16 01:21:55 +01:00
2016-06-07 22:24:20 +02:00
SetGpr(op.ra, result);
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
}
void PPUTranslator::RLDICR(ppu_opcode_t op)
{
const u32 sh = op.sh64;
const u32 me = op.mbe64;
const u64 mask = ~0ull << (63 - me);
Value* result;
if (sh < 63 - me)
{
// EXTLDI and other possible mnemonics
result = m_ir->CreateAnd(m_ir->CreateShl(GetGpr(op.rs), sh), mask);
}
else if (sh == 63 - me)
{
// SLDI mnemonic
result = m_ir->CreateShl(GetGpr(op.rs), sh);
}
else
{
// Generic op, including CLRRDI mnemonic
result = m_ir->CreateAnd(RotateLeft(GetGpr(op.rs), sh), mask);
}
SetGpr(op.ra, result);
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
}
void PPUTranslator::RLDIC(ppu_opcode_t op)
{
const u32 sh = op.sh64;
const u32 mb = op.mbe64;
const u64 mask = ppu_rotate_mask(mb, 63 - sh);
Value* result;
if (mb == 0 && sh == 0)
{
result = GetGpr(op.rs);
}
else if (mb <= 63 - sh)
{
// CLRLSLDI and other possible mnemonics
result = m_ir->CreateAnd(m_ir->CreateShl(GetGpr(op.rs), sh), mask);
}
else
{
// Generic op
result = m_ir->CreateAnd(RotateLeft(GetGpr(op.rs), sh), mask);
}
SetGpr(op.ra, result);
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
}
void PPUTranslator::RLDIMI(ppu_opcode_t op)
{
const u32 sh = op.sh64;
const u32 mb = op.mbe64;
const u64 mask = ppu_rotate_mask(mb, 63 - sh);
Value* result;
if (mb == 0 && sh == 0)
{
result = GetGpr(op.rs);
}
else if (mb <= 63 - sh)
{
// INSRDI and other possible mnemonics
result = m_ir->CreateAnd(m_ir->CreateShl(GetGpr(op.rs), sh), mask);
}
else
{
// Generic op
result = m_ir->CreateAnd(RotateLeft(GetGpr(op.rs), sh), mask);
}
if (mask != umax)
2016-06-07 22:24:20 +02:00
{
// Insertion
result = m_ir->CreateOr(result, m_ir->CreateAnd(GetGpr(op.ra), ~mask));
}
SetGpr(op.ra, result);
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
}
void PPUTranslator::RLDCL(ppu_opcode_t op)
{
const u32 mb = op.mbe64;
const u64 mask = ~0ull >> mb;
const auto result = m_ir->CreateAnd(RotateLeft(GetGpr(op.rs), GetGpr(op.rb)), mask);
SetGpr(op.ra, result);
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
}
void PPUTranslator::RLDCR(ppu_opcode_t op)
{
const u32 me = op.mbe64;
const u64 mask = ~0ull << (63 - me);
const auto result = m_ir->CreateAnd(RotateLeft(GetGpr(op.rs), GetGpr(op.rb)), mask);
SetGpr(op.ra, result);
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
}
void PPUTranslator::CMP(ppu_opcode_t op)
{
SetCrFieldSignedCmp(op.crfd, GetGpr(op.ra, op.l10 ? 64 : 32), GetGpr(op.rb, op.l10 ? 64 : 32));
}
void PPUTranslator::TW(ppu_opcode_t op)
{
2017-06-29 16:27:19 +02:00
if (op.opcode != ppu_instructions::TRAP())
{
UseCondition(m_md_unlikely, CheckTrapCondition(op.bo, GetGpr(op.ra, 32), GetGpr(op.rb, 32)));
}
else
{
FlushRegisters();
}
2017-07-01 01:08:51 +02:00
Trap();
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::LVSL(ppu_opcode_t op)
{
const auto addr = value<u64>(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb));
set_vr(op.vd, build<u8[16]>(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) + vsplat<u8[16]>(trunc<u8>(addr & 0xf)));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::LVEBX(ppu_opcode_t op)
{
return LVX(op);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::SUBFC(ppu_opcode_t op)
{
const auto a = GetGpr(op.ra);
const auto b = GetGpr(op.rb);
const auto result = m_ir->CreateSub(b, a);
SetGpr(op.rd, result);
SetCarry(m_ir->CreateICmpULE(result, b));
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
if (op.oe) SetOverflow(Call(GetType<bool>(), m_pure_attr, "__subfc_get_ov", a, b));
}
void PPUTranslator::ADDC(ppu_opcode_t op)
{
const auto a = GetGpr(op.ra);
const auto b = GetGpr(op.rb);
const auto result = m_ir->CreateAdd(a, b);
SetGpr(op.rd, result);
SetCarry(m_ir->CreateICmpULT(result, b));
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
if (op.oe) SetOverflow(Call(GetType<bool>(), m_pure_attr, "__addc_get_ov", a, b));
}
void PPUTranslator::MULHDU(ppu_opcode_t op)
{
const auto a = ZExt(GetGpr(op.ra));
const auto b = ZExt(GetGpr(op.rb));
const auto result = Trunc(m_ir->CreateLShr(m_ir->CreateMul(a, b), 64));
SetGpr(op.rd, result);
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
}
void PPUTranslator::MULHWU(ppu_opcode_t op)
{
const auto a = ZExt(GetGpr(op.ra, 32));
const auto b = ZExt(GetGpr(op.rb, 32));
SetGpr(op.rd, m_ir->CreateLShr(m_ir->CreateMul(a, b), 32));
if (op.rc) SetCrFieldSignedCmp(0, GetGpr(op.rd), m_ir->getInt64(0));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::MFOCRF(ppu_opcode_t op)
{
if (op.l11)
{
// MFOCRF
#if LLVM_VERSION_MAJOR < 17
const u64 pos = countLeadingZeros<u32>(op.crm) - 24;
#else
const u64 pos = countl_zero<u32>(op.crm) - 24;
#endif
2016-06-07 22:24:20 +02:00
if (pos >= 8 || 0x80u >> pos != op.crm)
2016-06-07 22:24:20 +02:00
{
SetGpr(op.rd, UndefValue::get(GetType<u64>()));
return;
}
}
2017-06-25 16:22:33 +02:00
else if (std::none_of(m_cr + 0, m_cr + 32, [](auto* p) { return p; }))
2016-06-07 22:24:20 +02:00
{
2017-06-25 16:22:33 +02:00
// MFCR (optimized)
Value* ln0 = m_ir->CreateIntToPtr(m_ir->CreatePtrToInt(m_ir->CreateStructGEP(m_thread_type, m_thread, 99), GetType<uptr>()), GetType<u8[16]>()->getPointerTo());
Value* ln1 = m_ir->CreateIntToPtr(m_ir->CreatePtrToInt(m_ir->CreateStructGEP(m_thread_type, m_thread, 115), GetType<uptr>()), GetType<u8[16]>()->getPointerTo());
2017-06-25 16:22:33 +02:00
ln0 = m_ir->CreateLoad(GetType<u8[16]>(), ln0);
ln1 = m_ir->CreateLoad(GetType<u8[16]>(), ln1);
2017-06-25 16:22:33 +02:00
if (!m_is_be)
{
ln0 = Shuffle(ln0, nullptr, {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0});
ln1 = Shuffle(ln1, nullptr, {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0});
}
2022-01-21 10:49:52 +01:00
const auto m0 = ZExt(bitcast<u16>(Trunc(ln0, GetType<bool[16]>())));
const auto m1 = ZExt(bitcast<u16>(Trunc(ln1, GetType<bool[16]>())));
2017-06-25 16:22:33 +02:00
SetGpr(op.rd, m_ir->CreateOr(m_ir->CreateShl(m0, 16), m1));
return;
2016-06-07 22:24:20 +02:00
}
Value* result{};
for (u32 i = 0; i < 8; i++)
{
if (!op.l11 || op.crm & (128 >> i))
{
for (u32 b = i * 4; b < i * 4 + 4; b++)
{
const auto value = m_ir->CreateShl(ZExt(GetCrb(b), GetType<u64>()), 31 - b);
result = result ? m_ir->CreateOr(result, value) : value;
}
}
}
SetGpr(op.rd, result);
}
void PPUTranslator::LWARX(ppu_opcode_t op)
{
if (g_cfg.core.ppu_128_reservations_loop_max_length)
{
RegStore(Trunc(GetAddr()), m_cia);
FlushRegisters();
PPU LLVM arm64+macOS port (#12115) * BufferUtils: use naive function pointer on Apple arm64 Use naive function pointer on Apple arm64 because ASLR breaks asmjit. See BufferUtils.cpp comment for explanation on why this happens and how to fix if you want to use asmjit. * build-macos: fix source maps for Mac Tell Qt not to strip debug symbols when we're in debug or relwithdebinfo modes. * LLVM PPU: fix aarch64 on macOS Force MachO on macOS to fix LLVM being unable to patch relocations during codegen. Adds Aarch64 NEON intrinsics for x86 intrinsics used by PPUTranslator/Recompiler. * virtual memory: use 16k pages on aarch64 macOS Temporary hack to get things working by using 16k pages instead of 4k pages in VM emulation. * PPU/SPU: fix NEON intrinsics and compilation for arm64 macOS Fixes some intrinsics usage and patches usages of asmjit to properly emit absolute jmps so ASLR doesn't cause out of bounds rel jumps. Also patches the SPU recompiler to properly work on arm64 by telling LLVM to target arm64. * virtual memory: fix W^X toggles on macOS aarch64 Fixes W^X on macOS aarch64 by setting all JIT mmap'd regions to default to RW mode. For both SPU and PPU execution threads, when initialization finishes we toggle to RX mode. This exploits Apple's per-thread setting for RW/RX to let us be technically compliant with the OS's W^X enforcement while not needing to actually separate the memory allocated for code/data. * PPU: implement aarch64 specific functions Implements ppu_gateway for arm64 and patches LLVM initialization to use the correct triple. Adds some fixes for macOS W^X JIT restrictions when entering/exiting JITed code. * PPU: Mark rpcs3 calls as non-tail Strictly speaking, rpcs3 JIT -> C++ calls are not tail calls. If you call a function inside e.g. an L2 syscall, it will clobber LR on arm64 and subtly break returns in emulated code. Only JIT -> JIT "calls" should be tail. * macOS/arm64: compatibility fixes * vm: patch virtual memory for arm64 macOS Tag mmap calls with MAP_JIT to allow W^X on macOS. Fix mmap calls to existing mmap'd addresses that were tagged with MAP_JIT on macOS. Fix memory unmapping on 16K page machines with a hack to mark "unmapped" pages as RW. * PPU: remove wrong comment * PPU: fix a merge regression * vm: remove 16k page hacks * PPU: formatting fixes * PPU: fix arm64 null function assembly * ppu: clean up arch-specific instructions
2022-06-14 14:28:38 +02:00
Call(GetType<void>(), "__resinterp", m_thread);
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
m_ir->CreateRetVoid();
return;
}
2017-02-26 16:56:31 +01:00
SetGpr(op.rd, Call(GetType<u32>(), "__lwarx", m_thread, op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb)));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::LDX(ppu_opcode_t op)
{
SetGpr(op.rd, ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetType<u64>()));
}
void PPUTranslator::LWZX(ppu_opcode_t op)
{
SetGpr(op.rd, ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetType<u32>()));
}
void PPUTranslator::SLW(ppu_opcode_t op)
{
const auto shift_num = m_ir->CreateAnd(GetGpr(op.rb), 0x3f);
const auto shift_res = m_ir->CreateShl(GetGpr(op.rs), shift_num);
const auto result = m_ir->CreateAnd(shift_res, 0xffffffff);
SetGpr(op.ra, result);
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
}
void PPUTranslator::CNTLZW(ppu_opcode_t op)
{
const auto result = Call(GetType<u32>(), "llvm.ctlz.i32", GetGpr(op.rs, 32), m_ir->getFalse());
SetGpr(op.ra, result);
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt32(0));
}
void PPUTranslator::SLD(ppu_opcode_t op)
{
const auto shift_num = m_ir->CreateAnd(GetGpr(op.rb), 0x7f);
const auto shift_arg = GetGpr(op.rs);
const auto result = Trunc(m_ir->CreateShl(ZExt(shift_arg), ZExt(shift_num)));
SetGpr(op.ra, result);
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
}
void PPUTranslator::AND(ppu_opcode_t op)
{
const auto result = op.rs == op.rb ? GetGpr(op.rs) : m_ir->CreateAnd(GetGpr(op.rs), GetGpr(op.rb));
SetGpr(op.ra, result);
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
}
void PPUTranslator::CMPL(ppu_opcode_t op)
{
SetCrFieldUnsignedCmp(op.crfd, GetGpr(op.ra, op.l10 ? 64 : 32), GetGpr(op.rb, op.l10 ? 64 : 32));
}
void PPUTranslator::LVSR(ppu_opcode_t op)
{
const auto addr = value<u64>(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb));
set_vr(op.vd, build<u8[16]>(31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16) - vsplat<u8[16]>(trunc<u8>(addr & 0xf)));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::LVEHX(ppu_opcode_t op)
{
return LVX(op);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::SUBF(ppu_opcode_t op)
{
const auto a = GetGpr(op.ra);
const auto b = GetGpr(op.rb);
const auto result = m_ir->CreateSub(b, a);
SetGpr(op.rd, result);
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
if (op.oe) SetOverflow(Call(GetType<bool>(), m_pure_attr, "__subf_get_ov", a, b));
}
void PPUTranslator::LDUX(ppu_opcode_t op)
{
const auto addr = m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb));
SetGpr(op.rd, ReadMemory(addr, GetType<u64>()));
SetGpr(op.ra, addr);
}
2021-03-05 20:05:37 +01:00
void PPUTranslator::DCBST(ppu_opcode_t)
2016-06-07 22:24:20 +02:00
{
}
void PPUTranslator::LWZUX(ppu_opcode_t op)
{
const auto addr = m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb));
SetGpr(op.rd, ReadMemory(addr, GetType<u32>()));
SetGpr(op.ra, addr);
}
void PPUTranslator::CNTLZD(ppu_opcode_t op)
{
const auto result = Call(GetType<u64>(), "llvm.ctlz.i64", GetGpr(op.rs), m_ir->getFalse());
SetGpr(op.ra, result);
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
}
void PPUTranslator::ANDC(ppu_opcode_t op)
{
const auto result = m_ir->CreateAnd(GetGpr(op.rs), m_ir->CreateNot(GetGpr(op.rb)));
SetGpr(op.ra, result);
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
}
void PPUTranslator::TD(ppu_opcode_t op)
{
2017-02-13 18:51:37 +01:00
UseCondition(m_md_unlikely, CheckTrapCondition(op.bo, GetGpr(op.ra), GetGpr(op.rb)));
2017-07-01 01:08:51 +02:00
Trap();
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::LVEWX(ppu_opcode_t op)
{
return LVX(op);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::MULHD(ppu_opcode_t op)
{
const auto a = SExt(GetGpr(op.ra)); // i128
const auto b = SExt(GetGpr(op.rb));
const auto result = Trunc(m_ir->CreateLShr(m_ir->CreateMul(a, b), 64));
SetGpr(op.rd, result);
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
}
void PPUTranslator::MULHW(ppu_opcode_t op)
{
const auto a = SExt(GetGpr(op.ra, 32));
const auto b = SExt(GetGpr(op.rb, 32));
SetGpr(op.rd, m_ir->CreateAShr(m_ir->CreateMul(a, b), 32));
if (op.rc) SetCrFieldSignedCmp(0, GetGpr(op.rd), m_ir->getInt64(0));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::LDARX(ppu_opcode_t op)
{
if (g_cfg.core.ppu_128_reservations_loop_max_length)
{
RegStore(Trunc(GetAddr()), m_cia);
FlushRegisters();
PPU LLVM arm64+macOS port (#12115) * BufferUtils: use naive function pointer on Apple arm64 Use naive function pointer on Apple arm64 because ASLR breaks asmjit. See BufferUtils.cpp comment for explanation on why this happens and how to fix if you want to use asmjit. * build-macos: fix source maps for Mac Tell Qt not to strip debug symbols when we're in debug or relwithdebinfo modes. * LLVM PPU: fix aarch64 on macOS Force MachO on macOS to fix LLVM being unable to patch relocations during codegen. Adds Aarch64 NEON intrinsics for x86 intrinsics used by PPUTranslator/Recompiler. * virtual memory: use 16k pages on aarch64 macOS Temporary hack to get things working by using 16k pages instead of 4k pages in VM emulation. * PPU/SPU: fix NEON intrinsics and compilation for arm64 macOS Fixes some intrinsics usage and patches usages of asmjit to properly emit absolute jmps so ASLR doesn't cause out of bounds rel jumps. Also patches the SPU recompiler to properly work on arm64 by telling LLVM to target arm64. * virtual memory: fix W^X toggles on macOS aarch64 Fixes W^X on macOS aarch64 by setting all JIT mmap'd regions to default to RW mode. For both SPU and PPU execution threads, when initialization finishes we toggle to RX mode. This exploits Apple's per-thread setting for RW/RX to let us be technically compliant with the OS's W^X enforcement while not needing to actually separate the memory allocated for code/data. * PPU: implement aarch64 specific functions Implements ppu_gateway for arm64 and patches LLVM initialization to use the correct triple. Adds some fixes for macOS W^X JIT restrictions when entering/exiting JITed code. * PPU: Mark rpcs3 calls as non-tail Strictly speaking, rpcs3 JIT -> C++ calls are not tail calls. If you call a function inside e.g. an L2 syscall, it will clobber LR on arm64 and subtly break returns in emulated code. Only JIT -> JIT "calls" should be tail. * macOS/arm64: compatibility fixes * vm: patch virtual memory for arm64 macOS Tag mmap calls with MAP_JIT to allow W^X on macOS. Fix mmap calls to existing mmap'd addresses that were tagged with MAP_JIT on macOS. Fix memory unmapping on 16K page machines with a hack to mark "unmapped" pages as RW. * PPU: remove wrong comment * PPU: fix a merge regression * vm: remove 16k page hacks * PPU: formatting fixes * PPU: fix arm64 null function assembly * ppu: clean up arch-specific instructions
2022-06-14 14:28:38 +02:00
Call(GetType<void>(), "__resinterp", m_thread);
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
m_ir->CreateRetVoid();
return;
}
2017-02-26 16:56:31 +01:00
SetGpr(op.rd, Call(GetType<u64>(), "__ldarx", m_thread, op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb)));
2016-06-07 22:24:20 +02:00
}
2021-03-05 20:05:37 +01:00
void PPUTranslator::DCBF(ppu_opcode_t)
2016-06-07 22:24:20 +02:00
{
}
void PPUTranslator::LBZX(ppu_opcode_t op)
{
SetGpr(op.rd, ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetType<u8>()));
}
void PPUTranslator::LVX(ppu_opcode_t op)
{
const auto addr = m_ir->CreateAnd(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), ~0xfull);
const auto data = ReadMemory(addr, GetType<u8[16]>(), m_is_be, 16);
2016-06-21 17:01:45 +02:00
SetVr(op.vd, m_is_be ? data : Shuffle(data, nullptr, { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::NEG(ppu_opcode_t op)
{
const auto reg = GetGpr(op.ra);
const auto result = m_ir->CreateNeg(reg);
SetGpr(op.rd, result);
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
if (op.oe) SetOverflow(Call(GetType<bool>(), m_pure_attr, "__neg_get_ov", reg));
}
void PPUTranslator::LBZUX(ppu_opcode_t op)
{
const auto addr = m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb));
SetGpr(op.rd, ReadMemory(addr, GetType<u8>()));
SetGpr(op.ra, addr);
}
void PPUTranslator::NOR(ppu_opcode_t op)
{
const auto result = m_ir->CreateNot(op.rs == op.rb ? GetGpr(op.rs) : m_ir->CreateOr(GetGpr(op.rs), GetGpr(op.rb)));
SetGpr(op.ra, result);
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
}
void PPUTranslator::STVEBX(ppu_opcode_t op)
{
const auto addr = op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb);
WriteMemory(addr, m_ir->CreateExtractElement(GetVr(op.vs, VrType::vi8), m_ir->CreateXor(m_ir->CreateAnd(addr, 15), m_is_be ? 0 : 15)));
}
void PPUTranslator::SUBFE(ppu_opcode_t op)
{
const auto a = m_ir->CreateNot(GetGpr(op.ra));
const auto b = GetGpr(op.rb);
const auto c = GetCarry();
const auto r1 = m_ir->CreateAdd(a, b);
const auto r2 = m_ir->CreateAdd(r1, ZExt(c, GetType<u64>()));
SetGpr(op.rd, r2);
SetCarry(m_ir->CreateOr(m_ir->CreateICmpULT(r1, a), m_ir->CreateICmpULT(r2, r1)));
if (op.rc) SetCrFieldSignedCmp(0, r2, m_ir->getInt64(0));
2016-06-07 22:24:20 +02:00
if (op.oe) SetOverflow(Call(GetType<bool>(), m_pure_attr, "__subfe_get_ov", a, b, c));
}
void PPUTranslator::ADDE(ppu_opcode_t op)
{
const auto a = GetGpr(op.ra);
const auto b = GetGpr(op.rb);
const auto c = GetCarry();
const auto r1 = m_ir->CreateAdd(a, b);
const auto r2 = m_ir->CreateAdd(r1, ZExt(c, GetType<u64>()));
SetGpr(op.rd, r2);
SetCarry(m_ir->CreateOr(m_ir->CreateICmpULT(r1, a), m_ir->CreateICmpULT(r2, r1)));
if (op.rc) SetCrFieldSignedCmp(0, r2, m_ir->getInt64(0));
2016-06-07 22:24:20 +02:00
if (op.oe) SetOverflow(Call(GetType<bool>(), m_pure_attr, "__adde_get_ov", a, b, c));
}
void PPUTranslator::MTOCRF(ppu_opcode_t op)
{
if (op.l11)
{
// MTOCRF
#if LLVM_VERSION_MAJOR < 17
const u64 pos = countLeadingZeros<u32>(op.crm) - 24;
#else
const u64 pos = countl_zero<u32>(op.crm) - 24;
#endif
2016-06-07 22:24:20 +02:00
if (pos >= 8 || 0x80u >> pos != op.crm)
2016-06-07 22:24:20 +02:00
{
return;
}
}
else
{
// MTCRF
}
2017-06-28 19:33:18 +02:00
static u8 s_table[64]
{
0, 0, 0, 0,
0, 0, 0, 1,
0, 0, 1, 0,
0, 0, 1, 1,
0, 1, 0, 0,
0, 1, 0, 1,
0, 1, 1, 0,
0, 1, 1, 1,
1, 0, 0, 0,
1, 0, 0, 1,
1, 0, 1, 0,
1, 0, 1, 1,
1, 1, 0, 0,
1, 1, 0, 1,
1, 1, 1, 0,
1, 1, 1, 1,
};
if (!m_mtocr_table)
{
m_mtocr_table = new GlobalVariable(*m_module, ArrayType::get(GetType<u8>(), 64), true, GlobalValue::PrivateLinkage, ConstantDataArray::get(m_context, s_table));
}
const auto value = GetGpr(op.rs, 32);
2016-06-07 22:24:20 +02:00
for (u32 i = 0; i < 8; i++)
{
if (op.crm & (128 >> i))
{
2017-06-28 19:33:18 +02:00
// Discard pending values
std::fill_n(m_cr + i * 4, 4, nullptr);
std::fill_n(m_g_cr + i * 4, 4, nullptr);
const auto index = m_ir->CreateAnd(m_ir->CreateLShr(value, 28 - i * 4), 15);
const auto src = m_ir->CreateGEP(dyn_cast<GlobalVariable>(m_mtocr_table)->getValueType(), m_mtocr_table, {m_ir->getInt32(0), m_ir->CreateShl(index, 2)});
const auto dst = bitcast(m_ir->CreateStructGEP(m_thread_type, m_thread, static_cast<uint>(m_cr - m_locals) + i * 4), GetType<u8*>());
Call(GetType<void>(), "llvm.memcpy.p0.p0.i32", dst, src, m_ir->getInt32(4), m_ir->getFalse());
2016-06-07 22:24:20 +02:00
}
}
}
void PPUTranslator::STDX(ppu_opcode_t op)
{
WriteMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetGpr(op.rs));
}
void PPUTranslator::STWCX(ppu_opcode_t op)
{
2017-02-26 16:56:31 +01:00
const auto bit = Call(GetType<bool>(), "__stwcx", m_thread, op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetGpr(op.rs, 32));
2016-06-07 22:24:20 +02:00
SetCrField(0, m_ir->getFalse(), m_ir->getFalse(), bit);
}
void PPUTranslator::STWX(ppu_opcode_t op)
{
WriteMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetGpr(op.rs, 32));
}
void PPUTranslator::STVEHX(ppu_opcode_t op)
{
const auto addr = m_ir->CreateAnd(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), -2);
WriteMemory(addr, m_ir->CreateExtractElement(GetVr(op.vs, VrType::vi16), m_ir->CreateLShr(m_ir->CreateXor(m_ir->CreateAnd(addr, 15), m_is_be ? 0 : 15), 1)), true, 2);
}
void PPUTranslator::STDUX(ppu_opcode_t op)
{
const auto addr = m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb));
WriteMemory(addr, GetGpr(op.rs));
SetGpr(op.ra, addr);
}
void PPUTranslator::STWUX(ppu_opcode_t op)
{
const auto addr = m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb));
WriteMemory(addr, GetGpr(op.rs, 32));
SetGpr(op.ra, addr);
}
void PPUTranslator::STVEWX(ppu_opcode_t op)
{
const auto addr = m_ir->CreateAnd(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), -4);
WriteMemory(addr, m_ir->CreateExtractElement(GetVr(op.vs, VrType::vi32), m_ir->CreateLShr(m_ir->CreateXor(m_ir->CreateAnd(addr, 15), m_is_be ? 0 : 15), 2)), true, 4);
}
void PPUTranslator::ADDZE(ppu_opcode_t op)
{
const auto a = GetGpr(op.ra);
const auto c = GetCarry();
const auto result = m_ir->CreateAdd(a, ZExt(c, GetType<u64>()));
SetGpr(op.rd, result);
SetCarry(m_ir->CreateICmpULT(result, a));
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
if (op.oe) SetOverflow(Call(GetType<bool>(), m_pure_attr, "__addze_get_ov", a, c));
}
void PPUTranslator::SUBFZE(ppu_opcode_t op)
{
const auto a = m_ir->CreateNot(GetGpr(op.ra));
const auto c = GetCarry();
const auto result = m_ir->CreateAdd(a, ZExt(c, GetType<u64>()));
SetGpr(op.rd, result);
SetCarry(m_ir->CreateICmpULT(result, a));
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
if (op.oe) SetOverflow(Call(GetType<bool>(), m_pure_attr, "__subfze_get_ov", a, c));
}
void PPUTranslator::STDCX(ppu_opcode_t op)
{
2017-02-26 16:56:31 +01:00
const auto bit = Call(GetType<bool>(), "__stdcx", m_thread, op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetGpr(op.rs));
2016-06-07 22:24:20 +02:00
SetCrField(0, m_ir->getFalse(), m_ir->getFalse(), bit);
}
void PPUTranslator::STBX(ppu_opcode_t op)
{
WriteMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetGpr(op.rs, 8));
}
void PPUTranslator::STVX(ppu_opcode_t op)
{
2016-06-21 17:01:45 +02:00
const auto value = GetVr(op.vs, VrType::vi8);
const auto data = m_is_be ? value : Shuffle(value, nullptr, { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 });
WriteMemory(m_ir->CreateAnd(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), -16), data, m_is_be, 16);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::SUBFME(ppu_opcode_t op)
{
const auto a = m_ir->CreateNot(GetGpr(op.ra));
const auto c = GetCarry();
const auto result = m_ir->CreateSub(a, ZExt(m_ir->CreateNot(c), GetType<u64>()));
SetGpr(op.rd, result);
SetCarry(m_ir->CreateOr(c, IsNotZero(a)));
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
if (op.oe) SetOverflow(Call(GetType<bool>(), m_pure_attr, "__subfme_get_ov", a, c));
}
void PPUTranslator::MULLD(ppu_opcode_t op)
{
const auto a = GetGpr(op.ra);
const auto b = GetGpr(op.rb);
const auto result = m_ir->CreateMul(a, b);
SetGpr(op.rd, result);
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
if (op.oe) SetOverflow(Call(GetType<bool>(), m_pure_attr, "__mulld_get_ov", a, b));
}
void PPUTranslator::ADDME(ppu_opcode_t op)
{
const auto a = GetGpr(op.ra);
const auto c = GetCarry();
const auto result = m_ir->CreateSub(a, ZExt(m_ir->CreateNot(c), GetType<u64>()));
SetGpr(op.rd, result);
SetCarry(m_ir->CreateOr(c, IsNotZero(a)));
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
if (op.oe) SetOverflow(Call(GetType<bool>(), m_pure_attr, "__addme_get_ov", a, c));
}
void PPUTranslator::MULLW(ppu_opcode_t op)
{
const auto a = SExt(GetGpr(op.ra, 32));
const auto b = SExt(GetGpr(op.rb, 32));
const auto result = m_ir->CreateMul(a, b);
SetGpr(op.rd, result);
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
if (op.oe) SetOverflow(Call(GetType<bool>(), m_pure_attr, "__mullw_get_ov", a, b));
}
2021-03-05 20:05:37 +01:00
void PPUTranslator::DCBTST(ppu_opcode_t)
2016-06-07 22:24:20 +02:00
{
}
void PPUTranslator::STBUX(ppu_opcode_t op)
{
const auto addr = m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb));
WriteMemory(addr, GetGpr(op.rs, 8));
SetGpr(op.ra, addr);
}
void PPUTranslator::ADD(ppu_opcode_t op)
{
const auto a = GetGpr(op.ra);
const auto b = GetGpr(op.rb);
const auto result = m_ir->CreateAdd(a, b);
SetGpr(op.rd, result);
if (op.oe)
{
//const auto s = m_ir->CreateCall(get_intrinsic<u64>(llvm::Intrinsic::sadd_with_overflow), {a, b});
//SetOverflow(m_ir->CreateExtractValue(s, {1}));
SetOverflow(m_ir->CreateICmpSLT(m_ir->CreateAnd(m_ir->CreateXor(a, m_ir->CreateNot(b)), m_ir->CreateXor(a, result)), m_ir->getInt64(0)));
}
2016-06-07 22:24:20 +02:00
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
}
2021-03-05 20:05:37 +01:00
void PPUTranslator::DCBT(ppu_opcode_t)
2016-06-07 22:24:20 +02:00
{
}
void PPUTranslator::LHZX(ppu_opcode_t op)
{
SetGpr(op.rd, ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetType<u16>()));
}
void PPUTranslator::EQV(ppu_opcode_t op)
{
const auto result = m_ir->CreateNot(m_ir->CreateXor(GetGpr(op.rs), GetGpr(op.rb)));
SetGpr(op.ra, result);
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
}
void PPUTranslator::ECIWX(ppu_opcode_t op)
{
UNK(op);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::LHZUX(ppu_opcode_t op)
{
const auto addr = m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb));
SetGpr(op.rd, ReadMemory(addr, GetType<u16>()));
SetGpr(op.ra, addr);
}
void PPUTranslator::XOR(ppu_opcode_t op)
{
2019-12-02 22:31:34 +01:00
const auto result = op.rs == op.rb ? static_cast<Value*>(m_ir->getInt64(0)) : m_ir->CreateXor(GetGpr(op.rs), GetGpr(op.rb));
2016-06-07 22:24:20 +02:00
SetGpr(op.ra, result);
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
}
void PPUTranslator::MFSPR(ppu_opcode_t op)
{
Value* result;
switch (const u32 n = (op.spr >> 5) | ((op.spr & 0x1f) << 5))
{
case 0x001: // MFXER
result = ZExt(RegLoad(m_cnt), GetType<u64>());
result = m_ir->CreateOr(result, m_ir->CreateShl(ZExt(RegLoad(m_so), GetType<u64>()), 29));
result = m_ir->CreateOr(result, m_ir->CreateShl(ZExt(RegLoad(m_ov), GetType<u64>()), 30));
result = m_ir->CreateOr(result, m_ir->CreateShl(ZExt(RegLoad(m_ca), GetType<u64>()), 31));
2016-06-07 22:24:20 +02:00
break;
case 0x008: // MFLR
result = RegLoad(m_lr);
2016-06-07 22:24:20 +02:00
break;
case 0x009: // MFCTR
result = RegLoad(m_ctr);
2016-06-07 22:24:20 +02:00
break;
case 0x100:
result = ZExt(RegLoad(m_vrsave));
2016-06-07 22:24:20 +02:00
break;
case 0x10C: // MFTB
2017-02-07 14:14:44 +01:00
result = Call(GetType<u64>(), m_pure_attr, "__get_tb");
2016-06-07 22:24:20 +02:00
break;
case 0x10D: // MFTBU
2017-02-07 14:14:44 +01:00
result = m_ir->CreateLShr(Call(GetType<u64>(), m_pure_attr, "__get_tb"), 32);
2016-06-07 22:24:20 +02:00
break;
default:
result = Call(GetType<u64>(), fmt::format("__mfspr_%u", n));
break;
}
SetGpr(op.rd, result);
}
void PPUTranslator::LWAX(ppu_opcode_t op)
{
SetGpr(op.rd, SExt(ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetType<s32>())));
}
2021-03-05 20:05:37 +01:00
void PPUTranslator::DST(ppu_opcode_t)
2016-06-07 22:24:20 +02:00
{
}
void PPUTranslator::LHAX(ppu_opcode_t op)
{
SetGpr(op.rd, SExt(ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetType<s16>()), GetType<s64>()));
}
void PPUTranslator::LVXL(ppu_opcode_t op)
{
return LVX(op);
}
void PPUTranslator::MFTB(ppu_opcode_t op)
{
2017-02-07 14:14:44 +01:00
Value* result;
switch (const u32 n = (op.spr >> 5) | ((op.spr & 0x1f) << 5))
{
case 0x10C: // MFTB
result = Call(GetType<u64>(), m_pure_attr, "__get_tb");
break;
case 0x10D: // MFTBU
result = m_ir->CreateLShr(Call(GetType<u64>(), m_pure_attr, "__get_tb"), 32);
break;
default:
result = Call(GetType<u64>(), fmt::format("__mftb_%u", n));
break;
}
SetGpr(op.rd, result);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::LWAUX(ppu_opcode_t op)
{
const auto addr = m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb));
SetGpr(op.rd, SExt(ReadMemory(addr, GetType<s32>())));
SetGpr(op.ra, addr);
}
2021-03-05 20:05:37 +01:00
void PPUTranslator::DSTST(ppu_opcode_t)
2016-06-07 22:24:20 +02:00
{
}
void PPUTranslator::LHAUX(ppu_opcode_t op)
{
const auto addr = m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb));
SetGpr(op.rd, SExt(ReadMemory(addr, GetType<s16>()), GetType<s64>()));
SetGpr(op.ra, addr);
}
void PPUTranslator::STHX(ppu_opcode_t op)
{
WriteMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetGpr(op.rs, 16));
}
void PPUTranslator::ORC(ppu_opcode_t op)
{
2019-12-02 22:31:34 +01:00
const auto result = op.rs == op.rb ? static_cast<Value*>(m_ir->getInt64(-1)) : m_ir->CreateOr(GetGpr(op.rs), m_ir->CreateNot(GetGpr(op.rb)));
2016-06-07 22:24:20 +02:00
SetGpr(op.ra, result);
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
}
void PPUTranslator::ECOWX(ppu_opcode_t op)
{
UNK(op);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::STHUX(ppu_opcode_t op)
{
const auto addr = m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb));
WriteMemory(addr, GetGpr(op.rs, 16));
SetGpr(op.ra, addr);
}
void PPUTranslator::OR(ppu_opcode_t op)
{
const auto result = op.rs == op.rb ? GetGpr(op.rs) : m_ir->CreateOr(GetGpr(op.rs), GetGpr(op.rb));
SetGpr(op.ra, result);
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
}
void PPUTranslator::DIVDU(ppu_opcode_t op)
{
const auto a = GetGpr(op.ra);
const auto b = GetGpr(op.rb);
const auto o = IsZero(b);
const auto result = m_ir->CreateUDiv(a, m_ir->CreateSelect(o, m_ir->getInt64(-1), b));
SetGpr(op.rd, m_ir->CreateSelect(o, m_ir->getInt64(0), result));
if (op.rc) SetCrFieldSignedCmp(0, GetGpr(op.rd), m_ir->getInt64(0));
2016-06-07 22:24:20 +02:00
if (op.oe) SetOverflow(o);
}
void PPUTranslator::DIVWU(ppu_opcode_t op)
{
const auto a = GetGpr(op.ra, 32);
const auto b = GetGpr(op.rb, 32);
const auto o = IsZero(b);
const auto result = m_ir->CreateUDiv(a, m_ir->CreateSelect(o, m_ir->getInt32(0xffffffff), b));
SetGpr(op.rd, m_ir->CreateSelect(o, m_ir->getInt32(0), result));
if (op.rc) SetCrFieldSignedCmp(0, GetGpr(op.rd), m_ir->getInt64(0));
2016-06-07 22:24:20 +02:00
if (op.oe) SetOverflow(o);
}
void PPUTranslator::MTSPR(ppu_opcode_t op)
{
const auto value = GetGpr(op.rs);
switch (const u32 n = (op.spr >> 5) | ((op.spr & 0x1f) << 5))
{
case 0x001: // MTXER
RegStore(Trunc(m_ir->CreateLShr(value, 31), GetType<bool>()), m_ca);
RegStore(Trunc(m_ir->CreateLShr(value, 30), GetType<bool>()), m_ov);
RegStore(Trunc(m_ir->CreateLShr(value, 29), GetType<bool>()), m_so);
RegStore(Trunc(value, GetType<u8>()), m_cnt);
2016-06-07 22:24:20 +02:00
break;
case 0x008: // MTLR
RegStore(value, m_lr);
2016-06-07 22:24:20 +02:00
break;
case 0x009: // MTCTR
RegStore(value, m_ctr);
2016-06-07 22:24:20 +02:00
break;
case 0x100:
RegStore(Trunc(value), m_vrsave);
2016-06-07 22:24:20 +02:00
break;
default:
Call(GetType<void>(), fmt::format("__mtspr_%u", n), value);
break;
}
}
void PPUTranslator::NAND(ppu_opcode_t op)
{
const auto result = m_ir->CreateNot(op.rs == op.rb ? GetGpr(op.rs) : m_ir->CreateAnd(GetGpr(op.rs), GetGpr(op.rb)));
SetGpr(op.ra, result);
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
}
void PPUTranslator::STVXL(ppu_opcode_t op)
{
return STVX(op);
}
void PPUTranslator::DIVD(ppu_opcode_t op)
{
const auto a = GetGpr(op.ra);
const auto b = GetGpr(op.rb);
const auto o = m_ir->CreateOr(IsZero(b), m_ir->CreateAnd(m_ir->CreateICmpEQ(a, m_ir->getInt64(1ull << 63)), IsOnes(b)));
const auto result = m_ir->CreateSDiv(a, m_ir->CreateSelect(o, m_ir->getInt64(1ull << 63), b));
SetGpr(op.rd, m_ir->CreateSelect(o, m_ir->getInt64(0), result));
if (op.rc) SetCrFieldSignedCmp(0, GetGpr(op.rd), m_ir->getInt64(0));
2016-06-07 22:24:20 +02:00
if (op.oe) SetOverflow(o);
}
void PPUTranslator::DIVW(ppu_opcode_t op)
{
const auto a = GetGpr(op.ra, 32);
const auto b = GetGpr(op.rb, 32);
const auto o = m_ir->CreateOr(IsZero(b), m_ir->CreateAnd(m_ir->CreateICmpEQ(a, m_ir->getInt32(s32{smin})), IsOnes(b)));
const auto result = m_ir->CreateSDiv(a, m_ir->CreateSelect(o, m_ir->getInt32(s32{smin}), b));
SetGpr(op.rd, m_ir->CreateSelect(o, m_ir->getInt32(0), result));
if (op.rc) SetCrFieldSignedCmp(0, GetGpr(op.rd), m_ir->getInt64(0));
2016-06-07 22:24:20 +02:00
if (op.oe) SetOverflow(o);
}
void PPUTranslator::LVLX(ppu_opcode_t op)
{
2017-06-29 16:27:19 +02:00
const auto addr = op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb);
const auto data = ReadMemory(m_ir->CreateAnd(addr, ~0xfull), GetType<u8[16]>(), m_is_be, 16);
set_vr(op.vd, pshufb(value<u8[16]>(data), build<u8[16]>(127, 126, 125, 124, 123, 122, 121, 120, 119, 118, 117, 116, 115, 114, 113, 112) + vsplat<u8[16]>(trunc<u8>(value<u64>(addr) & 0xf))));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::LDBRX(ppu_opcode_t op)
{
SetGpr(op.rd, ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetType<u64>(), false));
}
void PPUTranslator::LSWX(ppu_opcode_t op)
{
2017-06-29 16:27:19 +02:00
Call(GetType<void>(), "__lswx_not_supported", m_ir->getInt32(op.rd), RegLoad(m_cnt), op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::LWBRX(ppu_opcode_t op)
{
SetGpr(op.rd, ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetType<u32>(), false));
}
void PPUTranslator::LFSX(ppu_opcode_t op)
{
SetFpr(op.frd, ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetType<f32>()));
}
void PPUTranslator::SRW(ppu_opcode_t op)
{
const auto shift_num = m_ir->CreateAnd(GetGpr(op.rb), 0x3f);
const auto shift_arg = m_ir->CreateAnd(GetGpr(op.rs), 0xffffffff);
const auto result = m_ir->CreateLShr(shift_arg, shift_num);
SetGpr(op.ra, result);
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
}
void PPUTranslator::SRD(ppu_opcode_t op)
{
const auto shift_num = m_ir->CreateAnd(GetGpr(op.rb), 0x7f);
const auto shift_arg = GetGpr(op.rs);
const auto result = Trunc(m_ir->CreateLShr(ZExt(shift_arg), ZExt(shift_num)));
SetGpr(op.ra, result);
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
}
void PPUTranslator::LVRX(ppu_opcode_t op)
{
2017-06-29 16:27:19 +02:00
const auto addr = op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb);
const auto data = ReadMemory(m_ir->CreateAnd(addr, ~0xfull), GetType<u8[16]>(), m_is_be, 16);
set_vr(op.vd, pshufb(value<u8[16]>(data), build<u8[16]>(255, 254, 253, 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240) + vsplat<u8[16]>(trunc<u8>(value<u64>(addr) & 0xf))));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::LSWI(ppu_opcode_t op)
{
Value* addr = op.ra ? GetGpr(op.ra) : m_ir->getInt64(0);
u32 index = op.rb ? op.rb : 32;
u32 reg = op.rd;
while (index)
{
if (index > 3)
{
SetGpr(reg, ReadMemory(addr, GetType<u32>()));
index -= 4;
if (index)
{
addr = m_ir->CreateAdd(addr, m_ir->getInt64(4));
}
}
else
{
Value* buf = nullptr;
u32 i = 3;
while (index)
{
const auto byte = m_ir->CreateShl(ZExt(ReadMemory(addr, GetType<u8>()), GetType<u32>()), i * 8);
buf = buf ? m_ir->CreateOr(buf, byte) : byte;
if (--index)
{
addr = m_ir->CreateAdd(addr, m_ir->getInt64(1));
2019-03-21 20:32:04 +01:00
i--;
}
}
SetGpr(reg, buf);
}
reg = (reg + 1) % 32;
}
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::LFSUX(ppu_opcode_t op)
{
const auto addr = m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb));
SetFpr(op.frd, ReadMemory(addr, GetType<f32>()));
SetGpr(op.ra, addr);
}
void PPUTranslator::SYNC(ppu_opcode_t op)
{
// sync: Full seq cst barrier
// lwsync: Acq/Release barrier
m_ir->CreateFence(op.l10 ? AtomicOrdering::AcquireRelease : AtomicOrdering::SequentiallyConsistent);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::LFDX(ppu_opcode_t op)
{
SetFpr(op.frd, ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetType<f64>()));
}
void PPUTranslator::LFDUX(ppu_opcode_t op)
{
const auto addr = m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb));
SetFpr(op.frd, ReadMemory(addr, GetType<f64>()));
SetGpr(op.ra, addr);
}
void PPUTranslator::STVLX(ppu_opcode_t op)
{
const auto addr = op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb);
const auto data = pshufb(get_vr<u8[16]>(op.vs), build<u8[16]>(127, 126, 125, 124, 123, 122, 121, 120, 119, 118, 117, 116, 115, 114, 113, 112) + vsplat<u8[16]>(trunc<u8>(value<u64>(addr) & 0xf)));
const auto mask = bitcast<bool[16]>(splat<u16>(0xffff) << trunc<u16>(value<u64>(addr) & 0xf));
const auto ptr = value<u8(*)[16]>(GetMemory(m_ir->CreateAnd(addr, ~0xfull), GetType<u8[16]>()));
const auto align = splat<u32>(16);
eval(llvm_calli<void, decltype(data), decltype(ptr), decltype(align), decltype(mask)>{"llvm.masked.store.v16i8.p0", {data, ptr, align, mask}});
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::STDBRX(ppu_opcode_t op)
{
WriteMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetGpr(op.rs), false);
}
void PPUTranslator::STSWX(ppu_opcode_t op)
{
2017-06-29 16:27:19 +02:00
Call(GetType<void>(), "__stswx_not_supported", m_ir->getInt32(op.rs), RegLoad(m_cnt), op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::STWBRX(ppu_opcode_t op)
{
WriteMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetGpr(op.rs, 32), false);
}
void PPUTranslator::STFSX(ppu_opcode_t op)
{
WriteMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetFpr(op.frs, 32));
}
void PPUTranslator::STVRX(ppu_opcode_t op)
{
const auto addr = op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb);
const auto data = pshufb(get_vr<u8[16]>(op.vs), build<u8[16]>(255, 254, 253, 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240) + vsplat<u8[16]>(trunc<u8>(value<u64>(addr) & 0xf)));
const auto mask = bitcast<bool[16]>(trunc<u16>(splat<u64>(0xffff) << (value<u64>(addr) & 0xf) >> 16));
const auto ptr = value<u8(*)[16]>(GetMemory(m_ir->CreateAnd(addr, ~0xfull), GetType<u8[16]>()));
const auto align = splat<u32>(16);
eval(llvm_calli<void, decltype(data), decltype(ptr), decltype(align), decltype(mask)>{"llvm.masked.store.v16i8.p0", {data, ptr, align, mask}});
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::STFSUX(ppu_opcode_t op)
{
const auto addr = m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb));
WriteMemory(addr, GetFpr(op.frs, 32));
SetGpr(op.ra, addr);
}
void PPUTranslator::STSWI(ppu_opcode_t op)
{
Value* addr = op.ra ? GetGpr(op.ra) : m_ir->getInt64(0);
u32 index = op.rb ? op.rb : 32;
u32 reg = op.rd;
while (index)
{
if (index > 3)
{
2017-07-10 01:34:05 +02:00
WriteMemory(addr, GetGpr(reg, 32));
index -= 4;
if (index)
{
addr = m_ir->CreateAdd(addr, m_ir->getInt64(4));
}
}
else
{
Value* buf = GetGpr(reg, 32);
while (index)
{
2019-03-21 20:32:04 +01:00
WriteMemory(addr, Trunc(m_ir->CreateLShr(buf, 24), GetType<u8>()));
if (--index)
{
buf = m_ir->CreateShl(buf, 8);
addr = m_ir->CreateAdd(addr, m_ir->getInt64(1));
}
}
}
reg = (reg + 1) % 32;
}
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::STFDX(ppu_opcode_t op)
{
WriteMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetFpr(op.frs));
}
void PPUTranslator::STFDUX(ppu_opcode_t op)
{
const auto addr = m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb));
WriteMemory(addr, GetFpr(op.frs));
SetGpr(op.ra, addr);
}
void PPUTranslator::LVLXL(ppu_opcode_t op)
{
return LVLX(op);
}
void PPUTranslator::LHBRX(ppu_opcode_t op)
{
SetGpr(op.rd, ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetType<u16>(), false));
}
void PPUTranslator::SRAW(ppu_opcode_t op)
{
const auto shift_num = m_ir->CreateAnd(GetGpr(op.rb), 0x3f);
const auto shift_arg = GetGpr(op.rs, 32);
2016-06-21 17:01:45 +02:00
const auto arg_ext = SExt(shift_arg);
const auto result = m_ir->CreateAShr(arg_ext, shift_num);
2016-06-07 22:24:20 +02:00
SetGpr(op.ra, result);
2016-06-21 17:01:45 +02:00
SetCarry(m_ir->CreateAnd(m_ir->CreateICmpSLT(shift_arg, m_ir->getInt32(0)), m_ir->CreateICmpNE(arg_ext, m_ir->CreateShl(result, shift_num))));
2016-06-07 22:24:20 +02:00
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
}
void PPUTranslator::SRAD(ppu_opcode_t op)
{
2016-06-21 17:01:45 +02:00
const auto shift_num = ZExt(m_ir->CreateAnd(GetGpr(op.rb), 0x7f)); // i128
2016-06-07 22:24:20 +02:00
const auto shift_arg = GetGpr(op.rs);
2016-06-21 17:01:45 +02:00
const auto arg_ext = SExt(shift_arg); // i128
const auto res_128 = m_ir->CreateAShr(arg_ext, shift_num); // i128
const auto result = Trunc(res_128);
2016-06-07 22:24:20 +02:00
SetGpr(op.ra, result);
SetCarry(m_ir->CreateAnd(m_ir->CreateICmpSLT(shift_arg, m_ir->getInt64(0)), m_ir->CreateICmpNE(arg_ext, m_ir->CreateShl(res_128, shift_num))));
2016-06-07 22:24:20 +02:00
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
}
void PPUTranslator::LVRXL(ppu_opcode_t op)
{
return LVRX(op);
}
2021-03-05 20:05:37 +01:00
void PPUTranslator::DSS(ppu_opcode_t)
2016-06-07 22:24:20 +02:00
{
}
void PPUTranslator::SRAWI(ppu_opcode_t op)
{
const auto shift_arg = GetGpr(op.rs, 32);
2016-06-21 17:01:45 +02:00
const auto res_32 = m_ir->CreateAShr(shift_arg, op.sh32);
const auto result = SExt(res_32);
2016-06-07 22:24:20 +02:00
SetGpr(op.ra, result);
2016-06-21 17:01:45 +02:00
SetCarry(m_ir->CreateAnd(m_ir->CreateICmpSLT(shift_arg, m_ir->getInt32(0)), m_ir->CreateICmpNE(shift_arg, m_ir->CreateShl(res_32, op.sh32))));
2016-06-07 22:24:20 +02:00
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
}
void PPUTranslator::SRADI(ppu_opcode_t op)
{
const auto shift_arg = GetGpr(op.rs);
const auto result = m_ir->CreateAShr(shift_arg, op.sh64);
SetGpr(op.ra, result);
2016-06-21 17:01:45 +02:00
SetCarry(m_ir->CreateAnd(m_ir->CreateICmpSLT(shift_arg, m_ir->getInt64(0)), m_ir->CreateICmpNE(shift_arg, m_ir->CreateShl(result, op.sh64))));
2016-06-07 22:24:20 +02:00
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
}
2021-03-05 20:05:37 +01:00
void PPUTranslator::EIEIO(ppu_opcode_t)
2016-06-07 22:24:20 +02:00
{
// TODO
m_ir->CreateFence(AtomicOrdering::SequentiallyConsistent);
}
void PPUTranslator::STVLXL(ppu_opcode_t op)
{
return STVLX(op);
}
void PPUTranslator::STHBRX(ppu_opcode_t op)
{
WriteMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetGpr(op.rs, 16), false);
}
void PPUTranslator::EXTSH(ppu_opcode_t op)
{
const auto result = SExt(GetGpr(op.rs, 16), GetType<s64>());
SetGpr(op.ra, result);
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
}
void PPUTranslator::STVRXL(ppu_opcode_t op)
{
return STVRX(op);
}
void PPUTranslator::EXTSB(ppu_opcode_t op)
{
const auto result = SExt(GetGpr(op.rs, 8), GetType<s64>());
SetGpr(op.ra, result);
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
}
void PPUTranslator::STFIWX(ppu_opcode_t op)
{
WriteMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetFpr(op.frs, 32, true));
}
void PPUTranslator::EXTSW(ppu_opcode_t op)
{
const auto result = SExt(GetGpr(op.rs, 32));
SetGpr(op.ra, result);
if (op.rc) SetCrFieldSignedCmp(0, result, m_ir->getInt64(0));
}
2021-03-05 20:05:37 +01:00
void PPUTranslator::ICBI(ppu_opcode_t)
2016-06-07 22:24:20 +02:00
{
}
void PPUTranslator::DCBZ(ppu_opcode_t op)
{
const auto addr = m_ir->CreateAnd(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), -128);
if (g_cfg.core.accurate_cache_line_stores)
{
Call(GetType<void>(), "__dcbz", addr);
}
else
{
Call(GetType<void>(), "llvm.memset.p0.i32", GetMemory(addr, GetType<u8>()), m_ir->getInt8(0), m_ir->getInt32(128), m_ir->getFalse());
}
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::LWZ(ppu_opcode_t op)
{
2017-07-01 01:08:51 +02:00
Value* imm = m_ir->getInt64(op.simm16);
if (m_rel && (m_rel->type >= 4u && m_rel->type <= 6u))
2017-07-01 01:08:51 +02:00
{
imm = SExt(ReadMemory(GetAddr(+2), GetType<u16>()), GetType<u64>());
m_rel = nullptr;
}
SetGpr(op.rd, ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), imm) : imm, GetType<u32>()));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::LWZU(ppu_opcode_t op)
{
2017-07-01 01:08:51 +02:00
Value* imm = m_ir->getInt64(op.simm16);
if (m_rel && (m_rel->type >= 4u && m_rel->type <= 6u))
2017-07-01 01:08:51 +02:00
{
imm = SExt(ReadMemory(GetAddr(+2), GetType<u16>()), GetType<u64>());
m_rel = nullptr;
}
const auto addr = m_ir->CreateAdd(GetGpr(op.ra), imm);
2016-06-07 22:24:20 +02:00
SetGpr(op.rd, ReadMemory(addr, GetType<u32>()));
SetGpr(op.ra, addr);
}
void PPUTranslator::LBZ(ppu_opcode_t op)
{
2017-07-01 01:08:51 +02:00
Value* imm = m_ir->getInt64(op.simm16);
if (m_rel && (m_rel->type >= 4u && m_rel->type <= 6u))
2017-07-01 01:08:51 +02:00
{
imm = SExt(ReadMemory(GetAddr(+2), GetType<u16>()), GetType<u64>());
m_rel = nullptr;
}
SetGpr(op.rd, ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), imm) : imm, GetType<u8>()));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::LBZU(ppu_opcode_t op)
{
2017-07-01 01:08:51 +02:00
Value* imm = m_ir->getInt64(op.simm16);
if (m_rel && (m_rel->type >= 4u && m_rel->type <= 6u))
2017-07-01 01:08:51 +02:00
{
imm = SExt(ReadMemory(GetAddr(+2), GetType<u16>()), GetType<u64>());
m_rel = nullptr;
}
const auto addr = m_ir->CreateAdd(GetGpr(op.ra), imm);
2016-06-07 22:24:20 +02:00
SetGpr(op.rd, ReadMemory(addr, GetType<u8>()));
SetGpr(op.ra, addr);
}
void PPUTranslator::STW(ppu_opcode_t op)
{
2017-07-01 01:08:51 +02:00
Value* imm = m_ir->getInt64(op.simm16);
if (m_rel && (m_rel->type >= 4u && m_rel->type <= 6u))
2017-07-01 01:08:51 +02:00
{
imm = SExt(ReadMemory(GetAddr(+2), GetType<u16>()), GetType<u64>());
m_rel = nullptr;
}
const auto value = GetGpr(op.rs, 32);
const auto addr = op.ra ? m_ir->CreateAdd(GetGpr(op.ra), imm) : imm;
WriteMemory(addr, value);
//Insomniac engine v3 & v4 (newer R&C, Fuse, Resitance 3)
if (auto ci = llvm::dyn_cast<ConstantInt>(value))
{
if (ci->getZExtValue() == 0xAAAAAAAA)
{
2018-07-02 02:47:37 +02:00
Call(GetType<void>(), "__resupdate", addr, m_ir->getInt32(128));
}
}
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::STWU(ppu_opcode_t op)
{
2017-07-01 01:08:51 +02:00
Value* imm = m_ir->getInt64(op.simm16);
if (m_rel && (m_rel->type >= 4u && m_rel->type <= 6u))
2017-07-01 01:08:51 +02:00
{
imm = SExt(ReadMemory(GetAddr(+2), GetType<u16>()), GetType<u64>());
m_rel = nullptr;
}
const auto addr = m_ir->CreateAdd(GetGpr(op.ra), imm);
2016-06-07 22:24:20 +02:00
WriteMemory(addr, GetGpr(op.rs, 32));
SetGpr(op.ra, addr);
}
void PPUTranslator::STB(ppu_opcode_t op)
{
2017-07-01 01:08:51 +02:00
Value* imm = m_ir->getInt64(op.simm16);
if (m_rel && (m_rel->type >= 4u && m_rel->type <= 6u))
2017-07-01 01:08:51 +02:00
{
imm = SExt(ReadMemory(GetAddr(+2), GetType<u16>()), GetType<u64>());
m_rel = nullptr;
}
WriteMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), imm) : imm, GetGpr(op.rs, 8));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::STBU(ppu_opcode_t op)
{
2017-07-01 01:08:51 +02:00
Value* imm = m_ir->getInt64(op.simm16);
if (m_rel && (m_rel->type >= 4u && m_rel->type <= 6u))
2017-07-01 01:08:51 +02:00
{
imm = SExt(ReadMemory(GetAddr(+2), GetType<u16>()), GetType<u64>());
m_rel = nullptr;
}
const auto addr = m_ir->CreateAdd(GetGpr(op.ra), imm);
2016-06-07 22:24:20 +02:00
WriteMemory(addr, GetGpr(op.rs, 8));
SetGpr(op.ra, addr);
}
void PPUTranslator::LHZ(ppu_opcode_t op)
{
2017-07-01 01:08:51 +02:00
Value* imm = m_ir->getInt64(op.simm16);
if (m_rel && (m_rel->type >= 4u && m_rel->type <= 6u))
2017-07-01 01:08:51 +02:00
{
imm = SExt(ReadMemory(GetAddr(+2), GetType<u16>()), GetType<u64>());
m_rel = nullptr;
}
SetGpr(op.rd, ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), imm) : imm, GetType<u16>()));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::LHZU(ppu_opcode_t op)
{
2017-07-01 01:08:51 +02:00
Value* imm = m_ir->getInt64(op.simm16);
if (m_rel && (m_rel->type >= 4u && m_rel->type <= 6u))
2017-07-01 01:08:51 +02:00
{
imm = SExt(ReadMemory(GetAddr(+2), GetType<u16>()), GetType<u64>());
m_rel = nullptr;
}
const auto addr = m_ir->CreateAdd(GetGpr(op.ra), imm);
2016-06-07 22:24:20 +02:00
SetGpr(op.rd, ReadMemory(addr, GetType<u16>()));
SetGpr(op.ra, addr);
}
void PPUTranslator::LHA(ppu_opcode_t op)
{
2017-07-01 01:08:51 +02:00
Value* imm = m_ir->getInt64(op.simm16);
if (m_rel && (m_rel->type >= 4u && m_rel->type <= 6u))
2017-07-01 01:08:51 +02:00
{
imm = SExt(ReadMemory(GetAddr(+2), GetType<u16>()), GetType<u64>());
m_rel = nullptr;
}
SetGpr(op.rd, SExt(ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), imm) : imm, GetType<s16>()), GetType<s64>()));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::LHAU(ppu_opcode_t op)
{
2017-07-01 01:08:51 +02:00
Value* imm = m_ir->getInt64(op.simm16);
if (m_rel && (m_rel->type >= 4u && m_rel->type <= 6u))
2017-07-01 01:08:51 +02:00
{
imm = SExt(ReadMemory(GetAddr(+2), GetType<u16>()), GetType<u64>());
m_rel = nullptr;
}
const auto addr = m_ir->CreateAdd(GetGpr(op.ra), imm);
2016-06-07 22:24:20 +02:00
SetGpr(op.rd, SExt(ReadMemory(addr, GetType<s16>()), GetType<s64>()));
SetGpr(op.ra, addr);
}
void PPUTranslator::STH(ppu_opcode_t op)
{
2017-07-01 01:08:51 +02:00
Value* imm = m_ir->getInt64(op.simm16);
if (m_rel && (m_rel->type >= 4u && m_rel->type <= 6u))
2017-07-01 01:08:51 +02:00
{
imm = SExt(ReadMemory(GetAddr(+2), GetType<u16>()), GetType<u64>());
m_rel = nullptr;
}
WriteMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), imm) : imm, GetGpr(op.rs, 16));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::STHU(ppu_opcode_t op)
{
2017-07-01 01:08:51 +02:00
Value* imm = m_ir->getInt64(op.simm16);
if (m_rel && (m_rel->type >= 4u && m_rel->type <= 6u))
2017-07-01 01:08:51 +02:00
{
imm = SExt(ReadMemory(GetAddr(+2), GetType<u16>()), GetType<u64>());
m_rel = nullptr;
}
const auto addr = m_ir->CreateAdd(GetGpr(op.ra), imm);
2016-06-07 22:24:20 +02:00
WriteMemory(addr, GetGpr(op.rs, 16));
SetGpr(op.ra, addr);
}
void PPUTranslator::LMW(ppu_opcode_t op)
{
for (u32 i = 0; i < 32 - op.rd; i++)
{
SetGpr(i + op.rd, ReadMemory(op.ra ? m_ir->CreateAdd(m_ir->getInt64(op.simm16 + i * 4), GetGpr(op.ra)) : m_ir->getInt64(op.simm16 + i * 4), GetType<u32>()));
}
}
void PPUTranslator::STMW(ppu_opcode_t op)
{
for (u32 i = 0; i < 32 - op.rs; i++)
{
WriteMemory(op.ra ? m_ir->CreateAdd(m_ir->getInt64(op.simm16 + i * 4), GetGpr(op.ra)) : m_ir->getInt64(op.simm16 + i * 4), GetGpr(i + op.rs, 32));
}
}
void PPUTranslator::LFS(ppu_opcode_t op)
{
2017-07-01 01:08:51 +02:00
Value* imm = m_ir->getInt64(op.simm16);
if (m_rel && (m_rel->type >= 4u && m_rel->type <= 6u))
2017-07-01 01:08:51 +02:00
{
imm = SExt(ReadMemory(GetAddr(+2), GetType<u16>()), GetType<u64>());
m_rel = nullptr;
}
SetFpr(op.frd, ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), imm) : imm, GetType<f32>()));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::LFSU(ppu_opcode_t op)
{
2017-07-01 01:08:51 +02:00
Value* imm = m_ir->getInt64(op.simm16);
if (m_rel && (m_rel->type >= 4u && m_rel->type <= 6u))
2017-07-01 01:08:51 +02:00
{
imm = SExt(ReadMemory(GetAddr(+2), GetType<u16>()), GetType<u64>());
m_rel = nullptr;
}
const auto addr = m_ir->CreateAdd(GetGpr(op.ra), imm);
2016-06-07 22:24:20 +02:00
SetFpr(op.frd, ReadMemory(addr, GetType<f32>()));
SetGpr(op.ra, addr);
}
void PPUTranslator::LFD(ppu_opcode_t op)
{
2017-07-01 01:08:51 +02:00
Value* imm = m_ir->getInt64(op.simm16);
if (m_rel && (m_rel->type >= 4u && m_rel->type <= 6u))
2017-07-01 01:08:51 +02:00
{
imm = SExt(ReadMemory(GetAddr(+2), GetType<u16>()), GetType<u64>());
m_rel = nullptr;
}
SetFpr(op.frd, ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), imm) : imm, GetType<f64>()));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::LFDU(ppu_opcode_t op)
{
2017-07-01 01:08:51 +02:00
Value* imm = m_ir->getInt64(op.simm16);
if (m_rel && (m_rel->type >= 4u && m_rel->type <= 6u))
2017-07-01 01:08:51 +02:00
{
imm = SExt(ReadMemory(GetAddr(+2), GetType<u16>()), GetType<u64>());
m_rel = nullptr;
}
const auto addr = m_ir->CreateAdd(GetGpr(op.ra), imm);
2016-06-07 22:24:20 +02:00
SetFpr(op.frd, ReadMemory(addr, GetType<f64>()));
SetGpr(op.ra, addr);
}
void PPUTranslator::STFS(ppu_opcode_t op)
{
2017-07-01 01:08:51 +02:00
Value* imm = m_ir->getInt64(op.simm16);
if (m_rel && (m_rel->type >= 4u && m_rel->type <= 6u))
2017-07-01 01:08:51 +02:00
{
imm = SExt(ReadMemory(GetAddr(+2), GetType<u16>()), GetType<u64>());
m_rel = nullptr;
}
WriteMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), imm) : imm, GetFpr(op.frs, 32));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::STFSU(ppu_opcode_t op)
{
2017-07-01 01:08:51 +02:00
Value* imm = m_ir->getInt64(op.simm16);
if (m_rel && (m_rel->type >= 4u && m_rel->type <= 6u))
2017-07-01 01:08:51 +02:00
{
imm = SExt(ReadMemory(GetAddr(+2), GetType<u16>()), GetType<u64>());
m_rel = nullptr;
}
const auto addr = m_ir->CreateAdd(GetGpr(op.ra), imm);
2016-06-07 22:24:20 +02:00
WriteMemory(addr, GetFpr(op.frs, 32));
SetGpr(op.ra, addr);
}
void PPUTranslator::STFD(ppu_opcode_t op)
{
2017-07-01 01:08:51 +02:00
Value* imm = m_ir->getInt64(op.simm16);
if (m_rel && (m_rel->type >= 4u && m_rel->type <= 6u))
2017-07-01 01:08:51 +02:00
{
imm = SExt(ReadMemory(GetAddr(+2), GetType<u16>()), GetType<u64>());
m_rel = nullptr;
}
WriteMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), imm) : imm, GetFpr(op.frs));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::STFDU(ppu_opcode_t op)
{
2017-07-01 01:08:51 +02:00
Value* imm = m_ir->getInt64(op.simm16);
if (m_rel && (m_rel->type >= 4u && m_rel->type <= 6u))
2017-07-01 01:08:51 +02:00
{
imm = SExt(ReadMemory(GetAddr(+2), GetType<u16>()), GetType<u64>());
m_rel = nullptr;
}
const auto addr = m_ir->CreateAdd(GetGpr(op.ra), imm);
2016-06-07 22:24:20 +02:00
WriteMemory(addr, GetFpr(op.frs));
SetGpr(op.ra, addr);
}
void PPUTranslator::LD(ppu_opcode_t op)
{
2017-07-01 01:08:51 +02:00
Value* imm = m_ir->getInt64(op.ds << 2);
if (m_rel && m_rel->type == 57)
{
imm = m_ir->CreateAnd(SExt(ReadMemory(GetAddr(+2), GetType<u16>()), GetType<u64>()), ~3);
m_rel = nullptr;
}
SetGpr(op.rd, ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), imm) : imm, GetType<u64>()));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::LDU(ppu_opcode_t op)
{
2017-07-01 01:08:51 +02:00
Value* imm = m_ir->getInt64(op.ds << 2);
if (m_rel && m_rel->type == 57)
{
imm = m_ir->CreateAnd(SExt(ReadMemory(GetAddr(+2), GetType<u16>()), GetType<u64>()), ~3);
m_rel = nullptr;
}
const auto addr = m_ir->CreateAdd(GetGpr(op.ra), imm);
2016-06-07 22:24:20 +02:00
SetGpr(op.rd, ReadMemory(addr, GetType<u64>()));
SetGpr(op.ra, addr);
}
void PPUTranslator::LWA(ppu_opcode_t op)
{
2017-07-01 01:08:51 +02:00
Value* imm = m_ir->getInt64(op.ds << 2);
if (m_rel && m_rel->type == 57)
{
imm = m_ir->CreateAnd(SExt(ReadMemory(GetAddr(+2), GetType<u16>()), GetType<u64>()), ~3);
m_rel = nullptr;
}
SetGpr(op.rd, SExt(ReadMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), imm) : imm, GetType<s32>())));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::STD(ppu_opcode_t op)
{
2017-07-01 01:08:51 +02:00
Value* imm = m_ir->getInt64(op.ds << 2);
if (m_rel && m_rel->type == 57)
{
imm = m_ir->CreateAnd(SExt(ReadMemory(GetAddr(+2), GetType<u16>()), GetType<u64>()), ~3);
m_rel = nullptr;
}
WriteMemory(op.ra ? m_ir->CreateAdd(GetGpr(op.ra), imm) : imm, GetGpr(op.rs));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::STDU(ppu_opcode_t op)
{
2017-07-01 01:08:51 +02:00
Value* imm = m_ir->getInt64(op.ds << 2);
if (m_rel && m_rel->type == 57)
{
imm = m_ir->CreateAnd(SExt(ReadMemory(GetAddr(+2), GetType<u16>()), GetType<u64>()), ~3);
m_rel = nullptr;
}
const auto addr = m_ir->CreateAdd(GetGpr(op.ra), imm);
2016-06-07 22:24:20 +02:00
WriteMemory(addr, GetGpr(op.rs));
SetGpr(op.ra, addr);
}
void PPUTranslator::FDIVS(ppu_opcode_t op)
{
const auto a = GetFpr(op.fra);
const auto b = GetFpr(op.frb);
const auto result = m_ir->CreateFPTrunc(m_ir->CreateFDiv(a, b), GetType<f32>());
SetFpr(op.frd, result);
//SetFPSCR_FR(Call(GetType<bool>(), m_pure_attr, "__fdivs_get_fr", a, b));
//SetFPSCR_FI(Call(GetType<bool>(), m_pure_attr, "__fdivs_get_fi", a, b));
//SetFPSCRException(m_fpscr_ox, Call(GetType<bool>(), m_pure_attr, "__fdivs_get_ox", a, b));
//SetFPSCRException(m_fpscr_ux, Call(GetType<bool>(), m_pure_attr, "__fdivs_get_ux", a, b));
//SetFPSCRException(m_fpscr_zx, Call(GetType<bool>(), m_pure_attr, "__fdivs_get_zx", a, b));
//SetFPSCRException(m_fpscr_vxsnan, Call(GetType<bool>(), m_pure_attr, "__fdivs_get_vxsnan", a, b));
//SetFPSCRException(m_fpscr_vxidi, Call(GetType<bool>(), m_pure_attr, "__fdivs_get_vxidi", a, b));
//SetFPSCRException(m_fpscr_vxzdz, Call(GetType<bool>(), m_pure_attr, "__fdivs_get_vxzdz", a, b));
SetFPRF(result, op.rc != 0);
}
void PPUTranslator::FSUBS(ppu_opcode_t op)
{
const auto a = GetFpr(op.fra);
const auto b = GetFpr(op.frb);
const auto result = m_ir->CreateFPTrunc(m_ir->CreateFSub(a, b), GetType<f32>());
SetFpr(op.frd, result);
//SetFPSCR_FR(Call(GetType<bool>(), m_pure_attr, "__fsubs_get_fr", a, b));
//SetFPSCR_FI(Call(GetType<bool>(), m_pure_attr, "__fsubs_get_fi", a, b));
//SetFPSCRException(m_fpscr_ox, Call(GetType<bool>(), m_pure_attr, "__fsubs_get_ox", a, b));
//SetFPSCRException(m_fpscr_ux, Call(GetType<bool>(), m_pure_attr, "__fsubs_get_ux", a, b));
//SetFPSCRException(m_fpscr_vxsnan, Call(GetType<bool>(), m_pure_attr, "__fsubs_get_vxsnan", a, b));
//SetFPSCRException(m_fpscr_vxisi, Call(GetType<bool>(), m_pure_attr, "__fsubs_get_vxisi", a, b));
SetFPRF(result, op.rc != 0);
}
void PPUTranslator::FADDS(ppu_opcode_t op)
{
const auto a = GetFpr(op.fra);
const auto b = GetFpr(op.frb);
const auto result = m_ir->CreateFPTrunc(m_ir->CreateFAdd(a, b), GetType<f32>());
SetFpr(op.frd, result);
//SetFPSCR_FR(Call(GetType<bool>(), m_pure_attr, "__fadds_get_fr", a, b));
//SetFPSCR_FI(Call(GetType<bool>(), m_pure_attr, "__fadds_get_fi", a, b));
//SetFPSCRException(m_fpscr_ox, Call(GetType<bool>(), m_pure_attr, "__fadds_get_ox", a, b));
//SetFPSCRException(m_fpscr_ux, Call(GetType<bool>(), m_pure_attr, "__fadds_get_ux", a, b));
//SetFPSCRException(m_fpscr_vxsnan, Call(GetType<bool>(), m_pure_attr, "__fadds_get_vxsnan", a, b));
//SetFPSCRException(m_fpscr_vxisi, Call(GetType<bool>(), m_pure_attr, "__fadds_get_vxisi", a, b));
SetFPRF(result, op.rc != 0);
}
void PPUTranslator::FSQRTS(ppu_opcode_t op)
{
const auto b = GetFpr(op.frb);
const auto result = m_ir->CreateFPTrunc(Call(GetType<f64>(), "llvm.sqrt.f64", b), GetType<f32>());
SetFpr(op.frd, result);
//SetFPSCR_FR(Call(GetType<bool>(), m_pure_attr, "__fsqrts_get_fr", b));
//SetFPSCR_FI(Call(GetType<bool>(), m_pure_attr, "__fsqrts_get_fi", b));
//SetFPSCRException(m_fpscr_ox, Call(GetType<bool>(), m_pure_attr, "__fsqrts_get_ox", b));
//SetFPSCRException(m_fpscr_ux, Call(GetType<bool>(), m_pure_attr, "__fsqrts_get_ux", b));
//SetFPSCRException(m_fpscr_vxsnan, Call(GetType<bool>(), m_pure_attr, "__fsqrts_get_vxsnan", b));
//SetFPSCRException(m_fpscr_vxsqrt, Call(GetType<bool>(), m_pure_attr, "__fsqrts_get_vxsqrt", b));
SetFPRF(result, op.rc != 0);
}
void PPUTranslator::FRES(ppu_opcode_t op)
{
if (!m_fres_table)
{
m_fres_table = new GlobalVariable(*m_module, ArrayType::get(GetType<u32>(), 128), true, GlobalValue::PrivateLinkage, ConstantDataArray::get(m_context, ppu_fres_mantissas));
}
const auto a = GetFpr(op.frb);
const auto b = bitcast<u64>(a);
const auto n = m_ir->CreateFCmpUNO(a, a); // test for NaN
const auto e = m_ir->CreateAnd(m_ir->CreateLShr(b, 52), 0x7ff); // double exp
const auto i = m_ir->CreateAnd(m_ir->CreateLShr(b, 45), 0x7f); // mantissa LUT index
const auto ptr = dyn_cast<GetElementPtrInst>(m_ir->CreateGEP(dyn_cast<GlobalVariable>(m_fres_table)->getValueType(), m_fres_table, {m_ir->getInt64(0), i}));
assert(ptr->getResultElementType() == get_type<u32>());
const auto m = m_ir->CreateShl(ZExt(m_ir->CreateLoad(ptr->getResultElementType(), ptr)), 29);
const auto c = m_ir->CreateICmpUGE(e, m_ir->getInt64(0x3ff + 0x80)); // test for INF
const auto x = m_ir->CreateShl(m_ir->CreateSub(m_ir->getInt64(0x7ff - 2), e), 52);
const auto s = m_ir->CreateSelect(c, m_ir->getInt64(0), m_ir->CreateOr(x, m));
const auto r = bitcast<f64>(m_ir->CreateSelect(n, m_ir->CreateOr(b, 0x8'0000'0000'0000), m_ir->CreateOr(s, m_ir->CreateAnd(b, 0x8000'0000'0000'0000))));
SetFpr(op.frd, m_ir->CreateFPTrunc(r, GetType<f32>()));
2016-06-07 22:24:20 +02:00
//m_ir->CreateStore(GetUndef<bool>(), m_fpscr_fr);
//m_ir->CreateStore(GetUndef<bool>(), m_fpscr_fi);
//m_ir->CreateStore(GetUndef<bool>(), m_fpscr_xx);
//SetFPSCRException(m_fpscr_ox, Call(GetType<bool>(), m_pure_attr, "__fres_get_ox", b));
//SetFPSCRException(m_fpscr_ux, Call(GetType<bool>(), m_pure_attr, "__fres_get_ux", b));
//SetFPSCRException(m_fpscr_zx, Call(GetType<bool>(), m_pure_attr, "__fres_get_zx", b));
//SetFPSCRException(m_fpscr_vxsnan, Call(GetType<bool>(), m_pure_attr, "__fres_get_vxsnan", b));
SetFPRF(r, op.rc != 0);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::FMULS(ppu_opcode_t op)
{
const auto a = GetFpr(op.fra);
const auto c = GetFpr(op.frc);
const auto result = m_ir->CreateFPTrunc(m_ir->CreateFMul(a, c), GetType<f32>());
SetFpr(op.frd, result);
//SetFPSCR_FR(Call(GetType<bool>(), m_pure_attr, "__fmuls_get_fr", a, c));
//SetFPSCR_FI(Call(GetType<bool>(), m_pure_attr, "__fmuls_get_fi", a, c));
//SetFPSCRException(m_fpscr_ox, Call(GetType<bool>(), m_pure_attr, "__fmuls_get_ox", a, c));
//SetFPSCRException(m_fpscr_ux, Call(GetType<bool>(), m_pure_attr, "__fmuls_get_ux", a, c));
//SetFPSCRException(m_fpscr_vxsnan, Call(GetType<bool>(), m_pure_attr, "__fmuls_get_vxsnan", a, c));
//SetFPSCRException(m_fpscr_vximz, Call(GetType<bool>(), m_pure_attr, "__fmuls_get_vximz", a, c));
SetFPRF(result, op.rc != 0);
}
void PPUTranslator::FMADDS(ppu_opcode_t op)
{
const auto a = GetFpr(op.fra);
const auto b = GetFpr(op.frb);
const auto c = GetFpr(op.frc);
llvm::Value* result;
if (g_cfg.core.use_accurate_dfma)
{
result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), {a, c, b});
}
else
{
result = m_ir->CreateFAdd(m_ir->CreateFMul(a, c), b);
}
SetFpr(op.frd, m_ir->CreateFPTrunc(result, GetType<f32>()));
2016-06-07 22:24:20 +02:00
//SetFPSCR_FR(Call(GetType<bool>(), m_pure_attr, "__fmadds_get_fr", a, b, c));
//SetFPSCR_FI(Call(GetType<bool>(), m_pure_attr, "__fmadds_get_fi", a, b, c));
//SetFPSCRException(m_fpscr_ox, Call(GetType<bool>(), m_pure_attr, "__fmadds_get_ox", a, b, c));
//SetFPSCRException(m_fpscr_ux, Call(GetType<bool>(), m_pure_attr, "__fmadds_get_ux", a, b, c));
//SetFPSCRException(m_fpscr_vxsnan, Call(GetType<bool>(), m_pure_attr, "__fmadds_get_vxsnan", a, b, c));
//SetFPSCRException(m_fpscr_vxisi, Call(GetType<bool>(), m_pure_attr, "__fmadds_get_vxisi", a, b, c));
//SetFPSCRException(m_fpscr_vximz, Call(GetType<bool>(), m_pure_attr, "__fmadds_get_vximz", a, b, c));
SetFPRF(result, op.rc != 0);
}
void PPUTranslator::FMSUBS(ppu_opcode_t op)
{
const auto a = GetFpr(op.fra);
const auto b = GetFpr(op.frb);
const auto c = GetFpr(op.frc);
llvm::Value* result;
if (g_cfg.core.use_accurate_dfma)
{
result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), {a, c, m_ir->CreateFNeg(b)});
}
else
{
result = m_ir->CreateFSub(m_ir->CreateFMul(a, c), b);
}
SetFpr(op.frd, m_ir->CreateFPTrunc(result, GetType<f32>()));
2016-06-07 22:24:20 +02:00
//SetFPSCR_FR(Call(GetType<bool>(), m_pure_attr, "__fmadds_get_fr", a, b, c)); // TODO ???
//SetFPSCR_FI(Call(GetType<bool>(), m_pure_attr, "__fmadds_get_fi", a, b, c));
//SetFPSCRException(m_fpscr_ox, Call(GetType<bool>(), m_pure_attr, "__fmadds_get_ox", a, b, c));
//SetFPSCRException(m_fpscr_ux, Call(GetType<bool>(), m_pure_attr, "__fmadds_get_ux", a, b, c));
//SetFPSCRException(m_fpscr_vxsnan, Call(GetType<bool>(), m_pure_attr, "__fmadds_get_vxsnan", a, b, c));
//SetFPSCRException(m_fpscr_vxisi, Call(GetType<bool>(), m_pure_attr, "__fmadds_get_vxisi", a, b, c));
//SetFPSCRException(m_fpscr_vximz, Call(GetType<bool>(), m_pure_attr, "__fmadds_get_vximz", a, b, c));
SetFPRF(result, op.rc != 0);
}
void PPUTranslator::FNMSUBS(ppu_opcode_t op)
{
const auto a = GetFpr(op.fra);
const auto b = GetFpr(op.frb);
const auto c = GetFpr(op.frc);
llvm::Value* result;
if (g_cfg.core.use_accurate_dfma)
{
result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), {a, c, m_ir->CreateFNeg(b)});
}
else
{
result = m_ir->CreateFSub(m_ir->CreateFMul(a, c), b);
}
SetFpr(op.frd, m_ir->CreateFPTrunc(m_ir->CreateFNeg(result), GetType<f32>()));
2016-06-07 22:24:20 +02:00
//SetFPSCR_FR(Call(GetType<bool>(), m_pure_attr, "__fmadds_get_fr", a, b, c)); // TODO ???
//SetFPSCR_FI(Call(GetType<bool>(), m_pure_attr, "__fmadds_get_fi", a, b, c));
//SetFPSCRException(m_fpscr_ox, Call(GetType<bool>(), m_pure_attr, "__fmadds_get_ox", a, b, c));
//SetFPSCRException(m_fpscr_ux, Call(GetType<bool>(), m_pure_attr, "__fmadds_get_ux", a, b, c));
//SetFPSCRException(m_fpscr_vxsnan, Call(GetType<bool>(), m_pure_attr, "__fmadds_get_vxsnan", a, b, c));
//SetFPSCRException(m_fpscr_vxisi, Call(GetType<bool>(), m_pure_attr, "__fmadds_get_vxisi", a, b, c));
//SetFPSCRException(m_fpscr_vximz, Call(GetType<bool>(), m_pure_attr, "__fmadds_get_vximz", a, b, c));
SetFPRF(result, op.rc != 0);
}
void PPUTranslator::FNMADDS(ppu_opcode_t op)
{
const auto a = GetFpr(op.fra);
const auto b = GetFpr(op.frb);
const auto c = GetFpr(op.frc);
llvm::Value* result;
if (g_cfg.core.use_accurate_dfma)
{
result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), {a, c, b});
}
else
{
result = m_ir->CreateFAdd(m_ir->CreateFMul(a, c), b);
}
SetFpr(op.frd, m_ir->CreateFPTrunc(m_ir->CreateFNeg(result), GetType<f32>()));
2016-06-07 22:24:20 +02:00
//SetFPSCR_FR(Call(GetType<bool>(), m_pure_attr, "__fmadds_get_fr", a, b, c)); // TODO ???
//SetFPSCR_FI(Call(GetType<bool>(), m_pure_attr, "__fmadds_get_fi", a, b, c));
//SetFPSCRException(m_fpscr_ox, Call(GetType<bool>(), m_pure_attr, "__fmadds_get_ox", a, b, c));
//SetFPSCRException(m_fpscr_ux, Call(GetType<bool>(), m_pure_attr, "__fmadds_get_ux", a, b, c));
//SetFPSCRException(m_fpscr_vxsnan, Call(GetType<bool>(), m_pure_attr, "__fmadds_get_vxsnan", a, b, c));
//SetFPSCRException(m_fpscr_vxisi, Call(GetType<bool>(), m_pure_attr, "__fmadds_get_vxisi", a, b, c));
//SetFPSCRException(m_fpscr_vximz, Call(GetType<bool>(), m_pure_attr, "__fmadds_get_vximz", a, b, c));
SetFPRF(result, op.rc != 0);
}
void PPUTranslator::MTFSB1(ppu_opcode_t op)
{
SetFPSCRBit(op.crbd, m_ir->getTrue(), true);
if (op.rc) SetCrFieldFPCC(1);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::MCRFS(ppu_opcode_t op)
{
const auto lt = GetFPSCRBit(op.crfs * 4 + 0);
const auto gt = GetFPSCRBit(op.crfs * 4 + 1);
const auto eq = GetFPSCRBit(op.crfs * 4 + 2);
const auto un = GetFPSCRBit(op.crfs * 4 + 3);
SetCrField(op.crfd, lt, gt, eq, un);
}
void PPUTranslator::MTFSB0(ppu_opcode_t op)
{
SetFPSCRBit(op.crbd, m_ir->getFalse(), false);
if (op.rc) SetCrFieldFPCC(1);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::MTFSFI(ppu_opcode_t op)
{
SetFPSCRBit(op.crfd * 4 + 0, m_ir->getInt1((op.i & 8) != 0), false);
if (op.crfd != 0)
{
SetFPSCRBit(op.crfd * 4 + 1, m_ir->getInt1((op.i & 4) != 0), false);
SetFPSCRBit(op.crfd * 4 + 2, m_ir->getInt1((op.i & 2) != 0), false);
}
2016-06-07 22:24:20 +02:00
SetFPSCRBit(op.crfd * 4 + 3, m_ir->getInt1((op.i & 1) != 0), false);
if (op.rc) SetCrFieldFPCC(1);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::MFFS(ppu_opcode_t op)
{
Value* result = m_ir->getInt64(0);
for (u32 i = 16; i < 20; i++)
2016-06-07 22:24:20 +02:00
{
result = m_ir->CreateOr(result, m_ir->CreateShl(ZExt(RegLoad(m_fc[i]), GetType<u64>()), i ^ 31));
2016-06-07 22:24:20 +02:00
}
SetFpr(op.frd, result);
if (op.rc) SetCrFieldFPCC(1);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::MTFSF(ppu_opcode_t op)
{
const auto value = GetFpr(op.frb, 32, true);
for (u32 i = 16; i < 20; i++)
2016-06-07 22:24:20 +02:00
{
if (i != 1 && i != 2 && (op.flm & (128 >> (i / 4))) != 0)
{
SetFPSCRBit(i, Trunc(m_ir->CreateLShr(value, i ^ 31), GetType<bool>()), false);
}
}
if (op.rc) SetCrFieldFPCC(1);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::FCMPU(ppu_opcode_t op)
{
const auto a = GetFpr(op.fra);
const auto b = GetFpr(op.frb);
const auto lt = m_ir->CreateFCmpOLT(a, b);
const auto gt = m_ir->CreateFCmpOGT(a, b);
const auto eq = m_ir->CreateFCmpOEQ(a, b);
const auto un = m_ir->CreateFCmpUNO(a, b);
SetCrField(op.crfd, lt, gt, eq, un);
SetFPCC(lt, gt, eq, un);
//SetFPSCRException(m_fpscr_vxsnan, Call(GetType<bool>(), m_pure_attr, "__fcmpu_get_vxsnan", a, b));
}
void PPUTranslator::FRSP(ppu_opcode_t op)
{
const auto b = GetFpr(op.frb);
const auto result = m_ir->CreateFPTrunc(b, GetType<f32>());
SetFpr(op.frd, result);
//SetFPSCR_FR(Call(GetType<bool>(), m_pure_attr, "__frsp_get_fr", b));
//SetFPSCR_FI(Call(GetType<bool>(), m_pure_attr, "__frsp_get_fi", b));
//SetFPSCRException(m_fpscr_ox, Call(GetType<bool>(), m_pure_attr, "__frsp_get_ox", b));
//SetFPSCRException(m_fpscr_ux, Call(GetType<bool>(), m_pure_attr, "__frsp_get_ux", b));
//SetFPSCRException(m_fpscr_vxsnan, Call(GetType<bool>(), m_pure_attr, "__frsp_get_vxsnan", b));
SetFPRF(result, op.rc != 0);
}
void PPUTranslator::FCTIW(ppu_opcode_t op)
{
const auto b = GetFpr(op.frb);
const auto xormask = m_ir->CreateSExt(m_ir->CreateFCmpOGE(b, ConstantFP::get(GetType<f64>(), std::exp2l(31.))), GetType<s32>());
// fix result saturation (0x80000000 -> 0x7fffffff)
PPU LLVM arm64+macOS port (#12115) * BufferUtils: use naive function pointer on Apple arm64 Use naive function pointer on Apple arm64 because ASLR breaks asmjit. See BufferUtils.cpp comment for explanation on why this happens and how to fix if you want to use asmjit. * build-macos: fix source maps for Mac Tell Qt not to strip debug symbols when we're in debug or relwithdebinfo modes. * LLVM PPU: fix aarch64 on macOS Force MachO on macOS to fix LLVM being unable to patch relocations during codegen. Adds Aarch64 NEON intrinsics for x86 intrinsics used by PPUTranslator/Recompiler. * virtual memory: use 16k pages on aarch64 macOS Temporary hack to get things working by using 16k pages instead of 4k pages in VM emulation. * PPU/SPU: fix NEON intrinsics and compilation for arm64 macOS Fixes some intrinsics usage and patches usages of asmjit to properly emit absolute jmps so ASLR doesn't cause out of bounds rel jumps. Also patches the SPU recompiler to properly work on arm64 by telling LLVM to target arm64. * virtual memory: fix W^X toggles on macOS aarch64 Fixes W^X on macOS aarch64 by setting all JIT mmap'd regions to default to RW mode. For both SPU and PPU execution threads, when initialization finishes we toggle to RX mode. This exploits Apple's per-thread setting for RW/RX to let us be technically compliant with the OS's W^X enforcement while not needing to actually separate the memory allocated for code/data. * PPU: implement aarch64 specific functions Implements ppu_gateway for arm64 and patches LLVM initialization to use the correct triple. Adds some fixes for macOS W^X JIT restrictions when entering/exiting JITed code. * PPU: Mark rpcs3 calls as non-tail Strictly speaking, rpcs3 JIT -> C++ calls are not tail calls. If you call a function inside e.g. an L2 syscall, it will clobber LR on arm64 and subtly break returns in emulated code. Only JIT -> JIT "calls" should be tail. * macOS/arm64: compatibility fixes * vm: patch virtual memory for arm64 macOS Tag mmap calls with MAP_JIT to allow W^X on macOS. Fix mmap calls to existing mmap'd addresses that were tagged with MAP_JIT on macOS. Fix memory unmapping on 16K page machines with a hack to mark "unmapped" pages as RW. * PPU: remove wrong comment * PPU: fix a merge regression * vm: remove 16k page hacks * PPU: formatting fixes * PPU: fix arm64 null function assembly * ppu: clean up arch-specific instructions
2022-06-14 14:28:38 +02:00
#if defined(ARCH_X64)
SetFpr(op.frd, m_ir->CreateXor(xormask, Call(GetType<s32>(), "llvm.x86.sse2.cvtsd2si", m_ir->CreateInsertElement(GetUndef<f64[2]>(), b, u64{0}))));
PPU LLVM arm64+macOS port (#12115) * BufferUtils: use naive function pointer on Apple arm64 Use naive function pointer on Apple arm64 because ASLR breaks asmjit. See BufferUtils.cpp comment for explanation on why this happens and how to fix if you want to use asmjit. * build-macos: fix source maps for Mac Tell Qt not to strip debug symbols when we're in debug or relwithdebinfo modes. * LLVM PPU: fix aarch64 on macOS Force MachO on macOS to fix LLVM being unable to patch relocations during codegen. Adds Aarch64 NEON intrinsics for x86 intrinsics used by PPUTranslator/Recompiler. * virtual memory: use 16k pages on aarch64 macOS Temporary hack to get things working by using 16k pages instead of 4k pages in VM emulation. * PPU/SPU: fix NEON intrinsics and compilation for arm64 macOS Fixes some intrinsics usage and patches usages of asmjit to properly emit absolute jmps so ASLR doesn't cause out of bounds rel jumps. Also patches the SPU recompiler to properly work on arm64 by telling LLVM to target arm64. * virtual memory: fix W^X toggles on macOS aarch64 Fixes W^X on macOS aarch64 by setting all JIT mmap'd regions to default to RW mode. For both SPU and PPU execution threads, when initialization finishes we toggle to RX mode. This exploits Apple's per-thread setting for RW/RX to let us be technically compliant with the OS's W^X enforcement while not needing to actually separate the memory allocated for code/data. * PPU: implement aarch64 specific functions Implements ppu_gateway for arm64 and patches LLVM initialization to use the correct triple. Adds some fixes for macOS W^X JIT restrictions when entering/exiting JITed code. * PPU: Mark rpcs3 calls as non-tail Strictly speaking, rpcs3 JIT -> C++ calls are not tail calls. If you call a function inside e.g. an L2 syscall, it will clobber LR on arm64 and subtly break returns in emulated code. Only JIT -> JIT "calls" should be tail. * macOS/arm64: compatibility fixes * vm: patch virtual memory for arm64 macOS Tag mmap calls with MAP_JIT to allow W^X on macOS. Fix mmap calls to existing mmap'd addresses that were tagged with MAP_JIT on macOS. Fix memory unmapping on 16K page machines with a hack to mark "unmapped" pages as RW. * PPU: remove wrong comment * PPU: fix a merge regression * vm: remove 16k page hacks * PPU: formatting fixes * PPU: fix arm64 null function assembly * ppu: clean up arch-specific instructions
2022-06-14 14:28:38 +02:00
#elif defined(ARCH_ARM64)
SetFpr(op.frd, m_ir->CreateXor(xormask, Call(GetType<s32>(), "llvm.aarch64.neon.fcvtns.i32.f64", b)));
#endif
2016-06-07 22:24:20 +02:00
//SetFPSCR_FR(Call(GetType<bool>(), m_pure_attr, "__fctiw_get_fr", b));
//SetFPSCR_FI(Call(GetType<bool>(), m_pure_attr, "__fctiw_get_fi", b));
//SetFPSCRException(m_fpscr_vxsnan, Call(GetType<bool>(), m_pure_attr, "__fctiw_get_vxsnan", b));
//SetFPSCRException(m_fpscr_vxcvi, m_ir->CreateOr(sat_l, sat_h));
//m_ir->CreateStore(GetUndef<bool>(), m_fpscr_c);
//SetFPCC(GetUndef<bool>(), GetUndef<bool>(), GetUndef<bool>(), GetUndef<bool>(), op.rc != 0);
}
void PPUTranslator::FCTIWZ(ppu_opcode_t op)
{
const auto b = GetFpr(op.frb);
const auto xormask = m_ir->CreateSExt(m_ir->CreateFCmpOGE(b, ConstantFP::get(GetType<f64>(), std::exp2l(31.))), GetType<s32>());
// fix result saturation (0x80000000 -> 0x7fffffff)
PPU LLVM arm64+macOS port (#12115) * BufferUtils: use naive function pointer on Apple arm64 Use naive function pointer on Apple arm64 because ASLR breaks asmjit. See BufferUtils.cpp comment for explanation on why this happens and how to fix if you want to use asmjit. * build-macos: fix source maps for Mac Tell Qt not to strip debug symbols when we're in debug or relwithdebinfo modes. * LLVM PPU: fix aarch64 on macOS Force MachO on macOS to fix LLVM being unable to patch relocations during codegen. Adds Aarch64 NEON intrinsics for x86 intrinsics used by PPUTranslator/Recompiler. * virtual memory: use 16k pages on aarch64 macOS Temporary hack to get things working by using 16k pages instead of 4k pages in VM emulation. * PPU/SPU: fix NEON intrinsics and compilation for arm64 macOS Fixes some intrinsics usage and patches usages of asmjit to properly emit absolute jmps so ASLR doesn't cause out of bounds rel jumps. Also patches the SPU recompiler to properly work on arm64 by telling LLVM to target arm64. * virtual memory: fix W^X toggles on macOS aarch64 Fixes W^X on macOS aarch64 by setting all JIT mmap'd regions to default to RW mode. For both SPU and PPU execution threads, when initialization finishes we toggle to RX mode. This exploits Apple's per-thread setting for RW/RX to let us be technically compliant with the OS's W^X enforcement while not needing to actually separate the memory allocated for code/data. * PPU: implement aarch64 specific functions Implements ppu_gateway for arm64 and patches LLVM initialization to use the correct triple. Adds some fixes for macOS W^X JIT restrictions when entering/exiting JITed code. * PPU: Mark rpcs3 calls as non-tail Strictly speaking, rpcs3 JIT -> C++ calls are not tail calls. If you call a function inside e.g. an L2 syscall, it will clobber LR on arm64 and subtly break returns in emulated code. Only JIT -> JIT "calls" should be tail. * macOS/arm64: compatibility fixes * vm: patch virtual memory for arm64 macOS Tag mmap calls with MAP_JIT to allow W^X on macOS. Fix mmap calls to existing mmap'd addresses that were tagged with MAP_JIT on macOS. Fix memory unmapping on 16K page machines with a hack to mark "unmapped" pages as RW. * PPU: remove wrong comment * PPU: fix a merge regression * vm: remove 16k page hacks * PPU: formatting fixes * PPU: fix arm64 null function assembly * ppu: clean up arch-specific instructions
2022-06-14 14:28:38 +02:00
#if defined(ARCH_X64)
SetFpr(op.frd, m_ir->CreateXor(xormask, Call(GetType<s32>(), "llvm.x86.sse2.cvttsd2si", m_ir->CreateInsertElement(GetUndef<f64[2]>(), b, u64{0}))));
PPU LLVM arm64+macOS port (#12115) * BufferUtils: use naive function pointer on Apple arm64 Use naive function pointer on Apple arm64 because ASLR breaks asmjit. See BufferUtils.cpp comment for explanation on why this happens and how to fix if you want to use asmjit. * build-macos: fix source maps for Mac Tell Qt not to strip debug symbols when we're in debug or relwithdebinfo modes. * LLVM PPU: fix aarch64 on macOS Force MachO on macOS to fix LLVM being unable to patch relocations during codegen. Adds Aarch64 NEON intrinsics for x86 intrinsics used by PPUTranslator/Recompiler. * virtual memory: use 16k pages on aarch64 macOS Temporary hack to get things working by using 16k pages instead of 4k pages in VM emulation. * PPU/SPU: fix NEON intrinsics and compilation for arm64 macOS Fixes some intrinsics usage and patches usages of asmjit to properly emit absolute jmps so ASLR doesn't cause out of bounds rel jumps. Also patches the SPU recompiler to properly work on arm64 by telling LLVM to target arm64. * virtual memory: fix W^X toggles on macOS aarch64 Fixes W^X on macOS aarch64 by setting all JIT mmap'd regions to default to RW mode. For both SPU and PPU execution threads, when initialization finishes we toggle to RX mode. This exploits Apple's per-thread setting for RW/RX to let us be technically compliant with the OS's W^X enforcement while not needing to actually separate the memory allocated for code/data. * PPU: implement aarch64 specific functions Implements ppu_gateway for arm64 and patches LLVM initialization to use the correct triple. Adds some fixes for macOS W^X JIT restrictions when entering/exiting JITed code. * PPU: Mark rpcs3 calls as non-tail Strictly speaking, rpcs3 JIT -> C++ calls are not tail calls. If you call a function inside e.g. an L2 syscall, it will clobber LR on arm64 and subtly break returns in emulated code. Only JIT -> JIT "calls" should be tail. * macOS/arm64: compatibility fixes * vm: patch virtual memory for arm64 macOS Tag mmap calls with MAP_JIT to allow W^X on macOS. Fix mmap calls to existing mmap'd addresses that were tagged with MAP_JIT on macOS. Fix memory unmapping on 16K page machines with a hack to mark "unmapped" pages as RW. * PPU: remove wrong comment * PPU: fix a merge regression * vm: remove 16k page hacks * PPU: formatting fixes * PPU: fix arm64 null function assembly * ppu: clean up arch-specific instructions
2022-06-14 14:28:38 +02:00
#elif defined(ARCH_ARM64)
SetFpr(op.frd, m_ir->CreateXor(xormask, Call(GetType<s32>(), "llvm.aarch64.neon.fcvtzs.i32.f64", b)));
#endif
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::FDIV(ppu_opcode_t op)
{
const auto a = GetFpr(op.fra);
const auto b = GetFpr(op.frb);
const auto result = m_ir->CreateFDiv(a, b);
SetFpr(op.frd, result);
//SetFPSCR_FR(Call(GetType<bool>(), m_pure_attr, "__fdiv_get_fr", a, b));
//SetFPSCR_FI(Call(GetType<bool>(), m_pure_attr, "__fdiv_get_fi", a, b));
//SetFPSCRException(m_fpscr_ox, Call(GetType<bool>(), m_pure_attr, "__fdiv_get_ox", a, b));
//SetFPSCRException(m_fpscr_ux, Call(GetType<bool>(), m_pure_attr, "__fdiv_get_ux", a, b));
//SetFPSCRException(m_fpscr_zx, Call(GetType<bool>(), m_pure_attr, "__fdiv_get_zx", a, b));
//SetFPSCRException(m_fpscr_vxsnan, Call(GetType<bool>(), m_pure_attr, "__fdiv_get_vxsnan", a, b));
//SetFPSCRException(m_fpscr_vxidi, Call(GetType<bool>(), m_pure_attr, "__fdiv_get_vxidi", a, b));
//SetFPSCRException(m_fpscr_vxzdz, Call(GetType<bool>(), m_pure_attr, "__fdiv_get_vxzdz", a, b));
SetFPRF(result, op.rc != 0);
}
void PPUTranslator::FSUB(ppu_opcode_t op)
{
const auto a = GetFpr(op.fra);
const auto b = GetFpr(op.frb);
const auto result = m_ir->CreateFSub(a, b);
SetFpr(op.frd, result);
//SetFPSCR_FR(Call(GetType<bool>(), m_pure_attr, "__fsub_get_fr", a, b));
//SetFPSCR_FI(Call(GetType<bool>(), m_pure_attr, "__fsub_get_fi", a, b));
//SetFPSCRException(m_fpscr_ox, Call(GetType<bool>(), m_pure_attr, "__fsub_get_ox", a, b));
//SetFPSCRException(m_fpscr_ux, Call(GetType<bool>(), m_pure_attr, "__fsub_get_ux", a, b));
//SetFPSCRException(m_fpscr_vxsnan, Call(GetType<bool>(), m_pure_attr, "__fsub_get_vxsnan", a, b));
//SetFPSCRException(m_fpscr_vxisi, Call(GetType<bool>(), m_pure_attr, "__fsub_get_vxisi", a, b));
SetFPRF(result, op.rc != 0);
}
void PPUTranslator::FADD(ppu_opcode_t op)
{
const auto a = GetFpr(op.fra);
const auto b = GetFpr(op.frb);
const auto result = m_ir->CreateFAdd(a, b);
SetFpr(op.frd, result);
//SetFPSCR_FR(Call(GetType<bool>(), m_pure_attr, "__fadd_get_fr", a, b));
//SetFPSCR_FI(Call(GetType<bool>(), m_pure_attr, "__fadd_get_fi", a, b));
//SetFPSCRException(m_fpscr_ox, Call(GetType<bool>(), m_pure_attr, "__fadd_get_ox", a, b));
//SetFPSCRException(m_fpscr_ux, Call(GetType<bool>(), m_pure_attr, "__fadd_get_ux", a, b));
//SetFPSCRException(m_fpscr_vxsnan, Call(GetType<bool>(), m_pure_attr, "__fadd_get_vxsnan", a, b));
//SetFPSCRException(m_fpscr_vxisi, Call(GetType<bool>(), m_pure_attr, "__fadd_get_vxisi", a, b));
SetFPRF(result, op.rc != 0);
}
void PPUTranslator::FSQRT(ppu_opcode_t op)
{
const auto b = GetFpr(op.frb);
const auto result = Call(GetType<f64>(), "llvm.sqrt.f64", b);
SetFpr(op.frd, result);
//SetFPSCR_FR(Call(GetType<bool>(), m_pure_attr, "__fsqrt_get_fr", b));
//SetFPSCR_FI(Call(GetType<bool>(), m_pure_attr, "__fsqrt_get_fi", b));
//SetFPSCRException(m_fpscr_ox, Call(GetType<bool>(), m_pure_attr, "__fsqrt_get_ox", b));
//SetFPSCRException(m_fpscr_ux, Call(GetType<bool>(), m_pure_attr, "__fsqrt_get_ux", b));
//SetFPSCRException(m_fpscr_vxsnan, Call(GetType<bool>(), m_pure_attr, "__fsqrt_get_vxsnan", b));
//SetFPSCRException(m_fpscr_vxsqrt, Call(GetType<bool>(), m_pure_attr, "__fsqrt_get_vxsqrt", b));
SetFPRF(result, op.rc != 0);
}
void PPUTranslator::FSEL(ppu_opcode_t op)
{
const auto a = GetFpr(op.fra);
const auto b = GetFpr(op.frb);
const auto c = GetFpr(op.frc);
SetFpr(op.frd, m_ir->CreateSelect(m_ir->CreateFCmpOGE(a, ConstantFP::get(GetType<f64>(), 0.0)), c, b));
if (op.rc) SetCrFieldFPCC(1);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::FMUL(ppu_opcode_t op)
{
const auto a = GetFpr(op.fra);
const auto c = GetFpr(op.frc);
const auto result = m_ir->CreateFMul(a, c);
SetFpr(op.frd, result);
//SetFPSCR_FR(Call(GetType<bool>(), m_pure_attr, "__fmul_get_fr", a, c));
//SetFPSCR_FI(Call(GetType<bool>(), m_pure_attr, "__fmul_get_fi", a, c));
//SetFPSCRException(m_fpscr_ox, Call(GetType<bool>(), m_pure_attr, "__fmul_get_ox", a, c));
//SetFPSCRException(m_fpscr_ux, Call(GetType<bool>(), m_pure_attr, "__fmul_get_ux", a, c));
//SetFPSCRException(m_fpscr_vxsnan, Call(GetType<bool>(), m_pure_attr, "__fmul_get_vxsnan", a, c));
//SetFPSCRException(m_fpscr_vximz, Call(GetType<bool>(), m_pure_attr, "__fmul_get_vximz", a, c));
SetFPRF(result, op.rc != 0);
}
void PPUTranslator::FRSQRTE(ppu_opcode_t op)
{
if (!m_frsqrte_table)
{
m_frsqrte_table = new GlobalVariable(*m_module, ArrayType::get(GetType<u32>(), 0x8000), true, GlobalValue::PrivateLinkage, ConstantDataArray::get(m_context, ppu_frqrte_lut.data));
}
const auto b = m_ir->CreateBitCast(GetFpr(op.frb), GetType<u64>());
const auto ptr = dyn_cast<GetElementPtrInst>(m_ir->CreateGEP(dyn_cast<GlobalVariable>(m_frsqrte_table)->getValueType(), m_frsqrte_table, {m_ir->getInt64(0), m_ir->CreateLShr(b, 49)}));
assert(ptr->getResultElementType() == get_type<u32>());
const auto v = m_ir->CreateLoad(ptr->getResultElementType(), ptr);
const auto result = m_ir->CreateBitCast(m_ir->CreateShl(ZExt(v), 32), GetType<f64>());
2016-06-07 22:24:20 +02:00
SetFpr(op.frd, result);
//m_ir->CreateStore(GetUndef<bool>(), m_fpscr_fr);
//m_ir->CreateStore(GetUndef<bool>(), m_fpscr_fi);
//m_ir->CreateStore(GetUndef<bool>(), m_fpscr_xx);
//SetFPSCRException(m_fpscr_zx, Call(GetType<bool>(), m_pure_attr, "__frsqrte_get_zx", b));
//SetFPSCRException(m_fpscr_vxsnan, Call(GetType<bool>(), m_pure_attr, "__frsqrte_get_vxsnan", b));
//SetFPSCRException(m_fpscr_vxsqrt, Call(GetType<bool>(), m_pure_attr, "__frsqrte_get_vxsqrt", b));
SetFPRF(result, op.rc != 0);
}
void PPUTranslator::FMSUB(ppu_opcode_t op)
{
const auto a = GetFpr(op.fra);
const auto b = GetFpr(op.frb);
const auto c = GetFpr(op.frc);
llvm::Value* result;
if (g_cfg.core.use_accurate_dfma)
{
result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), {a, c, m_ir->CreateFNeg(b)});
}
else
{
result = m_ir->CreateFSub(m_ir->CreateFMul(a, c), b);
}
2016-06-07 22:24:20 +02:00
SetFpr(op.frd, result);
//SetFPSCR_FR(Call(GetType<bool>(), m_pure_attr, "__fmadd_get_fr", a, b, c)); // TODO ???
//SetFPSCR_FI(Call(GetType<bool>(), m_pure_attr, "__fmadd_get_fi", a, b, c));
//SetFPSCRException(m_fpscr_ox, Call(GetType<bool>(), m_pure_attr, "__fmadd_get_ox", a, b, c));
//SetFPSCRException(m_fpscr_ux, Call(GetType<bool>(), m_pure_attr, "__fmadd_get_ux", a, b, c));
//SetFPSCRException(m_fpscr_vxsnan, Call(GetType<bool>(), m_pure_attr, "__fmadd_get_vxsnan", a, b, c));
//SetFPSCRException(m_fpscr_vxisi, Call(GetType<bool>(), m_pure_attr, "__fmadd_get_vxisi", a, b, c));
//SetFPSCRException(m_fpscr_vximz, Call(GetType<bool>(), m_pure_attr, "__fmadd_get_vximz", a, b, c));
SetFPRF(result, op.rc != 0);
}
void PPUTranslator::FMADD(ppu_opcode_t op)
{
const auto a = GetFpr(op.fra);
const auto b = GetFpr(op.frb);
const auto c = GetFpr(op.frc);
llvm::Value* result;
if (g_cfg.core.use_accurate_dfma)
{
result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), { a, c, b });
}
else
{
2020-04-19 20:17:19 +02:00
result = m_ir->CreateFAdd(m_ir->CreateFMul(a, c), b);
}
2016-06-07 22:24:20 +02:00
SetFpr(op.frd, result);
//SetFPSCR_FR(Call(GetType<bool>(), m_pure_attr, "__fmadd_get_fr", a, b, c));
//SetFPSCR_FI(Call(GetType<bool>(), m_pure_attr, "__fmadd_get_fi", a, b, c));
//SetFPSCRException(m_fpscr_ox, Call(GetType<bool>(), m_pure_attr, "__fmadd_get_ox", a, b, c));
//SetFPSCRException(m_fpscr_ux, Call(GetType<bool>(), m_pure_attr, "__fmadd_get_ux", a, b, c));
//SetFPSCRException(m_fpscr_vxsnan, Call(GetType<bool>(), m_pure_attr, "__fmadd_get_vxsnan", a, b, c));
//SetFPSCRException(m_fpscr_vxisi, Call(GetType<bool>(), m_pure_attr, "__fmadd_get_vxisi", a, b, c));
//SetFPSCRException(m_fpscr_vximz, Call(GetType<bool>(), m_pure_attr, "__fmadd_get_vximz", a, b, c));
SetFPRF(result, op.rc != 0);
}
void PPUTranslator::FNMSUB(ppu_opcode_t op)
{
const auto a = GetFpr(op.fra);
const auto b = GetFpr(op.frb);
const auto c = GetFpr(op.frc);
llvm::Value* result;
if (g_cfg.core.use_accurate_dfma)
{
result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), {a, c, m_ir->CreateFNeg(b)});
}
else
{
result = m_ir->CreateFSub(m_ir->CreateFMul(a, c), b);
}
SetFpr(op.frd, m_ir->CreateFNeg(result));
2016-06-07 22:24:20 +02:00
//SetFPSCR_FR(Call(GetType<bool>(), m_pure_attr, "__fmadd_get_fr", a, b, c)); // TODO ???
//SetFPSCR_FI(Call(GetType<bool>(), m_pure_attr, "__fmadd_get_fi", a, b, c));
//SetFPSCRException(m_fpscr_ox, Call(GetType<bool>(), m_pure_attr, "__fmadd_get_ox", a, b, c));
//SetFPSCRException(m_fpscr_ux, Call(GetType<bool>(), m_pure_attr, "__fmadd_get_ux", a, b, c));
//SetFPSCRException(m_fpscr_vxsnan, Call(GetType<bool>(), m_pure_attr, "__fmadd_get_vxsnan", a, b, c));
//SetFPSCRException(m_fpscr_vxisi, Call(GetType<bool>(), m_pure_attr, "__fmadd_get_vxisi", a, b, c));
//SetFPSCRException(m_fpscr_vximz, Call(GetType<bool>(), m_pure_attr, "__fmadd_get_vximz", a, b, c));
SetFPRF(result, op.rc != 0);
}
void PPUTranslator::FNMADD(ppu_opcode_t op)
{
const auto a = GetFpr(op.fra);
const auto b = GetFpr(op.frb);
const auto c = GetFpr(op.frc);
llvm::Value* result;
if (g_cfg.core.use_accurate_dfma)
{
result = m_ir->CreateCall(get_intrinsic<f64>(llvm::Intrinsic::fma), {a, c, b});
}
else
{
result = m_ir->CreateFAdd(m_ir->CreateFMul(a, c), b);
}
SetFpr(op.frd, m_ir->CreateFNeg(result));
2016-06-07 22:24:20 +02:00
//SetFPSCR_FR(Call(GetType<bool>(), m_pure_attr, "__fmadd_get_fr", a, b, c)); // TODO ???
//SetFPSCR_FI(Call(GetType<bool>(), m_pure_attr, "__fmadd_get_fi", a, b, c));
//SetFPSCRException(m_fpscr_ox, Call(GetType<bool>(), m_pure_attr, "__fmadd_get_ox", a, b, c));
//SetFPSCRException(m_fpscr_ux, Call(GetType<bool>(), m_pure_attr, "__fmadd_get_ux", a, b, c));
//SetFPSCRException(m_fpscr_vxsnan, Call(GetType<bool>(), m_pure_attr, "__fmadd_get_vxsnan", a, b, c));
//SetFPSCRException(m_fpscr_vxisi, Call(GetType<bool>(), m_pure_attr, "__fmadd_get_vxisi", a, b, c));
//SetFPSCRException(m_fpscr_vximz, Call(GetType<bool>(), m_pure_attr, "__fmadd_get_vximz", a, b, c));
SetFPRF(result, op.rc != 0);
}
void PPUTranslator::FCMPO(ppu_opcode_t op)
{
const auto a = GetFpr(op.fra);
const auto b = GetFpr(op.frb);
const auto lt = m_ir->CreateFCmpOLT(a, b);
const auto gt = m_ir->CreateFCmpOGT(a, b);
const auto eq = m_ir->CreateFCmpOEQ(a, b);
const auto un = m_ir->CreateFCmpUNO(a, b);
SetCrField(op.crfd, lt, gt, eq, un);
SetFPCC(lt, gt, eq, un);
//SetFPSCRException(m_fpscr_vxsnan, Call(GetType<bool>(), m_pure_attr, "__fcmpo_get_vxsnan", a, b));
//SetFPSCRException(m_fpscr_vxvc, Call(GetType<bool>(), m_pure_attr, "__fcmpo_get_vxvc", a, b));
}
void PPUTranslator::FNEG(ppu_opcode_t op)
{
const auto b = GetFpr(op.frb);
SetFpr(op.frd, m_ir->CreateFNeg(b));
if (op.rc) SetCrFieldFPCC(1);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::FMR(ppu_opcode_t op)
{
SetFpr(op.frd, GetFpr(op.frb));
if (op.rc) SetCrFieldFPCC(1);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::FNABS(ppu_opcode_t op)
{
SetFpr(op.frd, m_ir->CreateFNeg(Call(GetType<f64>(), "llvm.fabs.f64", GetFpr(op.frb))));
if (op.rc) SetCrFieldFPCC(1);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::FABS(ppu_opcode_t op)
{
SetFpr(op.frd, Call(GetType<f64>(), "llvm.fabs.f64", GetFpr(op.frb)));
if (op.rc) SetCrFieldFPCC(1);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::FCTID(ppu_opcode_t op)
{
const auto b = GetFpr(op.frb);
2019-08-13 07:11:33 +02:00
const auto xormask = m_ir->CreateSExt(m_ir->CreateFCmpOGE(b, ConstantFP::get(GetType<f64>(), std::exp2l(63.))), GetType<s64>());
// fix result saturation (0x8000000000000000 -> 0x7fffffffffffffff)
PPU LLVM arm64+macOS port (#12115) * BufferUtils: use naive function pointer on Apple arm64 Use naive function pointer on Apple arm64 because ASLR breaks asmjit. See BufferUtils.cpp comment for explanation on why this happens and how to fix if you want to use asmjit. * build-macos: fix source maps for Mac Tell Qt not to strip debug symbols when we're in debug or relwithdebinfo modes. * LLVM PPU: fix aarch64 on macOS Force MachO on macOS to fix LLVM being unable to patch relocations during codegen. Adds Aarch64 NEON intrinsics for x86 intrinsics used by PPUTranslator/Recompiler. * virtual memory: use 16k pages on aarch64 macOS Temporary hack to get things working by using 16k pages instead of 4k pages in VM emulation. * PPU/SPU: fix NEON intrinsics and compilation for arm64 macOS Fixes some intrinsics usage and patches usages of asmjit to properly emit absolute jmps so ASLR doesn't cause out of bounds rel jumps. Also patches the SPU recompiler to properly work on arm64 by telling LLVM to target arm64. * virtual memory: fix W^X toggles on macOS aarch64 Fixes W^X on macOS aarch64 by setting all JIT mmap'd regions to default to RW mode. For both SPU and PPU execution threads, when initialization finishes we toggle to RX mode. This exploits Apple's per-thread setting for RW/RX to let us be technically compliant with the OS's W^X enforcement while not needing to actually separate the memory allocated for code/data. * PPU: implement aarch64 specific functions Implements ppu_gateway for arm64 and patches LLVM initialization to use the correct triple. Adds some fixes for macOS W^X JIT restrictions when entering/exiting JITed code. * PPU: Mark rpcs3 calls as non-tail Strictly speaking, rpcs3 JIT -> C++ calls are not tail calls. If you call a function inside e.g. an L2 syscall, it will clobber LR on arm64 and subtly break returns in emulated code. Only JIT -> JIT "calls" should be tail. * macOS/arm64: compatibility fixes * vm: patch virtual memory for arm64 macOS Tag mmap calls with MAP_JIT to allow W^X on macOS. Fix mmap calls to existing mmap'd addresses that were tagged with MAP_JIT on macOS. Fix memory unmapping on 16K page machines with a hack to mark "unmapped" pages as RW. * PPU: remove wrong comment * PPU: fix a merge regression * vm: remove 16k page hacks * PPU: formatting fixes * PPU: fix arm64 null function assembly * ppu: clean up arch-specific instructions
2022-06-14 14:28:38 +02:00
#if defined(ARCH_X64)
SetFpr(op.frd, m_ir->CreateXor(xormask, Call(GetType<s64>(), "llvm.x86.sse2.cvtsd2si64", m_ir->CreateInsertElement(GetUndef<f64[2]>(), b, u64{0}))));
PPU LLVM arm64+macOS port (#12115) * BufferUtils: use naive function pointer on Apple arm64 Use naive function pointer on Apple arm64 because ASLR breaks asmjit. See BufferUtils.cpp comment for explanation on why this happens and how to fix if you want to use asmjit. * build-macos: fix source maps for Mac Tell Qt not to strip debug symbols when we're in debug or relwithdebinfo modes. * LLVM PPU: fix aarch64 on macOS Force MachO on macOS to fix LLVM being unable to patch relocations during codegen. Adds Aarch64 NEON intrinsics for x86 intrinsics used by PPUTranslator/Recompiler. * virtual memory: use 16k pages on aarch64 macOS Temporary hack to get things working by using 16k pages instead of 4k pages in VM emulation. * PPU/SPU: fix NEON intrinsics and compilation for arm64 macOS Fixes some intrinsics usage and patches usages of asmjit to properly emit absolute jmps so ASLR doesn't cause out of bounds rel jumps. Also patches the SPU recompiler to properly work on arm64 by telling LLVM to target arm64. * virtual memory: fix W^X toggles on macOS aarch64 Fixes W^X on macOS aarch64 by setting all JIT mmap'd regions to default to RW mode. For both SPU and PPU execution threads, when initialization finishes we toggle to RX mode. This exploits Apple's per-thread setting for RW/RX to let us be technically compliant with the OS's W^X enforcement while not needing to actually separate the memory allocated for code/data. * PPU: implement aarch64 specific functions Implements ppu_gateway for arm64 and patches LLVM initialization to use the correct triple. Adds some fixes for macOS W^X JIT restrictions when entering/exiting JITed code. * PPU: Mark rpcs3 calls as non-tail Strictly speaking, rpcs3 JIT -> C++ calls are not tail calls. If you call a function inside e.g. an L2 syscall, it will clobber LR on arm64 and subtly break returns in emulated code. Only JIT -> JIT "calls" should be tail. * macOS/arm64: compatibility fixes * vm: patch virtual memory for arm64 macOS Tag mmap calls with MAP_JIT to allow W^X on macOS. Fix mmap calls to existing mmap'd addresses that were tagged with MAP_JIT on macOS. Fix memory unmapping on 16K page machines with a hack to mark "unmapped" pages as RW. * PPU: remove wrong comment * PPU: fix a merge regression * vm: remove 16k page hacks * PPU: formatting fixes * PPU: fix arm64 null function assembly * ppu: clean up arch-specific instructions
2022-06-14 14:28:38 +02:00
#elif defined(ARCH_ARM64)
SetFpr(op.frd, m_ir->CreateXor(xormask, Call(GetType<s64>(), "llvm.aarch64.neon.fcvtns.i64.f64", b)));
#endif
2016-06-07 22:24:20 +02:00
//SetFPSCR_FR(Call(GetType<bool>(), m_pure_attr, "__fctid_get_fr", b));
//SetFPSCR_FI(Call(GetType<bool>(), m_pure_attr, "__fctid_get_fi", b));
//SetFPSCRException(m_fpscr_vxsnan, Call(GetType<bool>(), m_pure_attr, "__fctid_get_vxsnan", b));
//SetFPSCRException(m_fpscr_vxcvi, m_ir->CreateOr(sat_l, sat_h));
//m_ir->CreateStore(GetUndef<bool>(), m_fpscr_c);
//SetFPCC(GetUndef<bool>(), GetUndef<bool>(), GetUndef<bool>(), GetUndef<bool>(), op.rc != 0);
}
void PPUTranslator::FCTIDZ(ppu_opcode_t op)
{
const auto b = GetFpr(op.frb);
2019-08-13 07:11:33 +02:00
const auto xormask = m_ir->CreateSExt(m_ir->CreateFCmpOGE(b, ConstantFP::get(GetType<f64>(), std::exp2l(63.))), GetType<s64>());
// fix result saturation (0x8000000000000000 -> 0x7fffffffffffffff)
PPU LLVM arm64+macOS port (#12115) * BufferUtils: use naive function pointer on Apple arm64 Use naive function pointer on Apple arm64 because ASLR breaks asmjit. See BufferUtils.cpp comment for explanation on why this happens and how to fix if you want to use asmjit. * build-macos: fix source maps for Mac Tell Qt not to strip debug symbols when we're in debug or relwithdebinfo modes. * LLVM PPU: fix aarch64 on macOS Force MachO on macOS to fix LLVM being unable to patch relocations during codegen. Adds Aarch64 NEON intrinsics for x86 intrinsics used by PPUTranslator/Recompiler. * virtual memory: use 16k pages on aarch64 macOS Temporary hack to get things working by using 16k pages instead of 4k pages in VM emulation. * PPU/SPU: fix NEON intrinsics and compilation for arm64 macOS Fixes some intrinsics usage and patches usages of asmjit to properly emit absolute jmps so ASLR doesn't cause out of bounds rel jumps. Also patches the SPU recompiler to properly work on arm64 by telling LLVM to target arm64. * virtual memory: fix W^X toggles on macOS aarch64 Fixes W^X on macOS aarch64 by setting all JIT mmap'd regions to default to RW mode. For both SPU and PPU execution threads, when initialization finishes we toggle to RX mode. This exploits Apple's per-thread setting for RW/RX to let us be technically compliant with the OS's W^X enforcement while not needing to actually separate the memory allocated for code/data. * PPU: implement aarch64 specific functions Implements ppu_gateway for arm64 and patches LLVM initialization to use the correct triple. Adds some fixes for macOS W^X JIT restrictions when entering/exiting JITed code. * PPU: Mark rpcs3 calls as non-tail Strictly speaking, rpcs3 JIT -> C++ calls are not tail calls. If you call a function inside e.g. an L2 syscall, it will clobber LR on arm64 and subtly break returns in emulated code. Only JIT -> JIT "calls" should be tail. * macOS/arm64: compatibility fixes * vm: patch virtual memory for arm64 macOS Tag mmap calls with MAP_JIT to allow W^X on macOS. Fix mmap calls to existing mmap'd addresses that were tagged with MAP_JIT on macOS. Fix memory unmapping on 16K page machines with a hack to mark "unmapped" pages as RW. * PPU: remove wrong comment * PPU: fix a merge regression * vm: remove 16k page hacks * PPU: formatting fixes * PPU: fix arm64 null function assembly * ppu: clean up arch-specific instructions
2022-06-14 14:28:38 +02:00
#if defined(ARCH_X64)
SetFpr(op.frd, m_ir->CreateXor(xormask, Call(GetType<s64>(), "llvm.x86.sse2.cvttsd2si64", m_ir->CreateInsertElement(GetUndef<f64[2]>(), b, u64{0}))));
PPU LLVM arm64+macOS port (#12115) * BufferUtils: use naive function pointer on Apple arm64 Use naive function pointer on Apple arm64 because ASLR breaks asmjit. See BufferUtils.cpp comment for explanation on why this happens and how to fix if you want to use asmjit. * build-macos: fix source maps for Mac Tell Qt not to strip debug symbols when we're in debug or relwithdebinfo modes. * LLVM PPU: fix aarch64 on macOS Force MachO on macOS to fix LLVM being unable to patch relocations during codegen. Adds Aarch64 NEON intrinsics for x86 intrinsics used by PPUTranslator/Recompiler. * virtual memory: use 16k pages on aarch64 macOS Temporary hack to get things working by using 16k pages instead of 4k pages in VM emulation. * PPU/SPU: fix NEON intrinsics and compilation for arm64 macOS Fixes some intrinsics usage and patches usages of asmjit to properly emit absolute jmps so ASLR doesn't cause out of bounds rel jumps. Also patches the SPU recompiler to properly work on arm64 by telling LLVM to target arm64. * virtual memory: fix W^X toggles on macOS aarch64 Fixes W^X on macOS aarch64 by setting all JIT mmap'd regions to default to RW mode. For both SPU and PPU execution threads, when initialization finishes we toggle to RX mode. This exploits Apple's per-thread setting for RW/RX to let us be technically compliant with the OS's W^X enforcement while not needing to actually separate the memory allocated for code/data. * PPU: implement aarch64 specific functions Implements ppu_gateway for arm64 and patches LLVM initialization to use the correct triple. Adds some fixes for macOS W^X JIT restrictions when entering/exiting JITed code. * PPU: Mark rpcs3 calls as non-tail Strictly speaking, rpcs3 JIT -> C++ calls are not tail calls. If you call a function inside e.g. an L2 syscall, it will clobber LR on arm64 and subtly break returns in emulated code. Only JIT -> JIT "calls" should be tail. * macOS/arm64: compatibility fixes * vm: patch virtual memory for arm64 macOS Tag mmap calls with MAP_JIT to allow W^X on macOS. Fix mmap calls to existing mmap'd addresses that were tagged with MAP_JIT on macOS. Fix memory unmapping on 16K page machines with a hack to mark "unmapped" pages as RW. * PPU: remove wrong comment * PPU: fix a merge regression * vm: remove 16k page hacks * PPU: formatting fixes * PPU: fix arm64 null function assembly * ppu: clean up arch-specific instructions
2022-06-14 14:28:38 +02:00
#elif defined(ARCH_ARM64)
SetFpr(op.frd, m_ir->CreateXor(xormask, Call(GetType<s64>(), "llvm.aarch64.neon.fcvtzs.i64.f64", b)));
#endif
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::FCFID(ppu_opcode_t op)
{
const auto b = GetFpr(op.frb, 64, true);
const auto result = m_ir->CreateSIToFP(b, GetType<f64>());
SetFpr(op.frd, result);
//SetFPSCR_FR(Call(GetType<bool>(), m_pure_attr, "__fcfid_get_fr", b));
//SetFPSCR_FI(Call(GetType<bool>(), m_pure_attr, "__fcfid_get_fi", b));
SetFPRF(result, op.rc != 0);
}
void PPUTranslator::UNK(ppu_opcode_t op)
{
FlushRegisters();
PPU LLVM arm64+macOS port (#12115) * BufferUtils: use naive function pointer on Apple arm64 Use naive function pointer on Apple arm64 because ASLR breaks asmjit. See BufferUtils.cpp comment for explanation on why this happens and how to fix if you want to use asmjit. * build-macos: fix source maps for Mac Tell Qt not to strip debug symbols when we're in debug or relwithdebinfo modes. * LLVM PPU: fix aarch64 on macOS Force MachO on macOS to fix LLVM being unable to patch relocations during codegen. Adds Aarch64 NEON intrinsics for x86 intrinsics used by PPUTranslator/Recompiler. * virtual memory: use 16k pages on aarch64 macOS Temporary hack to get things working by using 16k pages instead of 4k pages in VM emulation. * PPU/SPU: fix NEON intrinsics and compilation for arm64 macOS Fixes some intrinsics usage and patches usages of asmjit to properly emit absolute jmps so ASLR doesn't cause out of bounds rel jumps. Also patches the SPU recompiler to properly work on arm64 by telling LLVM to target arm64. * virtual memory: fix W^X toggles on macOS aarch64 Fixes W^X on macOS aarch64 by setting all JIT mmap'd regions to default to RW mode. For both SPU and PPU execution threads, when initialization finishes we toggle to RX mode. This exploits Apple's per-thread setting for RW/RX to let us be technically compliant with the OS's W^X enforcement while not needing to actually separate the memory allocated for code/data. * PPU: implement aarch64 specific functions Implements ppu_gateway for arm64 and patches LLVM initialization to use the correct triple. Adds some fixes for macOS W^X JIT restrictions when entering/exiting JITed code. * PPU: Mark rpcs3 calls as non-tail Strictly speaking, rpcs3 JIT -> C++ calls are not tail calls. If you call a function inside e.g. an L2 syscall, it will clobber LR on arm64 and subtly break returns in emulated code. Only JIT -> JIT "calls" should be tail. * macOS/arm64: compatibility fixes * vm: patch virtual memory for arm64 macOS Tag mmap calls with MAP_JIT to allow W^X on macOS. Fix mmap calls to existing mmap'd addresses that were tagged with MAP_JIT on macOS. Fix memory unmapping on 16K page machines with a hack to mark "unmapped" pages as RW. * PPU: remove wrong comment * PPU: fix a merge regression * vm: remove 16k page hacks * PPU: formatting fixes * PPU: fix arm64 null function assembly * ppu: clean up arch-specific instructions
2022-06-14 14:28:38 +02:00
Call(GetType<void>(), "__error", m_thread, GetAddr(), m_ir->getInt32(op.opcode));
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
m_ir->CreateRetVoid();
2016-06-07 22:24:20 +02:00
}
Value* PPUTranslator::GetGpr(u32 r, u32 num_bits)
{
return Trunc(RegLoad(m_gpr[r]), m_ir->getIntNTy(num_bits));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::SetGpr(u32 r, Value* value)
{
RegStore(ZExt(value, GetType<u64>()), m_gpr[r]);
2016-06-07 22:24:20 +02:00
}
Value* PPUTranslator::GetFpr(u32 r, u32 bits, bool as_int)
{
const auto value = RegLoad(m_fpr[r]);
2016-06-07 22:24:20 +02:00
if (!as_int && bits == 64)
{
return value;
}
else if (!as_int && bits == 32)
{
return m_ir->CreateFPTrunc(value, GetType<f32>());
}
else
{
return Trunc(bitcast(value, GetType<u64>()), m_ir->getIntNTy(bits));
2016-06-07 22:24:20 +02:00
}
}
void PPUTranslator::SetFpr(u32 r, Value* val)
{
const auto f64_val =
val->getType() == GetType<s32>() ? bitcast(SExt(val), GetType<f64>()) :
val->getType() == GetType<s64>() ? bitcast(val, GetType<f64>()) :
2016-06-07 22:24:20 +02:00
val->getType() == GetType<f32>() ? m_ir->CreateFPExt(val, GetType<f64>()) : val;
RegStore(f64_val, m_fpr[r]);
2016-06-07 22:24:20 +02:00
}
Value* PPUTranslator::GetVr(u32 vr, VrType type)
{
const auto value = RegLoad(m_vr[vr]);
2016-06-07 22:24:20 +02:00
llvm::Type* _type{};
2016-06-07 22:24:20 +02:00
switch (type)
{
case VrType::vi32: _type = GetType<u32[4]>(); break;
case VrType::vi8 : _type = GetType<u8[16]>(); break;
case VrType::vi16: _type = GetType<u16[8]>(); break;
case VrType::vf : _type = GetType<f32[4]>(); break;
case VrType::i128: _type = GetType<u128>(); break;
default: ensure(false);
2016-06-07 22:24:20 +02:00
}
return bitcast(value, _type);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::SetVr(u32 vr, Value* value)
{
const auto type = value->getType();
const auto size = type->getPrimitiveSizeInBits();
if (type->isVectorTy() && size != 128)
{
if (type->getScalarType()->isIntegerTy(1))
{
// Sign-extend bool values
2019-10-23 12:09:57 +02:00
value = SExt(value, ScaleType(type, 7 - s32(std::log2(+size))));
2016-06-07 22:24:20 +02:00
}
else if (size == 256 || size == 512)
{
// Truncate big vectors
2019-10-23 12:09:57 +02:00
value = Trunc(value, ScaleType(type, 7 - s32(std::log2(+size))));
2016-06-07 22:24:20 +02:00
}
}
ensure(value->getType()->getPrimitiveSizeInBits() == 128);
2017-07-22 15:30:12 +02:00
RegStore(value, m_vr[vr]);
2016-06-07 22:24:20 +02:00
}
Value* PPUTranslator::GetCrb(u32 crb)
{
return RegLoad(m_cr[crb]);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::SetCrb(u32 crb, Value* value)
{
RegStore(value, m_cr[crb]);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::SetCrField(u32 group, Value* lt, Value* gt, Value* eq, Value* so)
{
SetCrb(group * 4 + 0, lt ? lt : GetUndef<bool>());
SetCrb(group * 4 + 1, gt ? gt : GetUndef<bool>());
SetCrb(group * 4 + 2, eq ? eq : GetUndef<bool>());
SetCrb(group * 4 + 3, so ? so : RegLoad(m_so));
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::SetCrFieldSignedCmp(u32 n, Value* a, Value* b)
{
const auto lt = m_ir->CreateICmpSLT(a, b);
const auto gt = m_ir->CreateICmpSGT(a, b);
const auto eq = m_ir->CreateICmpEQ(a, b);
SetCrField(n, lt, gt, eq);
}
void PPUTranslator::SetCrFieldUnsignedCmp(u32 n, Value* a, Value* b)
{
const auto lt = m_ir->CreateICmpULT(a, b);
const auto gt = m_ir->CreateICmpUGT(a, b);
const auto eq = m_ir->CreateICmpEQ(a, b);
SetCrField(n, lt, gt, eq);
}
void PPUTranslator::SetCrFieldFPCC(u32 n)
{
SetCrField(n, GetFPSCRBit(16), GetFPSCRBit(17), GetFPSCRBit(18), GetFPSCRBit(19));
}
2016-06-07 22:24:20 +02:00
void PPUTranslator::SetFPCC(Value* lt, Value* gt, Value* eq, Value* un, bool set_cr)
{
SetFPSCRBit(16, lt, false);
SetFPSCRBit(17, gt, false);
SetFPSCRBit(18, eq, false);
SetFPSCRBit(19, un, false);
2016-06-07 22:24:20 +02:00
if (set_cr) SetCrField(1, lt, gt, eq, un);
}
2021-03-05 20:05:37 +01:00
void PPUTranslator::SetFPRF(Value* value, bool /*set_cr*/)
2016-06-07 22:24:20 +02:00
{
2021-01-12 11:01:06 +01:00
//const bool is32 =
2016-06-07 22:24:20 +02:00
value->getType()->isFloatTy() ? true :
value->getType()->isDoubleTy() ? false : ensure(false);
2016-06-07 22:24:20 +02:00
//const auto zero = ConstantFP::get(value->getType(), 0.0);
//const auto is_nan = m_ir->CreateFCmpUNO(value, zero);
//const auto is_inf = Call(GetType<bool>(), m_pure_attr, is32 ? "__is_inf32" : "__is_inf", value); // TODO
//const auto is_denorm = Call(GetType<bool>(), m_pure_attr, is32 ? "__is_denorm32" : "__is_denorm", value); // TODO
//const auto is_neg_zero = Call(GetType<bool>(), m_pure_attr, is32 ? "__is_neg_zero32" : "__is_neg_zero", value); // TODO
//const auto cc = m_ir->CreateOr(is_nan, m_ir->CreateOr(is_denorm, is_neg_zero));
//const auto lt = m_ir->CreateFCmpOLT(value, zero);
//const auto gt = m_ir->CreateFCmpOGT(value, zero);
//const auto eq = m_ir->CreateFCmpOEQ(value, zero);
//const auto un = m_ir->CreateOr(is_nan, is_inf);
//m_ir->CreateStore(cc, m_fpscr_c);
//SetFPCC(lt, gt, eq, un, set_cr);
}
2021-03-05 20:05:37 +01:00
void PPUTranslator::SetFPSCR_FR(Value* /*value*/)
2016-06-07 22:24:20 +02:00
{
//m_ir->CreateStore(value, m_fpscr_fr);
2016-06-07 22:24:20 +02:00
}
2021-03-05 20:05:37 +01:00
void PPUTranslator::SetFPSCR_FI(Value* /*value*/)
2016-06-07 22:24:20 +02:00
{
//m_ir->CreateStore(value, m_fpscr_fi);
//SetFPSCRException(m_fpscr_xx, value);
2016-06-07 22:24:20 +02:00
}
2021-03-05 20:05:37 +01:00
void PPUTranslator::SetFPSCRException(Value* /*ptr*/, Value* /*value*/)
2016-06-07 22:24:20 +02:00
{
//m_ir->CreateStore(m_ir->CreateOr(m_ir->CreateLoad(ptr), value), ptr);
//m_ir->CreateStore(m_ir->CreateOr(m_ir->CreateLoad(m_fpscr_fx), value), m_fpscr_fx);
2016-06-07 22:24:20 +02:00
}
Value* PPUTranslator::GetFPSCRBit(u32 n)
{
//if (n == 1 && m_fpscr[24])
//{
// // Floating-Point Enabled Exception Summary (FEX) 24-29
// Value* value = m_ir->CreateLoad(m_fpscr[24]);
// for (u32 i = 25; i <= 29; i++) value = m_ir->CreateOr(value, m_ir->CreateLoad(m_fpscr[i]));
// return value;
//}
//if (n == 2 && m_fpscr[7])
//{
// // Floating-Point Invalid Operation Exception Summary (VX) 7-12, 21-23
// Value* value = m_ir->CreateLoad(m_fpscr[7]);
// for (u32 i = 8; i <= 12; i++) value = m_ir->CreateOr(value, m_ir->CreateLoad(m_fpscr[i]));
// for (u32 i = 21; i <= 23; i++) value = m_ir->CreateOr(value, m_ir->CreateLoad(m_fpscr[i]));
// return value;
//}
if (n < 16 || n > 19)
2016-06-07 22:24:20 +02:00
{
return nullptr; // ???
}
// Get bit
const auto value = RegLoad(m_fc[n]);
2016-06-07 22:24:20 +02:00
//if (n == 0 || (n >= 3 && n <= 12) || (n >= 21 && n <= 23))
//{
// // Clear FX or exception bits
// m_ir->CreateStore(m_ir->getFalse(), m_fpscr[n]);
//}
2016-06-07 22:24:20 +02:00
return value;
}
2021-03-05 20:05:37 +01:00
void PPUTranslator::SetFPSCRBit(u32 n, Value* value, bool /*update_fx*/)
2016-06-07 22:24:20 +02:00
{
if (n < 16 || n > 19)
2016-06-07 22:24:20 +02:00
{
//CompilationError("SetFPSCRBit(): inaccessible bit " + std::to_string(n));
return; // ???
}
//if (update_fx)
//{
// if ((n >= 3 && n <= 12) || (n >= 21 && n <= 23))
// {
// // Update FX bit if necessary
// m_ir->CreateStore(m_ir->CreateOr(m_ir->CreateLoad(m_fpscr_fx), value), m_fpscr_fx);
// }
//}
2016-06-07 22:24:20 +02:00
//if (n >= 24 && n <= 28) CompilationError("SetFPSCRBit: exception enable bit " + std::to_string(n));
//if (n == 29) CompilationError("SetFPSCRBit: NI bit");
//if (n >= 30) CompilationError("SetFPSCRBit: RN bit");
// Store the bit
RegStore(value, m_fc[n]);
2016-06-07 22:24:20 +02:00
}
Value* PPUTranslator::GetCarry()
{
return RegLoad(m_ca);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::SetCarry(Value* bit)
{
RegStore(bit, m_ca);
2016-06-07 22:24:20 +02:00
}
void PPUTranslator::SetOverflow(Value* bit)
{
RegStore(bit, m_ov);
RegStore(m_ir->CreateOr(RegLoad(m_so), bit), m_so);
2016-06-07 22:24:20 +02:00
}
Value* PPUTranslator::CheckTrapCondition(u32 to, Value* left, Value* right)
{
Value* trap_condition = m_ir->getFalse();
if (to & 0x10) trap_condition = m_ir->CreateOr(trap_condition, m_ir->CreateICmpSLT(left, right));
if (to & 0x8) trap_condition = m_ir->CreateOr(trap_condition, m_ir->CreateICmpSGT(left, right));
if (to & 0x4) trap_condition = m_ir->CreateOr(trap_condition, m_ir->CreateICmpEQ(left, right));
if (to & 0x2) trap_condition = m_ir->CreateOr(trap_condition, m_ir->CreateICmpULT(left, right));
if (to & 0x1) trap_condition = m_ir->CreateOr(trap_condition, m_ir->CreateICmpUGT(left, right));
return trap_condition;
}
2017-07-01 01:08:51 +02:00
void PPUTranslator::Trap()
2016-06-07 22:24:20 +02:00
{
PPU LLVM arm64+macOS port (#12115) * BufferUtils: use naive function pointer on Apple arm64 Use naive function pointer on Apple arm64 because ASLR breaks asmjit. See BufferUtils.cpp comment for explanation on why this happens and how to fix if you want to use asmjit. * build-macos: fix source maps for Mac Tell Qt not to strip debug symbols when we're in debug or relwithdebinfo modes. * LLVM PPU: fix aarch64 on macOS Force MachO on macOS to fix LLVM being unable to patch relocations during codegen. Adds Aarch64 NEON intrinsics for x86 intrinsics used by PPUTranslator/Recompiler. * virtual memory: use 16k pages on aarch64 macOS Temporary hack to get things working by using 16k pages instead of 4k pages in VM emulation. * PPU/SPU: fix NEON intrinsics and compilation for arm64 macOS Fixes some intrinsics usage and patches usages of asmjit to properly emit absolute jmps so ASLR doesn't cause out of bounds rel jumps. Also patches the SPU recompiler to properly work on arm64 by telling LLVM to target arm64. * virtual memory: fix W^X toggles on macOS aarch64 Fixes W^X on macOS aarch64 by setting all JIT mmap'd regions to default to RW mode. For both SPU and PPU execution threads, when initialization finishes we toggle to RX mode. This exploits Apple's per-thread setting for RW/RX to let us be technically compliant with the OS's W^X enforcement while not needing to actually separate the memory allocated for code/data. * PPU: implement aarch64 specific functions Implements ppu_gateway for arm64 and patches LLVM initialization to use the correct triple. Adds some fixes for macOS W^X JIT restrictions when entering/exiting JITed code. * PPU: Mark rpcs3 calls as non-tail Strictly speaking, rpcs3 JIT -> C++ calls are not tail calls. If you call a function inside e.g. an L2 syscall, it will clobber LR on arm64 and subtly break returns in emulated code. Only JIT -> JIT "calls" should be tail. * macOS/arm64: compatibility fixes * vm: patch virtual memory for arm64 macOS Tag mmap calls with MAP_JIT to allow W^X on macOS. Fix mmap calls to existing mmap'd addresses that were tagged with MAP_JIT on macOS. Fix memory unmapping on 16K page machines with a hack to mark "unmapped" pages as RW. * PPU: remove wrong comment * PPU: fix a merge regression * vm: remove 16k page hacks * PPU: formatting fixes * PPU: fix arm64 null function assembly * ppu: clean up arch-specific instructions
2022-06-14 14:28:38 +02:00
Call(GetType<void>(), "__trap", m_thread, GetAddr());
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
m_ir->CreateRetVoid();
2016-06-07 22:24:20 +02:00
}
Value* PPUTranslator::CheckBranchCondition(u32 bo, u32 bi)
{
const bool bo0 = (bo & 0x10) != 0;
const bool bo1 = (bo & 0x08) != 0;
const bool bo2 = (bo & 0x04) != 0;
const bool bo3 = (bo & 0x02) != 0;
// Decrement counter if necessary
const auto ctr = bo2 ? nullptr : m_ir->CreateSub(RegLoad(m_ctr), m_ir->getInt64(1));
2016-06-07 22:24:20 +02:00
// Store counter if necessary
if (ctr) RegStore(ctr, m_ctr);
2016-06-07 22:24:20 +02:00
// Generate counter condition
const auto use_ctr = bo2 ? nullptr : m_ir->CreateICmp(bo3 ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE, ctr, m_ir->getInt64(0));
// Generate condition bit access
const auto use_cond = bo0 ? nullptr : bo1 ? GetCrb(bi) : m_ir->CreateNot(GetCrb(bi));
if (use_ctr && use_cond)
{
// Combine conditions if necessary
return m_ir->CreateAnd(use_ctr, use_cond);
}
return use_ctr ? use_ctr : use_cond;
}
2017-02-13 18:51:37 +01:00
MDNode* PPUTranslator::CheckBranchProbability(u32 bo)
{
const bool bo0 = (bo & 0x10) != 0;
const bool bo1 = (bo & 0x08) != 0;
const bool bo2 = (bo & 0x04) != 0;
const bool bo3 = (bo & 0x02) != 0;
const bool bo4 = (bo & 0x01) != 0;
if ((bo0 && bo1) || (bo2 && bo3))
{
return bo4 ? m_md_likely : m_md_unlikely;
}
return nullptr;
}
void PPUTranslator::build_interpreter()
{
#define BUILD_VEC_INST(i) { \
m_function = llvm::cast<llvm::Function>(m_module->getOrInsertFunction("op_" #i, get_type<void>(), m_thread_type->getPointerTo()).getCallee()); \
std::fill(std::begin(m_globals), std::end(m_globals), nullptr); \
std::fill(std::begin(m_locals), std::end(m_locals), nullptr); \
IRBuilder<> irb(BasicBlock::Create(m_context, "__entry", m_function)); \
m_ir = &irb; \
m_thread = m_function->getArg(0); \
ppu_opcode_t op{}; \
op.vd = 0; \
op.va = 1; \
op.vb = 2; \
op.vc = 3; \
this->i(op); \
FlushRegisters(); \
m_ir->CreateRetVoid(); \
replace_intrinsics(*m_function); \
}
BUILD_VEC_INST(VADDCUW);
BUILD_VEC_INST(VADDFP);
BUILD_VEC_INST(VADDSBS);
BUILD_VEC_INST(VADDSHS);
BUILD_VEC_INST(VADDSWS);
BUILD_VEC_INST(VADDUBM);
BUILD_VEC_INST(VADDUBS);
BUILD_VEC_INST(VADDUHM);
BUILD_VEC_INST(VADDUHS);
BUILD_VEC_INST(VADDUWM);
BUILD_VEC_INST(VADDUWS);
BUILD_VEC_INST(VAND);
BUILD_VEC_INST(VANDC);
BUILD_VEC_INST(VAVGSB);
BUILD_VEC_INST(VAVGSH);
BUILD_VEC_INST(VAVGSW);
BUILD_VEC_INST(VAVGUB);
BUILD_VEC_INST(VAVGUH);
BUILD_VEC_INST(VAVGUW);
BUILD_VEC_INST(VCFSX);
BUILD_VEC_INST(VCFUX);
BUILD_VEC_INST(VCMPBFP);
BUILD_VEC_INST(VCMPBFP_);
BUILD_VEC_INST(VCMPEQFP);
BUILD_VEC_INST(VCMPEQFP_);
BUILD_VEC_INST(VCMPEQUB);
BUILD_VEC_INST(VCMPEQUB_);
BUILD_VEC_INST(VCMPEQUH);
BUILD_VEC_INST(VCMPEQUH_);
BUILD_VEC_INST(VCMPEQUW);
BUILD_VEC_INST(VCMPEQUW_);
BUILD_VEC_INST(VCMPGEFP);
BUILD_VEC_INST(VCMPGEFP_);
BUILD_VEC_INST(VCMPGTFP);
BUILD_VEC_INST(VCMPGTFP_);
BUILD_VEC_INST(VCMPGTSB);
BUILD_VEC_INST(VCMPGTSB_);
BUILD_VEC_INST(VCMPGTSH);
BUILD_VEC_INST(VCMPGTSH_);
BUILD_VEC_INST(VCMPGTSW);
BUILD_VEC_INST(VCMPGTSW_);
BUILD_VEC_INST(VCMPGTUB);
BUILD_VEC_INST(VCMPGTUB_);
BUILD_VEC_INST(VCMPGTUH);
BUILD_VEC_INST(VCMPGTUH_);
BUILD_VEC_INST(VCMPGTUW);
BUILD_VEC_INST(VCMPGTUW_);
BUILD_VEC_INST(VCTSXS);
BUILD_VEC_INST(VCTUXS);
BUILD_VEC_INST(VEXPTEFP);
BUILD_VEC_INST(VLOGEFP);
BUILD_VEC_INST(VMADDFP);
BUILD_VEC_INST(VMAXFP);
BUILD_VEC_INST(VMAXSB);
BUILD_VEC_INST(VMAXSH);
BUILD_VEC_INST(VMAXSW);
BUILD_VEC_INST(VMAXUB);
BUILD_VEC_INST(VMAXUH);
BUILD_VEC_INST(VMAXUW);
BUILD_VEC_INST(VMHADDSHS);
BUILD_VEC_INST(VMHRADDSHS);
BUILD_VEC_INST(VMINFP);
BUILD_VEC_INST(VMINSB);
BUILD_VEC_INST(VMINSH);
BUILD_VEC_INST(VMINSW);
BUILD_VEC_INST(VMINUB);
BUILD_VEC_INST(VMINUH);
BUILD_VEC_INST(VMINUW);
BUILD_VEC_INST(VMLADDUHM);
BUILD_VEC_INST(VMRGHB);
BUILD_VEC_INST(VMRGHH);
BUILD_VEC_INST(VMRGHW);
BUILD_VEC_INST(VMRGLB);
BUILD_VEC_INST(VMRGLH);
BUILD_VEC_INST(VMRGLW);
BUILD_VEC_INST(VMSUMMBM);
BUILD_VEC_INST(VMSUMSHM);
BUILD_VEC_INST(VMSUMSHS);
BUILD_VEC_INST(VMSUMUBM);
BUILD_VEC_INST(VMSUMUHM);
BUILD_VEC_INST(VMSUMUHS);
BUILD_VEC_INST(VMULESB);
BUILD_VEC_INST(VMULESH);
BUILD_VEC_INST(VMULEUB);
BUILD_VEC_INST(VMULEUH);
BUILD_VEC_INST(VMULOSB);
BUILD_VEC_INST(VMULOSH);
BUILD_VEC_INST(VMULOUB);
BUILD_VEC_INST(VMULOUH);
BUILD_VEC_INST(VNMSUBFP);
BUILD_VEC_INST(VNOR);
BUILD_VEC_INST(VOR);
BUILD_VEC_INST(VPERM);
BUILD_VEC_INST(VPKPX);
BUILD_VEC_INST(VPKSHSS);
BUILD_VEC_INST(VPKSHUS);
BUILD_VEC_INST(VPKSWSS);
BUILD_VEC_INST(VPKSWUS);
BUILD_VEC_INST(VPKUHUM);
BUILD_VEC_INST(VPKUHUS);
BUILD_VEC_INST(VPKUWUM);
BUILD_VEC_INST(VPKUWUS);
BUILD_VEC_INST(VREFP);
BUILD_VEC_INST(VRFIM);
BUILD_VEC_INST(VRFIN);
BUILD_VEC_INST(VRFIP);
BUILD_VEC_INST(VRFIZ);
BUILD_VEC_INST(VRLB);
BUILD_VEC_INST(VRLH);
BUILD_VEC_INST(VRLW);
BUILD_VEC_INST(VRSQRTEFP);
BUILD_VEC_INST(VSEL);
BUILD_VEC_INST(VSL);
BUILD_VEC_INST(VSLB);
BUILD_VEC_INST(VSLDOI);
BUILD_VEC_INST(VSLH);
BUILD_VEC_INST(VSLO);
BUILD_VEC_INST(VSLW);
BUILD_VEC_INST(VSPLTB);
BUILD_VEC_INST(VSPLTH);
BUILD_VEC_INST(VSPLTISB);
BUILD_VEC_INST(VSPLTISH);
BUILD_VEC_INST(VSPLTISW);
BUILD_VEC_INST(VSPLTW);
BUILD_VEC_INST(VSR);
BUILD_VEC_INST(VSRAB);
BUILD_VEC_INST(VSRAH);
BUILD_VEC_INST(VSRAW);
BUILD_VEC_INST(VSRB);
BUILD_VEC_INST(VSRH);
BUILD_VEC_INST(VSRO);
BUILD_VEC_INST(VSRW);
BUILD_VEC_INST(VSUBCUW);
BUILD_VEC_INST(VSUBFP);
BUILD_VEC_INST(VSUBSBS);
BUILD_VEC_INST(VSUBSHS);
BUILD_VEC_INST(VSUBSWS);
BUILD_VEC_INST(VSUBUBM);
BUILD_VEC_INST(VSUBUBS);
BUILD_VEC_INST(VSUBUHM);
BUILD_VEC_INST(VSUBUHS);
BUILD_VEC_INST(VSUBUWM);
BUILD_VEC_INST(VSUBUWS);
BUILD_VEC_INST(VSUMSWS);
BUILD_VEC_INST(VSUM2SWS);
BUILD_VEC_INST(VSUM4SBS);
BUILD_VEC_INST(VSUM4SHS);
BUILD_VEC_INST(VSUM4UBS);
BUILD_VEC_INST(VUPKHPX);
BUILD_VEC_INST(VUPKHSB);
BUILD_VEC_INST(VUPKHSH);
BUILD_VEC_INST(VUPKLPX);
BUILD_VEC_INST(VUPKLSB);
BUILD_VEC_INST(VUPKLSH);
BUILD_VEC_INST(VXOR);
#undef BUILD_VEC_INST
}
2016-06-07 22:24:20 +02:00
#endif