Use LLVM 9

Use new add/sub with saturation intrinsics
This commit is contained in:
Nekotekina 2019-03-29 16:35:00 +03:00
parent 7e0b941e9f
commit d873802b9c
9 changed files with 148 additions and 92 deletions

View file

@ -267,7 +267,7 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect)
return;
}
indirect = m_module->getOrInsertFunction(fmt::format("__0x%llx", target), type);
indirect = m_module->getOrInsertFunction(fmt::format("__0x%llx", target), type).getCallee();
}
else
{
@ -597,33 +597,27 @@ void PPUTranslator::VADDSBS(ppu_opcode_t op)
{
const auto a = get_vr<s8[16]>(op.va);
const auto b = get_vr<s8[16]>(op.vb);
const auto s = eval(a + b);
const auto z = eval((a >> 7) ^ 0x7f);
const auto x = eval(scarry(a, b, s) >> 7);
set_vr(op.vd, eval(merge(x, z, s)));
SetSat(IsNotZero(x.value));
const auto r = add_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a + b)).value));
}
void PPUTranslator::VADDSHS(ppu_opcode_t op)
{
const auto a = get_vr<s16[8]>(op.va);
const auto b = get_vr<s16[8]>(op.vb);
const auto s = eval(a + b);
const auto z = eval((a >> 15) ^ 0x7fff);
const auto x = eval(scarry(a, b, s) >> 15);
set_vr(op.vd, eval(merge(x, z, s)));
SetSat(IsNotZero(x.value));
const auto r = add_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a + b)).value));
}
void PPUTranslator::VADDSWS(ppu_opcode_t op)
{
const auto a = get_vr<s32[4]>(op.va);
const auto b = get_vr<s32[4]>(op.vb);
const auto s = eval(a + b);
const auto z = eval((a >> 31) ^ 0x7fffffff);
const auto x = eval(scarry(a, b, s) >> 31);
set_vr(op.vd, eval(merge(x, z, s)));
SetSat(IsNotZero(x.value));
const auto r = add_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a + b)).value));
}
void PPUTranslator::VADDUBM(ppu_opcode_t op)
@ -637,10 +631,9 @@ void PPUTranslator::VADDUBS(ppu_opcode_t op)
{
const auto a = get_vr<u8[16]>(op.va);
const auto b = get_vr<u8[16]>(op.vb);
const auto s = eval(a + b);
const auto x = eval(s < a);
set_vr(op.vd, select(x, splat<u8[16]>(-1), s));
SetSat(IsNotZero(x.value));
const auto r = add_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a + b)).value));
}
void PPUTranslator::VADDUHM(ppu_opcode_t op)
@ -654,10 +647,9 @@ void PPUTranslator::VADDUHS(ppu_opcode_t op)
{
const auto a = get_vr<u16[8]>(op.va);
const auto b = get_vr<u16[8]>(op.vb);
const auto s = eval(a + b);
const auto x = eval(s < a);
set_vr(op.vd, select(x, splat<u16[8]>(-1), s));
SetSat(IsNotZero(x.value));
const auto r = add_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a + b)).value));
}
void PPUTranslator::VADDUWM(ppu_opcode_t op)
@ -671,10 +663,9 @@ void PPUTranslator::VADDUWS(ppu_opcode_t op)
{
const auto a = get_vr<u32[4]>(op.va);
const auto b = get_vr<u32[4]>(op.vb);
const auto s = eval(a + b);
const auto x = eval(s < a);
set_vr(op.vd, select(x, splat<u32[4]>(-1), s));
SetSat(IsNotZero(x.value));
const auto r = add_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a + b)).value));
}
void PPUTranslator::VAND(ppu_opcode_t op)
@ -1491,33 +1482,27 @@ void PPUTranslator::VSUBSBS(ppu_opcode_t op)
{
const auto a = get_vr<s8[16]>(op.va);
const auto b = get_vr<s8[16]>(op.vb);
const auto d = eval(a - b);
const auto z = eval((a >> 7) ^ 0x7f);
const auto x = eval(sborrow(a, b, d) >> 7);
set_vr(op.vd, eval(merge(x, z, d)));
SetSat(IsNotZero(x.value));
const auto r = sub_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a - b)).value));
}
void PPUTranslator::VSUBSHS(ppu_opcode_t op)
{
const auto a = get_vr<s16[8]>(op.va);
const auto b = get_vr<s16[8]>(op.vb);
const auto d = eval(a - b);
const auto z = eval((a >> 15) ^ 0x7fff);
const auto x = eval(sborrow(a, b, d) >> 15);
set_vr(op.vd, eval(merge(x, z, d)));
SetSat(IsNotZero(x.value));
const auto r = sub_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a - b)).value));
}
void PPUTranslator::VSUBSWS(ppu_opcode_t op)
{
const auto a = get_vr<s32[4]>(op.va);
const auto b = get_vr<s32[4]>(op.vb);
const auto d = eval(a - b);
const auto z = eval((a >> 31) ^ 0x7fffffff);
const auto x = eval(sborrow(a, b, d) >> 31);
set_vr(op.vd, eval(merge(x, z, d)));
SetSat(IsNotZero(x.value));
const auto r = sub_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a - b)).value));
}
void PPUTranslator::VSUBUBM(ppu_opcode_t op)
@ -1531,10 +1516,9 @@ void PPUTranslator::VSUBUBS(ppu_opcode_t op)
{
const auto a = get_vr<u8[16]>(op.va);
const auto b = get_vr<u8[16]>(op.vb);
const auto d = eval(a - b);
const auto x = eval(a < b);
set_vr(op.vd, select(x, splat<u8[16]>(0), d));
SetSat(IsNotZero(x.value));
const auto r = sub_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a - b)).value));
}
void PPUTranslator::VSUBUHM(ppu_opcode_t op)
@ -1548,10 +1532,9 @@ void PPUTranslator::VSUBUHS(ppu_opcode_t op)
{
const auto a = get_vr<u16[8]>(op.va);
const auto b = get_vr<u16[8]>(op.vb);
const auto d = eval(a - b);
const auto x = eval(a < b);
set_vr(op.vd, select(x, splat<u16[8]>(0), d));
SetSat(IsNotZero(x.value));
const auto r = sub_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a - b)).value));
}
void PPUTranslator::VSUBUWM(ppu_opcode_t op)
@ -1565,10 +1548,9 @@ void PPUTranslator::VSUBUWS(ppu_opcode_t op)
{
const auto a = get_vr<u32[4]>(op.va);
const auto b = get_vr<u32[4]>(op.vb);
const auto d = eval(a - b);
const auto x = eval(a < b);
set_vr(op.vd, select(x, splat<u32[4]>(0), d));
SetSat(IsNotZero(x.value));
const auto r = sub_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a - b)).value));
}
void PPUTranslator::VSUMSWS(ppu_opcode_t op)