Merge xfloat options

This commit is contained in:
Megamouse 2023-07-23 09:09:24 +02:00 committed by Elad Ashkenazi
parent e6009794ee
commit 343ba8733b
7 changed files with 70 additions and 62 deletions

View file

@ -5316,7 +5316,7 @@ public:
if (src > 0x40000)
{
// Use the xfloat hint to create 256-bit (4x double) PHI
llvm::Type* type = g_cfg.core.spu_accurate_xfloat && bb.reg_maybe_xf[i] ? get_type<f64[4]>() : get_reg_type(i);
llvm::Type* type = g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate && bb.reg_maybe_xf[i] ? get_type<f64[4]>() : get_reg_type(i);
const auto _phi = m_ir->CreatePHI(type, ::size32(bb.preds), fmt::format("phi0x%05x_r%u", baddr, i));
m_block->phi[i] = _phi;
@ -8876,7 +8876,7 @@ public:
void FREST(spu_opcode_t op)
{
// TODO
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
const auto a = get_vr<f32[4]>(op.ra);
const auto mask_ov = sext<s32[4]>(bitcast<s32[4]>(fabs(a)) > splat<s32[4]>(0x7e7fffff));
@ -8885,7 +8885,7 @@ public:
return;
}
if (g_cfg.core.spu_approx_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
{
register_intrinsic("spu_frest", [&](llvm::CallInst* ci)
{
@ -8918,13 +8918,13 @@ public:
void FRSQEST(spu_opcode_t op)
{
// TODO
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
set_vr(op.rt, fsplat<f64[4]>(1.0) / fsqrt(fabs(get_vr<f64[4]>(op.ra))));
return;
}
if (g_cfg.core.spu_approx_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
{
register_intrinsic("spu_frsqest", [&](llvm::CallInst* ci)
{
@ -8956,7 +8956,7 @@ public:
void FCGT(spu_opcode_t op)
{
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
set_vr(op.rt, sext<s32[4]>(fcmp_ord(get_vr<f64[4]>(op.ra) > get_vr<f64[4]>(op.rb))));
return;
@ -9003,7 +9003,7 @@ public:
return eval(sext<s32[4]>(bitcast<s32[4]>(a) > bitcast<s32[4]>(b)));
}
if (g_cfg.core.spu_approx_xfloat || g_cfg.core.spu_relaxed_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate || g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::relaxed)
{
const auto ai = eval(bitcast<s32[4]>(a));
const auto bi = eval(bitcast<s32[4]>(b));
@ -9034,7 +9034,7 @@ public:
void FCMGT(spu_opcode_t op)
{
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
set_vr(op.rt, sext<s32[4]>(fcmp_ord(fabs(get_vr<f64[4]>(op.ra)) > fabs(get_vr<f64[4]>(op.rb)))));
return;
@ -9080,7 +9080,7 @@ public:
return eval(sext<s32[4]>(mai > mbi));
}
if (g_cfg.core.spu_approx_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
{
return eval(sext<s32[4]>(fcmp_uno(ma > mb) & (mai > mbi)));
}
@ -9101,7 +9101,7 @@ public:
void FA(spu_opcode_t op)
{
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
set_vr(op.rt, get_vr<f64[4]>(op.ra) + get_vr<f64[4]>(op.rb));
return;
@ -9126,7 +9126,7 @@ public:
void FS(spu_opcode_t op)
{
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
set_vr(op.rt, get_vr<f64[4]>(op.ra) - get_vr<f64[4]>(op.rb));
return;
@ -9137,7 +9137,7 @@ public:
const auto a = value<f32[4]>(ci->getOperand(0));
const auto b = value<f32[4]>(ci->getOperand(1));
if (g_cfg.core.spu_approx_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
{
const auto bc = clamp_smax(b); // for #4478
return eval(a - bc);
@ -9159,7 +9159,7 @@ public:
void FM(spu_opcode_t op)
{
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
set_vr(op.rt, get_vr<f64[4]>(op.ra) * get_vr<f64[4]>(op.rb));
return;
@ -9170,7 +9170,7 @@ public:
const auto a = value<f32[4]>(ci->getOperand(0));
const auto b = value<f32[4]>(ci->getOperand(1));
if (g_cfg.core.spu_approx_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
{
if (a.value == b.value)
{
@ -9206,7 +9206,7 @@ public:
void FESD(spu_opcode_t op)
{
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
const auto r = zshuffle(get_vr<f64[4]>(op.ra), 1, 3);
const auto d = bitcast<s64[2]>(r);
@ -9236,7 +9236,7 @@ public:
void FRDS(spu_opcode_t op)
{
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
const auto r = get_vr<f64[2]>(op.ra);
const auto d = bitcast<s64[2]>(r);
@ -9267,7 +9267,7 @@ public:
void FCEQ(spu_opcode_t op)
{
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
set_vr(op.rt, sext<s32[4]>(fcmp_ord(get_vr<f64[4]>(op.ra) == get_vr<f64[4]>(op.rb))));
return;
@ -9320,7 +9320,7 @@ public:
return eval(sext<s32[4]>(bitcast<s32[4]>(a) == bitcast<s32[4]>(b)));
}
if (g_cfg.core.spu_approx_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
{
return eval(sext<s32[4]>(fcmp_ord(a == b)) | sext<s32[4]>(bitcast<s32[4]>(a) == bitcast<s32[4]>(b)));
}
@ -9341,7 +9341,7 @@ public:
void FCMEQ(spu_opcode_t op)
{
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
set_vr(op.rt, sext<s32[4]>(fcmp_ord(fabs(get_vr<f64[4]>(op.ra)) == fabs(get_vr<f64[4]>(op.rb)))));
return;
@ -9397,7 +9397,7 @@ public:
return eval(sext<s32[4]>(bitcast<s32[4]>(fa) == bitcast<s32[4]>(fb)));
}
if (g_cfg.core.spu_approx_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
{
return eval(sext<s32[4]>(fcmp_ord(fa == fb)) | sext<s32[4]>(bitcast<s32[4]>(fa) == bitcast<s32[4]>(fb)));
}
@ -9490,7 +9490,7 @@ public:
void FNMS(spu_opcode_t op)
{
// See FMA.
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
const auto [a, b, c] = get_vrs<f64[4]>(op.ra, op.rb, op.rc);
set_vr(op.rt4, fmuladd(-a, b, c));
@ -9503,7 +9503,7 @@ public:
const auto b = value<f32[4]>(ci->getOperand(1));
const auto c = value<f32[4]>(ci->getOperand(2));
if (g_cfg.core.spu_approx_xfloat || g_cfg.core.spu_relaxed_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate || g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::relaxed)
{
return fma32x4(eval(-clamp_smax(a)), clamp_smax(b), c);
}
@ -9525,7 +9525,7 @@ public:
void FMA(spu_opcode_t op)
{
// Hardware FMA produces the same result as multiple + add on the limited double range (xfloat).
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
const auto [a, b, c] = get_vrs<f64[4]>(op.ra, op.rb, op.rc);
set_vr(op.rt4, fmuladd(a, b, c));
@ -9538,7 +9538,7 @@ public:
const auto b = value<f32[4]>(ci->getOperand(1));
const auto c = value<f32[4]>(ci->getOperand(2));
if (g_cfg.core.spu_approx_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
{
const auto ma = sext<s32[4]>(fcmp_uno(a != fsplat<f32[4]>(0.)));
const auto mb = sext<s32[4]>(fcmp_uno(b != fsplat<f32[4]>(0.)));
@ -9599,7 +9599,7 @@ public:
void FMS(spu_opcode_t op)
{
// See FMA.
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
const auto [a, b, c] = get_vrs<f64[4]>(op.ra, op.rb, op.rc);
set_vr(op.rt4, fmuladd(a, b, -c));
@ -9612,7 +9612,7 @@ public:
const auto b = value<f32[4]>(ci->getOperand(1));
const auto c = value<f32[4]>(ci->getOperand(2));
if (g_cfg.core.spu_approx_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
{
return fma32x4(clamp_smax(a), clamp_smax(b), eval(-c));
}
@ -9646,7 +9646,7 @@ public:
void FI(spu_opcode_t op)
{
// TODO
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
set_vr(op.rt, get_vr<f64[4]>(op.rb));
// const auto [a, b] = get_vrs<f64[4]>(op.ra, op.rb);
@ -9674,7 +9674,7 @@ public:
return bitcast<f32[4]>((b & 0xff800000u) | (bitcast<u32[4]>(fpcast<f32[4]>(bnew)) & ~0xff800000u)); // Inject old sign and exponent
});
if (g_cfg.core.spu_approx_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
{
register_intrinsic("spu_re", [&](llvm::CallInst* ci)
{
@ -9733,7 +9733,7 @@ public:
void CFLTS(spu_opcode_t op)
{
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
value_t<f64[4]> a = get_vr<f64[4]>(op.ra);
value_t<f64[4]> s;
@ -9807,7 +9807,7 @@ public:
void CFLTU(spu_opcode_t op)
{
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
value_t<f64[4]> a = get_vr<f64[4]>(op.ra);
value_t<f64[4]> s;
@ -9890,7 +9890,7 @@ public:
void CSFLT(spu_opcode_t op)
{
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
value_t<s32[4]> a = get_vr<s32[4]>(op.ra);
value_t<f64[4]> r;
@ -9930,7 +9930,7 @@ public:
void CUFLT(spu_opcode_t op)
{
if (g_cfg.core.spu_accurate_xfloat)
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
{
value_t<s32[4]> a = get_vr<s32[4]>(op.ra);
value_t<f64[4]> r;