mirror of
https://github.com/RPCSX/rpcsx.git
synced 2026-04-20 22:05:12 +00:00
Merge xfloat options
This commit is contained in:
parent
e6009794ee
commit
343ba8733b
7 changed files with 70 additions and 62 deletions
|
|
@ -5316,7 +5316,7 @@ public:
|
|||
if (src > 0x40000)
|
||||
{
|
||||
// Use the xfloat hint to create 256-bit (4x double) PHI
|
||||
llvm::Type* type = g_cfg.core.spu_accurate_xfloat && bb.reg_maybe_xf[i] ? get_type<f64[4]>() : get_reg_type(i);
|
||||
llvm::Type* type = g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate && bb.reg_maybe_xf[i] ? get_type<f64[4]>() : get_reg_type(i);
|
||||
|
||||
const auto _phi = m_ir->CreatePHI(type, ::size32(bb.preds), fmt::format("phi0x%05x_r%u", baddr, i));
|
||||
m_block->phi[i] = _phi;
|
||||
|
|
@ -8876,7 +8876,7 @@ public:
|
|||
void FREST(spu_opcode_t op)
|
||||
{
|
||||
// TODO
|
||||
if (g_cfg.core.spu_accurate_xfloat)
|
||||
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
|
||||
{
|
||||
const auto a = get_vr<f32[4]>(op.ra);
|
||||
const auto mask_ov = sext<s32[4]>(bitcast<s32[4]>(fabs(a)) > splat<s32[4]>(0x7e7fffff));
|
||||
|
|
@ -8885,7 +8885,7 @@ public:
|
|||
return;
|
||||
}
|
||||
|
||||
if (g_cfg.core.spu_approx_xfloat)
|
||||
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
|
||||
{
|
||||
register_intrinsic("spu_frest", [&](llvm::CallInst* ci)
|
||||
{
|
||||
|
|
@ -8918,13 +8918,13 @@ public:
|
|||
void FRSQEST(spu_opcode_t op)
|
||||
{
|
||||
// TODO
|
||||
if (g_cfg.core.spu_accurate_xfloat)
|
||||
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
|
||||
{
|
||||
set_vr(op.rt, fsplat<f64[4]>(1.0) / fsqrt(fabs(get_vr<f64[4]>(op.ra))));
|
||||
return;
|
||||
}
|
||||
|
||||
if (g_cfg.core.spu_approx_xfloat)
|
||||
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
|
||||
{
|
||||
register_intrinsic("spu_frsqest", [&](llvm::CallInst* ci)
|
||||
{
|
||||
|
|
@ -8956,7 +8956,7 @@ public:
|
|||
|
||||
void FCGT(spu_opcode_t op)
|
||||
{
|
||||
if (g_cfg.core.spu_accurate_xfloat)
|
||||
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
|
||||
{
|
||||
set_vr(op.rt, sext<s32[4]>(fcmp_ord(get_vr<f64[4]>(op.ra) > get_vr<f64[4]>(op.rb))));
|
||||
return;
|
||||
|
|
@ -9003,7 +9003,7 @@ public:
|
|||
return eval(sext<s32[4]>(bitcast<s32[4]>(a) > bitcast<s32[4]>(b)));
|
||||
}
|
||||
|
||||
if (g_cfg.core.spu_approx_xfloat || g_cfg.core.spu_relaxed_xfloat)
|
||||
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate || g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::relaxed)
|
||||
{
|
||||
const auto ai = eval(bitcast<s32[4]>(a));
|
||||
const auto bi = eval(bitcast<s32[4]>(b));
|
||||
|
|
@ -9034,7 +9034,7 @@ public:
|
|||
|
||||
void FCMGT(spu_opcode_t op)
|
||||
{
|
||||
if (g_cfg.core.spu_accurate_xfloat)
|
||||
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
|
||||
{
|
||||
set_vr(op.rt, sext<s32[4]>(fcmp_ord(fabs(get_vr<f64[4]>(op.ra)) > fabs(get_vr<f64[4]>(op.rb)))));
|
||||
return;
|
||||
|
|
@ -9080,7 +9080,7 @@ public:
|
|||
return eval(sext<s32[4]>(mai > mbi));
|
||||
}
|
||||
|
||||
if (g_cfg.core.spu_approx_xfloat)
|
||||
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
|
||||
{
|
||||
return eval(sext<s32[4]>(fcmp_uno(ma > mb) & (mai > mbi)));
|
||||
}
|
||||
|
|
@ -9101,7 +9101,7 @@ public:
|
|||
|
||||
void FA(spu_opcode_t op)
|
||||
{
|
||||
if (g_cfg.core.spu_accurate_xfloat)
|
||||
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
|
||||
{
|
||||
set_vr(op.rt, get_vr<f64[4]>(op.ra) + get_vr<f64[4]>(op.rb));
|
||||
return;
|
||||
|
|
@ -9126,7 +9126,7 @@ public:
|
|||
|
||||
void FS(spu_opcode_t op)
|
||||
{
|
||||
if (g_cfg.core.spu_accurate_xfloat)
|
||||
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
|
||||
{
|
||||
set_vr(op.rt, get_vr<f64[4]>(op.ra) - get_vr<f64[4]>(op.rb));
|
||||
return;
|
||||
|
|
@ -9137,7 +9137,7 @@ public:
|
|||
const auto a = value<f32[4]>(ci->getOperand(0));
|
||||
const auto b = value<f32[4]>(ci->getOperand(1));
|
||||
|
||||
if (g_cfg.core.spu_approx_xfloat)
|
||||
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
|
||||
{
|
||||
const auto bc = clamp_smax(b); // for #4478
|
||||
return eval(a - bc);
|
||||
|
|
@ -9159,7 +9159,7 @@ public:
|
|||
|
||||
void FM(spu_opcode_t op)
|
||||
{
|
||||
if (g_cfg.core.spu_accurate_xfloat)
|
||||
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
|
||||
{
|
||||
set_vr(op.rt, get_vr<f64[4]>(op.ra) * get_vr<f64[4]>(op.rb));
|
||||
return;
|
||||
|
|
@ -9170,7 +9170,7 @@ public:
|
|||
const auto a = value<f32[4]>(ci->getOperand(0));
|
||||
const auto b = value<f32[4]>(ci->getOperand(1));
|
||||
|
||||
if (g_cfg.core.spu_approx_xfloat)
|
||||
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
|
||||
{
|
||||
if (a.value == b.value)
|
||||
{
|
||||
|
|
@ -9206,7 +9206,7 @@ public:
|
|||
|
||||
void FESD(spu_opcode_t op)
|
||||
{
|
||||
if (g_cfg.core.spu_accurate_xfloat)
|
||||
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
|
||||
{
|
||||
const auto r = zshuffle(get_vr<f64[4]>(op.ra), 1, 3);
|
||||
const auto d = bitcast<s64[2]>(r);
|
||||
|
|
@ -9236,7 +9236,7 @@ public:
|
|||
|
||||
void FRDS(spu_opcode_t op)
|
||||
{
|
||||
if (g_cfg.core.spu_accurate_xfloat)
|
||||
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
|
||||
{
|
||||
const auto r = get_vr<f64[2]>(op.ra);
|
||||
const auto d = bitcast<s64[2]>(r);
|
||||
|
|
@ -9267,7 +9267,7 @@ public:
|
|||
|
||||
void FCEQ(spu_opcode_t op)
|
||||
{
|
||||
if (g_cfg.core.spu_accurate_xfloat)
|
||||
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
|
||||
{
|
||||
set_vr(op.rt, sext<s32[4]>(fcmp_ord(get_vr<f64[4]>(op.ra) == get_vr<f64[4]>(op.rb))));
|
||||
return;
|
||||
|
|
@ -9320,7 +9320,7 @@ public:
|
|||
return eval(sext<s32[4]>(bitcast<s32[4]>(a) == bitcast<s32[4]>(b)));
|
||||
}
|
||||
|
||||
if (g_cfg.core.spu_approx_xfloat)
|
||||
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
|
||||
{
|
||||
return eval(sext<s32[4]>(fcmp_ord(a == b)) | sext<s32[4]>(bitcast<s32[4]>(a) == bitcast<s32[4]>(b)));
|
||||
}
|
||||
|
|
@ -9341,7 +9341,7 @@ public:
|
|||
|
||||
void FCMEQ(spu_opcode_t op)
|
||||
{
|
||||
if (g_cfg.core.spu_accurate_xfloat)
|
||||
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
|
||||
{
|
||||
set_vr(op.rt, sext<s32[4]>(fcmp_ord(fabs(get_vr<f64[4]>(op.ra)) == fabs(get_vr<f64[4]>(op.rb)))));
|
||||
return;
|
||||
|
|
@ -9397,7 +9397,7 @@ public:
|
|||
return eval(sext<s32[4]>(bitcast<s32[4]>(fa) == bitcast<s32[4]>(fb)));
|
||||
}
|
||||
|
||||
if (g_cfg.core.spu_approx_xfloat)
|
||||
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
|
||||
{
|
||||
return eval(sext<s32[4]>(fcmp_ord(fa == fb)) | sext<s32[4]>(bitcast<s32[4]>(fa) == bitcast<s32[4]>(fb)));
|
||||
}
|
||||
|
|
@ -9490,7 +9490,7 @@ public:
|
|||
void FNMS(spu_opcode_t op)
|
||||
{
|
||||
// See FMA.
|
||||
if (g_cfg.core.spu_accurate_xfloat)
|
||||
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
|
||||
{
|
||||
const auto [a, b, c] = get_vrs<f64[4]>(op.ra, op.rb, op.rc);
|
||||
set_vr(op.rt4, fmuladd(-a, b, c));
|
||||
|
|
@ -9503,7 +9503,7 @@ public:
|
|||
const auto b = value<f32[4]>(ci->getOperand(1));
|
||||
const auto c = value<f32[4]>(ci->getOperand(2));
|
||||
|
||||
if (g_cfg.core.spu_approx_xfloat || g_cfg.core.spu_relaxed_xfloat)
|
||||
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate || g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::relaxed)
|
||||
{
|
||||
return fma32x4(eval(-clamp_smax(a)), clamp_smax(b), c);
|
||||
}
|
||||
|
|
@ -9525,7 +9525,7 @@ public:
|
|||
void FMA(spu_opcode_t op)
|
||||
{
|
||||
// Hardware FMA produces the same result as multiple + add on the limited double range (xfloat).
|
||||
if (g_cfg.core.spu_accurate_xfloat)
|
||||
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
|
||||
{
|
||||
const auto [a, b, c] = get_vrs<f64[4]>(op.ra, op.rb, op.rc);
|
||||
set_vr(op.rt4, fmuladd(a, b, c));
|
||||
|
|
@ -9538,7 +9538,7 @@ public:
|
|||
const auto b = value<f32[4]>(ci->getOperand(1));
|
||||
const auto c = value<f32[4]>(ci->getOperand(2));
|
||||
|
||||
if (g_cfg.core.spu_approx_xfloat)
|
||||
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
|
||||
{
|
||||
const auto ma = sext<s32[4]>(fcmp_uno(a != fsplat<f32[4]>(0.)));
|
||||
const auto mb = sext<s32[4]>(fcmp_uno(b != fsplat<f32[4]>(0.)));
|
||||
|
|
@ -9599,7 +9599,7 @@ public:
|
|||
void FMS(spu_opcode_t op)
|
||||
{
|
||||
// See FMA.
|
||||
if (g_cfg.core.spu_accurate_xfloat)
|
||||
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
|
||||
{
|
||||
const auto [a, b, c] = get_vrs<f64[4]>(op.ra, op.rb, op.rc);
|
||||
set_vr(op.rt4, fmuladd(a, b, -c));
|
||||
|
|
@ -9612,7 +9612,7 @@ public:
|
|||
const auto b = value<f32[4]>(ci->getOperand(1));
|
||||
const auto c = value<f32[4]>(ci->getOperand(2));
|
||||
|
||||
if (g_cfg.core.spu_approx_xfloat)
|
||||
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
|
||||
{
|
||||
return fma32x4(clamp_smax(a), clamp_smax(b), eval(-c));
|
||||
}
|
||||
|
|
@ -9646,7 +9646,7 @@ public:
|
|||
void FI(spu_opcode_t op)
|
||||
{
|
||||
// TODO
|
||||
if (g_cfg.core.spu_accurate_xfloat)
|
||||
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
|
||||
{
|
||||
set_vr(op.rt, get_vr<f64[4]>(op.rb));
|
||||
// const auto [a, b] = get_vrs<f64[4]>(op.ra, op.rb);
|
||||
|
|
@ -9674,7 +9674,7 @@ public:
|
|||
return bitcast<f32[4]>((b & 0xff800000u) | (bitcast<u32[4]>(fpcast<f32[4]>(bnew)) & ~0xff800000u)); // Inject old sign and exponent
|
||||
});
|
||||
|
||||
if (g_cfg.core.spu_approx_xfloat)
|
||||
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate)
|
||||
{
|
||||
register_intrinsic("spu_re", [&](llvm::CallInst* ci)
|
||||
{
|
||||
|
|
@ -9733,7 +9733,7 @@ public:
|
|||
|
||||
void CFLTS(spu_opcode_t op)
|
||||
{
|
||||
if (g_cfg.core.spu_accurate_xfloat)
|
||||
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
|
||||
{
|
||||
value_t<f64[4]> a = get_vr<f64[4]>(op.ra);
|
||||
value_t<f64[4]> s;
|
||||
|
|
@ -9807,7 +9807,7 @@ public:
|
|||
|
||||
void CFLTU(spu_opcode_t op)
|
||||
{
|
||||
if (g_cfg.core.spu_accurate_xfloat)
|
||||
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
|
||||
{
|
||||
value_t<f64[4]> a = get_vr<f64[4]>(op.ra);
|
||||
value_t<f64[4]> s;
|
||||
|
|
@ -9890,7 +9890,7 @@ public:
|
|||
|
||||
void CSFLT(spu_opcode_t op)
|
||||
{
|
||||
if (g_cfg.core.spu_accurate_xfloat)
|
||||
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
|
||||
{
|
||||
value_t<s32[4]> a = get_vr<s32[4]>(op.ra);
|
||||
value_t<f64[4]> r;
|
||||
|
|
@ -9930,7 +9930,7 @@ public:
|
|||
|
||||
void CUFLT(spu_opcode_t op)
|
||||
{
|
||||
if (g_cfg.core.spu_accurate_xfloat)
|
||||
if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate)
|
||||
{
|
||||
value_t<s32[4]> a = get_vr<s32[4]>(op.ra);
|
||||
value_t<f64[4]> r;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue