mirror of
https://github.com/RPCSX/rpcsx.git
synced 2026-04-04 14:08:37 +00:00
spu: Fixes (#3526)
* spu: Rewrite interpreter fast FM - Partially implement accurate FM - Fix FMA/FMS/FNMS by removing an optimization that does not work for INF (cmpunord) - cmpunord does not catch all cases of an extended result/overflow - NOTE: FM still does not handle corner cases well (e.g inf * 1.2 because SPU does not have concept of inf)
This commit is contained in:
parent
81b3e7e449
commit
b338c81907
2 changed files with 99 additions and 27 deletions
|
|
@ -772,7 +772,31 @@ void spu_interpreter_fast::FS(SPUThread& spu, spu_opcode_t op)
|
|||
|
||||
void spu_interpreter_fast::FM(SPUThread& spu, spu_opcode_t op)
|
||||
{
|
||||
spu.gpr[op.rt].vf = _mm_mul_ps(spu.gpr[op.ra].vf, spu.gpr[op.rb].vf);
|
||||
const auto zero = _mm_set1_ps(0.f);
|
||||
const auto sign_bits = _mm_castsi128_ps(_mm_set1_epi32(0x80000000));
|
||||
const auto all_exp_bits = _mm_castsi128_ps(_mm_set1_epi32(0x7f800000));
|
||||
|
||||
//check denormals
|
||||
const auto denorm_check_a = _mm_cmpeq_ps(zero, _mm_and_ps(all_exp_bits, spu.gpr[op.ra].vf));
|
||||
const auto denorm_check_b = _mm_cmpeq_ps(zero, _mm_and_ps(all_exp_bits, spu.gpr[op.rb].vf));
|
||||
const auto denorm_operand_mask = _mm_or_ps(denorm_check_a, denorm_check_b);
|
||||
|
||||
//compute result with flushed denormal inputs
|
||||
const auto primary_result = _mm_mul_ps(spu.gpr[op.ra].vf, spu.gpr[op.rb].vf);
|
||||
const auto denom_result_mask = _mm_cmpeq_ps(zero, _mm_and_ps(all_exp_bits, primary_result));
|
||||
const auto flushed_result = _mm_andnot_ps(_mm_or_ps(denom_result_mask, denorm_operand_mask), primary_result);
|
||||
|
||||
//check for extended
|
||||
const auto nan_check = _mm_cmpeq_ps(_mm_and_ps(primary_result, all_exp_bits), all_exp_bits);
|
||||
const auto sign_mask = _mm_xor_ps(_mm_and_ps((__m128&)sign_bits, spu.gpr[op.ra].vf), _mm_and_ps((__m128&)sign_bits, spu.gpr[op.rb].vf));
|
||||
const auto extended_result = _mm_or_ps(sign_mask, _mm_andnot_ps((__m128&)sign_bits, primary_result));
|
||||
const auto final_extended = _mm_andnot_ps(denorm_operand_mask, extended_result);
|
||||
|
||||
//if nan, result = ext, else result = flushed
|
||||
const auto set1 = _mm_andnot_ps(nan_check, flushed_result);
|
||||
const auto set2 = _mm_and_ps(nan_check, final_extended);
|
||||
|
||||
spu.gpr[op.rt].vf = _mm_or_ps(set1, set2);
|
||||
}
|
||||
|
||||
void spu_interpreter::CLGTH(SPUThread& spu, spu_opcode_t op)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue