PPU/x64: Fix gv_cvtu32_tofs
Some checks are pending
Generate Translation Template / Generate Translation Template (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (/rpcs3/.ci/build-linux-aarch64.sh, gcc, rpcs3/rpcs3-ci-jammy-aarch64:1.7, ubuntu-24.04-arm) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (/rpcs3/.ci/build-linux.sh, gcc, rpcs3/rpcs3-ci-jammy:1.7, ubuntu-24.04) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (a1d35836e8d45bfc6f63c26f0a3e5d46ef622fe1, rpcs3/rpcs3-binaries-linux-arm64, /rpcs3/.ci/build-linux-aarch64.sh, clang, rpcs3/rpcs3-ci-jammy-aarch64:1.7, ubuntu-24.04-arm) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (d812f1254a1157c80fd402f94446310560f54e5f, rpcs3/rpcs3-binaries-linux, /rpcs3/.ci/build-linux.sh, clang, rpcs3/rpcs3-ci-jammy:1.7, ubuntu-24.04) (push) Waiting to run
Build RPCS3 / RPCS3 Mac ${{ matrix.name }} (51ae32f468089a8169aaf1567de355ff4a3e0842, rpcs3/rpcs3-binaries-mac, .ci/build-mac.sh, Intel) (push) Waiting to run
Build RPCS3 / RPCS3 Mac ${{ matrix.name }} (8e21bdbc40711a3fccd18fbf17b742348b0f4281, rpcs3/rpcs3-binaries-mac-arm64, .ci/build-mac-arm64.sh, Apple Silicon) (push) Waiting to run
Build RPCS3 / RPCS3 Windows (push) Waiting to run
Build RPCS3 / RPCS3 Windows Clang (win64, clang, clang64) (push) Waiting to run
Build RPCS3 / RPCS3 FreeBSD (push) Waiting to run

This commit is contained in:
Elad 2025-11-14 14:27:06 +02:00
parent e48ba283d8
commit 92e24a5779
2 changed files with 23 additions and 3 deletions

View file

@ -1115,7 +1115,24 @@ void PPUTranslator::VCFSX(ppu_opcode_t op)
void PPUTranslator::VCFUX(ppu_opcode_t op)
{
const auto b = get_vr<u32[4]>(op.vb);
set_vr(op.vd, fpcast<f32[4]>(b) * fsplat<f32[4]>(std::pow(2, -static_cast<int>(op.vuimm))));
#ifdef ARCH_ARM64
return set_vr(op.vd, fpcast<f32[4]>(b) * fsplat<f32[4]>(std::pow(2, -static_cast<int>(op.vuimm))));
#else
if (m_use_avx512)
{
return set_vr(op.vd, fpcast<f32[4]>(b) * fsplat<f32[4]>(std::pow(2, -static_cast<int>(op.vuimm))));
}
constexpr int bit_shift = 9;
const auto shifted = (b >> bit_shift);
const auto cleared = shifted << bit_shift;
const auto low_bits = b - cleared;
const auto high_part = fpcast<f32[4]>(noncast<s32[4]>(shifted)) * fsplat<f32[4]>(static_cast<f32>(1u << bit_shift));
const auto low_part = fpcast<f32[4]>(noncast<s32[4]>(low_bits));
const auto temp = high_part + low_part;
set_vr(op.vd, temp * fsplat<f32[4]>(std::pow(2, -static_cast<int>(op.vuimm))));
#endif
}
void PPUTranslator::VCMPBFP(ppu_opcode_t op)

View file

@ -2213,8 +2213,11 @@ inline v128 gv_cvtu32_tofs(const v128& src)
#if defined(__AVX512VL__)
return _mm_cvtepu32_ps(src);
#elif defined(ARCH_X64)
const auto fix = _mm_and_ps(_mm_castsi128_ps(_mm_srai_epi32(src, 31)), _mm_set1_ps(0x80000000));
return _mm_add_ps(_mm_cvtepi32_ps(_mm_and_si128(src, _mm_set1_epi32(0x7fffffff))), fix);
constexpr u64 bit_shift = 9;
const auto shifted = _mm_srli_epi32(src, bit_shift);
const auto cleared = _mm_slli_epi32(shifted, bit_shift);
const auto low_bits = _mm_sub_epi32(src, cleared);
return _mm_add_ps(_mm_cvtepi32_ps(low_bits), _mm_mul_ps(_mm_cvtepi32_ps(shifted), _mm_set_ps1(1u << bit_shift)));
#elif defined(ARCH_ARM64)
return vcvtq_f32_u32(src);
#endif