asm.hpp: replace custom functions with C++20 functions
Some checks are pending
Generate Translation Template / Generate Translation Template (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (/rpcs3/.ci/build-linux-aarch64.sh, gcc, rpcs3/rpcs3-ci-jammy-aarch64:1.7, ubuntu-24.04-arm) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (/rpcs3/.ci/build-linux.sh, gcc, rpcs3/rpcs3-ci-jammy:1.7, ubuntu-24.04) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (a1d35836e8d45bfc6f63c26f0a3e5d46ef622fe1, rpcs3/rpcs3-binaries-linux-arm64, /rpcs3/.ci/build-linux-aarch64.sh, clang, rpcs3/rpcs3-ci-jammy-aarch64:1.7, ubuntu-24.04-arm) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (d812f1254a1157c80fd402f94446310560f54e5f, rpcs3/rpcs3-binaries-linux, /rpcs3/.ci/build-linux.sh, clang, rpcs3/rpcs3-ci-jammy:1.7, ubuntu-24.04) (push) Waiting to run
Build RPCS3 / RPCS3 Mac ${{ matrix.name }} (51ae32f468089a8169aaf1567de355ff4a3e0842, rpcs3/rpcs3-binaries-mac, .ci/build-mac.sh, Intel) (push) Waiting to run
Build RPCS3 / RPCS3 Mac ${{ matrix.name }} (8e21bdbc40711a3fccd18fbf17b742348b0f4281, rpcs3/rpcs3-binaries-mac-arm64, .ci/build-mac-arm64.sh, Apple Silicon) (push) Waiting to run
Build RPCS3 / RPCS3 Windows (push) Waiting to run
Build RPCS3 / RPCS3 Windows Clang (win64, clang, clang64) (push) Waiting to run
Build RPCS3 / RPCS3 FreeBSD (push) Waiting to run

This commit is contained in:
oltolm 2025-06-06 19:34:51 +02:00 committed by Elad
parent b8031f4510
commit a93197cdcb
10 changed files with 46 additions and 163 deletions

View file

@ -221,7 +221,7 @@ std::pair<PPUDisAsm::const_op, u64> PPUDisAsm::try_get_const_op_gpr_value(u32 re
GET_CONST_REG(reg_rs, op.rs);
return { form, utils::rol64(reg_rs, op.sh64) & (~0ull << (op.mbe64 ^ 63)) };
return {form, std::rotl<u64>(reg_rs, op.sh64) & (~0ull << (op.mbe64 ^ 63))};
}
case ppu_itype::OR:
{

View file

@ -3483,7 +3483,7 @@ auto RLWIMI()
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
const u64 mask = ppu_rotate_mask(32 + op.mb32, 32 + op.me32);
ppu.gpr[op.ra] = (ppu.gpr[op.ra] & ~mask) | (dup32(utils::rol32(static_cast<u32>(ppu.gpr[op.rs]), op.sh32)) & mask);
ppu.gpr[op.ra] = (ppu.gpr[op.ra] & ~mask) | (dup32(std::rotl<u32>(static_cast<u32>(ppu.gpr[op.rs]), op.sh32)) & mask);
if constexpr (((Flags == has_rc) || ...))
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
};
@ -3497,7 +3497,7 @@ auto RLWINM()
return ppu_exec_select<Flags...>::template select<>();
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
ppu.gpr[op.ra] = dup32(utils::rol32(static_cast<u32>(ppu.gpr[op.rs]), op.sh32)) & ppu_rotate_mask(32 + op.mb32, 32 + op.me32);
ppu.gpr[op.ra] = dup32(std::rotl<u32>(static_cast<u32>(ppu.gpr[op.rs]), op.sh32)) & ppu_rotate_mask(32 + op.mb32, 32 + op.me32);
if constexpr (((Flags == has_rc) || ...))
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
};
@ -3511,7 +3511,7 @@ auto RLWNM()
return ppu_exec_select<Flags...>::template select<>();
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
ppu.gpr[op.ra] = dup32(utils::rol32(static_cast<u32>(ppu.gpr[op.rs]), ppu.gpr[op.rb] & 0x1f)) & ppu_rotate_mask(32 + op.mb32, 32 + op.me32);
ppu.gpr[op.ra] = dup32(std::rotl<u32>(static_cast<u32>(ppu.gpr[op.rs]), ppu.gpr[op.rb] & 0x1f)) & ppu_rotate_mask(32 + op.mb32, 32 + op.me32);
if constexpr (((Flags == has_rc) || ...))
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
};
@ -3599,7 +3599,7 @@ auto RLDICL()
return ppu_exec_select<Flags...>::template select<>();
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], op.sh64) & (~0ull >> op.mbe64);
ppu.gpr[op.ra] = std::rotl<u64>(ppu.gpr[op.rs], op.sh64) & (~0ull >> op.mbe64);
if constexpr (((Flags == has_rc) || ...))
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
};
@ -3613,7 +3613,7 @@ auto RLDICR()
return ppu_exec_select<Flags...>::template select<>();
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], op.sh64) & (~0ull << (op.mbe64 ^ 63));
ppu.gpr[op.ra] = std::rotl<u64>(ppu.gpr[op.rs], op.sh64) & (~0ull << (op.mbe64 ^ 63));
if constexpr (((Flags == has_rc) || ...))
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
};
@ -3627,7 +3627,7 @@ auto RLDIC()
return ppu_exec_select<Flags...>::template select<>();
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], op.sh64) & ppu_rotate_mask(op.mbe64, op.sh64 ^ 63);
ppu.gpr[op.ra] = std::rotl<u64>(ppu.gpr[op.rs], op.sh64) & ppu_rotate_mask(op.mbe64, op.sh64 ^ 63);
if constexpr (((Flags == has_rc) || ...))
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
};
@ -3642,7 +3642,7 @@ auto RLDIMI()
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
const u64 mask = ppu_rotate_mask(op.mbe64, op.sh64 ^ 63);
ppu.gpr[op.ra] = (ppu.gpr[op.ra] & ~mask) | (utils::rol64(ppu.gpr[op.rs], op.sh64) & mask);
ppu.gpr[op.ra] = (ppu.gpr[op.ra] & ~mask) | (std::rotl<u64>(ppu.gpr[op.rs], op.sh64) & mask);
if constexpr (((Flags == has_rc) || ...))
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
};
@ -3656,7 +3656,7 @@ auto RLDCL()
return ppu_exec_select<Flags...>::template select<>();
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], ppu.gpr[op.rb] & 0x3f) & (~0ull >> op.mbe64);
ppu.gpr[op.ra] = std::rotl<u64>(ppu.gpr[op.rs], ppu.gpr[op.rb] & 0x3f) & (~0ull >> op.mbe64);
if constexpr (((Flags == has_rc) || ...))
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
};
@ -3670,7 +3670,7 @@ auto RLDCR()
return ppu_exec_select<Flags...>::template select<>();
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], ppu.gpr[op.rb] & 0x3f) & (~0ull << (op.mbe64 ^ 63));
ppu.gpr[op.ra] = std::rotl<u64>(ppu.gpr[op.rs], ppu.gpr[op.rb] & 0x3f) & (~0ull << (op.mbe64 ^ 63));
if constexpr (((Flags == has_rc) || ...))
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
};

View file

@ -3212,7 +3212,7 @@ void spu_recompiler::ROTQBYI(spu_opcode_t op)
}
else if (s == 4 || s == 8 || s == 12)
{
c->pshufd(va, va, utils::rol8(0xE4, s / 2));
c->pshufd(va, va, std::rotl<u8>(0xE4, s / 2));
}
else if (utils::has_ssse3())
{

View file

@ -2333,7 +2333,7 @@ std::vector<u32> spu_thread::discover_functions(u32 base_addr, std::span<const u
// Search for BRSL LR and BRASL LR or BR
// TODO: BISL
const v128 inst = read_from_ptr<be_t<v128>>(ls.data(), i - base_addr);
const v128 cleared_i16 = gv_and32(inst, v128::from32p(utils::rol32(~0xffff, 7)));
const v128 cleared_i16 = gv_and32(inst, v128::from32p(std::rotl<u32>(~0xffff, 7)));
const v128 eq_brsl = gv_eq32(cleared_i16, v128::from32p(0x66u << 23));
const v128 eq_brasl = gv_eq32(cleared_i16, brasl_mask);
const v128 eq_br = gv_eq32(cleared_i16, v128::from32p(0x64u << 23));
@ -5396,7 +5396,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
const usz block_tail = duplicate_positions[it_begin - it_tail];
// Check if the distance is precisely two times from the end
if (reg_state_it.size() - block_start != utils::rol64(reg_state_it.size() - block_tail, 1))
if (reg_state_it.size() - block_start != std::rotl<u64>(reg_state_it.size() - block_tail, 1))
{
continue;
}

View file

@ -291,7 +291,7 @@ bool ROT(spu_thread& spu, spu_opcode_t op)
for (u32 i = 0; i < 4; i++)
{
spu.gpr[op.rt]._u32[i] = utils::rol32(a._u32[i], b._u32[i]);
spu.gpr[op.rt]._u32[i] = std::rotl<u32>(a._u32[i], b._u32[i]);
}
return true;
}
@ -346,7 +346,7 @@ bool ROTH(spu_thread& spu, spu_opcode_t op)
for (u32 i = 0; i < 8; i++)
{
spu.gpr[op.rt]._u16[i] = utils::rol16(a._u16[i], b._u16[i]);
spu.gpr[op.rt]._u16[i] = std::rotl<u16>(a._u16[i], b._u16[i]);
}
return true;
}

View file

@ -3220,7 +3220,7 @@ plain_access:
bool spu_thread::do_dma_check(const spu_mfc_cmd& args)
{
const u32 mask = utils::rol32(1, args.tag);
const u32 mask = std::rotl<u32>(1, args.tag);
if (mfc_barrier & mask || (args.cmd & (MFC_BARRIER_MASK | MFC_FENCE_MASK) && mfc_fence & mask)) [[unlikely]]
{
@ -3236,13 +3236,13 @@ bool spu_thread::do_dma_check(const spu_mfc_cmd& args)
if ((mfc_queue[i].cmd & ~0xc) == MFC_BARRIER_CMD)
{
mfc_barrier |= -1;
mfc_fence |= utils::rol32(1, mfc_queue[i].tag);
mfc_fence |= std::rotl<u32>(1, mfc_queue[i].tag);
continue;
}
if (true)
{
const u32 _mask = utils::rol32(1u, mfc_queue[i].tag);
const u32 _mask = std::rotl<u32>(1u, mfc_queue[i].tag);
// A command with barrier hard blocks that tag until it's been dealt with
if (mfc_queue[i].cmd & MFC_BARRIER_MASK)
@ -3805,14 +3805,14 @@ bool spu_thread::do_list_transfer(spu_mfc_cmd& args)
{
range_lock->release(0);
ch_stall_mask |= utils::rol32(1, args.tag);
ch_stall_mask |= std::rotl<u32>(1, args.tag);
if (!ch_stall_stat.get_count())
{
set_events(SPU_EVENT_SN);
}
ch_stall_stat.set_value(utils::rol32(1, args.tag) | ch_stall_stat.get_value());
ch_stall_stat.set_value(std::rotl<u32>(1, args.tag) | ch_stall_stat.get_value());
args.tag |= 0x80; // Set stalled status
args.eal = ::narrow<u32>(reinterpret_cast<const u8*>(item_ptr) - this->ls);
@ -4271,7 +4271,7 @@ bool spu_thread::do_mfc(bool can_escape, bool must_finish)
auto process_command = [&](spu_mfc_cmd& args)
{
// Select tag bit in the tag mask or the stall mask
const u32 mask = utils::rol32(1, args.tag);
const u32 mask = std::rotl<u32>(1, args.tag);
if ((args.cmd & ~0xc) == MFC_BARRIER_CMD)
{
@ -5373,7 +5373,7 @@ bool spu_thread::process_mfc_cmd()
std::memcpy(dump.data, _ptr<u8>(ch_mfc_cmd.lsa & 0x3ff80), 128);
}
const u32 mask = utils::rol32(1, ch_mfc_cmd.tag);
const u32 mask = std::rotl<u32>(1, ch_mfc_cmd.tag);
if ((mfc_barrier | mfc_fence) & mask) [[unlikely]]
{
@ -5428,11 +5428,11 @@ bool spu_thread::process_mfc_cmd()
}
mfc_queue[mfc_size++] = ch_mfc_cmd;
mfc_fence |= utils::rol32(1, ch_mfc_cmd.tag);
mfc_fence |= std::rotl<u32>(1, ch_mfc_cmd.tag);
if (ch_mfc_cmd.cmd & MFC_BARRIER_MASK)
{
mfc_barrier |= utils::rol32(1, ch_mfc_cmd.tag);
mfc_barrier |= std::rotl<u32>(1, ch_mfc_cmd.tag);
}
return true;
@ -5481,11 +5481,11 @@ bool spu_thread::process_mfc_cmd()
}
mfc_size++;
mfc_fence |= utils::rol32(1, cmd.tag);
mfc_fence |= std::rotl<u32>(1, cmd.tag);
if (cmd.cmd & MFC_BARRIER_MASK)
{
mfc_barrier |= utils::rol32(1, cmd.tag);
mfc_barrier |= std::rotl<u32>(1, cmd.tag);
}
if (check_mfc_interrupts(pc + 4))
@ -5511,7 +5511,7 @@ bool spu_thread::process_mfc_cmd()
{
mfc_queue[mfc_size++] = ch_mfc_cmd;
mfc_barrier |= -1;
mfc_fence |= utils::rol32(1, ch_mfc_cmd.tag);
mfc_fence |= std::rotl<u32>(1, ch_mfc_cmd.tag);
}
return true;
@ -6872,7 +6872,7 @@ bool spu_thread::set_ch_value(u32 ch, u32 value)
value &= 0x1f;
// Reset stall status for specified tag
const u32 tag_mask = utils::rol32(1, value);
const u32 tag_mask = std::rotl<u32>(1, value);
if (ch_stall_mask & tag_mask)
{

View file

@ -139,7 +139,7 @@ namespace rsx
u32 bytes_read = 0;
// Find the next set bit after every iteration
for (int i = 0;; i = (std::countr_zero<u32>(utils::rol8(to_fetch, 0 - i - 1)) + i + 1) % 8)
for (int i = 0;; i = (std::countr_zero<u32>(std::rotl<u8>(to_fetch, 0 - i - 1)) + i + 1) % 8)
{
// If a reservation is being updated, try to load another
const auto& res = vm::reservation_acquire(addr1 + i * 128);

View file

@ -2856,7 +2856,7 @@ namespace rsx
for (u32 ea = address >> 20, end = ea + (size >> 20); ea < end; ea++)
{
const u32 io = utils::rol32(iomap_table.io[ea], 32 - 20);
const u32 io = std::rotl<u32>(iomap_table.io[ea], 32 - 20);
if (io + 1)
{
@ -2886,7 +2886,7 @@ namespace rsx
while (to_unmap)
{
bit = (std::countr_zero<u64>(utils::rol64(to_unmap, 0 - bit)) + bit);
bit = (std::countr_zero<u64>(std::rotl<u64>(to_unmap, 0 - bit)) + bit);
to_unmap &= ~(1ull << bit);
constexpr u16 null_entry = 0xFFFF;

View file

@ -8,30 +8,12 @@
extern bool g_use_rtm;
extern u64 g_rtm_tx_limit1;
#ifdef _M_X64
#ifdef ARCH_X64
#ifdef _MSC_VER
extern "C"
{
u32 _xbegin();
void _xend();
void _mm_pause();
void _mm_prefetch(const char*, int);
void _m_prefetchw(const volatile void*);
uchar _rotl8(uchar, uchar);
ushort _rotl16(ushort, uchar);
u64 __popcnt64(u64);
s64 __mulh(s64, s64);
u64 __umulh(u64, u64);
s64 _div128(s64, s64, s64, s64*);
u64 _udiv128(u64, u64, u64, u64*);
void __debugbreak();
}
#include <intrin.h>
#else
#include <immintrin.h>
#include <x86intrin.h>
#endif
#endif
@ -113,7 +95,7 @@ namespace utils
const u64 value = reinterpret_cast<u64>(func);
const void* ptr = reinterpret_cast<const void*>(value);
#ifdef _M_X64
#ifdef ARCH_X64
return _mm_prefetch(static_cast<const char*>(ptr), _MM_HINT_T1);
#else
return __builtin_prefetch(ptr, 0, 2);
@ -128,7 +110,7 @@ namespace utils
return;
}
#ifdef _M_X64
#ifdef ARCH_X64
return _mm_prefetch(static_cast<const char*>(ptr), _MM_HINT_T0);
#else
return __builtin_prefetch(ptr, 0, 3);
@ -142,110 +124,19 @@ namespace utils
return;
}
#if defined(_M_X64) && !defined(__clang__)
return _m_prefetchw(ptr);
#if defined(ARCH_X64)
return _m_prefetchw(const_cast<void*>(ptr));
#else
return __builtin_prefetch(ptr, 1, 0);
#endif
}
constexpr u8 rol8(u8 x, u8 n)
{
if (std::is_constant_evaluated())
{
return (x << (n & 7)) | (x >> ((-n & 7)));
}
#ifdef _MSC_VER
return _rotl8(x, n);
#elif defined(__clang__)
return __builtin_rotateleft8(x, n);
#elif defined(ARCH_X64)
return __builtin_ia32_rolqi(x, n);
#else
return (x << (n & 7)) | (x >> ((-n & 7)));
#endif
}
constexpr u16 rol16(u16 x, u16 n)
{
if (std::is_constant_evaluated())
{
return (x << (n & 15)) | (x >> ((-n & 15)));
}
#ifdef _MSC_VER
return _rotl16(x, static_cast<uchar>(n));
#elif defined(__clang__)
return __builtin_rotateleft16(x, n);
#elif defined(ARCH_X64)
return __builtin_ia32_rolhi(x, n);
#else
return (x << (n & 15)) | (x >> ((-n & 15)));
#endif
}
constexpr u32 rol32(u32 x, u32 n)
{
if (std::is_constant_evaluated())
{
return (x << (n & 31)) | (x >> (((0 - n) & 31)));
}
#ifdef _MSC_VER
return _rotl(x, n);
#elif defined(__clang__)
return __builtin_rotateleft32(x, n);
#else
return (x << (n & 31)) | (x >> (((0 - n) & 31)));
#endif
}
constexpr u64 rol64(u64 x, u64 n)
{
if (std::is_constant_evaluated())
{
return (x << (n & 63)) | (x >> (((0 - n) & 63)));
}
#ifdef _MSC_VER
return _rotl64(x, static_cast<int>(n));
#elif defined(__clang__)
return __builtin_rotateleft64(x, n);
#else
return (x << (n & 63)) | (x >> (((0 - n) & 63)));
#endif
}
constexpr u32 popcnt64(u64 v)
{
#if !defined(_MSC_VER) || defined(__SSE4_2__)
if (std::is_constant_evaluated())
#endif
{
v = (v & 0xaaaaaaaaaaaaaaaa) / 2 + (v & 0x5555555555555555);
v = (v & 0xcccccccccccccccc) / 4 + (v & 0x3333333333333333);
v = (v & 0xf0f0f0f0f0f0f0f0) / 16 + (v & 0x0f0f0f0f0f0f0f0f);
v = (v & 0xff00ff00ff00ff00) / 256 + (v & 0x00ff00ff00ff00ff);
v = ((v & 0xffff0000ffff0000) >> 16) + (v & 0x0000ffff0000ffff);
return static_cast<u32>((v >> 32) + v);
}
#if !defined(_MSC_VER) || defined(__SSE4_2__)
#ifdef _MSC_VER
return static_cast<u32>(__popcnt64(v));
#else
return __builtin_popcountll(v);
#endif
#endif
}
constexpr u32 popcnt128(const u128& v)
{
#ifdef _MSC_VER
return popcnt64(v.lo) + popcnt64(v.hi);
return std::popcount(v.lo) + std::popcount(v.hi);
#else
return popcnt64(v) + popcnt64(v >> 64);
return std::popcount(v);
#endif
}
@ -332,10 +223,7 @@ namespace utils
else
return std::countr_zero(arg.lo);
#else
if (u64 lo = static_cast<u64>(arg))
return std::countr_zero<u64>(lo);
else
return std::countr_zero<u64>(arg >> 64) + 64;
return std::countr_zero(arg);
#endif
}
@ -347,10 +235,7 @@ namespace utils
else
return std::countl_zero(arg.lo) + 64;
#else
if (u64 hi = static_cast<u64>(arg >> 64))
return std::countl_zero<u64>(hi);
else
return std::countl_zero<u64>(arg) + 64;
return std::countl_zero(arg);
#endif
}
@ -358,10 +243,8 @@ namespace utils
{
#if defined(ARCH_ARM64)
__asm__ volatile("yield");
#elif defined(_M_X64)
_mm_pause();
#elif defined(ARCH_X64)
__builtin_ia32_pause();
_mm_pause();
#else
#error "Missing utils::pause() implementation"
#endif

View file

@ -2986,7 +2986,7 @@ inline v128 gv_rol16(const v128& a, const v128& b)
#else
v128 r;
for (u32 i = 0; i < 8; i++)
r._u16[i] = utils::rol16(a._u16[i], b._u16[i]);
r._u16[i] = std::rotl<u16>(a._u16[i], b._u16[i]);
return r;
#endif
}
@ -3020,7 +3020,7 @@ inline v128 gv_rol32(const v128& a, const v128& b)
#else
v128 r;
for (u32 i = 0; i < 4; i++)
r._u32[i] = utils::rol32(a._u32[i], b._u32[i]);
r._u32[i] = std::rotl<u32>(a._u32[i], b._u32[i]);
return r;
#endif
}
@ -3039,7 +3039,7 @@ inline v128 gv_rol32(const v128& a)
#else
v128 r;
for (u32 i = 0; i < 4; i++)
r._u32[i] = utils::rol32(a._u32[i], count);
r._u32[i] = std::rotl<u32>(a._u32[i], count);
return r;
#endif
}