asm.hpp: replace custom functions with C++20 functions
Some checks are pending
Generate Translation Template / Generate Translation Template (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (/rpcs3/.ci/build-linux-aarch64.sh, gcc, rpcs3/rpcs3-ci-jammy-aarch64:1.7, ubuntu-24.04-arm) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (/rpcs3/.ci/build-linux.sh, gcc, rpcs3/rpcs3-ci-jammy:1.7, ubuntu-24.04) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (a1d35836e8d45bfc6f63c26f0a3e5d46ef622fe1, rpcs3/rpcs3-binaries-linux-arm64, /rpcs3/.ci/build-linux-aarch64.sh, clang, rpcs3/rpcs3-ci-jammy-aarch64:1.7, ubuntu-24.04-arm) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (d812f1254a1157c80fd402f94446310560f54e5f, rpcs3/rpcs3-binaries-linux, /rpcs3/.ci/build-linux.sh, clang, rpcs3/rpcs3-ci-jammy:1.7, ubuntu-24.04) (push) Waiting to run
Build RPCS3 / RPCS3 Mac ${{ matrix.name }} (51ae32f468089a8169aaf1567de355ff4a3e0842, rpcs3/rpcs3-binaries-mac, .ci/build-mac.sh, Intel) (push) Waiting to run
Build RPCS3 / RPCS3 Mac ${{ matrix.name }} (8e21bdbc40711a3fccd18fbf17b742348b0f4281, rpcs3/rpcs3-binaries-mac-arm64, .ci/build-mac-arm64.sh, Apple Silicon) (push) Waiting to run
Build RPCS3 / RPCS3 Windows (push) Waiting to run
Build RPCS3 / RPCS3 Windows Clang (win64, clang, clang64) (push) Waiting to run
Build RPCS3 / RPCS3 FreeBSD (push) Waiting to run

This commit is contained in:
oltolm 2025-06-06 19:34:51 +02:00 committed by Elad
parent b8031f4510
commit a93197cdcb
10 changed files with 46 additions and 163 deletions

View file

@ -221,7 +221,7 @@ std::pair<PPUDisAsm::const_op, u64> PPUDisAsm::try_get_const_op_gpr_value(u32 re
GET_CONST_REG(reg_rs, op.rs); GET_CONST_REG(reg_rs, op.rs);
return { form, utils::rol64(reg_rs, op.sh64) & (~0ull << (op.mbe64 ^ 63)) }; return {form, std::rotl<u64>(reg_rs, op.sh64) & (~0ull << (op.mbe64 ^ 63))};
} }
case ppu_itype::OR: case ppu_itype::OR:
{ {

View file

@ -3483,7 +3483,7 @@ auto RLWIMI()
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) { static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
const u64 mask = ppu_rotate_mask(32 + op.mb32, 32 + op.me32); const u64 mask = ppu_rotate_mask(32 + op.mb32, 32 + op.me32);
ppu.gpr[op.ra] = (ppu.gpr[op.ra] & ~mask) | (dup32(utils::rol32(static_cast<u32>(ppu.gpr[op.rs]), op.sh32)) & mask); ppu.gpr[op.ra] = (ppu.gpr[op.ra] & ~mask) | (dup32(std::rotl<u32>(static_cast<u32>(ppu.gpr[op.rs]), op.sh32)) & mask);
if constexpr (((Flags == has_rc) || ...)) if constexpr (((Flags == has_rc) || ...))
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0); ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
}; };
@ -3497,7 +3497,7 @@ auto RLWINM()
return ppu_exec_select<Flags...>::template select<>(); return ppu_exec_select<Flags...>::template select<>();
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) { static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
ppu.gpr[op.ra] = dup32(utils::rol32(static_cast<u32>(ppu.gpr[op.rs]), op.sh32)) & ppu_rotate_mask(32 + op.mb32, 32 + op.me32); ppu.gpr[op.ra] = dup32(std::rotl<u32>(static_cast<u32>(ppu.gpr[op.rs]), op.sh32)) & ppu_rotate_mask(32 + op.mb32, 32 + op.me32);
if constexpr (((Flags == has_rc) || ...)) if constexpr (((Flags == has_rc) || ...))
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0); ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
}; };
@ -3511,7 +3511,7 @@ auto RLWNM()
return ppu_exec_select<Flags...>::template select<>(); return ppu_exec_select<Flags...>::template select<>();
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) { static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
ppu.gpr[op.ra] = dup32(utils::rol32(static_cast<u32>(ppu.gpr[op.rs]), ppu.gpr[op.rb] & 0x1f)) & ppu_rotate_mask(32 + op.mb32, 32 + op.me32); ppu.gpr[op.ra] = dup32(std::rotl<u32>(static_cast<u32>(ppu.gpr[op.rs]), ppu.gpr[op.rb] & 0x1f)) & ppu_rotate_mask(32 + op.mb32, 32 + op.me32);
if constexpr (((Flags == has_rc) || ...)) if constexpr (((Flags == has_rc) || ...))
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0); ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
}; };
@ -3599,7 +3599,7 @@ auto RLDICL()
return ppu_exec_select<Flags...>::template select<>(); return ppu_exec_select<Flags...>::template select<>();
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) { static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], op.sh64) & (~0ull >> op.mbe64); ppu.gpr[op.ra] = std::rotl<u64>(ppu.gpr[op.rs], op.sh64) & (~0ull >> op.mbe64);
if constexpr (((Flags == has_rc) || ...)) if constexpr (((Flags == has_rc) || ...))
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0); ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
}; };
@ -3613,7 +3613,7 @@ auto RLDICR()
return ppu_exec_select<Flags...>::template select<>(); return ppu_exec_select<Flags...>::template select<>();
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) { static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], op.sh64) & (~0ull << (op.mbe64 ^ 63)); ppu.gpr[op.ra] = std::rotl<u64>(ppu.gpr[op.rs], op.sh64) & (~0ull << (op.mbe64 ^ 63));
if constexpr (((Flags == has_rc) || ...)) if constexpr (((Flags == has_rc) || ...))
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0); ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
}; };
@ -3627,7 +3627,7 @@ auto RLDIC()
return ppu_exec_select<Flags...>::template select<>(); return ppu_exec_select<Flags...>::template select<>();
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) { static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], op.sh64) & ppu_rotate_mask(op.mbe64, op.sh64 ^ 63); ppu.gpr[op.ra] = std::rotl<u64>(ppu.gpr[op.rs], op.sh64) & ppu_rotate_mask(op.mbe64, op.sh64 ^ 63);
if constexpr (((Flags == has_rc) || ...)) if constexpr (((Flags == has_rc) || ...))
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0); ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
}; };
@ -3642,7 +3642,7 @@ auto RLDIMI()
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) { static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
const u64 mask = ppu_rotate_mask(op.mbe64, op.sh64 ^ 63); const u64 mask = ppu_rotate_mask(op.mbe64, op.sh64 ^ 63);
ppu.gpr[op.ra] = (ppu.gpr[op.ra] & ~mask) | (utils::rol64(ppu.gpr[op.rs], op.sh64) & mask); ppu.gpr[op.ra] = (ppu.gpr[op.ra] & ~mask) | (std::rotl<u64>(ppu.gpr[op.rs], op.sh64) & mask);
if constexpr (((Flags == has_rc) || ...)) if constexpr (((Flags == has_rc) || ...))
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0); ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
}; };
@ -3656,7 +3656,7 @@ auto RLDCL()
return ppu_exec_select<Flags...>::template select<>(); return ppu_exec_select<Flags...>::template select<>();
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) { static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], ppu.gpr[op.rb] & 0x3f) & (~0ull >> op.mbe64); ppu.gpr[op.ra] = std::rotl<u64>(ppu.gpr[op.rs], ppu.gpr[op.rb] & 0x3f) & (~0ull >> op.mbe64);
if constexpr (((Flags == has_rc) || ...)) if constexpr (((Flags == has_rc) || ...))
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0); ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
}; };
@ -3670,7 +3670,7 @@ auto RLDCR()
return ppu_exec_select<Flags...>::template select<>(); return ppu_exec_select<Flags...>::template select<>();
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) { static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], ppu.gpr[op.rb] & 0x3f) & (~0ull << (op.mbe64 ^ 63)); ppu.gpr[op.ra] = std::rotl<u64>(ppu.gpr[op.rs], ppu.gpr[op.rb] & 0x3f) & (~0ull << (op.mbe64 ^ 63));
if constexpr (((Flags == has_rc) || ...)) if constexpr (((Flags == has_rc) || ...))
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0); ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
}; };

View file

@ -3212,7 +3212,7 @@ void spu_recompiler::ROTQBYI(spu_opcode_t op)
} }
else if (s == 4 || s == 8 || s == 12) else if (s == 4 || s == 8 || s == 12)
{ {
c->pshufd(va, va, utils::rol8(0xE4, s / 2)); c->pshufd(va, va, std::rotl<u8>(0xE4, s / 2));
} }
else if (utils::has_ssse3()) else if (utils::has_ssse3())
{ {

View file

@ -2333,7 +2333,7 @@ std::vector<u32> spu_thread::discover_functions(u32 base_addr, std::span<const u
// Search for BRSL LR and BRASL LR or BR // Search for BRSL LR and BRASL LR or BR
// TODO: BISL // TODO: BISL
const v128 inst = read_from_ptr<be_t<v128>>(ls.data(), i - base_addr); const v128 inst = read_from_ptr<be_t<v128>>(ls.data(), i - base_addr);
const v128 cleared_i16 = gv_and32(inst, v128::from32p(utils::rol32(~0xffff, 7))); const v128 cleared_i16 = gv_and32(inst, v128::from32p(std::rotl<u32>(~0xffff, 7)));
const v128 eq_brsl = gv_eq32(cleared_i16, v128::from32p(0x66u << 23)); const v128 eq_brsl = gv_eq32(cleared_i16, v128::from32p(0x66u << 23));
const v128 eq_brasl = gv_eq32(cleared_i16, brasl_mask); const v128 eq_brasl = gv_eq32(cleared_i16, brasl_mask);
const v128 eq_br = gv_eq32(cleared_i16, v128::from32p(0x64u << 23)); const v128 eq_br = gv_eq32(cleared_i16, v128::from32p(0x64u << 23));
@ -5396,7 +5396,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
const usz block_tail = duplicate_positions[it_begin - it_tail]; const usz block_tail = duplicate_positions[it_begin - it_tail];
// Check if the distance is precisely two times from the end // Check if the distance is precisely two times from the end
if (reg_state_it.size() - block_start != utils::rol64(reg_state_it.size() - block_tail, 1)) if (reg_state_it.size() - block_start != std::rotl<u64>(reg_state_it.size() - block_tail, 1))
{ {
continue; continue;
} }

View file

@ -291,7 +291,7 @@ bool ROT(spu_thread& spu, spu_opcode_t op)
for (u32 i = 0; i < 4; i++) for (u32 i = 0; i < 4; i++)
{ {
spu.gpr[op.rt]._u32[i] = utils::rol32(a._u32[i], b._u32[i]); spu.gpr[op.rt]._u32[i] = std::rotl<u32>(a._u32[i], b._u32[i]);
} }
return true; return true;
} }
@ -346,7 +346,7 @@ bool ROTH(spu_thread& spu, spu_opcode_t op)
for (u32 i = 0; i < 8; i++) for (u32 i = 0; i < 8; i++)
{ {
spu.gpr[op.rt]._u16[i] = utils::rol16(a._u16[i], b._u16[i]); spu.gpr[op.rt]._u16[i] = std::rotl<u16>(a._u16[i], b._u16[i]);
} }
return true; return true;
} }

View file

@ -3220,7 +3220,7 @@ plain_access:
bool spu_thread::do_dma_check(const spu_mfc_cmd& args) bool spu_thread::do_dma_check(const spu_mfc_cmd& args)
{ {
const u32 mask = utils::rol32(1, args.tag); const u32 mask = std::rotl<u32>(1, args.tag);
if (mfc_barrier & mask || (args.cmd & (MFC_BARRIER_MASK | MFC_FENCE_MASK) && mfc_fence & mask)) [[unlikely]] if (mfc_barrier & mask || (args.cmd & (MFC_BARRIER_MASK | MFC_FENCE_MASK) && mfc_fence & mask)) [[unlikely]]
{ {
@ -3236,13 +3236,13 @@ bool spu_thread::do_dma_check(const spu_mfc_cmd& args)
if ((mfc_queue[i].cmd & ~0xc) == MFC_BARRIER_CMD) if ((mfc_queue[i].cmd & ~0xc) == MFC_BARRIER_CMD)
{ {
mfc_barrier |= -1; mfc_barrier |= -1;
mfc_fence |= utils::rol32(1, mfc_queue[i].tag); mfc_fence |= std::rotl<u32>(1, mfc_queue[i].tag);
continue; continue;
} }
if (true) if (true)
{ {
const u32 _mask = utils::rol32(1u, mfc_queue[i].tag); const u32 _mask = std::rotl<u32>(1u, mfc_queue[i].tag);
// A command with barrier hard blocks that tag until it's been dealt with // A command with barrier hard blocks that tag until it's been dealt with
if (mfc_queue[i].cmd & MFC_BARRIER_MASK) if (mfc_queue[i].cmd & MFC_BARRIER_MASK)
@ -3805,14 +3805,14 @@ bool spu_thread::do_list_transfer(spu_mfc_cmd& args)
{ {
range_lock->release(0); range_lock->release(0);
ch_stall_mask |= utils::rol32(1, args.tag); ch_stall_mask |= std::rotl<u32>(1, args.tag);
if (!ch_stall_stat.get_count()) if (!ch_stall_stat.get_count())
{ {
set_events(SPU_EVENT_SN); set_events(SPU_EVENT_SN);
} }
ch_stall_stat.set_value(utils::rol32(1, args.tag) | ch_stall_stat.get_value()); ch_stall_stat.set_value(std::rotl<u32>(1, args.tag) | ch_stall_stat.get_value());
args.tag |= 0x80; // Set stalled status args.tag |= 0x80; // Set stalled status
args.eal = ::narrow<u32>(reinterpret_cast<const u8*>(item_ptr) - this->ls); args.eal = ::narrow<u32>(reinterpret_cast<const u8*>(item_ptr) - this->ls);
@ -4271,7 +4271,7 @@ bool spu_thread::do_mfc(bool can_escape, bool must_finish)
auto process_command = [&](spu_mfc_cmd& args) auto process_command = [&](spu_mfc_cmd& args)
{ {
// Select tag bit in the tag mask or the stall mask // Select tag bit in the tag mask or the stall mask
const u32 mask = utils::rol32(1, args.tag); const u32 mask = std::rotl<u32>(1, args.tag);
if ((args.cmd & ~0xc) == MFC_BARRIER_CMD) if ((args.cmd & ~0xc) == MFC_BARRIER_CMD)
{ {
@ -5373,7 +5373,7 @@ bool spu_thread::process_mfc_cmd()
std::memcpy(dump.data, _ptr<u8>(ch_mfc_cmd.lsa & 0x3ff80), 128); std::memcpy(dump.data, _ptr<u8>(ch_mfc_cmd.lsa & 0x3ff80), 128);
} }
const u32 mask = utils::rol32(1, ch_mfc_cmd.tag); const u32 mask = std::rotl<u32>(1, ch_mfc_cmd.tag);
if ((mfc_barrier | mfc_fence) & mask) [[unlikely]] if ((mfc_barrier | mfc_fence) & mask) [[unlikely]]
{ {
@ -5428,11 +5428,11 @@ bool spu_thread::process_mfc_cmd()
} }
mfc_queue[mfc_size++] = ch_mfc_cmd; mfc_queue[mfc_size++] = ch_mfc_cmd;
mfc_fence |= utils::rol32(1, ch_mfc_cmd.tag); mfc_fence |= std::rotl<u32>(1, ch_mfc_cmd.tag);
if (ch_mfc_cmd.cmd & MFC_BARRIER_MASK) if (ch_mfc_cmd.cmd & MFC_BARRIER_MASK)
{ {
mfc_barrier |= utils::rol32(1, ch_mfc_cmd.tag); mfc_barrier |= std::rotl<u32>(1, ch_mfc_cmd.tag);
} }
return true; return true;
@ -5481,11 +5481,11 @@ bool spu_thread::process_mfc_cmd()
} }
mfc_size++; mfc_size++;
mfc_fence |= utils::rol32(1, cmd.tag); mfc_fence |= std::rotl<u32>(1, cmd.tag);
if (cmd.cmd & MFC_BARRIER_MASK) if (cmd.cmd & MFC_BARRIER_MASK)
{ {
mfc_barrier |= utils::rol32(1, cmd.tag); mfc_barrier |= std::rotl<u32>(1, cmd.tag);
} }
if (check_mfc_interrupts(pc + 4)) if (check_mfc_interrupts(pc + 4))
@ -5511,7 +5511,7 @@ bool spu_thread::process_mfc_cmd()
{ {
mfc_queue[mfc_size++] = ch_mfc_cmd; mfc_queue[mfc_size++] = ch_mfc_cmd;
mfc_barrier |= -1; mfc_barrier |= -1;
mfc_fence |= utils::rol32(1, ch_mfc_cmd.tag); mfc_fence |= std::rotl<u32>(1, ch_mfc_cmd.tag);
} }
return true; return true;
@ -6872,7 +6872,7 @@ bool spu_thread::set_ch_value(u32 ch, u32 value)
value &= 0x1f; value &= 0x1f;
// Reset stall status for specified tag // Reset stall status for specified tag
const u32 tag_mask = utils::rol32(1, value); const u32 tag_mask = std::rotl<u32>(1, value);
if (ch_stall_mask & tag_mask) if (ch_stall_mask & tag_mask)
{ {

View file

@ -139,7 +139,7 @@ namespace rsx
u32 bytes_read = 0; u32 bytes_read = 0;
// Find the next set bit after every iteration // Find the next set bit after every iteration
for (int i = 0;; i = (std::countr_zero<u32>(utils::rol8(to_fetch, 0 - i - 1)) + i + 1) % 8) for (int i = 0;; i = (std::countr_zero<u32>(std::rotl<u8>(to_fetch, 0 - i - 1)) + i + 1) % 8)
{ {
// If a reservation is being updated, try to load another // If a reservation is being updated, try to load another
const auto& res = vm::reservation_acquire(addr1 + i * 128); const auto& res = vm::reservation_acquire(addr1 + i * 128);

View file

@ -2856,7 +2856,7 @@ namespace rsx
for (u32 ea = address >> 20, end = ea + (size >> 20); ea < end; ea++) for (u32 ea = address >> 20, end = ea + (size >> 20); ea < end; ea++)
{ {
const u32 io = utils::rol32(iomap_table.io[ea], 32 - 20); const u32 io = std::rotl<u32>(iomap_table.io[ea], 32 - 20);
if (io + 1) if (io + 1)
{ {
@ -2886,7 +2886,7 @@ namespace rsx
while (to_unmap) while (to_unmap)
{ {
bit = (std::countr_zero<u64>(utils::rol64(to_unmap, 0 - bit)) + bit); bit = (std::countr_zero<u64>(std::rotl<u64>(to_unmap, 0 - bit)) + bit);
to_unmap &= ~(1ull << bit); to_unmap &= ~(1ull << bit);
constexpr u16 null_entry = 0xFFFF; constexpr u16 null_entry = 0xFFFF;

View file

@ -8,30 +8,12 @@
extern bool g_use_rtm; extern bool g_use_rtm;
extern u64 g_rtm_tx_limit1; extern u64 g_rtm_tx_limit1;
#ifdef _M_X64 #ifdef ARCH_X64
#ifdef _MSC_VER #ifdef _MSC_VER
extern "C"
{
u32 _xbegin();
void _xend();
void _mm_pause();
void _mm_prefetch(const char*, int);
void _m_prefetchw(const volatile void*);
uchar _rotl8(uchar, uchar);
ushort _rotl16(ushort, uchar);
u64 __popcnt64(u64);
s64 __mulh(s64, s64);
u64 __umulh(u64, u64);
s64 _div128(s64, s64, s64, s64*);
u64 _udiv128(u64, u64, u64, u64*);
void __debugbreak();
}
#include <intrin.h> #include <intrin.h>
#else #else
#include <immintrin.h> #include <immintrin.h>
#include <x86intrin.h>
#endif #endif
#endif #endif
@ -113,7 +95,7 @@ namespace utils
const u64 value = reinterpret_cast<u64>(func); const u64 value = reinterpret_cast<u64>(func);
const void* ptr = reinterpret_cast<const void*>(value); const void* ptr = reinterpret_cast<const void*>(value);
#ifdef _M_X64 #ifdef ARCH_X64
return _mm_prefetch(static_cast<const char*>(ptr), _MM_HINT_T1); return _mm_prefetch(static_cast<const char*>(ptr), _MM_HINT_T1);
#else #else
return __builtin_prefetch(ptr, 0, 2); return __builtin_prefetch(ptr, 0, 2);
@ -128,7 +110,7 @@ namespace utils
return; return;
} }
#ifdef _M_X64 #ifdef ARCH_X64
return _mm_prefetch(static_cast<const char*>(ptr), _MM_HINT_T0); return _mm_prefetch(static_cast<const char*>(ptr), _MM_HINT_T0);
#else #else
return __builtin_prefetch(ptr, 0, 3); return __builtin_prefetch(ptr, 0, 3);
@ -142,110 +124,19 @@ namespace utils
return; return;
} }
#if defined(_M_X64) && !defined(__clang__) #if defined(ARCH_X64)
return _m_prefetchw(ptr); return _m_prefetchw(const_cast<void*>(ptr));
#else #else
return __builtin_prefetch(ptr, 1, 0); return __builtin_prefetch(ptr, 1, 0);
#endif #endif
} }
constexpr u8 rol8(u8 x, u8 n)
{
if (std::is_constant_evaluated())
{
return (x << (n & 7)) | (x >> ((-n & 7)));
}
#ifdef _MSC_VER
return _rotl8(x, n);
#elif defined(__clang__)
return __builtin_rotateleft8(x, n);
#elif defined(ARCH_X64)
return __builtin_ia32_rolqi(x, n);
#else
return (x << (n & 7)) | (x >> ((-n & 7)));
#endif
}
constexpr u16 rol16(u16 x, u16 n)
{
if (std::is_constant_evaluated())
{
return (x << (n & 15)) | (x >> ((-n & 15)));
}
#ifdef _MSC_VER
return _rotl16(x, static_cast<uchar>(n));
#elif defined(__clang__)
return __builtin_rotateleft16(x, n);
#elif defined(ARCH_X64)
return __builtin_ia32_rolhi(x, n);
#else
return (x << (n & 15)) | (x >> ((-n & 15)));
#endif
}
constexpr u32 rol32(u32 x, u32 n)
{
if (std::is_constant_evaluated())
{
return (x << (n & 31)) | (x >> (((0 - n) & 31)));
}
#ifdef _MSC_VER
return _rotl(x, n);
#elif defined(__clang__)
return __builtin_rotateleft32(x, n);
#else
return (x << (n & 31)) | (x >> (((0 - n) & 31)));
#endif
}
constexpr u64 rol64(u64 x, u64 n)
{
if (std::is_constant_evaluated())
{
return (x << (n & 63)) | (x >> (((0 - n) & 63)));
}
#ifdef _MSC_VER
return _rotl64(x, static_cast<int>(n));
#elif defined(__clang__)
return __builtin_rotateleft64(x, n);
#else
return (x << (n & 63)) | (x >> (((0 - n) & 63)));
#endif
}
constexpr u32 popcnt64(u64 v)
{
#if !defined(_MSC_VER) || defined(__SSE4_2__)
if (std::is_constant_evaluated())
#endif
{
v = (v & 0xaaaaaaaaaaaaaaaa) / 2 + (v & 0x5555555555555555);
v = (v & 0xcccccccccccccccc) / 4 + (v & 0x3333333333333333);
v = (v & 0xf0f0f0f0f0f0f0f0) / 16 + (v & 0x0f0f0f0f0f0f0f0f);
v = (v & 0xff00ff00ff00ff00) / 256 + (v & 0x00ff00ff00ff00ff);
v = ((v & 0xffff0000ffff0000) >> 16) + (v & 0x0000ffff0000ffff);
return static_cast<u32>((v >> 32) + v);
}
#if !defined(_MSC_VER) || defined(__SSE4_2__)
#ifdef _MSC_VER
return static_cast<u32>(__popcnt64(v));
#else
return __builtin_popcountll(v);
#endif
#endif
}
constexpr u32 popcnt128(const u128& v) constexpr u32 popcnt128(const u128& v)
{ {
#ifdef _MSC_VER #ifdef _MSC_VER
return popcnt64(v.lo) + popcnt64(v.hi); return std::popcount(v.lo) + std::popcount(v.hi);
#else #else
return popcnt64(v) + popcnt64(v >> 64); return std::popcount(v);
#endif #endif
} }
@ -332,10 +223,7 @@ namespace utils
else else
return std::countr_zero(arg.lo); return std::countr_zero(arg.lo);
#else #else
if (u64 lo = static_cast<u64>(arg)) return std::countr_zero(arg);
return std::countr_zero<u64>(lo);
else
return std::countr_zero<u64>(arg >> 64) + 64;
#endif #endif
} }
@ -347,10 +235,7 @@ namespace utils
else else
return std::countl_zero(arg.lo) + 64; return std::countl_zero(arg.lo) + 64;
#else #else
if (u64 hi = static_cast<u64>(arg >> 64)) return std::countl_zero(arg);
return std::countl_zero<u64>(hi);
else
return std::countl_zero<u64>(arg) + 64;
#endif #endif
} }
@ -358,10 +243,8 @@ namespace utils
{ {
#if defined(ARCH_ARM64) #if defined(ARCH_ARM64)
__asm__ volatile("yield"); __asm__ volatile("yield");
#elif defined(_M_X64)
_mm_pause();
#elif defined(ARCH_X64) #elif defined(ARCH_X64)
__builtin_ia32_pause(); _mm_pause();
#else #else
#error "Missing utils::pause() implementation" #error "Missing utils::pause() implementation"
#endif #endif

View file

@ -2986,7 +2986,7 @@ inline v128 gv_rol16(const v128& a, const v128& b)
#else #else
v128 r; v128 r;
for (u32 i = 0; i < 8; i++) for (u32 i = 0; i < 8; i++)
r._u16[i] = utils::rol16(a._u16[i], b._u16[i]); r._u16[i] = std::rotl<u16>(a._u16[i], b._u16[i]);
return r; return r;
#endif #endif
} }
@ -3020,7 +3020,7 @@ inline v128 gv_rol32(const v128& a, const v128& b)
#else #else
v128 r; v128 r;
for (u32 i = 0; i < 4; i++) for (u32 i = 0; i < 4; i++)
r._u32[i] = utils::rol32(a._u32[i], b._u32[i]); r._u32[i] = std::rotl<u32>(a._u32[i], b._u32[i]);
return r; return r;
#endif #endif
} }
@ -3039,7 +3039,7 @@ inline v128 gv_rol32(const v128& a)
#else #else
v128 r; v128 r;
for (u32 i = 0; i < 4; i++) for (u32 i = 0; i < 4; i++)
r._u32[i] = utils::rol32(a._u32[i], count); r._u32[i] = std::rotl<u32>(a._u32[i], count);
return r; return r;
#endif #endif
} }