mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-12-06 07:12:28 +01:00
asm.hpp: replace custom functions with C++20 functions
Some checks are pending
Generate Translation Template / Generate Translation Template (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (/rpcs3/.ci/build-linux-aarch64.sh, gcc, rpcs3/rpcs3-ci-jammy-aarch64:1.7, ubuntu-24.04-arm) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (/rpcs3/.ci/build-linux.sh, gcc, rpcs3/rpcs3-ci-jammy:1.7, ubuntu-24.04) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (a1d35836e8d45bfc6f63c26f0a3e5d46ef622fe1, rpcs3/rpcs3-binaries-linux-arm64, /rpcs3/.ci/build-linux-aarch64.sh, clang, rpcs3/rpcs3-ci-jammy-aarch64:1.7, ubuntu-24.04-arm) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (d812f1254a1157c80fd402f94446310560f54e5f, rpcs3/rpcs3-binaries-linux, /rpcs3/.ci/build-linux.sh, clang, rpcs3/rpcs3-ci-jammy:1.7, ubuntu-24.04) (push) Waiting to run
Build RPCS3 / RPCS3 Mac ${{ matrix.name }} (51ae32f468089a8169aaf1567de355ff4a3e0842, rpcs3/rpcs3-binaries-mac, .ci/build-mac.sh, Intel) (push) Waiting to run
Build RPCS3 / RPCS3 Mac ${{ matrix.name }} (8e21bdbc40711a3fccd18fbf17b742348b0f4281, rpcs3/rpcs3-binaries-mac-arm64, .ci/build-mac-arm64.sh, Apple Silicon) (push) Waiting to run
Build RPCS3 / RPCS3 Windows (push) Waiting to run
Build RPCS3 / RPCS3 Windows Clang (win64, clang, clang64) (push) Waiting to run
Build RPCS3 / RPCS3 FreeBSD (push) Waiting to run
Some checks are pending
Generate Translation Template / Generate Translation Template (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (/rpcs3/.ci/build-linux-aarch64.sh, gcc, rpcs3/rpcs3-ci-jammy-aarch64:1.7, ubuntu-24.04-arm) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (/rpcs3/.ci/build-linux.sh, gcc, rpcs3/rpcs3-ci-jammy:1.7, ubuntu-24.04) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (a1d35836e8d45bfc6f63c26f0a3e5d46ef622fe1, rpcs3/rpcs3-binaries-linux-arm64, /rpcs3/.ci/build-linux-aarch64.sh, clang, rpcs3/rpcs3-ci-jammy-aarch64:1.7, ubuntu-24.04-arm) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (d812f1254a1157c80fd402f94446310560f54e5f, rpcs3/rpcs3-binaries-linux, /rpcs3/.ci/build-linux.sh, clang, rpcs3/rpcs3-ci-jammy:1.7, ubuntu-24.04) (push) Waiting to run
Build RPCS3 / RPCS3 Mac ${{ matrix.name }} (51ae32f468089a8169aaf1567de355ff4a3e0842, rpcs3/rpcs3-binaries-mac, .ci/build-mac.sh, Intel) (push) Waiting to run
Build RPCS3 / RPCS3 Mac ${{ matrix.name }} (8e21bdbc40711a3fccd18fbf17b742348b0f4281, rpcs3/rpcs3-binaries-mac-arm64, .ci/build-mac-arm64.sh, Apple Silicon) (push) Waiting to run
Build RPCS3 / RPCS3 Windows (push) Waiting to run
Build RPCS3 / RPCS3 Windows Clang (win64, clang, clang64) (push) Waiting to run
Build RPCS3 / RPCS3 FreeBSD (push) Waiting to run
This commit is contained in:
parent
b8031f4510
commit
a93197cdcb
|
|
@ -221,7 +221,7 @@ std::pair<PPUDisAsm::const_op, u64> PPUDisAsm::try_get_const_op_gpr_value(u32 re
|
|||
|
||||
GET_CONST_REG(reg_rs, op.rs);
|
||||
|
||||
return { form, utils::rol64(reg_rs, op.sh64) & (~0ull << (op.mbe64 ^ 63)) };
|
||||
return {form, std::rotl<u64>(reg_rs, op.sh64) & (~0ull << (op.mbe64 ^ 63))};
|
||||
}
|
||||
case ppu_itype::OR:
|
||||
{
|
||||
|
|
|
|||
|
|
@ -3483,7 +3483,7 @@ auto RLWIMI()
|
|||
|
||||
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
||||
const u64 mask = ppu_rotate_mask(32 + op.mb32, 32 + op.me32);
|
||||
ppu.gpr[op.ra] = (ppu.gpr[op.ra] & ~mask) | (dup32(utils::rol32(static_cast<u32>(ppu.gpr[op.rs]), op.sh32)) & mask);
|
||||
ppu.gpr[op.ra] = (ppu.gpr[op.ra] & ~mask) | (dup32(std::rotl<u32>(static_cast<u32>(ppu.gpr[op.rs]), op.sh32)) & mask);
|
||||
if constexpr (((Flags == has_rc) || ...))
|
||||
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
||||
};
|
||||
|
|
@ -3497,7 +3497,7 @@ auto RLWINM()
|
|||
return ppu_exec_select<Flags...>::template select<>();
|
||||
|
||||
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
||||
ppu.gpr[op.ra] = dup32(utils::rol32(static_cast<u32>(ppu.gpr[op.rs]), op.sh32)) & ppu_rotate_mask(32 + op.mb32, 32 + op.me32);
|
||||
ppu.gpr[op.ra] = dup32(std::rotl<u32>(static_cast<u32>(ppu.gpr[op.rs]), op.sh32)) & ppu_rotate_mask(32 + op.mb32, 32 + op.me32);
|
||||
if constexpr (((Flags == has_rc) || ...))
|
||||
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
||||
};
|
||||
|
|
@ -3511,7 +3511,7 @@ auto RLWNM()
|
|||
return ppu_exec_select<Flags...>::template select<>();
|
||||
|
||||
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
||||
ppu.gpr[op.ra] = dup32(utils::rol32(static_cast<u32>(ppu.gpr[op.rs]), ppu.gpr[op.rb] & 0x1f)) & ppu_rotate_mask(32 + op.mb32, 32 + op.me32);
|
||||
ppu.gpr[op.ra] = dup32(std::rotl<u32>(static_cast<u32>(ppu.gpr[op.rs]), ppu.gpr[op.rb] & 0x1f)) & ppu_rotate_mask(32 + op.mb32, 32 + op.me32);
|
||||
if constexpr (((Flags == has_rc) || ...))
|
||||
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
||||
};
|
||||
|
|
@ -3599,7 +3599,7 @@ auto RLDICL()
|
|||
return ppu_exec_select<Flags...>::template select<>();
|
||||
|
||||
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
||||
ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], op.sh64) & (~0ull >> op.mbe64);
|
||||
ppu.gpr[op.ra] = std::rotl<u64>(ppu.gpr[op.rs], op.sh64) & (~0ull >> op.mbe64);
|
||||
if constexpr (((Flags == has_rc) || ...))
|
||||
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
||||
};
|
||||
|
|
@ -3613,7 +3613,7 @@ auto RLDICR()
|
|||
return ppu_exec_select<Flags...>::template select<>();
|
||||
|
||||
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
||||
ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], op.sh64) & (~0ull << (op.mbe64 ^ 63));
|
||||
ppu.gpr[op.ra] = std::rotl<u64>(ppu.gpr[op.rs], op.sh64) & (~0ull << (op.mbe64 ^ 63));
|
||||
if constexpr (((Flags == has_rc) || ...))
|
||||
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
||||
};
|
||||
|
|
@ -3627,7 +3627,7 @@ auto RLDIC()
|
|||
return ppu_exec_select<Flags...>::template select<>();
|
||||
|
||||
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
||||
ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], op.sh64) & ppu_rotate_mask(op.mbe64, op.sh64 ^ 63);
|
||||
ppu.gpr[op.ra] = std::rotl<u64>(ppu.gpr[op.rs], op.sh64) & ppu_rotate_mask(op.mbe64, op.sh64 ^ 63);
|
||||
if constexpr (((Flags == has_rc) || ...))
|
||||
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
||||
};
|
||||
|
|
@ -3642,7 +3642,7 @@ auto RLDIMI()
|
|||
|
||||
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
||||
const u64 mask = ppu_rotate_mask(op.mbe64, op.sh64 ^ 63);
|
||||
ppu.gpr[op.ra] = (ppu.gpr[op.ra] & ~mask) | (utils::rol64(ppu.gpr[op.rs], op.sh64) & mask);
|
||||
ppu.gpr[op.ra] = (ppu.gpr[op.ra] & ~mask) | (std::rotl<u64>(ppu.gpr[op.rs], op.sh64) & mask);
|
||||
if constexpr (((Flags == has_rc) || ...))
|
||||
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
||||
};
|
||||
|
|
@ -3656,7 +3656,7 @@ auto RLDCL()
|
|||
return ppu_exec_select<Flags...>::template select<>();
|
||||
|
||||
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
||||
ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], ppu.gpr[op.rb] & 0x3f) & (~0ull >> op.mbe64);
|
||||
ppu.gpr[op.ra] = std::rotl<u64>(ppu.gpr[op.rs], ppu.gpr[op.rb] & 0x3f) & (~0ull >> op.mbe64);
|
||||
if constexpr (((Flags == has_rc) || ...))
|
||||
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
||||
};
|
||||
|
|
@ -3670,7 +3670,7 @@ auto RLDCR()
|
|||
return ppu_exec_select<Flags...>::template select<>();
|
||||
|
||||
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
||||
ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], ppu.gpr[op.rb] & 0x3f) & (~0ull << (op.mbe64 ^ 63));
|
||||
ppu.gpr[op.ra] = std::rotl<u64>(ppu.gpr[op.rs], ppu.gpr[op.rb] & 0x3f) & (~0ull << (op.mbe64 ^ 63));
|
||||
if constexpr (((Flags == has_rc) || ...))
|
||||
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
||||
};
|
||||
|
|
|
|||
|
|
@ -3212,7 +3212,7 @@ void spu_recompiler::ROTQBYI(spu_opcode_t op)
|
|||
}
|
||||
else if (s == 4 || s == 8 || s == 12)
|
||||
{
|
||||
c->pshufd(va, va, utils::rol8(0xE4, s / 2));
|
||||
c->pshufd(va, va, std::rotl<u8>(0xE4, s / 2));
|
||||
}
|
||||
else if (utils::has_ssse3())
|
||||
{
|
||||
|
|
|
|||
|
|
@ -2333,7 +2333,7 @@ std::vector<u32> spu_thread::discover_functions(u32 base_addr, std::span<const u
|
|||
// Search for BRSL LR and BRASL LR or BR
|
||||
// TODO: BISL
|
||||
const v128 inst = read_from_ptr<be_t<v128>>(ls.data(), i - base_addr);
|
||||
const v128 cleared_i16 = gv_and32(inst, v128::from32p(utils::rol32(~0xffff, 7)));
|
||||
const v128 cleared_i16 = gv_and32(inst, v128::from32p(std::rotl<u32>(~0xffff, 7)));
|
||||
const v128 eq_brsl = gv_eq32(cleared_i16, v128::from32p(0x66u << 23));
|
||||
const v128 eq_brasl = gv_eq32(cleared_i16, brasl_mask);
|
||||
const v128 eq_br = gv_eq32(cleared_i16, v128::from32p(0x64u << 23));
|
||||
|
|
@ -5396,7 +5396,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
const usz block_tail = duplicate_positions[it_begin - it_tail];
|
||||
|
||||
// Check if the distance is precisely two times from the end
|
||||
if (reg_state_it.size() - block_start != utils::rol64(reg_state_it.size() - block_tail, 1))
|
||||
if (reg_state_it.size() - block_start != std::rotl<u64>(reg_state_it.size() - block_tail, 1))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -291,7 +291,7 @@ bool ROT(spu_thread& spu, spu_opcode_t op)
|
|||
|
||||
for (u32 i = 0; i < 4; i++)
|
||||
{
|
||||
spu.gpr[op.rt]._u32[i] = utils::rol32(a._u32[i], b._u32[i]);
|
||||
spu.gpr[op.rt]._u32[i] = std::rotl<u32>(a._u32[i], b._u32[i]);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
|
@ -346,7 +346,7 @@ bool ROTH(spu_thread& spu, spu_opcode_t op)
|
|||
|
||||
for (u32 i = 0; i < 8; i++)
|
||||
{
|
||||
spu.gpr[op.rt]._u16[i] = utils::rol16(a._u16[i], b._u16[i]);
|
||||
spu.gpr[op.rt]._u16[i] = std::rotl<u16>(a._u16[i], b._u16[i]);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3220,7 +3220,7 @@ plain_access:
|
|||
|
||||
bool spu_thread::do_dma_check(const spu_mfc_cmd& args)
|
||||
{
|
||||
const u32 mask = utils::rol32(1, args.tag);
|
||||
const u32 mask = std::rotl<u32>(1, args.tag);
|
||||
|
||||
if (mfc_barrier & mask || (args.cmd & (MFC_BARRIER_MASK | MFC_FENCE_MASK) && mfc_fence & mask)) [[unlikely]]
|
||||
{
|
||||
|
|
@ -3236,13 +3236,13 @@ bool spu_thread::do_dma_check(const spu_mfc_cmd& args)
|
|||
if ((mfc_queue[i].cmd & ~0xc) == MFC_BARRIER_CMD)
|
||||
{
|
||||
mfc_barrier |= -1;
|
||||
mfc_fence |= utils::rol32(1, mfc_queue[i].tag);
|
||||
mfc_fence |= std::rotl<u32>(1, mfc_queue[i].tag);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (true)
|
||||
{
|
||||
const u32 _mask = utils::rol32(1u, mfc_queue[i].tag);
|
||||
const u32 _mask = std::rotl<u32>(1u, mfc_queue[i].tag);
|
||||
|
||||
// A command with barrier hard blocks that tag until it's been dealt with
|
||||
if (mfc_queue[i].cmd & MFC_BARRIER_MASK)
|
||||
|
|
@ -3805,14 +3805,14 @@ bool spu_thread::do_list_transfer(spu_mfc_cmd& args)
|
|||
{
|
||||
range_lock->release(0);
|
||||
|
||||
ch_stall_mask |= utils::rol32(1, args.tag);
|
||||
ch_stall_mask |= std::rotl<u32>(1, args.tag);
|
||||
|
||||
if (!ch_stall_stat.get_count())
|
||||
{
|
||||
set_events(SPU_EVENT_SN);
|
||||
}
|
||||
|
||||
ch_stall_stat.set_value(utils::rol32(1, args.tag) | ch_stall_stat.get_value());
|
||||
ch_stall_stat.set_value(std::rotl<u32>(1, args.tag) | ch_stall_stat.get_value());
|
||||
|
||||
args.tag |= 0x80; // Set stalled status
|
||||
args.eal = ::narrow<u32>(reinterpret_cast<const u8*>(item_ptr) - this->ls);
|
||||
|
|
@ -4271,7 +4271,7 @@ bool spu_thread::do_mfc(bool can_escape, bool must_finish)
|
|||
auto process_command = [&](spu_mfc_cmd& args)
|
||||
{
|
||||
// Select tag bit in the tag mask or the stall mask
|
||||
const u32 mask = utils::rol32(1, args.tag);
|
||||
const u32 mask = std::rotl<u32>(1, args.tag);
|
||||
|
||||
if ((args.cmd & ~0xc) == MFC_BARRIER_CMD)
|
||||
{
|
||||
|
|
@ -5373,7 +5373,7 @@ bool spu_thread::process_mfc_cmd()
|
|||
std::memcpy(dump.data, _ptr<u8>(ch_mfc_cmd.lsa & 0x3ff80), 128);
|
||||
}
|
||||
|
||||
const u32 mask = utils::rol32(1, ch_mfc_cmd.tag);
|
||||
const u32 mask = std::rotl<u32>(1, ch_mfc_cmd.tag);
|
||||
|
||||
if ((mfc_barrier | mfc_fence) & mask) [[unlikely]]
|
||||
{
|
||||
|
|
@ -5428,11 +5428,11 @@ bool spu_thread::process_mfc_cmd()
|
|||
}
|
||||
|
||||
mfc_queue[mfc_size++] = ch_mfc_cmd;
|
||||
mfc_fence |= utils::rol32(1, ch_mfc_cmd.tag);
|
||||
mfc_fence |= std::rotl<u32>(1, ch_mfc_cmd.tag);
|
||||
|
||||
if (ch_mfc_cmd.cmd & MFC_BARRIER_MASK)
|
||||
{
|
||||
mfc_barrier |= utils::rol32(1, ch_mfc_cmd.tag);
|
||||
mfc_barrier |= std::rotl<u32>(1, ch_mfc_cmd.tag);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
@ -5481,11 +5481,11 @@ bool spu_thread::process_mfc_cmd()
|
|||
}
|
||||
|
||||
mfc_size++;
|
||||
mfc_fence |= utils::rol32(1, cmd.tag);
|
||||
mfc_fence |= std::rotl<u32>(1, cmd.tag);
|
||||
|
||||
if (cmd.cmd & MFC_BARRIER_MASK)
|
||||
{
|
||||
mfc_barrier |= utils::rol32(1, cmd.tag);
|
||||
mfc_barrier |= std::rotl<u32>(1, cmd.tag);
|
||||
}
|
||||
|
||||
if (check_mfc_interrupts(pc + 4))
|
||||
|
|
@ -5511,7 +5511,7 @@ bool spu_thread::process_mfc_cmd()
|
|||
{
|
||||
mfc_queue[mfc_size++] = ch_mfc_cmd;
|
||||
mfc_barrier |= -1;
|
||||
mfc_fence |= utils::rol32(1, ch_mfc_cmd.tag);
|
||||
mfc_fence |= std::rotl<u32>(1, ch_mfc_cmd.tag);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
@ -6872,7 +6872,7 @@ bool spu_thread::set_ch_value(u32 ch, u32 value)
|
|||
value &= 0x1f;
|
||||
|
||||
// Reset stall status for specified tag
|
||||
const u32 tag_mask = utils::rol32(1, value);
|
||||
const u32 tag_mask = std::rotl<u32>(1, value);
|
||||
|
||||
if (ch_stall_mask & tag_mask)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -139,7 +139,7 @@ namespace rsx
|
|||
u32 bytes_read = 0;
|
||||
|
||||
// Find the next set bit after every iteration
|
||||
for (int i = 0;; i = (std::countr_zero<u32>(utils::rol8(to_fetch, 0 - i - 1)) + i + 1) % 8)
|
||||
for (int i = 0;; i = (std::countr_zero<u32>(std::rotl<u8>(to_fetch, 0 - i - 1)) + i + 1) % 8)
|
||||
{
|
||||
// If a reservation is being updated, try to load another
|
||||
const auto& res = vm::reservation_acquire(addr1 + i * 128);
|
||||
|
|
|
|||
|
|
@ -2856,7 +2856,7 @@ namespace rsx
|
|||
|
||||
for (u32 ea = address >> 20, end = ea + (size >> 20); ea < end; ea++)
|
||||
{
|
||||
const u32 io = utils::rol32(iomap_table.io[ea], 32 - 20);
|
||||
const u32 io = std::rotl<u32>(iomap_table.io[ea], 32 - 20);
|
||||
|
||||
if (io + 1)
|
||||
{
|
||||
|
|
@ -2886,7 +2886,7 @@ namespace rsx
|
|||
|
||||
while (to_unmap)
|
||||
{
|
||||
bit = (std::countr_zero<u64>(utils::rol64(to_unmap, 0 - bit)) + bit);
|
||||
bit = (std::countr_zero<u64>(std::rotl<u64>(to_unmap, 0 - bit)) + bit);
|
||||
to_unmap &= ~(1ull << bit);
|
||||
|
||||
constexpr u16 null_entry = 0xFFFF;
|
||||
|
|
|
|||
|
|
@ -8,30 +8,12 @@
|
|||
extern bool g_use_rtm;
|
||||
extern u64 g_rtm_tx_limit1;
|
||||
|
||||
#ifdef _M_X64
|
||||
#ifdef ARCH_X64
|
||||
#ifdef _MSC_VER
|
||||
extern "C"
|
||||
{
|
||||
u32 _xbegin();
|
||||
void _xend();
|
||||
void _mm_pause();
|
||||
void _mm_prefetch(const char*, int);
|
||||
void _m_prefetchw(const volatile void*);
|
||||
|
||||
uchar _rotl8(uchar, uchar);
|
||||
ushort _rotl16(ushort, uchar);
|
||||
u64 __popcnt64(u64);
|
||||
|
||||
s64 __mulh(s64, s64);
|
||||
u64 __umulh(u64, u64);
|
||||
|
||||
s64 _div128(s64, s64, s64, s64*);
|
||||
u64 _udiv128(u64, u64, u64, u64*);
|
||||
void __debugbreak();
|
||||
}
|
||||
#include <intrin.h>
|
||||
#else
|
||||
#include <immintrin.h>
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
|
@ -113,7 +95,7 @@ namespace utils
|
|||
const u64 value = reinterpret_cast<u64>(func);
|
||||
const void* ptr = reinterpret_cast<const void*>(value);
|
||||
|
||||
#ifdef _M_X64
|
||||
#ifdef ARCH_X64
|
||||
return _mm_prefetch(static_cast<const char*>(ptr), _MM_HINT_T1);
|
||||
#else
|
||||
return __builtin_prefetch(ptr, 0, 2);
|
||||
|
|
@ -128,7 +110,7 @@ namespace utils
|
|||
return;
|
||||
}
|
||||
|
||||
#ifdef _M_X64
|
||||
#ifdef ARCH_X64
|
||||
return _mm_prefetch(static_cast<const char*>(ptr), _MM_HINT_T0);
|
||||
#else
|
||||
return __builtin_prefetch(ptr, 0, 3);
|
||||
|
|
@ -142,110 +124,19 @@ namespace utils
|
|||
return;
|
||||
}
|
||||
|
||||
#if defined(_M_X64) && !defined(__clang__)
|
||||
return _m_prefetchw(ptr);
|
||||
#if defined(ARCH_X64)
|
||||
return _m_prefetchw(const_cast<void*>(ptr));
|
||||
#else
|
||||
return __builtin_prefetch(ptr, 1, 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
constexpr u8 rol8(u8 x, u8 n)
|
||||
{
|
||||
if (std::is_constant_evaluated())
|
||||
{
|
||||
return (x << (n & 7)) | (x >> ((-n & 7)));
|
||||
}
|
||||
|
||||
#ifdef _MSC_VER
|
||||
return _rotl8(x, n);
|
||||
#elif defined(__clang__)
|
||||
return __builtin_rotateleft8(x, n);
|
||||
#elif defined(ARCH_X64)
|
||||
return __builtin_ia32_rolqi(x, n);
|
||||
#else
|
||||
return (x << (n & 7)) | (x >> ((-n & 7)));
|
||||
#endif
|
||||
}
|
||||
|
||||
constexpr u16 rol16(u16 x, u16 n)
|
||||
{
|
||||
if (std::is_constant_evaluated())
|
||||
{
|
||||
return (x << (n & 15)) | (x >> ((-n & 15)));
|
||||
}
|
||||
|
||||
#ifdef _MSC_VER
|
||||
return _rotl16(x, static_cast<uchar>(n));
|
||||
#elif defined(__clang__)
|
||||
return __builtin_rotateleft16(x, n);
|
||||
#elif defined(ARCH_X64)
|
||||
return __builtin_ia32_rolhi(x, n);
|
||||
#else
|
||||
return (x << (n & 15)) | (x >> ((-n & 15)));
|
||||
#endif
|
||||
}
|
||||
|
||||
constexpr u32 rol32(u32 x, u32 n)
|
||||
{
|
||||
if (std::is_constant_evaluated())
|
||||
{
|
||||
return (x << (n & 31)) | (x >> (((0 - n) & 31)));
|
||||
}
|
||||
|
||||
#ifdef _MSC_VER
|
||||
return _rotl(x, n);
|
||||
#elif defined(__clang__)
|
||||
return __builtin_rotateleft32(x, n);
|
||||
#else
|
||||
return (x << (n & 31)) | (x >> (((0 - n) & 31)));
|
||||
#endif
|
||||
}
|
||||
|
||||
constexpr u64 rol64(u64 x, u64 n)
|
||||
{
|
||||
if (std::is_constant_evaluated())
|
||||
{
|
||||
return (x << (n & 63)) | (x >> (((0 - n) & 63)));
|
||||
}
|
||||
|
||||
#ifdef _MSC_VER
|
||||
return _rotl64(x, static_cast<int>(n));
|
||||
#elif defined(__clang__)
|
||||
return __builtin_rotateleft64(x, n);
|
||||
#else
|
||||
return (x << (n & 63)) | (x >> (((0 - n) & 63)));
|
||||
#endif
|
||||
}
|
||||
|
||||
constexpr u32 popcnt64(u64 v)
|
||||
{
|
||||
#if !defined(_MSC_VER) || defined(__SSE4_2__)
|
||||
if (std::is_constant_evaluated())
|
||||
#endif
|
||||
{
|
||||
v = (v & 0xaaaaaaaaaaaaaaaa) / 2 + (v & 0x5555555555555555);
|
||||
v = (v & 0xcccccccccccccccc) / 4 + (v & 0x3333333333333333);
|
||||
v = (v & 0xf0f0f0f0f0f0f0f0) / 16 + (v & 0x0f0f0f0f0f0f0f0f);
|
||||
v = (v & 0xff00ff00ff00ff00) / 256 + (v & 0x00ff00ff00ff00ff);
|
||||
v = ((v & 0xffff0000ffff0000) >> 16) + (v & 0x0000ffff0000ffff);
|
||||
return static_cast<u32>((v >> 32) + v);
|
||||
}
|
||||
|
||||
#if !defined(_MSC_VER) || defined(__SSE4_2__)
|
||||
#ifdef _MSC_VER
|
||||
return static_cast<u32>(__popcnt64(v));
|
||||
#else
|
||||
return __builtin_popcountll(v);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
constexpr u32 popcnt128(const u128& v)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
return popcnt64(v.lo) + popcnt64(v.hi);
|
||||
return std::popcount(v.lo) + std::popcount(v.hi);
|
||||
#else
|
||||
return popcnt64(v) + popcnt64(v >> 64);
|
||||
return std::popcount(v);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
@ -332,10 +223,7 @@ namespace utils
|
|||
else
|
||||
return std::countr_zero(arg.lo);
|
||||
#else
|
||||
if (u64 lo = static_cast<u64>(arg))
|
||||
return std::countr_zero<u64>(lo);
|
||||
else
|
||||
return std::countr_zero<u64>(arg >> 64) + 64;
|
||||
return std::countr_zero(arg);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
@ -347,10 +235,7 @@ namespace utils
|
|||
else
|
||||
return std::countl_zero(arg.lo) + 64;
|
||||
#else
|
||||
if (u64 hi = static_cast<u64>(arg >> 64))
|
||||
return std::countl_zero<u64>(hi);
|
||||
else
|
||||
return std::countl_zero<u64>(arg) + 64;
|
||||
return std::countl_zero(arg);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
@ -358,10 +243,8 @@ namespace utils
|
|||
{
|
||||
#if defined(ARCH_ARM64)
|
||||
__asm__ volatile("yield");
|
||||
#elif defined(_M_X64)
|
||||
_mm_pause();
|
||||
#elif defined(ARCH_X64)
|
||||
__builtin_ia32_pause();
|
||||
_mm_pause();
|
||||
#else
|
||||
#error "Missing utils::pause() implementation"
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -2986,7 +2986,7 @@ inline v128 gv_rol16(const v128& a, const v128& b)
|
|||
#else
|
||||
v128 r;
|
||||
for (u32 i = 0; i < 8; i++)
|
||||
r._u16[i] = utils::rol16(a._u16[i], b._u16[i]);
|
||||
r._u16[i] = std::rotl<u16>(a._u16[i], b._u16[i]);
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
|
|
@ -3020,7 +3020,7 @@ inline v128 gv_rol32(const v128& a, const v128& b)
|
|||
#else
|
||||
v128 r;
|
||||
for (u32 i = 0; i < 4; i++)
|
||||
r._u32[i] = utils::rol32(a._u32[i], b._u32[i]);
|
||||
r._u32[i] = std::rotl<u32>(a._u32[i], b._u32[i]);
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
|
|
@ -3039,7 +3039,7 @@ inline v128 gv_rol32(const v128& a)
|
|||
#else
|
||||
v128 r;
|
||||
for (u32 i = 0; i < 4; i++)
|
||||
r._u32[i] = utils::rol32(a._u32[i], count);
|
||||
r._u32[i] = std::rotl<u32>(a._u32[i], count);
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue