mirror of
https://github.com/RPCSX/rpcsx.git
synced 2026-01-03 15:20:27 +01:00
Remove vm::reservation_lock from it. Use lock bits to prevent memory clobbering in GETLLAR. Improve u128 for MSVC since it's used for bitlocking. Improve 128 bit atomics for the same reason. Improve vm::reservation_op and friends.
1721 lines
39 KiB
C++
1721 lines
39 KiB
C++
#pragma once
|
|
|
|
#include "Utilities/types.h"
|
|
#include <functional>
|
|
#include <mutex>
|
|
|
|
#ifdef _MSC_VER
|
|
#include <atomic>
|
|
#endif
|
|
|
|
// Wait timeout extension (in nanoseconds)
|
|
enum class atomic_wait_timeout : u64
|
|
{
|
|
inf = 0xffffffffffffffff,
|
|
};
|
|
|
|
// Helper for waitable atomics (as in C++20 std::atomic)
|
|
struct atomic_storage_futex
|
|
{
|
|
private:
|
|
template <typename T, std::size_t Align>
|
|
friend class atomic_t;
|
|
|
|
static void
|
|
#ifdef _WIN32
|
|
__vectorcall
|
|
#endif
|
|
wait(const void* data, std::size_t size, __m128i old128, u64 timeout, __m128i mask128);
|
|
static void notify_one(const void* data);
|
|
static void notify_all(const void* data);
|
|
|
|
public:
|
|
static void set_wait_callback(bool(*cb)(const void* data));
|
|
static void set_notify_callback(void(*cb)(const void* data, u64 progress));
|
|
static void raw_notify(const void* data);
|
|
};
|
|
|
|
// Helper class, provides access to compiler-specific atomic intrinsics
|
|
template <typename T, std::size_t Size = sizeof(T)>
|
|
struct atomic_storage
|
|
{
|
|
static_assert(sizeof(T) <= 16 && sizeof(T) == alignof(T), "atomic_storage<> error: invalid type");
|
|
|
|
/* First part: Non-MSVC intrinsics */
|
|
|
|
#ifndef _MSC_VER
|
|
|
|
#if defined(__ATOMIC_HLE_ACQUIRE) && defined(__ATOMIC_HLE_RELEASE)
|
|
static constexpr int s_hle_ack = __ATOMIC_SEQ_CST | __ATOMIC_HLE_ACQUIRE;
|
|
static constexpr int s_hle_rel = __ATOMIC_SEQ_CST | __ATOMIC_HLE_RELEASE;
|
|
#else
|
|
static constexpr int s_hle_ack = __ATOMIC_SEQ_CST;
|
|
static constexpr int s_hle_rel = __ATOMIC_SEQ_CST;
|
|
#endif
|
|
|
|
static inline bool compare_exchange(T& dest, T& comp, T exch)
|
|
{
|
|
return __atomic_compare_exchange(&dest, &comp, &exch, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
|
|
}
|
|
|
|
static inline bool compare_exchange_hle_acq(T& dest, T& comp, T exch)
|
|
{
|
|
static_assert(sizeof(T) == 4 || sizeof(T) == 8);
|
|
return __atomic_compare_exchange(&dest, &comp, &exch, false, s_hle_ack, s_hle_ack);
|
|
}
|
|
|
|
static inline T load(const T& dest)
|
|
{
|
|
T result;
|
|
__atomic_load(&dest, &result, __ATOMIC_SEQ_CST);
|
|
return result;
|
|
}
|
|
|
|
static inline void store(T& dest, T value)
|
|
{
|
|
__atomic_store(&dest, &value, __ATOMIC_SEQ_CST);
|
|
}
|
|
|
|
static inline void release(T& dest, T value)
|
|
{
|
|
__atomic_store(&dest, &value, __ATOMIC_RELEASE);
|
|
}
|
|
|
|
static inline T exchange(T& dest, T value)
|
|
{
|
|
T result;
|
|
__atomic_exchange(&dest, &value, &result, __ATOMIC_SEQ_CST);
|
|
return result;
|
|
}
|
|
|
|
static inline T fetch_add(T& dest, T value)
|
|
{
|
|
return __atomic_fetch_add(&dest, value, __ATOMIC_SEQ_CST);
|
|
}
|
|
|
|
static inline T fetch_add_hle_rel(T& dest, T value)
|
|
{
|
|
static_assert(sizeof(T) == 4 || sizeof(T) == 8);
|
|
return __atomic_fetch_add(&dest, value, s_hle_rel);
|
|
}
|
|
|
|
static inline T add_fetch(T& dest, T value)
|
|
{
|
|
return __atomic_add_fetch(&dest, value, __ATOMIC_SEQ_CST);
|
|
}
|
|
|
|
static inline T fetch_sub(T& dest, T value)
|
|
{
|
|
return __atomic_fetch_sub(&dest, value, __ATOMIC_SEQ_CST);
|
|
}
|
|
|
|
static inline T sub_fetch(T& dest, T value)
|
|
{
|
|
return __atomic_sub_fetch(&dest, value, __ATOMIC_SEQ_CST);
|
|
}
|
|
|
|
static inline T fetch_and(T& dest, T value)
|
|
{
|
|
return __atomic_fetch_and(&dest, value, __ATOMIC_SEQ_CST);
|
|
}
|
|
|
|
static inline T and_fetch(T& dest, T value)
|
|
{
|
|
return __atomic_and_fetch(&dest, value, __ATOMIC_SEQ_CST);
|
|
}
|
|
|
|
static inline T fetch_xor(T& dest, T value)
|
|
{
|
|
return __atomic_fetch_xor(&dest, value, __ATOMIC_SEQ_CST);
|
|
}
|
|
|
|
static inline T xor_fetch(T& dest, T value)
|
|
{
|
|
return __atomic_xor_fetch(&dest, value, __ATOMIC_SEQ_CST);
|
|
}
|
|
|
|
static inline T fetch_or(T& dest, T value)
|
|
{
|
|
return __atomic_fetch_or(&dest, value, __ATOMIC_SEQ_CST);
|
|
}
|
|
|
|
static inline T or_fetch(T& dest, T value)
|
|
{
|
|
return __atomic_or_fetch(&dest, value, __ATOMIC_SEQ_CST);
|
|
}
|
|
#endif
|
|
|
|
/* Second part: MSVC-specific */
|
|
|
|
#ifdef _MSC_VER
|
|
static inline T add_fetch(T& dest, T value)
|
|
{
|
|
return atomic_storage<T>::fetch_add(dest, value) + value;
|
|
}
|
|
|
|
static inline T fetch_sub(T& dest, T value)
|
|
{
|
|
return atomic_storage<T>::fetch_add(dest, 0 - value);
|
|
}
|
|
|
|
static inline T sub_fetch(T& dest, T value)
|
|
{
|
|
return atomic_storage<T>::fetch_add(dest, 0 - value) - value;
|
|
}
|
|
|
|
static inline T and_fetch(T& dest, T value)
|
|
{
|
|
return atomic_storage<T>::fetch_and(dest, value) & value;
|
|
}
|
|
|
|
static inline T or_fetch(T& dest, T value)
|
|
{
|
|
return atomic_storage<T>::fetch_or(dest, value) | value;
|
|
}
|
|
|
|
static inline T xor_fetch(T& dest, T value)
|
|
{
|
|
return atomic_storage<T>::fetch_xor(dest, value) ^ value;
|
|
}
|
|
#endif
|
|
|
|
/* Third part: fallbacks, may be hidden by subsequent atomic_storage<> specializations */
|
|
|
|
static inline T fetch_inc(T& dest)
|
|
{
|
|
return atomic_storage<T>::fetch_add(dest, 1);
|
|
}
|
|
|
|
static inline T inc_fetch(T& dest)
|
|
{
|
|
return atomic_storage<T>::add_fetch(dest, 1);
|
|
}
|
|
|
|
static inline T fetch_dec(T& dest)
|
|
{
|
|
return atomic_storage<T>::fetch_sub(dest, 1);
|
|
}
|
|
|
|
static inline T dec_fetch(T& dest)
|
|
{
|
|
return atomic_storage<T>::sub_fetch(dest, 1);
|
|
}
|
|
|
|
static inline bool test_and_set(T& dest, T mask)
|
|
{
|
|
return (atomic_storage<T>::fetch_or(dest, mask) & mask) != 0;
|
|
}
|
|
|
|
static inline bool test_and_reset(T& dest, T mask)
|
|
{
|
|
return (atomic_storage<T>::fetch_and(dest, ~mask) & mask) != 0;
|
|
}
|
|
|
|
static inline bool test_and_complement(T& dest, T mask)
|
|
{
|
|
return (atomic_storage<T>::fetch_xor(dest, mask) & mask) != 0;
|
|
}
|
|
|
|
static inline bool bts(T& dest, uint bit)
|
|
{
|
|
return atomic_storage<T>::test_and_set(dest, static_cast<T>(1) << bit);
|
|
}
|
|
|
|
static inline bool btr(T& dest, uint bit)
|
|
{
|
|
return atomic_storage<T>::test_and_reset(dest, static_cast<T>(1) << bit);
|
|
}
|
|
|
|
static inline bool btc(T& dest, uint bit)
|
|
{
|
|
return atomic_storage<T>::test_and_complement(dest, static_cast<T>(1) << bit);
|
|
}
|
|
};
|
|
|
|
/* The rest: ugly MSVC intrinsics + inline asm implementations */
|
|
|
|
template <typename T>
|
|
struct atomic_storage<T, 1> : atomic_storage<T, 0>
|
|
{
|
|
#ifdef _MSC_VER
|
|
static inline bool compare_exchange(T& dest, T& comp, T exch)
|
|
{
|
|
const char v = std::bit_cast<char>(comp);
|
|
const char r = _InterlockedCompareExchange8(reinterpret_cast<volatile char*>(&dest), std::bit_cast<char>(exch), v);
|
|
comp = std::bit_cast<T>(r);
|
|
return r == v;
|
|
}
|
|
|
|
static inline T load(const T& dest)
|
|
{
|
|
const char value = *reinterpret_cast<const volatile char*>(&dest);
|
|
std::atomic_thread_fence(std::memory_order_acquire);
|
|
return std::bit_cast<T>(value);
|
|
}
|
|
|
|
static inline void release(T& dest, T value)
|
|
{
|
|
std::atomic_thread_fence(std::memory_order_release);
|
|
*reinterpret_cast<volatile char*>(&dest) = std::bit_cast<char>(value);
|
|
}
|
|
|
|
static inline T exchange(T& dest, T value)
|
|
{
|
|
const char r = _InterlockedExchange8(reinterpret_cast<volatile char*>(&dest), std::bit_cast<char>(value));
|
|
return std::bit_cast<T>(r);
|
|
}
|
|
|
|
static inline void store(T& dest, T value)
|
|
{
|
|
exchange(dest, value);
|
|
}
|
|
|
|
static inline T fetch_add(T& dest, T value)
|
|
{
|
|
const char r = _InterlockedExchangeAdd8(reinterpret_cast<volatile char*>(&dest), std::bit_cast<char>(value));
|
|
return std::bit_cast<T>(r);
|
|
}
|
|
|
|
static inline T fetch_and(T& dest, T value)
|
|
{
|
|
const char r = _InterlockedAnd8(reinterpret_cast<volatile char*>(&dest), std::bit_cast<char>(value));
|
|
return std::bit_cast<T>(r);
|
|
}
|
|
|
|
static inline T fetch_or(T& dest, T value)
|
|
{
|
|
const char r = _InterlockedOr8(reinterpret_cast<volatile char*>(&dest), std::bit_cast<char>(value));
|
|
return std::bit_cast<T>(r);
|
|
}
|
|
|
|
static inline T fetch_xor(T& dest, T value)
|
|
{
|
|
const char r = _InterlockedXor8(reinterpret_cast<volatile char*>(&dest), std::bit_cast<char>(value));
|
|
return std::bit_cast<T>(r);
|
|
}
|
|
#endif
|
|
};
|
|
|
|
template <typename T>
|
|
struct atomic_storage<T, 2> : atomic_storage<T, 0>
|
|
{
|
|
#ifdef _MSC_VER
|
|
static inline bool compare_exchange(T& dest, T& comp, T exch)
|
|
{
|
|
const short v = std::bit_cast<short>(comp);
|
|
const short r = _InterlockedCompareExchange16(reinterpret_cast<volatile short*>(&dest), std::bit_cast<short>(exch), v);
|
|
comp = std::bit_cast<T>(r);
|
|
return r == v;
|
|
}
|
|
|
|
static inline T load(const T& dest)
|
|
{
|
|
const short value = *reinterpret_cast<const volatile short*>(&dest);
|
|
std::atomic_thread_fence(std::memory_order_acquire);
|
|
return std::bit_cast<T>(value);
|
|
}
|
|
|
|
static inline void release(T& dest, T value)
|
|
{
|
|
std::atomic_thread_fence(std::memory_order_release);
|
|
*reinterpret_cast<volatile short*>(&dest) = std::bit_cast<short>(value);
|
|
}
|
|
|
|
static inline T exchange(T& dest, T value)
|
|
{
|
|
const short r = _InterlockedExchange16(reinterpret_cast<volatile short*>(&dest), std::bit_cast<short>(value));
|
|
return std::bit_cast<T>(r);
|
|
}
|
|
|
|
static inline void store(T& dest, T value)
|
|
{
|
|
exchange(dest, value);
|
|
}
|
|
|
|
static inline T fetch_add(T& dest, T value)
|
|
{
|
|
const short r = _InterlockedExchangeAdd16(reinterpret_cast<volatile short*>(&dest), std::bit_cast<short>(value));
|
|
return std::bit_cast<T>(r);
|
|
}
|
|
|
|
static inline T fetch_and(T& dest, T value)
|
|
{
|
|
const short r = _InterlockedAnd16(reinterpret_cast<volatile short*>(&dest), std::bit_cast<short>(value));
|
|
return std::bit_cast<T>(r);
|
|
}
|
|
|
|
static inline T fetch_or(T& dest, T value)
|
|
{
|
|
const short r = _InterlockedOr16(reinterpret_cast<volatile short*>(&dest), std::bit_cast<short>(value));
|
|
return std::bit_cast<T>(r);
|
|
}
|
|
|
|
static inline T fetch_xor(T& dest, T value)
|
|
{
|
|
const short r = _InterlockedXor16(reinterpret_cast<volatile short*>(&dest), std::bit_cast<short>(value));
|
|
return std::bit_cast<T>(r);
|
|
}
|
|
|
|
static inline T inc_fetch(T& dest)
|
|
{
|
|
const short r = _InterlockedIncrement16(reinterpret_cast<volatile short*>(&dest));
|
|
return std::bit_cast<T>(r);
|
|
}
|
|
|
|
static inline T dec_fetch(T& dest)
|
|
{
|
|
const short r = _InterlockedDecrement16(reinterpret_cast<volatile short*>(&dest));
|
|
return std::bit_cast<T>(r);
|
|
}
|
|
#else
|
|
static inline bool bts(T& dest, uint bit)
|
|
{
|
|
bool result;
|
|
ushort _bit = static_cast<ushort>(bit);
|
|
__asm__("lock btsw %2, %0\n" : "+m" (dest), "=@ccc" (result) : "Ir" (_bit) : "cc");
|
|
return result;
|
|
}
|
|
|
|
static inline bool btr(T& dest, uint bit)
|
|
{
|
|
bool result;
|
|
ushort _bit = static_cast<ushort>(bit);
|
|
__asm__("lock btrw %2, %0\n": "+m" (dest), "=@ccc" (result) : "Ir" (_bit) : "cc");
|
|
return result;
|
|
}
|
|
|
|
static inline bool btc(T& dest, uint bit)
|
|
{
|
|
bool result;
|
|
ushort _bit = static_cast<ushort>(bit);
|
|
__asm__("lock btcw %2, %0\n": "+m" (dest), "=@ccc" (result) : "Ir" (_bit) : "cc");
|
|
return result;
|
|
}
|
|
#endif
|
|
};
|
|
|
|
template <typename T>
|
|
struct atomic_storage<T, 4> : atomic_storage<T, 0>
|
|
{
|
|
#ifdef _MSC_VER
|
|
static inline bool compare_exchange(T& dest, T& comp, T exch)
|
|
{
|
|
const long v = std::bit_cast<long>(comp);
|
|
const long r = _InterlockedCompareExchange(reinterpret_cast<volatile long*>(&dest), std::bit_cast<long>(exch), v);
|
|
comp = std::bit_cast<T>(r);
|
|
return r == v;
|
|
}
|
|
|
|
static inline bool compare_exchange_hle_acq(T& dest, T& comp, T exch)
|
|
{
|
|
const long v = std::bit_cast<long>(comp);
|
|
const long r = _InterlockedCompareExchange_HLEAcquire(reinterpret_cast<volatile long*>(&dest), std::bit_cast<long>(exch), v);
|
|
comp = std::bit_cast<T>(r);
|
|
return r == v;
|
|
}
|
|
|
|
static inline T load(const T& dest)
|
|
{
|
|
const long value = *reinterpret_cast<const volatile long*>(&dest);
|
|
std::atomic_thread_fence(std::memory_order_acquire);
|
|
return std::bit_cast<T>(value);
|
|
}
|
|
|
|
static inline void release(T& dest, T value)
|
|
{
|
|
std::atomic_thread_fence(std::memory_order_release);
|
|
*reinterpret_cast<volatile long*>(&dest) = std::bit_cast<long>(value);
|
|
}
|
|
|
|
static inline T exchange(T& dest, T value)
|
|
{
|
|
const long r = _InterlockedExchange(reinterpret_cast<volatile long*>(&dest), std::bit_cast<long>(value));
|
|
return std::bit_cast<T>(r);
|
|
}
|
|
|
|
static inline void store(T& dest, T value)
|
|
{
|
|
exchange(dest, value);
|
|
}
|
|
|
|
static inline T fetch_add(T& dest, T value)
|
|
{
|
|
const long r = _InterlockedExchangeAdd(reinterpret_cast<volatile long*>(&dest), std::bit_cast<long>(value));
|
|
return std::bit_cast<T>(r);
|
|
}
|
|
|
|
static inline T fetch_add_hle_rel(T& dest, T value)
|
|
{
|
|
const long r = _InterlockedExchangeAdd_HLERelease(reinterpret_cast<volatile long*>(&dest), std::bit_cast<long>(value));
|
|
return std::bit_cast<T>(r);
|
|
}
|
|
|
|
static inline T fetch_and(T& dest, T value)
|
|
{
|
|
long r = _InterlockedAnd(reinterpret_cast<volatile long*>(&dest), std::bit_cast<long>(value));
|
|
return std::bit_cast<T>(r);
|
|
}
|
|
|
|
static inline T fetch_or(T& dest, T value)
|
|
{
|
|
const long r = _InterlockedOr(reinterpret_cast<volatile long*>(&dest), std::bit_cast<long>(value));
|
|
return std::bit_cast<T>(r);
|
|
}
|
|
|
|
static inline T fetch_xor(T& dest, T value)
|
|
{
|
|
const long r = _InterlockedXor(reinterpret_cast<volatile long*>(&dest), std::bit_cast<long>(value));
|
|
return std::bit_cast<T>(r);
|
|
}
|
|
|
|
static inline T inc_fetch(T& dest)
|
|
{
|
|
const long r = _InterlockedIncrement(reinterpret_cast<volatile long*>(&dest));
|
|
return std::bit_cast<T>(r);
|
|
}
|
|
|
|
static inline T dec_fetch(T& dest)
|
|
{
|
|
const long r = _InterlockedDecrement(reinterpret_cast<volatile long*>(&dest));
|
|
return std::bit_cast<T>(r);
|
|
}
|
|
|
|
static inline bool bts(T& dest, uint bit)
|
|
{
|
|
return _interlockedbittestandset(reinterpret_cast<volatile long*>(&dest), bit) != 0;
|
|
}
|
|
|
|
static inline bool btr(T& dest, uint bit)
|
|
{
|
|
return _interlockedbittestandreset(reinterpret_cast<volatile long*>(&dest), bit) != 0;
|
|
}
|
|
#else
|
|
static inline bool bts(T& dest, uint bit)
|
|
{
|
|
bool result;
|
|
__asm__("lock btsl %2, %0\n" : "+m" (dest), "=@ccc" (result) : "Ir" (bit) : "cc");
|
|
return result;
|
|
}
|
|
|
|
static inline bool btr(T& dest, uint bit)
|
|
{
|
|
bool result;
|
|
__asm__("lock btrl %2, %0\n" : "+m" (dest), "=@ccc" (result) : "Ir" (bit) : "cc");
|
|
return result;
|
|
}
|
|
|
|
static inline bool btc(T& dest, uint bit)
|
|
{
|
|
bool result;
|
|
__asm__("lock btcl %2, %0\n" : "+m" (dest), "=@ccc" (result) : "Ir" (bit) : "cc");
|
|
return result;
|
|
}
|
|
#endif
|
|
};
|
|
|
|
template <typename T>
|
|
struct atomic_storage<T, 8> : atomic_storage<T, 0>
|
|
{
|
|
#ifdef _MSC_VER
|
|
static inline bool compare_exchange(T& dest, T& comp, T exch)
|
|
{
|
|
const llong v = std::bit_cast<llong>(comp);
|
|
const llong r = _InterlockedCompareExchange64(reinterpret_cast<volatile llong*>(&dest), std::bit_cast<llong>(exch), v);
|
|
comp = std::bit_cast<T>(r);
|
|
return r == v;
|
|
}
|
|
|
|
static inline bool compare_exchange_hle_acq(T& dest, T& comp, T exch)
|
|
{
|
|
const llong v = std::bit_cast<llong>(comp);
|
|
const llong r = _InterlockedCompareExchange64_HLEAcquire(reinterpret_cast<volatile llong*>(&dest), std::bit_cast<llong>(exch), v);
|
|
comp = std::bit_cast<T>(r);
|
|
return r == v;
|
|
}
|
|
|
|
static inline T load(const T& dest)
|
|
{
|
|
const llong value = *reinterpret_cast<const volatile llong*>(&dest);
|
|
std::atomic_thread_fence(std::memory_order_acquire);
|
|
return std::bit_cast<T>(value);
|
|
}
|
|
|
|
static inline void release(T& dest, T value)
|
|
{
|
|
std::atomic_thread_fence(std::memory_order_release);
|
|
*reinterpret_cast<volatile llong*>(&dest) = std::bit_cast<llong>(value);
|
|
}
|
|
|
|
static inline T exchange(T& dest, T value)
|
|
{
|
|
const llong r = _InterlockedExchange64(reinterpret_cast<volatile llong*>(&dest), std::bit_cast<llong>(value));
|
|
return std::bit_cast<T>(r);
|
|
}
|
|
|
|
static inline void store(T& dest, T value)
|
|
{
|
|
exchange(dest, value);
|
|
}
|
|
|
|
static inline T fetch_add(T& dest, T value)
|
|
{
|
|
const llong r = _InterlockedExchangeAdd64(reinterpret_cast<volatile llong*>(&dest), std::bit_cast<llong>(value));
|
|
return std::bit_cast<T>(r);
|
|
}
|
|
|
|
static inline T fetch_add_hle_rel(T& dest, T value)
|
|
{
|
|
const llong r = _InterlockedExchangeAdd64_HLERelease(reinterpret_cast<volatile llong*>(&dest), std::bit_cast<llong>(value));
|
|
return std::bit_cast<T>(r);
|
|
}
|
|
|
|
static inline T fetch_and(T& dest, T value)
|
|
{
|
|
const llong r = _InterlockedAnd64(reinterpret_cast<volatile llong*>(&dest), std::bit_cast<llong>(value));
|
|
return std::bit_cast<T>(r);
|
|
}
|
|
|
|
static inline T fetch_or(T& dest, T value)
|
|
{
|
|
const llong r = _InterlockedOr64(reinterpret_cast<volatile llong*>(&dest), std::bit_cast<llong>(value));
|
|
return std::bit_cast<T>(r);
|
|
}
|
|
|
|
static inline T fetch_xor(T& dest, T value)
|
|
{
|
|
const llong r = _InterlockedXor64(reinterpret_cast<volatile llong*>(&dest), std::bit_cast<llong>(value));
|
|
return std::bit_cast<T>(r);
|
|
}
|
|
|
|
static inline T inc_fetch(T& dest)
|
|
{
|
|
const llong r = _InterlockedIncrement64(reinterpret_cast<volatile llong*>(&dest));
|
|
return std::bit_cast<T>(r);
|
|
}
|
|
|
|
static inline T dec_fetch(T& dest)
|
|
{
|
|
const llong r = _InterlockedDecrement64(reinterpret_cast<volatile llong*>(&dest));
|
|
return std::bit_cast<T>(r);
|
|
}
|
|
|
|
static inline bool bts(T& dest, uint bit)
|
|
{
|
|
return _interlockedbittestandset64(reinterpret_cast<volatile llong*>(&dest), bit) != 0;
|
|
}
|
|
|
|
static inline bool btr(T& dest, uint bit)
|
|
{
|
|
return _interlockedbittestandreset64(reinterpret_cast<volatile llong*>(&dest), bit) != 0;
|
|
}
|
|
#else
|
|
static inline bool bts(T& dest, uint bit)
|
|
{
|
|
bool result;
|
|
ullong _bit = bit;
|
|
__asm__("lock btsq %2, %0\n" : "+m" (dest), "=@ccc" (result) : "Ir" (_bit) : "cc");
|
|
return result;
|
|
}
|
|
|
|
static inline bool btr(T& dest, uint bit)
|
|
{
|
|
bool result;
|
|
ullong _bit = bit;
|
|
__asm__("lock btrq %2, %0\n" : "+m" (dest), "=@ccc" (result) : "Ir" (_bit) : "cc");
|
|
return result;
|
|
}
|
|
|
|
static inline bool btc(T& dest, uint bit)
|
|
{
|
|
bool result;
|
|
ullong _bit = bit;
|
|
__asm__("lock btcq %2, %0\n" : "+m" (dest), "=@ccc" (result) : "Ir" (_bit) : "cc");
|
|
return result;
|
|
}
|
|
#endif
|
|
};
|
|
|
|
template <typename T>
|
|
struct atomic_storage<T, 16> : atomic_storage<T, 0>
|
|
{
|
|
#ifdef _MSC_VER
|
|
static inline T load(const T& dest)
|
|
{
|
|
__m128i val = _mm_load_si128(reinterpret_cast<const __m128i*>(&dest));
|
|
std::atomic_thread_fence(std::memory_order_acquire);
|
|
return std::bit_cast<T>(val);
|
|
}
|
|
|
|
static inline bool compare_exchange(T& dest, T& comp, T exch)
|
|
{
|
|
struct alignas(16) llong2 { llong ll[2]; };
|
|
const llong2 _exch = std::bit_cast<llong2>(exch);
|
|
return _InterlockedCompareExchange128(reinterpret_cast<volatile llong*>(&dest), _exch.ll[1], _exch.ll[0], reinterpret_cast<llong*>(&comp)) != 0;
|
|
}
|
|
|
|
static inline T exchange(T& dest, T value)
|
|
{
|
|
struct alignas(16) llong2 { llong ll[2]; };
|
|
const llong2 _value = std::bit_cast<llong2>(value);
|
|
|
|
const auto llptr = reinterpret_cast<volatile llong*>(&dest);
|
|
llong2 cmp{ llptr[0], llptr[1] };
|
|
while (!_InterlockedCompareExchange128(llptr, _value.ll[1], _value.ll[0], cmp.ll));
|
|
return std::bit_cast<T>(cmp);
|
|
}
|
|
|
|
static inline void store(T& dest, T value)
|
|
{
|
|
exchange(dest, value);
|
|
}
|
|
|
|
static inline void release(T& dest, T value)
|
|
{
|
|
std::atomic_thread_fence(std::memory_order_release);
|
|
_mm_store_si128(reinterpret_cast<__m128i*>(&dest), std::bit_cast<__m128i>(value));
|
|
}
|
|
#else
|
|
static inline T load(const T& dest)
|
|
{
|
|
__m128i val = _mm_load_si128(reinterpret_cast<const __m128i*>(&dest));
|
|
__atomic_thread_fence(__ATOMIC_ACQUIRE);
|
|
return std::bit_cast<T>(val);
|
|
}
|
|
|
|
static inline bool compare_exchange(T& dest, T& comp, T exch)
|
|
{
|
|
bool result;
|
|
ullong cmp_lo = 0;
|
|
ullong cmp_hi = 0;
|
|
ullong exc_lo = 0;
|
|
ullong exc_hi = 0;
|
|
|
|
if constexpr (std::is_same_v<T, u128> || std::is_same_v<T, s128>)
|
|
{
|
|
cmp_lo = comp;
|
|
cmp_hi = comp >> 64;
|
|
exc_lo = exch;
|
|
exc_hi = exch >> 64;
|
|
}
|
|
else
|
|
{
|
|
std::memcpy(&cmp_lo, reinterpret_cast<char*>(&comp) + 0, 8);
|
|
std::memcpy(&cmp_hi, reinterpret_cast<char*>(&comp) + 8, 8);
|
|
std::memcpy(&exc_lo, reinterpret_cast<char*>(&exch) + 0, 8);
|
|
std::memcpy(&exc_hi, reinterpret_cast<char*>(&exch) + 8, 8);
|
|
}
|
|
|
|
__asm__ volatile("lock cmpxchg16b %1;"
|
|
: "=@ccz" (result)
|
|
, "+m" (dest)
|
|
, "+d" (cmp_hi)
|
|
, "+a" (cmp_lo)
|
|
: "c" (exc_hi)
|
|
, "b" (exc_lo)
|
|
: "cc");
|
|
|
|
if constexpr (std::is_same_v<T, u128> || std::is_same_v<T, s128>)
|
|
{
|
|
comp = T{cmp_hi} << 64 | cmp_lo;
|
|
}
|
|
else
|
|
{
|
|
std::memcpy(reinterpret_cast<char*>(&comp) + 0, &cmp_lo, 8);
|
|
std::memcpy(reinterpret_cast<char*>(&comp) + 8, &cmp_hi, 8);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
static inline T exchange(T& dest, T value)
|
|
{
|
|
return std::bit_cast<T>(__sync_lock_test_and_set(reinterpret_cast<u128*>(&dest), std::bit_cast<u128>(value)));
|
|
}
|
|
|
|
static inline void store(T& dest, T value)
|
|
{
|
|
exchange(dest, value);
|
|
}
|
|
|
|
static inline void release(T& dest, T value)
|
|
{
|
|
__atomic_thread_fence(__ATOMIC_RELEASE);
|
|
_mm_store_si128(reinterpret_cast<__m128i*>(&dest), std::bit_cast<__m128i>(value));
|
|
}
|
|
#endif
|
|
|
|
// TODO
|
|
};
|
|
|
|
// Atomic type with lock-free and standard layout guarantees (and appropriate limitations)
|
|
template <typename T, std::size_t Align = alignof(T)>
|
|
class atomic_t
|
|
{
|
|
protected:
|
|
using type = typename std::remove_cv<T>::type;
|
|
|
|
using ptr_rt = std::conditional_t<std::is_pointer_v<type>, ullong, type>;
|
|
|
|
static_assert(alignof(type) == sizeof(type), "atomic_t<> error: unexpected alignment, use alignas() if necessary");
|
|
|
|
alignas(Align) type m_data;
|
|
|
|
public:
|
|
atomic_t() noexcept = default;
|
|
|
|
atomic_t(const atomic_t&) = delete;
|
|
|
|
atomic_t& operator =(const atomic_t&) = delete;
|
|
|
|
// Define simple type
|
|
using simple_type = simple_t<T>;
|
|
|
|
constexpr atomic_t(const type& value) noexcept
|
|
: m_data(value)
|
|
{
|
|
}
|
|
|
|
// Unsafe direct access
|
|
type& raw()
|
|
{
|
|
return m_data;
|
|
}
|
|
|
|
// Unsafe direct access
|
|
const type& raw() const
|
|
{
|
|
return m_data;
|
|
}
|
|
|
|
// Atomically compare data with cmp, replace with exch if equal, return previous data value anyway
|
|
type compare_and_swap(const type& cmp, const type& exch)
|
|
{
|
|
type old = cmp;
|
|
atomic_storage<type>::compare_exchange(m_data, old, exch);
|
|
return old;
|
|
}
|
|
|
|
// Atomically compare data with cmp, replace with exch if equal, return true if data was replaced
|
|
bool compare_and_swap_test(const type& cmp, const type& exch)
|
|
{
|
|
type old = cmp;
|
|
return atomic_storage<type>::compare_exchange(m_data, old, exch);
|
|
}
|
|
|
|
// As in std::atomic
|
|
bool compare_exchange(type& cmp_and_old, const type& exch)
|
|
{
|
|
return atomic_storage<type>::compare_exchange(m_data, cmp_and_old, exch);
|
|
}
|
|
|
|
// Atomic operation; returns old value, or pair of old value and return value (cancel op if evaluates to false)
|
|
template <typename F, typename RT = std::invoke_result_t<F, T&>>
|
|
std::conditional_t<std::is_void_v<RT>, type, std::pair<type, RT>> fetch_op(F func)
|
|
{
|
|
type _new, old = atomic_storage<type>::load(m_data);
|
|
|
|
while (true)
|
|
{
|
|
_new = old;
|
|
|
|
if constexpr (std::is_void_v<RT>)
|
|
{
|
|
std::invoke(func, _new);
|
|
|
|
if (atomic_storage<type>::compare_exchange(m_data, old, _new)) [[likely]]
|
|
{
|
|
return old;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
RT ret = std::invoke(func, _new);
|
|
|
|
if (!ret || atomic_storage<type>::compare_exchange(m_data, old, _new)) [[likely]]
|
|
{
|
|
return {old, std::move(ret)};
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Atomic operation; returns function result value, function is the lambda
|
|
template <typename F, typename RT = std::invoke_result_t<F, T&>>
|
|
RT atomic_op(F func)
|
|
{
|
|
type _new, old = atomic_storage<type>::load(m_data);
|
|
|
|
while (true)
|
|
{
|
|
_new = old;
|
|
|
|
if constexpr (std::is_void_v<RT>)
|
|
{
|
|
std::invoke(func, _new);
|
|
|
|
if (atomic_storage<type>::compare_exchange(m_data, old, _new)) [[likely]]
|
|
{
|
|
return;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
RT result = std::invoke(func, _new);
|
|
|
|
if (atomic_storage<type>::compare_exchange(m_data, old, _new)) [[likely]]
|
|
{
|
|
return result;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Atomically read data
|
|
type load() const
|
|
{
|
|
return atomic_storage<type>::load(m_data);
|
|
}
|
|
|
|
// Atomically read data
|
|
operator simple_type() const
|
|
{
|
|
return atomic_storage<type>::load(m_data);
|
|
}
|
|
|
|
// Atomically write data
|
|
void store(const type& rhs)
|
|
{
|
|
atomic_storage<type>::store(m_data, rhs);
|
|
}
|
|
|
|
type operator =(const type& rhs)
|
|
{
|
|
atomic_storage<type>::store(m_data, rhs);
|
|
return rhs;
|
|
}
|
|
|
|
// Atomically write data with release memory order (faster on x86)
|
|
void release(const type& rhs)
|
|
{
|
|
atomic_storage<type>::release(m_data, rhs);
|
|
}
|
|
|
|
// Atomically replace data with value, return previous data value
|
|
type exchange(const type& rhs)
|
|
{
|
|
return atomic_storage<type>::exchange(m_data, rhs);
|
|
}
|
|
|
|
auto fetch_add(const ptr_rt& rhs)
|
|
{
|
|
if constexpr(std::is_integral<type>::value)
|
|
{
|
|
return atomic_storage<type>::fetch_add(m_data, rhs);
|
|
}
|
|
|
|
return fetch_op([&](T& v)
|
|
{
|
|
v += rhs;
|
|
});
|
|
}
|
|
|
|
auto add_fetch(const ptr_rt& rhs)
|
|
{
|
|
if constexpr(std::is_integral<type>::value)
|
|
{
|
|
return atomic_storage<type>::add_fetch(m_data, rhs);
|
|
}
|
|
|
|
return atomic_op([&](T& v)
|
|
{
|
|
v += rhs;
|
|
return v;
|
|
});
|
|
}
|
|
|
|
auto operator +=(const ptr_rt& rhs)
|
|
{
|
|
if constexpr(std::is_integral<type>::value)
|
|
{
|
|
return atomic_storage<type>::add_fetch(m_data, rhs);
|
|
}
|
|
|
|
return atomic_op([&](T& v)
|
|
{
|
|
return v += rhs;
|
|
});
|
|
}
|
|
|
|
auto fetch_sub(const ptr_rt& rhs)
|
|
{
|
|
if constexpr(std::is_integral<type>::value)
|
|
{
|
|
return atomic_storage<type>::fetch_sub(m_data, rhs);
|
|
}
|
|
|
|
return fetch_op([&](T& v)
|
|
{
|
|
v -= rhs;
|
|
});
|
|
}
|
|
|
|
auto sub_fetch(const ptr_rt& rhs)
|
|
{
|
|
if constexpr(std::is_integral<type>::value)
|
|
{
|
|
return atomic_storage<type>::sub_fetch(m_data, rhs);
|
|
}
|
|
|
|
return atomic_op([&](T& v)
|
|
{
|
|
v -= rhs;
|
|
return v;
|
|
});
|
|
}
|
|
|
|
auto operator -=(const ptr_rt& rhs)
|
|
{
|
|
if constexpr(std::is_integral<type>::value)
|
|
{
|
|
return atomic_storage<type>::sub_fetch(m_data, rhs);
|
|
}
|
|
|
|
return atomic_op([&](T& v)
|
|
{
|
|
return v -= rhs;
|
|
});
|
|
}
|
|
|
|
auto fetch_and(const type& rhs)
|
|
{
|
|
if constexpr(std::is_integral<type>::value)
|
|
{
|
|
return atomic_storage<type>::fetch_and(m_data, rhs);
|
|
}
|
|
|
|
return fetch_op([&](T& v)
|
|
{
|
|
v &= rhs;
|
|
});
|
|
}
|
|
|
|
auto and_fetch(const type& rhs)
|
|
{
|
|
if constexpr(std::is_integral<type>::value)
|
|
{
|
|
return atomic_storage<type>::and_fetch(m_data, rhs);
|
|
}
|
|
|
|
return atomic_op([&](T& v)
|
|
{
|
|
v &= rhs;
|
|
return v;
|
|
});
|
|
}
|
|
|
|
auto operator &=(const type& rhs)
|
|
{
|
|
if constexpr(std::is_integral<type>::value)
|
|
{
|
|
return atomic_storage<type>::and_fetch(m_data, rhs);
|
|
}
|
|
|
|
return atomic_op([&](T& v)
|
|
{
|
|
return v &= rhs;
|
|
});
|
|
}
|
|
|
|
auto fetch_or(const type& rhs)
|
|
{
|
|
if constexpr(std::is_integral<type>::value)
|
|
{
|
|
return atomic_storage<type>::fetch_or(m_data, rhs);
|
|
}
|
|
|
|
return fetch_op([&](T& v)
|
|
{
|
|
v |= rhs;
|
|
});
|
|
}
|
|
|
|
auto or_fetch(const type& rhs)
|
|
{
|
|
if constexpr(std::is_integral<type>::value)
|
|
{
|
|
return atomic_storage<type>::or_fetch(m_data, rhs);
|
|
}
|
|
|
|
return atomic_op([&](T& v)
|
|
{
|
|
v |= rhs;
|
|
return v;
|
|
});
|
|
}
|
|
|
|
auto operator |=(const type& rhs)
|
|
{
|
|
if constexpr(std::is_integral<type>::value)
|
|
{
|
|
return atomic_storage<type>::or_fetch(m_data, rhs);
|
|
}
|
|
|
|
return atomic_op([&](T& v)
|
|
{
|
|
return v |= rhs;
|
|
});
|
|
}
|
|
|
|
auto fetch_xor(const type& rhs)
|
|
{
|
|
if constexpr(std::is_integral<type>::value)
|
|
{
|
|
return atomic_storage<type>::fetch_xor(m_data, rhs);
|
|
}
|
|
|
|
return fetch_op([&](T& v)
|
|
{
|
|
v ^= rhs;
|
|
});
|
|
}
|
|
|
|
auto xor_fetch(const type& rhs)
|
|
{
|
|
if constexpr(std::is_integral<type>::value)
|
|
{
|
|
return atomic_storage<type>::xor_fetch(m_data, rhs);
|
|
}
|
|
|
|
return atomic_op([&](T& v)
|
|
{
|
|
v ^= rhs;
|
|
return v;
|
|
});
|
|
}
|
|
|
|
auto operator ^=(const type& rhs)
|
|
{
|
|
if constexpr(std::is_integral<type>::value)
|
|
{
|
|
return atomic_storage<type>::xor_fetch(m_data, rhs);
|
|
}
|
|
|
|
return atomic_op([&](T& v)
|
|
{
|
|
return v ^= rhs;
|
|
});
|
|
}
|
|
|
|
auto operator ++()
|
|
{
|
|
if constexpr(std::is_integral<type>::value)
|
|
{
|
|
return atomic_storage<type>::inc_fetch(m_data);
|
|
}
|
|
|
|
return atomic_op([](T& v)
|
|
{
|
|
return ++v;
|
|
});
|
|
}
|
|
|
|
auto operator --()
|
|
{
|
|
if constexpr(std::is_integral<type>::value)
|
|
{
|
|
return atomic_storage<type>::dec_fetch(m_data);
|
|
}
|
|
|
|
return atomic_op([](T& v)
|
|
{
|
|
return --v;
|
|
});
|
|
}
|
|
|
|
auto operator ++(int)
|
|
{
|
|
if constexpr(std::is_integral<type>::value)
|
|
{
|
|
return atomic_storage<type>::fetch_inc(m_data);
|
|
}
|
|
|
|
return atomic_op([](T& v)
|
|
{
|
|
return v++;
|
|
});
|
|
}
|
|
|
|
auto operator --(int)
|
|
{
|
|
if constexpr(std::is_integral<type>::value)
|
|
{
|
|
return atomic_storage<type>::fetch_dec(m_data);
|
|
}
|
|
|
|
return atomic_op([](T& v)
|
|
{
|
|
return v--;
|
|
});
|
|
}
|
|
|
|
// Conditionally decrement
|
|
bool try_dec(simple_type greater_than = std::numeric_limits<simple_type>::min())
|
|
{
|
|
type _new, old = atomic_storage<type>::load(m_data);
|
|
|
|
while (true)
|
|
{
|
|
_new = old;
|
|
|
|
if (!(_new > greater_than))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
_new -= 1;
|
|
|
|
if (atomic_storage<type>::compare_exchange(m_data, old, _new)) [[likely]]
|
|
{
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Conditionally increment
|
|
bool try_inc(simple_type less_than = std::numeric_limits<simple_type>::max())
|
|
{
|
|
type _new, old = atomic_storage<type>::load(m_data);
|
|
|
|
while (true)
|
|
{
|
|
_new = old;
|
|
|
|
if (!(_new < less_than))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
_new += 1;
|
|
|
|
if (atomic_storage<type>::compare_exchange(m_data, old, _new)) [[likely]]
|
|
{
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
|
|
bool bts(uint bit)
|
|
{
|
|
return atomic_storage<type>::bts(m_data, bit);
|
|
}
|
|
|
|
bool btr(uint bit)
|
|
{
|
|
return atomic_storage<type>::btr(m_data, bit);
|
|
}
|
|
|
|
// Timeout is discouraged
|
|
void wait(type old_value, atomic_wait_timeout timeout = atomic_wait_timeout::inf) const noexcept
|
|
{
|
|
if constexpr (sizeof(T) <= 8)
|
|
{
|
|
const __m128i old = _mm_cvtsi64_si128(std::bit_cast<get_uint_t<sizeof(T)>>(old_value));
|
|
atomic_storage_futex::wait(&m_data, sizeof(T), old, static_cast<u64>(timeout), _mm_set1_epi64x(-1));
|
|
}
|
|
else if constexpr (sizeof(T) == 16)
|
|
{
|
|
const __m128i old = std::bit_cast<__m128i>(old_value);
|
|
atomic_storage_futex::wait(&m_data, sizeof(T), old, static_cast<u64>(timeout), _mm_set1_epi64x(-1));
|
|
}
|
|
}
|
|
|
|
// Overload with mask (only selected bits are checked), timeout is discouraged
|
|
void wait(type old_value, type mask_value, atomic_wait_timeout timeout = atomic_wait_timeout::inf)
|
|
{
|
|
if constexpr (sizeof(T) <= 8)
|
|
{
|
|
const __m128i old = _mm_cvtsi64_si128(std::bit_cast<get_uint_t<sizeof(T)>>(old_value));
|
|
const __m128i mask = _mm_cvtsi64_si128(std::bit_cast<get_uint_t<sizeof(T)>>(mask_value));
|
|
atomic_storage_futex::wait(&m_data, sizeof(T), old, static_cast<u64>(timeout), mask);
|
|
}
|
|
else if constexpr (sizeof(T) == 16)
|
|
{
|
|
const __m128i old = std::bit_cast<__m128i>(old_value);
|
|
const __m128i mask = std::bit_cast<__m128i>(mask_value);
|
|
atomic_storage_futex::wait(&m_data, sizeof(T), old, static_cast<u64>(timeout), mask);
|
|
}
|
|
}
|
|
|
|
void notify_one() noexcept
|
|
{
|
|
atomic_storage_futex::notify_one(&m_data);
|
|
}
|
|
|
|
void notify_all() noexcept
|
|
{
|
|
atomic_storage_futex::notify_all(&m_data);
|
|
}
|
|
};
|
|
|
|
template <typename T, unsigned BitWidth = 0>
|
|
class atomic_with_lock_bit
|
|
{
|
|
// Simply internal type
|
|
using type = std::conditional_t<std::is_pointer_v<T>, std::uintptr_t, T>;
|
|
|
|
// Used for pointer arithmetics
|
|
using ptr_rt = std::conditional_t<std::is_pointer_v<T>, ullong, T>;
|
|
|
|
static constexpr auto c_lock_bit = BitWidth + 1;
|
|
static constexpr auto c_dirty = type{1} << BitWidth;
|
|
|
|
// Check space for lock bit
|
|
static_assert(BitWidth <= sizeof(T) * 8 - 2, "No space for lock bit");
|
|
static_assert(sizeof(T) <= 8 || (!std::is_pointer_v<T> && !std::is_integral_v<T>), "Not supported");
|
|
static_assert(!std::is_same_v<std::decay_t<T>, bool>, "Bool not supported, use integral with size 1.");
|
|
static_assert(std::is_pointer_v<T> == (BitWidth == 0), "BitWidth should be 0 for pointers");
|
|
static_assert(!std::is_pointer_v<T> || (alignof(std::remove_pointer_t<T>) >= 4), "Pointer type should have align 4 or more");
|
|
|
|
atomic_t<type> m_data;
|
|
|
|
public:
|
|
using base_type = T;
|
|
|
|
static bool is_locked(type old_val)
|
|
{
|
|
if constexpr (std::is_signed_v<type> && BitWidth == sizeof(T) * 8 - 2)
|
|
{
|
|
return old_val < 0;
|
|
}
|
|
else if constexpr (std::is_pointer_v<T>)
|
|
{
|
|
return (old_val & 2) != 0;
|
|
}
|
|
else
|
|
{
|
|
return (old_val & (type{2} << BitWidth)) != 0;
|
|
}
|
|
}
|
|
|
|
static type clamp_value(type old_val)
|
|
{
|
|
if constexpr (std::is_pointer_v<T>)
|
|
{
|
|
return old_val & (~type{0} << 2);
|
|
}
|
|
else
|
|
{
|
|
return old_val & ((type{1} << BitWidth) - type{1});
|
|
}
|
|
}
|
|
|
|
// Define simple type
|
|
using simple_type = simple_t<T>;
|
|
|
|
atomic_with_lock_bit() noexcept = default;
|
|
|
|
atomic_with_lock_bit(const atomic_with_lock_bit&) = delete;
|
|
|
|
atomic_with_lock_bit& operator =(const atomic_with_lock_bit&) = delete;
|
|
|
|
constexpr atomic_with_lock_bit(T value) noexcept
|
|
: m_data(clamp_value(reinterpret_cast<type>(value)))
|
|
{
|
|
}
|
|
|
|
// Unsafe read
|
|
type raw_load() const
|
|
{
|
|
return clamp_value(m_data.load());
|
|
}
|
|
|
|
// Unsafe write and unlock
|
|
void raw_release(type value)
|
|
{
|
|
m_data.release(clamp_value(value));
|
|
|
|
// TODO: test dirty bit for notification
|
|
if (true)
|
|
{
|
|
m_data.notify_all();
|
|
}
|
|
}
|
|
|
|
void lock()
|
|
{
|
|
while (m_data.bts(c_lock_bit)) [[unlikely]]
|
|
{
|
|
type old_val = m_data.load();
|
|
|
|
if (is_locked(old_val)) [[likely]]
|
|
{
|
|
if ((old_val & c_dirty) == 0)
|
|
{
|
|
// Try to set dirty bit if not set already
|
|
if (!m_data.compare_and_swap_test(old_val, old_val | c_dirty))
|
|
{
|
|
continue;
|
|
}
|
|
}
|
|
|
|
m_data.wait(old_val | c_dirty);
|
|
old_val = m_data.load();
|
|
}
|
|
}
|
|
}
|
|
|
|
bool try_lock()
|
|
{
|
|
return !m_data.bts(c_lock_bit);
|
|
}
|
|
|
|
void unlock()
|
|
{
|
|
type old_val = m_data.load();
|
|
|
|
if constexpr (std::is_pointer_v<T>)
|
|
{
|
|
m_data.and_fetch(~type{0} << 2);
|
|
}
|
|
else
|
|
{
|
|
m_data.and_fetch((type{1} << BitWidth) - type{1});
|
|
}
|
|
|
|
// Test dirty bit for notification
|
|
if (old_val & c_dirty)
|
|
{
|
|
m_data.notify_all();
|
|
}
|
|
}
|
|
|
|
T load()
|
|
{
|
|
type old_val = m_data.load();
|
|
|
|
while (is_locked(old_val)) [[unlikely]]
|
|
{
|
|
if ((old_val & c_dirty) == 0)
|
|
{
|
|
if (!m_data.compare_and_swap_test(old_val, old_val | c_dirty))
|
|
{
|
|
old_val = m_data.load();
|
|
continue;
|
|
}
|
|
}
|
|
|
|
m_data.wait(old_val | c_dirty);
|
|
old_val = m_data.load();
|
|
}
|
|
|
|
return reinterpret_cast<T>(clamp_value(old_val));
|
|
}
|
|
|
|
void store(T value)
|
|
{
|
|
static_cast<void>(exchange(value));
|
|
}
|
|
|
|
T exchange(T value)
|
|
{
|
|
type old_val = m_data.load();
|
|
|
|
while (is_locked(old_val) || !m_data.compare_and_swap_test(old_val, clamp_value(reinterpret_cast<type>(value)))) [[unlikely]]
|
|
{
|
|
if ((old_val & c_dirty) == 0)
|
|
{
|
|
if (!m_data.compare_and_swap_test(old_val, old_val | c_dirty))
|
|
{
|
|
old_val = m_data.load();
|
|
continue;
|
|
}
|
|
}
|
|
|
|
m_data.wait(old_val);
|
|
old_val = m_data.load();
|
|
}
|
|
|
|
return reinterpret_cast<T>(clamp_value(old_val));
|
|
}
|
|
|
|
T compare_and_swap(T cmp, T exch)
|
|
{
|
|
static_cast<void>(compare_exchange(cmp, exch));
|
|
return cmp;
|
|
}
|
|
|
|
bool compare_and_swap_test(T cmp, T exch)
|
|
{
|
|
return compare_exchange(cmp, exch);
|
|
}
|
|
|
|
bool compare_exchange(T& cmp_and_old, T exch)
|
|
{
|
|
type old_val = m_data.load();
|
|
type expected = clamp_value(reinterpret_cast<type>(cmp_and_old));
|
|
type new_val = clamp_value(reinterpret_cast<type>(exch));
|
|
|
|
while (is_locked(old_val) || (old_val == expected && !m_data.compare_and_swap_test(expected, new_val))) [[unlikely]]
|
|
{
|
|
if (old_val == expected)
|
|
{
|
|
old_val = m_data.load();
|
|
continue;
|
|
}
|
|
|
|
if ((old_val & c_dirty) == 0)
|
|
{
|
|
if (!m_data.compare_and_swap_test(old_val, old_val | c_dirty))
|
|
{
|
|
old_val = m_data.load();
|
|
continue;
|
|
}
|
|
}
|
|
|
|
m_data.wait(old_val);
|
|
old_val = m_data.load();
|
|
}
|
|
|
|
cmp_and_old = reinterpret_cast<T>(clamp_value(old_val));
|
|
|
|
return clamp_value(old_val) == expected;
|
|
}
|
|
|
|
template <typename F, typename RT = std::invoke_result_t<F, T&>>
|
|
RT atomic_op(F func)
|
|
{
|
|
type _new, old;
|
|
old = m_data.load();
|
|
|
|
while (true)
|
|
{
|
|
if (is_locked(old)) [[unlikely]]
|
|
{
|
|
if ((old & c_dirty) == 0)
|
|
{
|
|
if (!m_data.compare_and_swap_test(old, old | c_dirty))
|
|
{
|
|
old = m_data.load();
|
|
continue;
|
|
}
|
|
}
|
|
|
|
m_data.wait(old);
|
|
old = m_data.load();
|
|
continue;
|
|
}
|
|
|
|
_new = old;
|
|
|
|
if constexpr (std::is_void_v<RT>)
|
|
{
|
|
std::invoke(func, reinterpret_cast<T&>(_new));
|
|
|
|
if (atomic_storage<type>::compare_exchange(m_data.raw(), old, clamp_value(_new))) [[likely]]
|
|
{
|
|
return;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
RT result = std::invoke(func, reinterpret_cast<T&>(_new));
|
|
|
|
if (atomic_storage<type>::compare_exchange(m_data.raw(), old, clamp_value(_new))) [[likely]]
|
|
{
|
|
return result;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
auto fetch_add(const ptr_rt& rhs)
|
|
{
|
|
return atomic_op([&](T& v)
|
|
{
|
|
return std::exchange(v, (v += rhs));
|
|
});
|
|
}
|
|
|
|
auto operator +=(const ptr_rt& rhs)
|
|
{
|
|
return atomic_op([&](T& v)
|
|
{
|
|
return v += rhs;
|
|
});
|
|
}
|
|
|
|
auto fetch_sub(const ptr_rt& rhs)
|
|
{
|
|
return atomic_op([&](T& v)
|
|
{
|
|
return std::exchange(v, (v -= rhs));
|
|
});
|
|
}
|
|
|
|
auto operator -=(const ptr_rt& rhs)
|
|
{
|
|
return atomic_op([&](T& v)
|
|
{
|
|
return v -= rhs;
|
|
});
|
|
}
|
|
|
|
auto fetch_and(const T& rhs)
|
|
{
|
|
return atomic_op([&](T& v)
|
|
{
|
|
return std::exchange(v, (v &= rhs));
|
|
});
|
|
}
|
|
|
|
auto operator &=(const T& rhs)
|
|
{
|
|
return atomic_op([&](T& v)
|
|
{
|
|
return v &= rhs;
|
|
});
|
|
}
|
|
|
|
auto fetch_or(const T& rhs)
|
|
{
|
|
return atomic_op([&](T& v)
|
|
{
|
|
return std::exchange(v, (v |= rhs));
|
|
});
|
|
}
|
|
|
|
auto operator |=(const T& rhs)
|
|
{
|
|
return atomic_op([&](T& v)
|
|
{
|
|
return v |= rhs;
|
|
});
|
|
}
|
|
|
|
auto fetch_xor(const T& rhs)
|
|
{
|
|
return atomic_op([&](T& v)
|
|
{
|
|
return std::exchange(v, (v ^= rhs));
|
|
});
|
|
}
|
|
|
|
auto operator ^=(const T& rhs)
|
|
{
|
|
return atomic_op([&](T& v)
|
|
{
|
|
return v ^= rhs;
|
|
});
|
|
}
|
|
|
|
auto operator ++()
|
|
{
|
|
return atomic_op([](T& v)
|
|
{
|
|
return ++v;
|
|
});
|
|
}
|
|
|
|
auto operator --()
|
|
{
|
|
return atomic_op([](T& v)
|
|
{
|
|
return --v;
|
|
});
|
|
}
|
|
|
|
auto operator ++(int)
|
|
{
|
|
return atomic_op([](T& v)
|
|
{
|
|
return v++;
|
|
});
|
|
}
|
|
|
|
auto operator --(int)
|
|
{
|
|
return atomic_op([](T& v)
|
|
{
|
|
return v--;
|
|
});
|
|
}
|
|
};
|
|
|
|
using fat_atomic_u1 = atomic_with_lock_bit<u8, 1>;
|
|
using fat_atomic_u6 = atomic_with_lock_bit<u8, 6>;
|
|
using fat_atomic_s6 = atomic_with_lock_bit<s8, 6>;
|
|
using fat_atomic_u8 = atomic_with_lock_bit<u16, 8>;
|
|
using fat_atomic_s8 = atomic_with_lock_bit<s16, 8>;
|
|
|
|
using fat_atomic_u14 = atomic_with_lock_bit<u16, 14>;
|
|
using fat_atomic_s14 = atomic_with_lock_bit<s16, 14>;
|
|
using fat_atomic_u16 = atomic_with_lock_bit<u32, 16>;
|
|
using fat_atomic_s16 = atomic_with_lock_bit<s32, 16>;
|
|
|
|
using fat_atomic_u30 = atomic_with_lock_bit<u32, 30>;
|
|
using fat_atomic_s30 = atomic_with_lock_bit<s32, 30>;
|
|
using fat_atomic_u32 = atomic_with_lock_bit<u64, 32>;
|
|
using fat_atomic_s32 = atomic_with_lock_bit<s64, 32>;
|
|
using fat_atomic_u62 = atomic_with_lock_bit<u64, 62>;
|
|
using fat_atomic_s62 = atomic_with_lock_bit<s64, 62>;
|
|
|
|
template <typename Ptr>
|
|
using fat_atomic_ptr = atomic_with_lock_bit<Ptr*, 0>;
|
|
|
|
namespace detail
|
|
{
|
|
template <typename Arg, typename... Args>
|
|
struct mao_func_t
|
|
{
|
|
template <typename... TArgs>
|
|
using RT = typename mao_func_t<Args...>::template RT<TArgs..., Arg>;
|
|
};
|
|
|
|
template <typename Arg>
|
|
struct mao_func_t<Arg>
|
|
{
|
|
template <typename... TArgs>
|
|
using RT = std::invoke_result_t<Arg, simple_t<TArgs>&...>;
|
|
};
|
|
|
|
template <typename... Args>
|
|
using mao_result = typename mao_func_t<std::decay_t<Args>...>::template RT<>;
|
|
|
|
template <typename RT, typename... Args, std::size_t... I>
|
|
RT multi_atomic_op(std::index_sequence<I...>, Args&&... args)
|
|
{
|
|
// Tie all arguments (function is the latest)
|
|
auto vars = std::tie(args...);
|
|
|
|
// Lock all variables
|
|
std::lock(std::get<I>(vars)...);
|
|
|
|
// Load initial values
|
|
auto values = std::make_tuple(std::get<I>(vars).raw_load()...);
|
|
|
|
if constexpr (std::is_void_v<RT>)
|
|
{
|
|
std::invoke(std::get<(sizeof...(Args) - 1)>(vars), reinterpret_cast<typename std::remove_reference_t<decltype(std::get<I>(vars))>::base_type&>(std::get<I>(values))...);
|
|
|
|
// Unlock and return
|
|
(std::get<I>(vars).raw_release(std::get<I>(values)), ...);
|
|
}
|
|
else
|
|
{
|
|
RT result = std::invoke(std::get<(sizeof...(Args) - 1)>(vars), reinterpret_cast<typename std::remove_reference_t<decltype(std::get<I>(vars))>::base_type&>(std::get<I>(values))...);
|
|
|
|
// Unlock and return the result
|
|
(std::get<I>(vars).raw_release(std::get<I>(values)), ...);
|
|
|
|
return result;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Atomic operation; returns function result value, function is the lambda
|
|
template <typename... Args, typename RT = detail::mao_result<Args...>>
|
|
RT multi_atomic_op(Args&&... args)
|
|
{
|
|
return detail::multi_atomic_op<RT>(std::make_index_sequence<(sizeof...(Args) - 1)>(), std::forward<Args>(args)...);
|
|
}
|