2019-07-27 00:34:10 +02:00
|
|
|
|
#include "atomic.hpp"
|
|
|
|
|
|
|
2020-10-25 02:17:54 +02:00
|
|
|
|
#if defined(__linux__)
|
2019-09-22 00:31:23 +02:00
|
|
|
|
#define USE_FUTEX
|
2020-10-25 02:17:54 +02:00
|
|
|
|
#elif !defined(_WIN32)
|
|
|
|
|
|
#define USE_STD
|
2019-09-22 00:31:23 +02:00
|
|
|
|
#endif
|
|
|
|
|
|
|
2019-07-27 00:34:10 +02:00
|
|
|
|
#include "Utilities/sync.h"
|
|
|
|
|
|
|
2019-10-20 21:01:10 +02:00
|
|
|
|
#include <utility>
|
2019-08-02 00:23:26 +02:00
|
|
|
|
#include <mutex>
|
|
|
|
|
|
#include <condition_variable>
|
2020-10-25 02:17:54 +02:00
|
|
|
|
#include <chrono>
|
2019-09-22 00:31:23 +02:00
|
|
|
|
#include <iterator>
|
|
|
|
|
|
#include <memory>
|
2019-10-20 21:01:10 +02:00
|
|
|
|
#include <cstdlib>
|
2019-10-20 01:41:19 +02:00
|
|
|
|
|
|
|
|
|
|
// Hashtable size factor (can be set to 0 to stress-test collisions)
|
2019-10-20 19:08:09 +02:00
|
|
|
|
static constexpr uint s_hashtable_power = 16;
|
2019-08-02 00:23:26 +02:00
|
|
|
|
|
2019-09-12 16:14:26 +02:00
|
|
|
|
// Total number of entries, should be a power of 2.
|
2019-10-20 01:41:19 +02:00
|
|
|
|
static constexpr std::uintptr_t s_hashtable_size = 1u << s_hashtable_power;
|
|
|
|
|
|
|
|
|
|
|
|
// Pointer mask without bits used as hash, assuming signed 48-bit pointers.
|
2019-10-20 21:01:10 +02:00
|
|
|
|
static constexpr u64 s_pointer_mask = s_hashtable_power > 7 ? 0xffff'ffff'ffff & ~((s_hashtable_size - 1)) : 0xffff'ffff'ffff;
|
2019-10-20 01:41:19 +02:00
|
|
|
|
|
|
|
|
|
|
// Max number of waiters is 32767.
|
2019-10-20 21:01:10 +02:00
|
|
|
|
static constexpr u64 s_waiter_mask = s_hashtable_power > 7 ? 0x7fff'0000'0000'0000 : 0x7f00'0000'0000'0000;
|
2019-10-20 01:41:19 +02:00
|
|
|
|
|
|
|
|
|
|
// Bit indicates that more than one.
|
|
|
|
|
|
static constexpr u64 s_collision_bit = 0x8000'0000'0000'0000;
|
2019-07-27 00:34:10 +02:00
|
|
|
|
|
2019-10-20 01:41:19 +02:00
|
|
|
|
// Allocated slot with secondary table.
|
|
|
|
|
|
static constexpr u64 s_slot_mask = ~(s_waiter_mask | s_pointer_mask | s_collision_bit);
|
|
|
|
|
|
|
|
|
|
|
|
// Helper to get least significant set bit from 64-bit masks
|
|
|
|
|
|
template <u64 Mask>
|
|
|
|
|
|
static constexpr u64 one_v = Mask & (0 - Mask);
|
2019-07-29 19:28:20 +02:00
|
|
|
|
|
2020-10-23 17:20:57 +02:00
|
|
|
|
// Callback for wait() function, returns false if wait should return
|
|
|
|
|
|
static thread_local bool(*s_tls_wait_cb)(const void* data) = [](const void*){ return true; };
|
|
|
|
|
|
|
2020-10-26 02:02:39 +01:00
|
|
|
|
// Callback for notification functions for optimizations
|
|
|
|
|
|
static thread_local void(*s_tls_notify_cb)(const void* data, u64 progress) = [](const void*, u64){};
|
|
|
|
|
|
|
2020-10-23 17:20:57 +02:00
|
|
|
|
// Compare data in memory with old value, and return true if they are equal
|
2020-11-04 15:19:35 +01:00
|
|
|
|
template <bool CheckCb = true>
|
|
|
|
|
|
static NEVER_INLINE bool
|
2020-10-26 21:32:40 +01:00
|
|
|
|
#ifdef _WIN32
|
|
|
|
|
|
__vectorcall
|
|
|
|
|
|
#endif
|
2020-11-04 15:19:35 +01:00
|
|
|
|
ptr_cmp(const void* data, u32 size, __m128i old128, __m128i mask128)
|
2020-10-23 17:20:57 +02:00
|
|
|
|
{
|
|
|
|
|
|
if constexpr (CheckCb)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (!s_tls_wait_cb(data))
|
|
|
|
|
|
{
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2020-10-26 21:32:40 +01:00
|
|
|
|
const u64 old_value = _mm_cvtsi128_si64(old128);
|
|
|
|
|
|
const u64 mask = _mm_cvtsi128_si64(mask128);
|
|
|
|
|
|
|
2020-10-23 17:20:57 +02:00
|
|
|
|
switch (size)
|
|
|
|
|
|
{
|
|
|
|
|
|
case 1: return (reinterpret_cast<const atomic_t<u8>*>(data)->load() & mask) == (old_value & mask);
|
|
|
|
|
|
case 2: return (reinterpret_cast<const atomic_t<u16>*>(data)->load() & mask) == (old_value & mask);
|
|
|
|
|
|
case 4: return (reinterpret_cast<const atomic_t<u32>*>(data)->load() & mask) == (old_value & mask);
|
|
|
|
|
|
case 8: return (reinterpret_cast<const atomic_t<u64>*>(data)->load() & mask) == (old_value & mask);
|
2020-10-26 21:32:40 +01:00
|
|
|
|
case 16:
|
|
|
|
|
|
{
|
2020-11-04 15:19:35 +01:00
|
|
|
|
const auto v0 = std::bit_cast<__m128i>(atomic_storage<u128>::load(*reinterpret_cast<const u128*>(data)));
|
2020-10-26 21:32:40 +01:00
|
|
|
|
const auto v1 = _mm_xor_si128(v0, old128);
|
|
|
|
|
|
const auto v2 = _mm_and_si128(v1, mask128);
|
|
|
|
|
|
const auto v3 = _mm_packs_epi16(v2, v2);
|
|
|
|
|
|
|
|
|
|
|
|
if (_mm_cvtsi128_si64(v3) == 0)
|
|
|
|
|
|
{
|
|
|
|
|
|
return true;
|
|
|
|
|
|
}
|
2020-11-05 17:18:48 +01:00
|
|
|
|
|
|
|
|
|
|
break;
|
2020-10-26 21:32:40 +01:00
|
|
|
|
}
|
2020-11-04 15:19:35 +01:00
|
|
|
|
default:
|
|
|
|
|
|
{
|
|
|
|
|
|
fprintf(stderr, "ptr_cmp(): bad size (size=%u)" HERE "\n", size);
|
|
|
|
|
|
std::abort();
|
|
|
|
|
|
}
|
2020-10-23 17:20:57 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2020-11-04 15:19:35 +01:00
|
|
|
|
// Returns true if mask overlaps, or the argument is invalid
|
|
|
|
|
|
static bool
|
|
|
|
|
|
#ifdef _WIN32
|
|
|
|
|
|
__vectorcall
|
|
|
|
|
|
#endif
|
|
|
|
|
|
cmp_mask(u32 size1, __m128i mask1, __m128i val1, u32 size2, __m128i mask2, __m128i val2)
|
|
|
|
|
|
{
|
|
|
|
|
|
// In force wake up, one of the size arguments is zero
|
|
|
|
|
|
const u32 size = std::min(size1, size2);
|
|
|
|
|
|
|
|
|
|
|
|
if (!size) [[unlikely]]
|
|
|
|
|
|
{
|
|
|
|
|
|
return true;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2020-11-06 00:43:14 +01:00
|
|
|
|
// Compare only masks, new value is not available in this mode
|
|
|
|
|
|
if ((size1 | size2) == umax)
|
|
|
|
|
|
{
|
|
|
|
|
|
// Simple mask overlap
|
|
|
|
|
|
const auto v0 = _mm_and_si128(mask1, mask2);
|
|
|
|
|
|
const auto v1 = _mm_packs_epi16(v0, v0);
|
|
|
|
|
|
return _mm_cvtsi128_si64(v1) != 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2020-11-04 15:19:35 +01:00
|
|
|
|
// Generate masked value inequality bits
|
|
|
|
|
|
const auto v0 = _mm_and_si128(_mm_and_si128(mask1, mask2), _mm_xor_si128(val1, val2));
|
|
|
|
|
|
|
|
|
|
|
|
if (size <= 8)
|
|
|
|
|
|
{
|
|
|
|
|
|
// Generate sized mask
|
|
|
|
|
|
const u64 mask = UINT64_MAX >> ((64 - size * 8) & 63);
|
|
|
|
|
|
|
|
|
|
|
|
if (!(_mm_cvtsi128_si64(v0) & mask))
|
|
|
|
|
|
{
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
else if (size == 16)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (!_mm_cvtsi128_si64(_mm_packs_epi16(v0, v0)))
|
|
|
|
|
|
{
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
|
|
|
|
|
fprintf(stderr, "cmp_mask(): bad size (size1=%u, size2=%u)" HERE "\n", size1, size2);
|
|
|
|
|
|
std::abort();
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2020-11-06 00:43:14 +01:00
|
|
|
|
namespace atomic_wait
|
2020-10-25 02:17:54 +02:00
|
|
|
|
{
|
2020-11-04 15:19:35 +01:00
|
|
|
|
// Essentially a fat semaphore
|
2020-11-05 17:18:48 +01:00
|
|
|
|
struct alignas(64) cond_handle
|
2020-10-25 02:17:54 +02:00
|
|
|
|
{
|
2020-11-04 15:19:35 +01:00
|
|
|
|
#ifdef _WIN32
|
|
|
|
|
|
u64 tid = GetCurrentThreadId();
|
|
|
|
|
|
#else
|
|
|
|
|
|
u64 tid = reinterpret_cast<u64>(pthread_self());
|
|
|
|
|
|
#endif
|
|
|
|
|
|
atomic_t<u32> sync{};
|
|
|
|
|
|
u32 size{};
|
2020-11-05 17:18:48 +01:00
|
|
|
|
u64 tsc0{};
|
|
|
|
|
|
const void* ptr{};
|
2020-11-04 15:19:35 +01:00
|
|
|
|
__m128i mask{};
|
|
|
|
|
|
__m128i oldv{};
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef USE_STD
|
|
|
|
|
|
// Standard CV/mutex pair (often contains pthread_cond_t/pthread_mutex_t)
|
2020-10-25 02:17:54 +02:00
|
|
|
|
std::condition_variable cond;
|
|
|
|
|
|
std::mutex mtx;
|
2020-11-04 15:19:35 +01:00
|
|
|
|
#endif
|
2020-11-05 17:18:48 +01:00
|
|
|
|
|
|
|
|
|
|
bool forced_wakeup()
|
|
|
|
|
|
{
|
|
|
|
|
|
const auto [_old, ok] = sync.fetch_op([](u32& val)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (val == 1 || val == 2)
|
|
|
|
|
|
{
|
|
|
|
|
|
val = 3;
|
|
|
|
|
|
return true;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
// Prevent collision between normal wake-up and forced one
|
|
|
|
|
|
return ok && _old == 1;
|
|
|
|
|
|
}
|
2020-11-06 10:48:56 +01:00
|
|
|
|
|
|
|
|
|
|
void alert_native()
|
|
|
|
|
|
{
|
|
|
|
|
|
#ifdef USE_FUTEX
|
|
|
|
|
|
// Use "wake all" arg for robustness, only 1 thread is expected
|
|
|
|
|
|
futex(&sync, FUTEX_WAKE_PRIVATE, 0x7fff'ffff);
|
|
|
|
|
|
#elif defined(USE_STD)
|
|
|
|
|
|
// Not super efficient: locking is required to avoid lost notifications
|
|
|
|
|
|
mtx.lock();
|
|
|
|
|
|
mtx.unlock();
|
|
|
|
|
|
cond.notify_all();
|
|
|
|
|
|
#elif defined(_WIN32)
|
|
|
|
|
|
if (NtWaitForAlertByThreadId)
|
|
|
|
|
|
{
|
|
|
|
|
|
// Sets some sticky alert bit, at least I believe so
|
|
|
|
|
|
NtAlertThreadByThreadId(tid);
|
|
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
|
|
|
|
|
// Can wait in rare cases, which is its annoying weakness
|
|
|
|
|
|
NtReleaseKeyedEvent(nullptr, &sync, 1, nullptr);
|
|
|
|
|
|
}
|
|
|
|
|
|
#endif
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool try_alert_native()
|
|
|
|
|
|
{
|
|
|
|
|
|
#if defined(USE_FUTEX)
|
|
|
|
|
|
return false;
|
|
|
|
|
|
#elif defined(USE_STD)
|
|
|
|
|
|
// Optimistic non-blocking path
|
|
|
|
|
|
if (mtx.try_lock())
|
|
|
|
|
|
{
|
|
|
|
|
|
mtx.unlock();
|
|
|
|
|
|
cond.notify_all();
|
|
|
|
|
|
return true;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
|
#elif defined(_WIN32)
|
|
|
|
|
|
if (NtAlertThreadByThreadId)
|
|
|
|
|
|
{
|
|
|
|
|
|
// Don't notify prematurely with this API
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static LARGE_INTEGER instant{};
|
|
|
|
|
|
|
|
|
|
|
|
if (NtReleaseKeyedEvent(nullptr, &sync, 1, &instant) != NTSTATUS_SUCCESS)
|
|
|
|
|
|
{
|
|
|
|
|
|
// Failed to notify immediately
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
|
#endif
|
|
|
|
|
|
}
|
2020-10-25 02:17:54 +02:00
|
|
|
|
};
|
2020-11-05 17:18:48 +01:00
|
|
|
|
|
|
|
|
|
|
#ifndef USE_STD
|
|
|
|
|
|
static_assert(sizeof(cond_handle) == 64);
|
|
|
|
|
|
#endif
|
2020-10-25 02:17:54 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2020-11-05 22:06:58 +01:00
|
|
|
|
// Max allowed thread number is chosen to fit in 16 bits
|
2020-11-06 09:55:01 +01:00
|
|
|
|
static std::aligned_storage_t<sizeof(atomic_wait::cond_handle), alignof(atomic_wait::cond_handle)> s_cond_list[UINT16_MAX + 1]{};
|
2020-10-25 02:17:54 +02:00
|
|
|
|
|
2020-11-05 17:18:48 +01:00
|
|
|
|
// Used to allow concurrent notifying
|
2020-11-06 09:55:01 +01:00
|
|
|
|
static atomic_t<u32> s_cond_refs[UINT16_MAX + 1]{};
|
2020-10-25 02:17:54 +02:00
|
|
|
|
|
2020-11-05 17:18:48 +01:00
|
|
|
|
// Allocation bits
|
2020-11-06 09:55:01 +01:00
|
|
|
|
static atomic_t<u64, 64> s_cond_bits[(UINT16_MAX + 1) / 64]{};
|
2020-10-25 02:17:54 +02:00
|
|
|
|
|
2020-11-05 17:18:48 +01:00
|
|
|
|
// Allocation semaphore
|
|
|
|
|
|
static atomic_t<u32, 64> s_cond_sema{0};
|
2020-10-25 02:17:54 +02:00
|
|
|
|
|
|
|
|
|
|
static u32 cond_alloc()
|
|
|
|
|
|
{
|
|
|
|
|
|
// Determine whether there is a free slot or not
|
2020-11-05 22:06:58 +01:00
|
|
|
|
if (!s_cond_sema.try_inc(UINT16_MAX + 1))
|
2020-10-25 02:17:54 +02:00
|
|
|
|
{
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Diversify search start points to reduce contention and increase immediate success chance
|
|
|
|
|
|
#ifdef _WIN32
|
|
|
|
|
|
const u32 start = GetCurrentProcessorNumber();
|
|
|
|
|
|
#elif __linux__
|
|
|
|
|
|
const u32 start = sched_getcpu();
|
|
|
|
|
|
#else
|
|
|
|
|
|
const u32 start = __rdtsc();
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
2020-11-05 17:18:48 +01:00
|
|
|
|
for (u32 i = start;; i++)
|
2020-10-25 02:17:54 +02:00
|
|
|
|
{
|
2020-11-05 17:18:48 +01:00
|
|
|
|
const u32 group = i % ::size32(s_cond_bits);
|
2020-10-25 02:17:54 +02:00
|
|
|
|
|
|
|
|
|
|
const auto [bits, ok] = s_cond_bits[group].fetch_op([](u64& bits)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (~bits)
|
|
|
|
|
|
{
|
|
|
|
|
|
// Set lowest clear bit
|
|
|
|
|
|
bits |= bits + 1;
|
|
|
|
|
|
return true;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
|
});
|
|
|
|
|
|
|
2020-11-06 09:55:01 +01:00
|
|
|
|
if (ok) [[likely]]
|
2020-10-25 02:17:54 +02:00
|
|
|
|
{
|
|
|
|
|
|
// Find lowest clear bit
|
|
|
|
|
|
const u32 id = group * 64 + std::countr_one(bits);
|
|
|
|
|
|
|
2020-11-06 09:55:01 +01:00
|
|
|
|
if (id == 0) [[unlikely]]
|
|
|
|
|
|
{
|
|
|
|
|
|
// Special case, set bit and continue
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2020-10-25 02:17:54 +02:00
|
|
|
|
// Construct inplace before it can be used
|
2020-11-06 00:43:14 +01:00
|
|
|
|
new (s_cond_list + id) atomic_wait::cond_handle();
|
2020-10-25 02:17:54 +02:00
|
|
|
|
|
2020-11-05 22:06:58 +01:00
|
|
|
|
// Add first reference
|
|
|
|
|
|
verify(HERE), !s_cond_refs[id]++;
|
|
|
|
|
|
|
2020-11-06 09:55:01 +01:00
|
|
|
|
return id;
|
2020-10-25 02:17:54 +02:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2020-11-05 17:18:48 +01:00
|
|
|
|
// Unreachable
|
2020-10-25 02:17:54 +02:00
|
|
|
|
std::abort();
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2020-11-06 00:43:14 +01:00
|
|
|
|
static atomic_wait::cond_handle* cond_get(u32 cond_id)
|
2020-10-25 02:17:54 +02:00
|
|
|
|
{
|
2020-11-05 22:06:58 +01:00
|
|
|
|
if (cond_id - 1 < u32{UINT16_MAX}) [[likely]]
|
2020-10-25 02:17:54 +02:00
|
|
|
|
{
|
2020-11-06 09:55:01 +01:00
|
|
|
|
return std::launder(reinterpret_cast<atomic_wait::cond_handle*>(s_cond_list + cond_id));
|
2020-10-25 02:17:54 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static void cond_free(u32 cond_id)
|
|
|
|
|
|
{
|
2020-11-05 22:06:58 +01:00
|
|
|
|
if (cond_id - 1 >= u32{UINT16_MAX})
|
2020-10-25 02:17:54 +02:00
|
|
|
|
{
|
2020-11-04 15:19:35 +01:00
|
|
|
|
fprintf(stderr, "cond_free(): bad id %u" HERE "\n", cond_id);
|
|
|
|
|
|
std::abort();
|
2020-10-25 02:17:54 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2020-11-05 22:06:58 +01:00
|
|
|
|
// Dereference, destroy on last ref
|
2020-11-06 09:55:01 +01:00
|
|
|
|
if (--s_cond_refs[cond_id])
|
2020-11-05 22:06:58 +01:00
|
|
|
|
{
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2020-10-25 02:17:54 +02:00
|
|
|
|
// Call the destructor
|
|
|
|
|
|
cond_get(cond_id)->~cond_handle();
|
|
|
|
|
|
|
|
|
|
|
|
// Remove the allocation bit
|
2020-11-06 09:55:01 +01:00
|
|
|
|
s_cond_bits[cond_id / 64] &= ~(1ull << (cond_id % 64));
|
2020-10-25 02:17:54 +02:00
|
|
|
|
|
|
|
|
|
|
// Release the semaphore
|
|
|
|
|
|
s_cond_sema--;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2020-11-05 22:06:58 +01:00
|
|
|
|
static u32 cond_lock(atomic_t<u16>* sema)
|
|
|
|
|
|
{
|
|
|
|
|
|
while (const u32 cond_id = sema->load())
|
|
|
|
|
|
{
|
2020-11-06 09:55:01 +01:00
|
|
|
|
const auto [old, ok] = s_cond_refs[cond_id].fetch_op([](u32& ref)
|
2020-11-05 22:06:58 +01:00
|
|
|
|
{
|
2020-11-06 09:55:01 +01:00
|
|
|
|
if (!ref || ref == UINT32_MAX)
|
2020-11-05 22:06:58 +01:00
|
|
|
|
{
|
|
|
|
|
|
// Don't reference already deallocated semaphore
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
ref++;
|
|
|
|
|
|
return true;
|
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
if (ok)
|
|
|
|
|
|
{
|
|
|
|
|
|
return cond_id;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2020-11-06 09:55:01 +01:00
|
|
|
|
if (old == UINT32_MAX)
|
2020-11-05 22:06:58 +01:00
|
|
|
|
{
|
2020-11-06 09:55:01 +01:00
|
|
|
|
fmt::raw_error("Thread limit " STRINGIZE(UINT32_MAX) " for a single address reached in atomic notifier.");
|
2020-11-05 22:06:58 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (sema->load() != cond_id)
|
|
|
|
|
|
{
|
|
|
|
|
|
// Try again if it changed
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2020-11-06 00:43:14 +01:00
|
|
|
|
namespace atomic_wait
|
2019-10-20 01:41:19 +02:00
|
|
|
|
{
|
2020-11-05 17:18:48 +01:00
|
|
|
|
#define MAX_THREADS (56)
|
|
|
|
|
|
|
2020-11-04 15:19:35 +01:00
|
|
|
|
struct alignas(128) sync_var
|
2019-10-20 01:41:19 +02:00
|
|
|
|
{
|
2019-10-20 19:08:09 +02:00
|
|
|
|
constexpr sync_var() noexcept = default;
|
2019-09-09 11:28:21 +02:00
|
|
|
|
|
2019-10-20 19:08:09 +02:00
|
|
|
|
// Reference counter, owning pointer, collision bit and optionally selected slot
|
|
|
|
|
|
atomic_t<u64> addr_ref{};
|
|
|
|
|
|
|
2020-11-05 22:06:58 +01:00
|
|
|
|
private:
|
|
|
|
|
|
// Semaphores (allocated in reverse order), empty are zeros
|
2020-11-05 17:18:48 +01:00
|
|
|
|
atomic_t<u16> sema_data[MAX_THREADS]{};
|
2020-10-23 18:31:29 +02:00
|
|
|
|
|
2020-11-05 17:18:48 +01:00
|
|
|
|
// Allocated semaphore bits (to make total size 128)
|
|
|
|
|
|
atomic_t<u64> sema_bits{};
|
2020-10-23 18:31:29 +02:00
|
|
|
|
|
2020-11-05 22:06:58 +01:00
|
|
|
|
public:
|
2020-11-04 15:19:35 +01:00
|
|
|
|
atomic_t<u16>* sema_alloc()
|
2020-10-23 18:31:29 +02:00
|
|
|
|
{
|
|
|
|
|
|
const auto [bits, ok] = sema_bits.fetch_op([](u64& bits)
|
|
|
|
|
|
{
|
2020-11-05 17:18:48 +01:00
|
|
|
|
if (bits + 1 < (1ull << MAX_THREADS))
|
2020-10-23 18:31:29 +02:00
|
|
|
|
{
|
|
|
|
|
|
// Set lowest clear bit
|
|
|
|
|
|
bits |= bits + 1;
|
|
|
|
|
|
return true;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
if (ok) [[likely]]
|
|
|
|
|
|
{
|
|
|
|
|
|
// Find lowest clear bit
|
2020-11-05 22:06:58 +01:00
|
|
|
|
return get_sema(std::countr_one(bits));
|
2020-10-23 18:31:29 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2020-11-05 17:18:48 +01:00
|
|
|
|
// TODO: support extension if reached
|
|
|
|
|
|
fmt::raw_error("Thread limit " STRINGIZE(MAX_THREADS) " for a single address reached in atomic wait.");
|
2020-10-23 18:31:29 +02:00
|
|
|
|
return nullptr;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2020-11-05 22:06:58 +01:00
|
|
|
|
atomic_t<u16>* get_sema(u32 id)
|
|
|
|
|
|
{
|
|
|
|
|
|
verify(HERE), id < MAX_THREADS;
|
|
|
|
|
|
|
|
|
|
|
|
return &sema_data[(MAX_THREADS - 1) - id];
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
u64 get_sema_bits() const
|
|
|
|
|
|
{
|
|
|
|
|
|
return sema_bits & ((1ull << MAX_THREADS) - 1);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void reset_sema_bit(atomic_t<u16>* sema)
|
|
|
|
|
|
{
|
|
|
|
|
|
verify(HERE), sema >= sema_data && sema < std::end(sema_data);
|
|
|
|
|
|
|
|
|
|
|
|
sema_bits &= ~(1ull << ((MAX_THREADS - 1) - (sema - sema_data)));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2020-11-04 15:19:35 +01:00
|
|
|
|
void sema_free(atomic_t<u16>* sema)
|
2020-10-23 18:31:29 +02:00
|
|
|
|
{
|
|
|
|
|
|
if (sema < sema_data || sema >= std::end(sema_data))
|
|
|
|
|
|
{
|
2020-11-05 17:18:48 +01:00
|
|
|
|
fprintf(stderr, "sema_free(): bad sema ptr %p" HERE "\n", sema);
|
2020-10-23 18:31:29 +02:00
|
|
|
|
std::abort();
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2020-11-05 22:06:58 +01:00
|
|
|
|
// Try to deallocate semaphore (may be delegated to a notifier)
|
|
|
|
|
|
cond_free(sema->exchange(0));
|
2020-11-04 15:19:35 +01:00
|
|
|
|
|
2020-10-23 18:31:29 +02:00
|
|
|
|
// Clear sema bit
|
2020-11-05 22:06:58 +01:00
|
|
|
|
reset_sema_bit(sema);
|
2020-10-23 18:31:29 +02:00
|
|
|
|
}
|
2019-10-20 19:08:09 +02:00
|
|
|
|
};
|
2020-11-04 15:19:35 +01:00
|
|
|
|
|
|
|
|
|
|
static_assert(sizeof(sync_var) == 128);
|
2020-11-05 17:18:48 +01:00
|
|
|
|
|
|
|
|
|
|
#undef MAX_THREADS
|
2019-10-20 19:08:09 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Main hashtable for atomic wait.
|
2020-11-06 00:43:14 +01:00
|
|
|
|
alignas(128) static atomic_wait::sync_var s_hashtable[s_hashtable_size]{};
|
2019-10-20 19:08:09 +02:00
|
|
|
|
|
2020-11-06 00:43:14 +01:00
|
|
|
|
namespace atomic_wait
|
2019-10-20 19:08:09 +02:00
|
|
|
|
{
|
|
|
|
|
|
struct slot_info
|
|
|
|
|
|
{
|
|
|
|
|
|
constexpr slot_info() noexcept = default;
|
2019-09-09 03:32:30 +02:00
|
|
|
|
|
2019-10-20 19:08:09 +02:00
|
|
|
|
// Branch extension
|
2020-11-06 00:43:14 +01:00
|
|
|
|
atomic_wait::sync_var branch[48 - s_hashtable_power]{};
|
2019-10-20 01:41:19 +02:00
|
|
|
|
};
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Number of search groups (defines max slot branch count as gcount * 64)
|
2020-11-05 17:18:48 +01:00
|
|
|
|
#define MAX_SLOTS (4096)
|
2019-10-20 01:41:19 +02:00
|
|
|
|
|
|
|
|
|
|
// Array of slot branch objects
|
2020-11-06 00:43:14 +01:00
|
|
|
|
alignas(128) static atomic_wait::slot_info s_slot_list[MAX_SLOTS]{};
|
2019-10-20 01:41:19 +02:00
|
|
|
|
|
|
|
|
|
|
// Allocation bits
|
2020-11-05 17:18:48 +01:00
|
|
|
|
static atomic_t<u64, 64> s_slot_bits[MAX_SLOTS / 64]{};
|
|
|
|
|
|
|
|
|
|
|
|
// Allocation semaphore
|
|
|
|
|
|
static atomic_t<u32, 64> s_slot_sema{0};
|
|
|
|
|
|
|
|
|
|
|
|
static_assert(MAX_SLOTS % 64 == 0);
|
2019-09-22 00:31:23 +02:00
|
|
|
|
|
2019-10-20 01:41:19 +02:00
|
|
|
|
static u64 slot_alloc()
|
|
|
|
|
|
{
|
2020-11-05 17:18:48 +01:00
|
|
|
|
// Determine whether there is a free slot or not
|
|
|
|
|
|
if (!s_slot_sema.try_inc(MAX_SLOTS + 1))
|
|
|
|
|
|
{
|
|
|
|
|
|
fmt::raw_error("Hashtable extension slot limit " STRINGIZE(MAX_SLOTS) " reached in atomic wait.");
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2019-10-20 01:41:19 +02:00
|
|
|
|
// Diversify search start points to reduce contention and increase immediate success chance
|
|
|
|
|
|
#ifdef _WIN32
|
|
|
|
|
|
const u32 start = GetCurrentProcessorNumber();
|
|
|
|
|
|
#elif __linux__
|
|
|
|
|
|
const u32 start = sched_getcpu();
|
|
|
|
|
|
#else
|
|
|
|
|
|
const u32 start = __rdtsc();
|
2019-09-22 00:31:23 +02:00
|
|
|
|
#endif
|
|
|
|
|
|
|
2020-11-05 17:18:48 +01:00
|
|
|
|
for (u32 i = start;; i++)
|
2019-10-20 01:41:19 +02:00
|
|
|
|
{
|
2020-11-05 17:18:48 +01:00
|
|
|
|
const u32 group = i % ::size32(s_slot_bits);
|
2019-07-29 19:28:20 +02:00
|
|
|
|
|
2019-10-20 01:41:19 +02:00
|
|
|
|
const auto [bits, ok] = s_slot_bits[group].fetch_op([](u64& bits)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (~bits)
|
|
|
|
|
|
{
|
|
|
|
|
|
// Set lowest clear bit
|
|
|
|
|
|
bits |= bits + 1;
|
|
|
|
|
|
return true;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
if (ok)
|
|
|
|
|
|
{
|
|
|
|
|
|
// Find lowest clear bit
|
2020-04-13 14:31:41 +02:00
|
|
|
|
return group * 64 + std::countr_one(bits);
|
2019-10-20 01:41:19 +02:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2020-11-05 17:18:48 +01:00
|
|
|
|
// Unreachable
|
2019-10-20 01:41:19 +02:00
|
|
|
|
std::abort();
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2020-11-05 17:18:48 +01:00
|
|
|
|
#undef MAX_SLOTS
|
|
|
|
|
|
|
2020-11-06 00:43:14 +01:00
|
|
|
|
static atomic_wait::sync_var* slot_get(std::uintptr_t iptr, atomic_wait::sync_var* loc, u64 lv = 0)
|
2019-09-08 21:48:26 +02:00
|
|
|
|
{
|
2019-10-20 01:41:19 +02:00
|
|
|
|
if (!loc)
|
|
|
|
|
|
{
|
|
|
|
|
|
return nullptr;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2019-10-20 19:08:09 +02:00
|
|
|
|
const u64 value = loc->addr_ref.load();
|
2019-10-20 01:41:19 +02:00
|
|
|
|
|
2019-10-20 21:52:18 +02:00
|
|
|
|
if ((value & s_waiter_mask) == 0)
|
2019-10-20 01:41:19 +02:00
|
|
|
|
{
|
|
|
|
|
|
return nullptr;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if ((value & s_pointer_mask) == (iptr & s_pointer_mask))
|
|
|
|
|
|
{
|
2019-10-20 19:08:09 +02:00
|
|
|
|
return loc;
|
2019-10-20 01:41:19 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if ((value & s_collision_bit) == 0)
|
|
|
|
|
|
{
|
|
|
|
|
|
return nullptr;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Get the number of leading equal bits to determine subslot
|
2020-04-13 20:57:16 +02:00
|
|
|
|
const u64 eq_bits = std::countl_zero<u64>((((iptr ^ value) & (s_pointer_mask >> lv)) | ~s_pointer_mask) << 16);
|
2019-10-20 01:41:19 +02:00
|
|
|
|
|
|
|
|
|
|
// Proceed recursively, increment level
|
|
|
|
|
|
return slot_get(iptr, s_slot_list[(value & s_slot_mask) / one_v<s_slot_mask>].branch + eq_bits, eq_bits + 1);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static void slot_free(u64 id)
|
|
|
|
|
|
{
|
|
|
|
|
|
// Reset allocation bit
|
|
|
|
|
|
id = (id & s_slot_mask) / one_v<s_slot_mask>;
|
|
|
|
|
|
s_slot_bits[id / 64] &= ~(1ull << (id % 64));
|
2020-11-05 17:18:48 +01:00
|
|
|
|
|
|
|
|
|
|
// Reset semaphore
|
|
|
|
|
|
s_slot_sema--;
|
2019-10-20 01:41:19 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2020-11-06 00:43:14 +01:00
|
|
|
|
static void slot_free(std::uintptr_t iptr, atomic_wait::sync_var* loc, u64 lv = 0)
|
2019-10-20 21:52:18 +02:00
|
|
|
|
{
|
|
|
|
|
|
const u64 value = loc->addr_ref.load();
|
|
|
|
|
|
|
|
|
|
|
|
if ((value & s_pointer_mask) != (iptr & s_pointer_mask))
|
|
|
|
|
|
{
|
2020-11-05 17:18:48 +01:00
|
|
|
|
ASSERT(value & s_waiter_mask);
|
|
|
|
|
|
ASSERT(value & s_collision_bit);
|
2019-10-20 21:52:18 +02:00
|
|
|
|
|
|
|
|
|
|
// Get the number of leading equal bits to determine subslot
|
2020-04-13 20:57:16 +02:00
|
|
|
|
const u64 eq_bits = std::countl_zero<u64>((((iptr ^ value) & (s_pointer_mask >> lv)) | ~s_pointer_mask) << 16);
|
2019-10-20 21:52:18 +02:00
|
|
|
|
|
|
|
|
|
|
// Proceed recursively, to deallocate deepest branch first
|
|
|
|
|
|
slot_free(iptr, s_slot_list[(value & s_slot_mask) / one_v<s_slot_mask>].branch + eq_bits, eq_bits + 1);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Actual cleanup in reverse order
|
|
|
|
|
|
auto [_old, ok] = loc->addr_ref.fetch_op([&](u64& value)
|
|
|
|
|
|
{
|
2020-11-05 17:18:48 +01:00
|
|
|
|
ASSERT(value & s_waiter_mask);
|
2019-10-20 21:52:18 +02:00
|
|
|
|
{
|
|
|
|
|
|
value -= one_v<s_waiter_mask>;
|
|
|
|
|
|
|
|
|
|
|
|
if (!(value & s_waiter_mask))
|
|
|
|
|
|
{
|
|
|
|
|
|
// Reset on last waiter
|
|
|
|
|
|
value = 0;
|
|
|
|
|
|
return 2;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
if (ok > 1 && _old & s_collision_bit)
|
|
|
|
|
|
{
|
|
|
|
|
|
// Deallocate slot on last waiter
|
|
|
|
|
|
slot_free(_old);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2020-10-26 21:32:40 +01:00
|
|
|
|
SAFE_BUFFERS void
|
|
|
|
|
|
#ifdef _WIN32
|
|
|
|
|
|
__vectorcall
|
|
|
|
|
|
#endif
|
2020-11-06 00:43:14 +01:00
|
|
|
|
atomic_wait_engine::wait(const void* data, u32 size, __m128i old_value, u64 timeout, __m128i mask)
|
2019-10-20 01:41:19 +02:00
|
|
|
|
{
|
|
|
|
|
|
const std::uintptr_t iptr = reinterpret_cast<std::uintptr_t>(data);
|
2019-08-02 00:23:26 +02:00
|
|
|
|
|
2019-10-20 01:41:19 +02:00
|
|
|
|
// Allocated slot index
|
|
|
|
|
|
u64 slot_a = -1;
|
2019-08-02 00:23:26 +02:00
|
|
|
|
|
2019-10-20 01:41:19 +02:00
|
|
|
|
// Found slot object
|
2020-11-06 00:43:14 +01:00
|
|
|
|
atomic_wait::sync_var* slot = nullptr;
|
2019-08-02 00:23:26 +02:00
|
|
|
|
|
2019-10-20 01:41:19 +02:00
|
|
|
|
auto install_op = [&](u64& value) -> u64
|
2019-08-02 00:23:26 +02:00
|
|
|
|
{
|
2019-10-20 01:41:19 +02:00
|
|
|
|
if ((value & s_waiter_mask) == s_waiter_mask)
|
2019-09-09 01:42:05 +02:00
|
|
|
|
{
|
2019-10-20 01:41:19 +02:00
|
|
|
|
// Return immediately on waiter overflow
|
|
|
|
|
|
return 0;
|
2019-09-09 01:42:05 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2019-10-20 01:41:19 +02:00
|
|
|
|
if (!value || (value & s_pointer_mask) == (iptr & s_pointer_mask))
|
2019-08-02 00:23:26 +02:00
|
|
|
|
{
|
2019-10-20 19:08:09 +02:00
|
|
|
|
// Store pointer bits
|
|
|
|
|
|
value |= (iptr & s_pointer_mask);
|
|
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
|
|
|
|
|
if ((value & s_collision_bit) == 0)
|
2019-09-09 01:42:05 +02:00
|
|
|
|
{
|
2019-10-20 01:41:19 +02:00
|
|
|
|
if (slot_a + 1 == 0)
|
2019-09-09 01:42:05 +02:00
|
|
|
|
{
|
2019-10-20 19:08:09 +02:00
|
|
|
|
// Second waiter: allocate slot and install it
|
2019-10-20 01:41:19 +02:00
|
|
|
|
slot_a = slot_alloc() * one_v<s_slot_mask>;
|
2019-09-09 01:42:05 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2019-10-20 01:41:19 +02:00
|
|
|
|
value |= slot_a;
|
2019-09-09 01:42:05 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2019-10-20 01:41:19 +02:00
|
|
|
|
// Set collision bit
|
|
|
|
|
|
value |= s_collision_bit;
|
2019-08-02 00:23:26 +02:00
|
|
|
|
}
|
2019-07-27 00:34:10 +02:00
|
|
|
|
|
2019-10-20 01:41:19 +02:00
|
|
|
|
// Add waiter
|
|
|
|
|
|
value += one_v<s_waiter_mask>;
|
|
|
|
|
|
return value;
|
|
|
|
|
|
};
|
2019-07-27 00:34:10 +02:00
|
|
|
|
|
2019-10-20 01:41:19 +02:00
|
|
|
|
// Search detail
|
|
|
|
|
|
u64 lv = 0;
|
2019-09-09 11:28:21 +02:00
|
|
|
|
|
2020-11-06 00:43:14 +01:00
|
|
|
|
for (atomic_wait::sync_var* ptr = &s_hashtable[iptr % s_hashtable_size];;)
|
2019-07-27 00:34:10 +02:00
|
|
|
|
{
|
2019-10-20 19:08:09 +02:00
|
|
|
|
auto [_old, ok] = ptr->addr_ref.fetch_op(install_op);
|
2019-10-20 01:41:19 +02:00
|
|
|
|
|
|
|
|
|
|
if (slot_a + 1)
|
2019-07-27 00:34:10 +02:00
|
|
|
|
{
|
2019-10-20 19:08:09 +02:00
|
|
|
|
if ((_old & s_collision_bit) == 0 && (ok & s_collision_bit) && (ok & s_slot_mask) == slot_a)
|
2019-10-20 01:41:19 +02:00
|
|
|
|
{
|
|
|
|
|
|
// Slot set successfully
|
|
|
|
|
|
slot_a = -1;
|
|
|
|
|
|
}
|
2019-07-27 00:34:10 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2019-10-20 01:41:19 +02:00
|
|
|
|
if (!ok)
|
2019-07-27 00:34:10 +02:00
|
|
|
|
{
|
2019-10-20 01:41:19 +02:00
|
|
|
|
// Expected only on top level
|
2020-10-23 18:31:29 +02:00
|
|
|
|
if (timeout + 1 || ptr_cmp<false>(data, size, old_value, mask))
|
|
|
|
|
|
{
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// TODO
|
|
|
|
|
|
busy_wait(30000);
|
|
|
|
|
|
continue;
|
2019-07-27 00:34:10 +02:00
|
|
|
|
}
|
2019-10-20 01:41:19 +02:00
|
|
|
|
|
|
|
|
|
|
if (!_old || (_old & s_pointer_mask) == (iptr & s_pointer_mask))
|
2019-07-27 00:34:10 +02:00
|
|
|
|
{
|
2019-10-20 01:41:19 +02:00
|
|
|
|
// Success
|
|
|
|
|
|
if (slot_a + 1)
|
|
|
|
|
|
{
|
|
|
|
|
|
// Cleanup slot if unused
|
|
|
|
|
|
slot_free(slot_a);
|
|
|
|
|
|
slot_a = -1;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2019-10-20 19:08:09 +02:00
|
|
|
|
slot = ptr;
|
2019-10-20 01:41:19 +02:00
|
|
|
|
break;
|
2019-07-27 00:34:10 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2019-10-20 01:41:19 +02:00
|
|
|
|
// Get the number of leading equal bits (between iptr and slot owner)
|
2020-04-13 20:57:16 +02:00
|
|
|
|
const u64 eq_bits = std::countl_zero<u64>((((iptr ^ ok) & (s_pointer_mask >> lv)) | ~s_pointer_mask) << 16);
|
2019-07-27 00:34:10 +02:00
|
|
|
|
|
2019-10-20 01:41:19 +02:00
|
|
|
|
// Collision; need to go deeper
|
2019-10-20 19:08:09 +02:00
|
|
|
|
ptr = s_slot_list[(ok & s_slot_mask) / one_v<s_slot_mask>].branch + eq_bits;
|
2019-10-20 01:41:19 +02:00
|
|
|
|
|
|
|
|
|
|
lv = eq_bits + 1;
|
2019-07-27 00:34:10 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2020-11-04 15:19:35 +01:00
|
|
|
|
const u32 cond_id = cond_alloc();
|
|
|
|
|
|
|
|
|
|
|
|
if (cond_id == 0)
|
|
|
|
|
|
{
|
2020-11-05 22:06:58 +01:00
|
|
|
|
fmt::raw_error("Thread limit " STRINGIZE(UINT16_MAX) " reached in atomic wait.");
|
2020-11-04 15:19:35 +01:00
|
|
|
|
}
|
2020-10-24 03:40:12 +02:00
|
|
|
|
|
2020-10-23 18:31:29 +02:00
|
|
|
|
auto sema = slot->sema_alloc();
|
2019-10-20 01:41:19 +02:00
|
|
|
|
|
2020-10-23 18:31:29 +02:00
|
|
|
|
while (!sema)
|
2019-09-22 00:31:23 +02:00
|
|
|
|
{
|
2020-10-23 18:31:29 +02:00
|
|
|
|
if (timeout + 1 || ptr_cmp<false>(data, size, old_value, mask))
|
2019-09-22 00:31:23 +02:00
|
|
|
|
{
|
2020-11-04 15:19:35 +01:00
|
|
|
|
cond_free(cond_id);
|
2020-10-23 18:31:29 +02:00
|
|
|
|
slot_free(iptr, &s_hashtable[iptr % s_hashtable_size]);
|
|
|
|
|
|
return;
|
2019-09-22 00:31:23 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2020-10-23 18:31:29 +02:00
|
|
|
|
// TODO
|
|
|
|
|
|
busy_wait(30000);
|
|
|
|
|
|
sema = slot->sema_alloc();
|
2019-09-22 00:31:23 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2020-11-04 15:19:35 +01:00
|
|
|
|
// Save for notifiers
|
|
|
|
|
|
const auto cond = cond_get(cond_id);
|
|
|
|
|
|
|
2020-11-05 17:18:48 +01:00
|
|
|
|
// Store some info for notifiers (some may be unused)
|
2020-11-04 15:19:35 +01:00
|
|
|
|
cond->size = size;
|
|
|
|
|
|
cond->mask = mask;
|
|
|
|
|
|
cond->oldv = old_value;
|
2020-11-05 17:18:48 +01:00
|
|
|
|
cond->ptr = data;
|
|
|
|
|
|
cond->tsc0 = __rdtsc();
|
2020-11-04 15:19:35 +01:00
|
|
|
|
|
|
|
|
|
|
cond->sync = 1;
|
2020-11-05 17:18:48 +01:00
|
|
|
|
sema->store(static_cast<u16>(cond_id));
|
2020-11-04 15:19:35 +01:00
|
|
|
|
|
2020-10-25 02:17:54 +02:00
|
|
|
|
#ifdef USE_STD
|
2020-11-04 15:19:35 +01:00
|
|
|
|
// Lock mutex
|
|
|
|
|
|
std::unique_lock lock(cond->mtx);
|
2020-10-25 02:17:54 +02:00
|
|
|
|
#endif
|
|
|
|
|
|
|
2020-10-23 18:31:29 +02:00
|
|
|
|
// Can skip unqueue process if true
|
2020-10-25 02:17:54 +02:00
|
|
|
|
#if defined(USE_FUTEX) || defined(USE_STD)
|
|
|
|
|
|
constexpr bool fallback = true;
|
|
|
|
|
|
#else
|
2019-10-20 01:41:19 +02:00
|
|
|
|
bool fallback = false;
|
2020-10-23 18:31:29 +02:00
|
|
|
|
#endif
|
2019-10-20 01:41:19 +02:00
|
|
|
|
|
2020-11-05 17:18:48 +01:00
|
|
|
|
while (ptr_cmp(data, size, old_value, mask))
|
2019-07-27 00:34:10 +02:00
|
|
|
|
{
|
2019-10-20 17:19:54 +02:00
|
|
|
|
#ifdef USE_FUTEX
|
|
|
|
|
|
struct timespec ts;
|
|
|
|
|
|
ts.tv_sec = timeout / 1'000'000'000;
|
|
|
|
|
|
ts.tv_nsec = timeout % 1'000'000'000;
|
|
|
|
|
|
|
2020-11-04 15:19:35 +01:00
|
|
|
|
if (cond->sync.load() > 1) [[unlikely]]
|
2019-10-20 17:19:54 +02:00
|
|
|
|
{
|
2020-10-23 18:31:29 +02:00
|
|
|
|
// Signaled prematurely
|
2020-11-04 15:19:35 +01:00
|
|
|
|
if (cond->sync.load() == 3 || !cond->sync.compare_and_swap_test(2, 1))
|
|
|
|
|
|
{
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
2019-10-20 17:19:54 +02:00
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
2020-11-04 15:19:35 +01:00
|
|
|
|
futex(&cond->sync, FUTEX_WAIT_PRIVATE, 1, timeout + 1 ? &ts : nullptr);
|
2019-10-20 17:19:54 +02:00
|
|
|
|
}
|
2020-10-25 02:17:54 +02:00
|
|
|
|
#elif defined(USE_STD)
|
2020-11-04 15:19:35 +01:00
|
|
|
|
if (cond->sync.load() > 1) [[unlikely]]
|
2020-10-25 02:17:54 +02:00
|
|
|
|
{
|
2020-11-04 15:19:35 +01:00
|
|
|
|
if (cond->sync.load() == 3 || !cond->sync.compare_and_swap_test(2, 1))
|
2020-10-25 02:17:54 +02:00
|
|
|
|
{
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2020-11-04 15:19:35 +01:00
|
|
|
|
|
|
|
|
|
|
if (timeout + 1)
|
2020-10-25 02:17:54 +02:00
|
|
|
|
{
|
2020-11-04 15:19:35 +01:00
|
|
|
|
cond->cond.wait_for(lock, std::chrono::nanoseconds(timeout));
|
2020-10-25 02:17:54 +02:00
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
2020-11-04 15:19:35 +01:00
|
|
|
|
cond->cond.wait(lock);
|
2020-10-25 02:17:54 +02:00
|
|
|
|
}
|
2020-10-23 18:31:29 +02:00
|
|
|
|
#elif defined(_WIN32)
|
2020-10-21 08:54:37 +02:00
|
|
|
|
LARGE_INTEGER qw;
|
|
|
|
|
|
qw.QuadPart = -static_cast<s64>(timeout / 100);
|
2020-10-20 21:00:15 +02:00
|
|
|
|
|
2020-10-21 08:54:37 +02:00
|
|
|
|
if (timeout % 100)
|
2019-09-09 01:42:05 +02:00
|
|
|
|
{
|
2020-10-21 08:54:37 +02:00
|
|
|
|
// Round up to closest 100ns unit
|
|
|
|
|
|
qw.QuadPart -= 1;
|
|
|
|
|
|
}
|
2020-10-20 21:00:15 +02:00
|
|
|
|
|
2020-11-04 15:19:35 +01:00
|
|
|
|
if (fallback) [[unlikely]]
|
2019-09-22 00:31:23 +02:00
|
|
|
|
{
|
2020-11-05 17:18:48 +01:00
|
|
|
|
if (cond->sync.load() == 3 || !cond->sync.compare_and_swap_test(2, 1))
|
2020-10-24 03:40:12 +02:00
|
|
|
|
{
|
|
|
|
|
|
fallback = false;
|
2020-11-04 15:19:35 +01:00
|
|
|
|
break;
|
2020-10-24 03:40:12 +02:00
|
|
|
|
}
|
2019-09-22 00:31:23 +02:00
|
|
|
|
|
2020-11-04 15:19:35 +01:00
|
|
|
|
fallback = false;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (NtWaitForAlertByThreadId)
|
|
|
|
|
|
{
|
|
|
|
|
|
switch (DWORD status = NtWaitForAlertByThreadId(cond, timeout + 1 ? &qw : nullptr))
|
2020-10-24 03:40:12 +02:00
|
|
|
|
{
|
|
|
|
|
|
case NTSTATUS_ALERTED: fallback = true; break;
|
|
|
|
|
|
case NTSTATUS_TIMEOUT: break;
|
|
|
|
|
|
default:
|
|
|
|
|
|
{
|
|
|
|
|
|
SetLastError(status);
|
|
|
|
|
|
fmt::raw_verify_error("Unexpected NtWaitForAlertByThreadId result.", nullptr, 0);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
else
|
2019-09-22 00:31:23 +02:00
|
|
|
|
{
|
2020-11-06 10:48:56 +01:00
|
|
|
|
if (NtWaitForKeyedEvent(nullptr, &cond->sync, false, timeout + 1 ? &qw : nullptr) == NTSTATUS_SUCCESS)
|
2020-10-24 03:40:12 +02:00
|
|
|
|
{
|
|
|
|
|
|
// Error code assumed to be timeout
|
|
|
|
|
|
fallback = true;
|
|
|
|
|
|
}
|
2019-09-22 00:31:23 +02:00
|
|
|
|
}
|
2019-07-27 00:34:10 +02:00
|
|
|
|
#endif
|
|
|
|
|
|
|
2020-10-23 18:31:29 +02:00
|
|
|
|
if (timeout + 1)
|
2019-07-27 00:34:10 +02:00
|
|
|
|
{
|
2020-10-23 18:31:29 +02:00
|
|
|
|
// TODO: reduce timeout instead
|
2019-07-27 00:34:10 +02:00
|
|
|
|
break;
|
|
|
|
|
|
}
|
2020-10-23 18:31:29 +02:00
|
|
|
|
}
|
2019-07-27 00:34:10 +02:00
|
|
|
|
|
2020-10-23 18:31:29 +02:00
|
|
|
|
while (!fallback)
|
|
|
|
|
|
{
|
|
|
|
|
|
#if defined(_WIN32)
|
2020-10-21 08:54:37 +02:00
|
|
|
|
static LARGE_INTEGER instant{};
|
2020-10-20 21:00:15 +02:00
|
|
|
|
|
2020-11-04 15:19:35 +01:00
|
|
|
|
if (cond->sync.compare_and_swap_test(1, 2))
|
2020-10-24 03:40:12 +02:00
|
|
|
|
{
|
2020-11-04 15:19:35 +01:00
|
|
|
|
// Succeeded in self-notifying
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
2020-10-24 03:40:12 +02:00
|
|
|
|
|
2020-11-04 15:19:35 +01:00
|
|
|
|
if (NtWaitForAlertByThreadId)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (NtWaitForAlertByThreadId(cond, &instant) == NTSTATUS_ALERTED)
|
2020-10-24 03:40:12 +02:00
|
|
|
|
{
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2020-11-06 10:48:56 +01:00
|
|
|
|
if (!NtWaitForKeyedEvent(nullptr, &cond->sync, false, &instant))
|
2019-09-22 00:31:23 +02:00
|
|
|
|
{
|
2020-10-23 18:31:29 +02:00
|
|
|
|
// Succeeded in obtaining an event without waiting
|
|
|
|
|
|
break;
|
2019-09-22 00:31:23 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2020-11-04 15:19:35 +01:00
|
|
|
|
continue;
|
2020-10-24 03:40:12 +02:00
|
|
|
|
#endif
|
2020-11-04 15:19:35 +01:00
|
|
|
|
}
|
2020-10-24 03:40:12 +02:00
|
|
|
|
|
2020-10-25 02:17:54 +02:00
|
|
|
|
#ifdef USE_STD
|
2020-11-04 15:19:35 +01:00
|
|
|
|
if (lock)
|
|
|
|
|
|
{
|
|
|
|
|
|
lock.unlock();
|
|
|
|
|
|
}
|
2020-10-25 02:17:54 +02:00
|
|
|
|
#endif
|
|
|
|
|
|
|
2020-10-23 18:31:29 +02:00
|
|
|
|
slot->sema_free(sema);
|
2019-07-27 00:34:10 +02:00
|
|
|
|
|
2019-10-20 21:52:18 +02:00
|
|
|
|
slot_free(iptr, &s_hashtable[iptr % s_hashtable_size]);
|
2019-08-02 00:23:26 +02:00
|
|
|
|
|
2019-10-20 01:41:19 +02:00
|
|
|
|
s_tls_wait_cb(nullptr);
|
|
|
|
|
|
}
|
2019-09-08 21:48:26 +02:00
|
|
|
|
|
2020-10-23 18:31:29 +02:00
|
|
|
|
// Platform specific wake-up function
|
2020-11-04 15:19:35 +01:00
|
|
|
|
static NEVER_INLINE bool
|
|
|
|
|
|
#ifdef _WIN32
|
|
|
|
|
|
__vectorcall
|
|
|
|
|
|
#endif
|
|
|
|
|
|
alert_sema(atomic_t<u16>* sema, const void* data, u64 info, u32 size, __m128i mask, __m128i new_value)
|
2020-10-23 18:31:29 +02:00
|
|
|
|
{
|
2020-11-05 22:06:58 +01:00
|
|
|
|
const u32 cond_id = cond_lock(sema);
|
2020-10-26 02:02:39 +01:00
|
|
|
|
|
2020-11-05 22:06:58 +01:00
|
|
|
|
if (!cond_id)
|
2020-11-04 15:19:35 +01:00
|
|
|
|
{
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
2020-10-26 02:02:39 +01:00
|
|
|
|
|
2020-11-04 15:19:35 +01:00
|
|
|
|
const auto cond = cond_get(cond_id);
|
2020-10-25 02:17:54 +02:00
|
|
|
|
|
2020-11-05 22:06:58 +01:00
|
|
|
|
verify(HERE), cond;
|
|
|
|
|
|
|
|
|
|
|
|
bool ok = false;
|
2020-10-25 02:17:54 +02:00
|
|
|
|
|
2020-11-05 22:06:58 +01:00
|
|
|
|
if (cond->sync && (!size ? (!info || cond->tid == info) : cond->ptr == data && cmp_mask(size, mask, new_value, cond->size, cond->mask, cond->oldv)))
|
2020-10-24 03:40:12 +02:00
|
|
|
|
{
|
2020-11-05 17:18:48 +01:00
|
|
|
|
if ((!size && cond->forced_wakeup()) || (size && cond->sync.load() == 1 && cond->sync.compare_and_swap_test(1, 2)))
|
2020-10-24 03:40:12 +02:00
|
|
|
|
{
|
2020-11-05 17:18:48 +01:00
|
|
|
|
ok = true;
|
2020-11-06 10:48:56 +01:00
|
|
|
|
cond->alert_native();
|
2020-10-24 03:40:12 +02:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2020-11-05 22:06:58 +01:00
|
|
|
|
// Remove lock, possibly deallocate cond
|
|
|
|
|
|
cond_free(cond_id);
|
2020-11-04 15:19:35 +01:00
|
|
|
|
return ok;
|
2020-10-23 18:31:29 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2020-11-06 00:43:14 +01:00
|
|
|
|
void atomic_wait_engine::set_wait_callback(bool(*cb)(const void* data))
|
2019-09-08 21:48:26 +02:00
|
|
|
|
{
|
|
|
|
|
|
if (cb)
|
|
|
|
|
|
{
|
|
|
|
|
|
s_tls_wait_cb = cb;
|
|
|
|
|
|
}
|
2020-10-26 02:02:39 +01:00
|
|
|
|
else
|
|
|
|
|
|
{
|
|
|
|
|
|
s_tls_wait_cb = [](const void*){ return true; };
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2020-11-06 00:43:14 +01:00
|
|
|
|
void atomic_wait_engine::set_notify_callback(void(*cb)(const void*, u64))
|
2020-10-26 02:02:39 +01:00
|
|
|
|
{
|
|
|
|
|
|
if (cb)
|
|
|
|
|
|
{
|
|
|
|
|
|
s_tls_notify_cb = cb;
|
|
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
|
|
|
|
|
s_tls_notify_cb = [](const void*, u64){};
|
|
|
|
|
|
}
|
2019-09-08 21:48:26 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2020-11-06 00:43:14 +01:00
|
|
|
|
bool atomic_wait_engine::raw_notify(const void* data, u64 thread_id)
|
2019-09-08 21:48:26 +02:00
|
|
|
|
{
|
2020-11-04 15:19:35 +01:00
|
|
|
|
const std::uintptr_t iptr = reinterpret_cast<std::uintptr_t>(data);
|
|
|
|
|
|
|
|
|
|
|
|
const auto slot = slot_get(iptr, &s_hashtable[(iptr) % s_hashtable_size]);
|
|
|
|
|
|
|
|
|
|
|
|
if (!slot)
|
2019-09-08 21:48:26 +02:00
|
|
|
|
{
|
2020-11-04 15:19:35 +01:00
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2020-11-05 17:18:48 +01:00
|
|
|
|
s_tls_notify_cb(data, 0);
|
|
|
|
|
|
|
2020-11-04 15:19:35 +01:00
|
|
|
|
u64 progress = 0;
|
|
|
|
|
|
|
2020-11-05 22:06:58 +01:00
|
|
|
|
for (u64 bits = slot->get_sema_bits(); bits; bits &= bits - 1)
|
2020-11-04 15:19:35 +01:00
|
|
|
|
{
|
2020-11-05 22:06:58 +01:00
|
|
|
|
const auto sema = slot->get_sema(std::countr_zero(bits));
|
2020-11-04 15:19:35 +01:00
|
|
|
|
|
|
|
|
|
|
// Forced notification
|
|
|
|
|
|
if (alert_sema(sema, data, thread_id, 0, _mm_setzero_si128(), _mm_setzero_si128()))
|
|
|
|
|
|
{
|
|
|
|
|
|
s_tls_notify_cb(data, ++progress);
|
|
|
|
|
|
|
|
|
|
|
|
if (thread_id == 0)
|
|
|
|
|
|
{
|
|
|
|
|
|
// Works like notify_all in this case
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
2019-09-08 21:48:26 +02:00
|
|
|
|
}
|
2020-11-04 15:19:35 +01:00
|
|
|
|
|
|
|
|
|
|
s_tls_notify_cb(data, -1);
|
|
|
|
|
|
return progress != 0;
|
2019-09-08 21:48:26 +02:00
|
|
|
|
}
|
2019-09-22 00:31:23 +02:00
|
|
|
|
|
2020-11-04 15:19:35 +01:00
|
|
|
|
void
|
|
|
|
|
|
#ifdef _WIN32
|
|
|
|
|
|
__vectorcall
|
|
|
|
|
|
#endif
|
2020-11-06 00:43:14 +01:00
|
|
|
|
atomic_wait_engine::notify_one(const void* data, u32 size, __m128i mask, __m128i new_value)
|
2019-09-22 00:31:23 +02:00
|
|
|
|
{
|
|
|
|
|
|
const std::uintptr_t iptr = reinterpret_cast<std::uintptr_t>(data);
|
|
|
|
|
|
|
2019-10-20 01:41:19 +02:00
|
|
|
|
const auto slot = slot_get(iptr, &s_hashtable[(iptr) % s_hashtable_size]);
|
2019-09-22 00:31:23 +02:00
|
|
|
|
|
2019-10-20 01:41:19 +02:00
|
|
|
|
if (!slot)
|
2019-09-22 00:31:23 +02:00
|
|
|
|
{
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
2019-10-20 01:41:19 +02:00
|
|
|
|
|
2020-11-05 17:18:48 +01:00
|
|
|
|
s_tls_notify_cb(data, 0);
|
|
|
|
|
|
|
2020-10-26 02:02:39 +01:00
|
|
|
|
u64 progress = 0;
|
|
|
|
|
|
|
2020-11-05 22:06:58 +01:00
|
|
|
|
for (u64 bits = slot->get_sema_bits(); bits; bits &= bits - 1)
|
2019-09-22 00:31:23 +02:00
|
|
|
|
{
|
2020-11-05 22:06:58 +01:00
|
|
|
|
const auto sema = slot->get_sema(std::countr_zero(bits));
|
2019-09-22 00:31:23 +02:00
|
|
|
|
|
2020-11-04 15:19:35 +01:00
|
|
|
|
if (alert_sema(sema, data, progress, size, mask, new_value))
|
2019-09-22 00:31:23 +02:00
|
|
|
|
{
|
2020-10-26 02:02:39 +01:00
|
|
|
|
s_tls_notify_cb(data, ++progress);
|
2020-10-23 18:31:29 +02:00
|
|
|
|
break;
|
2019-09-22 00:31:23 +02:00
|
|
|
|
}
|
|
|
|
|
|
}
|
2020-10-26 02:02:39 +01:00
|
|
|
|
|
|
|
|
|
|
s_tls_notify_cb(data, -1);
|
2019-09-22 00:31:23 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2020-11-05 17:18:48 +01:00
|
|
|
|
SAFE_BUFFERS void
|
2020-11-04 15:19:35 +01:00
|
|
|
|
#ifdef _WIN32
|
|
|
|
|
|
__vectorcall
|
|
|
|
|
|
#endif
|
2020-11-06 00:43:14 +01:00
|
|
|
|
atomic_wait_engine::notify_all(const void* data, u32 size, __m128i mask, __m128i new_value)
|
2019-09-22 00:31:23 +02:00
|
|
|
|
{
|
|
|
|
|
|
const std::uintptr_t iptr = reinterpret_cast<std::uintptr_t>(data);
|
|
|
|
|
|
|
2019-10-20 01:41:19 +02:00
|
|
|
|
const auto slot = slot_get(iptr, &s_hashtable[(iptr) % s_hashtable_size]);
|
2019-09-22 00:31:23 +02:00
|
|
|
|
|
2019-10-20 01:41:19 +02:00
|
|
|
|
if (!slot)
|
2019-09-22 00:31:23 +02:00
|
|
|
|
{
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
2019-10-20 01:41:19 +02:00
|
|
|
|
|
2020-11-05 17:18:48 +01:00
|
|
|
|
s_tls_notify_cb(data, 0);
|
|
|
|
|
|
|
2020-10-26 02:02:39 +01:00
|
|
|
|
u64 progress = 0;
|
2020-10-23 21:43:27 +02:00
|
|
|
|
{
|
|
|
|
|
|
// Make a copy to filter out waiters that fail some checks
|
2020-11-05 22:06:58 +01:00
|
|
|
|
u64 copy = slot->get_sema_bits();
|
2020-11-04 15:19:35 +01:00
|
|
|
|
u64 lock = 0;
|
2020-11-05 17:18:48 +01:00
|
|
|
|
u32 lock_ids[64]{};
|
2020-10-23 21:43:27 +02:00
|
|
|
|
|
|
|
|
|
|
for (u64 bits = copy; bits; bits &= bits - 1)
|
|
|
|
|
|
{
|
|
|
|
|
|
const u32 id = std::countr_zero(bits);
|
|
|
|
|
|
|
2020-11-05 22:06:58 +01:00
|
|
|
|
const auto sema = slot->get_sema(id);
|
2020-10-23 21:43:27 +02:00
|
|
|
|
|
2020-11-05 22:06:58 +01:00
|
|
|
|
if (const u32 cond_id = cond_lock(sema))
|
2020-11-04 15:19:35 +01:00
|
|
|
|
{
|
|
|
|
|
|
// Add lock bit for cleanup
|
|
|
|
|
|
lock |= 1ull << id;
|
|
|
|
|
|
lock_ids[id] = cond_id;
|
|
|
|
|
|
|
|
|
|
|
|
const auto cond = cond_get(cond_id);
|
|
|
|
|
|
|
2020-11-05 22:06:58 +01:00
|
|
|
|
verify(HERE), cond;
|
|
|
|
|
|
|
|
|
|
|
|
if (cond->sync && cond->ptr == data && cmp_mask(size, mask, new_value, cond->size, cond->mask, cond->oldv))
|
2020-11-04 15:19:35 +01:00
|
|
|
|
{
|
|
|
|
|
|
if (cond->sync.load() == 1 && cond->sync.compare_and_swap_test(1, 2))
|
|
|
|
|
|
{
|
2020-11-05 22:06:58 +01:00
|
|
|
|
// Ok.
|
2020-11-04 15:19:35 +01:00
|
|
|
|
continue;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2020-10-23 21:43:27 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Remove the bit from next stage
|
|
|
|
|
|
copy &= ~(1ull << id);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// If only one waiter exists, there is no point in trying to optimize
|
|
|
|
|
|
if (copy & (copy - 1))
|
|
|
|
|
|
{
|
|
|
|
|
|
for (u64 bits = copy; bits; bits &= bits - 1)
|
|
|
|
|
|
{
|
|
|
|
|
|
const u32 id = std::countr_zero(bits);
|
|
|
|
|
|
|
2020-11-06 10:48:56 +01:00
|
|
|
|
if (cond_get(lock_ids[id])->try_alert_native())
|
2020-11-05 17:18:48 +01:00
|
|
|
|
{
|
2020-11-06 10:48:56 +01:00
|
|
|
|
s_tls_notify_cb(data, ++progress);
|
2020-11-05 22:06:58 +01:00
|
|
|
|
|
2020-11-06 10:48:56 +01:00
|
|
|
|
// Remove the bit from next stage
|
|
|
|
|
|
copy &= ~(1ull << id);
|
2020-10-23 21:43:27 +02:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Proceed with remaining bits using "normal" blocking waiting
|
|
|
|
|
|
for (u64 bits = copy; bits; bits &= bits - 1)
|
|
|
|
|
|
{
|
2020-11-06 10:48:56 +01:00
|
|
|
|
cond_get(lock_ids[std::countr_zero(bits)])->alert_native();
|
2020-11-05 22:06:58 +01:00
|
|
|
|
|
2020-10-26 02:02:39 +01:00
|
|
|
|
s_tls_notify_cb(data, ++progress);
|
2020-10-23 21:43:27 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2020-11-04 15:19:35 +01:00
|
|
|
|
// Cleanup locked notifiers
|
|
|
|
|
|
for (u64 bits = lock; bits; bits &= bits - 1)
|
|
|
|
|
|
{
|
2020-11-05 22:06:58 +01:00
|
|
|
|
cond_free(lock_ids[std::countr_zero(bits)]);
|
2020-11-04 15:19:35 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
2020-10-26 02:02:39 +01:00
|
|
|
|
s_tls_notify_cb(data, -1);
|
2020-10-23 21:43:27 +02:00
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2020-11-05 22:06:58 +01:00
|
|
|
|
// Unused, let's keep for reference
|
|
|
|
|
|
for (u64 bits = slot->get_sema_bits(); bits; bits &= bits - 1)
|
2019-09-22 00:31:23 +02:00
|
|
|
|
{
|
2020-11-05 22:06:58 +01:00
|
|
|
|
const auto sema = slot->get_sema(std::countr_zero(bits));
|
2019-09-22 00:31:23 +02:00
|
|
|
|
|
2020-11-04 15:19:35 +01:00
|
|
|
|
if (alert_sema(sema, data, progress, size, mask, new_value))
|
2019-09-22 00:31:23 +02:00
|
|
|
|
{
|
2020-10-26 02:02:39 +01:00
|
|
|
|
s_tls_notify_cb(data, ++progress);
|
2020-10-23 18:31:29 +02:00
|
|
|
|
continue;
|
2019-09-22 00:31:23 +02:00
|
|
|
|
}
|
|
|
|
|
|
}
|
2020-10-26 02:02:39 +01:00
|
|
|
|
|
|
|
|
|
|
s_tls_notify_cb(data, -1);
|
2019-09-22 00:31:23 +02:00
|
|
|
|
}
|