2019-07-27 00:34:10 +02:00
|
|
|
|
#include "atomic.hpp"
|
|
|
|
|
|
|
2020-10-25 02:17:54 +02:00
|
|
|
|
#if defined(__linux__)
|
2019-09-22 00:31:23 +02:00
|
|
|
|
#define USE_FUTEX
|
2020-10-25 02:17:54 +02:00
|
|
|
|
#elif !defined(_WIN32)
|
|
|
|
|
|
#define USE_STD
|
2019-09-22 00:31:23 +02:00
|
|
|
|
#endif
|
|
|
|
|
|
|
2019-07-27 00:34:10 +02:00
|
|
|
|
#include "Utilities/sync.h"
|
|
|
|
|
|
|
2019-10-20 21:01:10 +02:00
|
|
|
|
#include <utility>
|
2019-08-02 00:23:26 +02:00
|
|
|
|
#include <mutex>
|
|
|
|
|
|
#include <condition_variable>
|
2020-10-25 02:17:54 +02:00
|
|
|
|
#include <chrono>
|
2019-09-22 00:31:23 +02:00
|
|
|
|
#include <iterator>
|
|
|
|
|
|
#include <memory>
|
2019-10-20 21:01:10 +02:00
|
|
|
|
#include <cstdlib>
|
2019-10-20 01:41:19 +02:00
|
|
|
|
|
|
|
|
|
|
// Hashtable size factor (can be set to 0 to stress-test collisions)
|
2019-10-20 19:08:09 +02:00
|
|
|
|
static constexpr uint s_hashtable_power = 16;
|
2019-08-02 00:23:26 +02:00
|
|
|
|
|
2019-09-12 16:14:26 +02:00
|
|
|
|
// Total number of entries, should be a power of 2.
|
2019-10-20 01:41:19 +02:00
|
|
|
|
static constexpr std::uintptr_t s_hashtable_size = 1u << s_hashtable_power;
|
|
|
|
|
|
|
|
|
|
|
|
// Pointer mask without bits used as hash, assuming signed 48-bit pointers.
|
2019-10-20 21:01:10 +02:00
|
|
|
|
static constexpr u64 s_pointer_mask = s_hashtable_power > 7 ? 0xffff'ffff'ffff & ~((s_hashtable_size - 1)) : 0xffff'ffff'ffff;
|
2019-10-20 01:41:19 +02:00
|
|
|
|
|
|
|
|
|
|
// Max number of waiters is 32767.
|
2019-10-20 21:01:10 +02:00
|
|
|
|
static constexpr u64 s_waiter_mask = s_hashtable_power > 7 ? 0x7fff'0000'0000'0000 : 0x7f00'0000'0000'0000;
|
2019-10-20 01:41:19 +02:00
|
|
|
|
|
|
|
|
|
|
// Bit indicates that more than one.
|
|
|
|
|
|
static constexpr u64 s_collision_bit = 0x8000'0000'0000'0000;
|
2019-07-27 00:34:10 +02:00
|
|
|
|
|
2019-10-20 01:41:19 +02:00
|
|
|
|
// Allocated slot with secondary table.
|
|
|
|
|
|
static constexpr u64 s_slot_mask = ~(s_waiter_mask | s_pointer_mask | s_collision_bit);
|
|
|
|
|
|
|
|
|
|
|
|
// Helper to get least significant set bit from 64-bit masks
|
|
|
|
|
|
template <u64 Mask>
|
|
|
|
|
|
static constexpr u64 one_v = Mask & (0 - Mask);
|
2019-07-29 19:28:20 +02:00
|
|
|
|
|
2020-10-23 17:20:57 +02:00
|
|
|
|
// Callback for wait() function, returns false if wait should return
|
|
|
|
|
|
static thread_local bool(*s_tls_wait_cb)(const void* data) = [](const void*){ return true; };
|
|
|
|
|
|
|
|
|
|
|
|
// Compare data in memory with old value, and return true if they are equal
|
|
|
|
|
|
template <bool CheckCb = true, bool CheckData = true>
|
|
|
|
|
|
static inline bool ptr_cmp(const void* data, std::size_t size, u64 old_value, u64 mask)
|
|
|
|
|
|
{
|
|
|
|
|
|
if constexpr (CheckCb)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (!s_tls_wait_cb(data))
|
|
|
|
|
|
{
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if constexpr (CheckData)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (!data)
|
|
|
|
|
|
{
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
switch (size)
|
|
|
|
|
|
{
|
|
|
|
|
|
case 1: return (reinterpret_cast<const atomic_t<u8>*>(data)->load() & mask) == (old_value & mask);
|
|
|
|
|
|
case 2: return (reinterpret_cast<const atomic_t<u16>*>(data)->load() & mask) == (old_value & mask);
|
|
|
|
|
|
case 4: return (reinterpret_cast<const atomic_t<u32>*>(data)->load() & mask) == (old_value & mask);
|
|
|
|
|
|
case 8: return (reinterpret_cast<const atomic_t<u64>*>(data)->load() & mask) == (old_value & mask);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2020-10-25 02:17:54 +02:00
|
|
|
|
#ifdef USE_STD
|
|
|
|
|
|
namespace
|
|
|
|
|
|
{
|
|
|
|
|
|
// Standard CV/mutex pair
|
|
|
|
|
|
struct cond_handle
|
|
|
|
|
|
{
|
|
|
|
|
|
std::condition_variable cond;
|
|
|
|
|
|
std::mutex mtx;
|
|
|
|
|
|
|
|
|
|
|
|
cond_handle() noexcept
|
|
|
|
|
|
{
|
|
|
|
|
|
mtx.lock();
|
|
|
|
|
|
}
|
|
|
|
|
|
};
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Arbitrary max allowed thread number
|
|
|
|
|
|
static constexpr u32 s_max_conds = 512 * 64;
|
|
|
|
|
|
|
|
|
|
|
|
static std::aligned_storage_t<sizeof(cond_handle), alignof(cond_handle)> s_cond_list[s_max_conds]{};
|
|
|
|
|
|
|
|
|
|
|
|
alignas(64) atomic_t<u64> s_cond_bits[s_max_conds / 64];
|
|
|
|
|
|
|
|
|
|
|
|
alignas(64) atomic_t<u32> s_cond_sema{0};
|
|
|
|
|
|
|
|
|
|
|
|
static u32 cond_alloc()
|
|
|
|
|
|
{
|
|
|
|
|
|
// Determine whether there is a free slot or not
|
|
|
|
|
|
if (!s_cond_sema.try_inc(s_max_conds + 1))
|
|
|
|
|
|
{
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Diversify search start points to reduce contention and increase immediate success chance
|
|
|
|
|
|
#ifdef _WIN32
|
|
|
|
|
|
const u32 start = GetCurrentProcessorNumber();
|
|
|
|
|
|
#elif __linux__
|
|
|
|
|
|
const u32 start = sched_getcpu();
|
|
|
|
|
|
#else
|
|
|
|
|
|
const u32 start = __rdtsc();
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
for (u32 i = start * 8;; i++)
|
|
|
|
|
|
{
|
|
|
|
|
|
const u32 group = i % (s_max_conds / 64);
|
|
|
|
|
|
|
|
|
|
|
|
const auto [bits, ok] = s_cond_bits[group].fetch_op([](u64& bits)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (~bits)
|
|
|
|
|
|
{
|
|
|
|
|
|
// Set lowest clear bit
|
|
|
|
|
|
bits |= bits + 1;
|
|
|
|
|
|
return true;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
if (ok)
|
|
|
|
|
|
{
|
|
|
|
|
|
// Find lowest clear bit
|
|
|
|
|
|
const u32 id = group * 64 + std::countr_one(bits);
|
|
|
|
|
|
|
|
|
|
|
|
// Construct inplace before it can be used
|
|
|
|
|
|
new (s_cond_list + id) cond_handle();
|
|
|
|
|
|
|
|
|
|
|
|
return id + 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// TODO: unreachable
|
|
|
|
|
|
std::abort();
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static cond_handle* cond_get(u32 cond_id)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (cond_id - 1 < s_max_conds) [[likely]]
|
|
|
|
|
|
{
|
|
|
|
|
|
return std::launder(reinterpret_cast<cond_handle*>(s_cond_list + (cond_id - 1)));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static void cond_free(u32 cond_id)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (cond_id - 1 >= s_max_conds)
|
|
|
|
|
|
{
|
|
|
|
|
|
// Ignore bad id because it may contain notifier lock
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Call the destructor
|
|
|
|
|
|
cond_get(cond_id)->~cond_handle();
|
|
|
|
|
|
|
|
|
|
|
|
// Remove the allocation bit
|
|
|
|
|
|
s_cond_bits[(cond_id - 1) / 64] &= ~(1ull << ((cond_id - 1) % 64));
|
|
|
|
|
|
|
|
|
|
|
|
// Release the semaphore
|
|
|
|
|
|
s_cond_sema--;
|
|
|
|
|
|
}
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
2019-10-20 01:41:19 +02:00
|
|
|
|
namespace
|
|
|
|
|
|
{
|
2019-10-20 19:08:09 +02:00
|
|
|
|
struct sync_var
|
2019-10-20 01:41:19 +02:00
|
|
|
|
{
|
2019-10-20 19:08:09 +02:00
|
|
|
|
constexpr sync_var() noexcept = default;
|
2019-09-09 11:28:21 +02:00
|
|
|
|
|
2019-10-20 19:08:09 +02:00
|
|
|
|
// Reference counter, owning pointer, collision bit and optionally selected slot
|
|
|
|
|
|
atomic_t<u64> addr_ref{};
|
|
|
|
|
|
|
2020-10-23 18:31:29 +02:00
|
|
|
|
// Allocated semaphore bits (max 60)
|
|
|
|
|
|
atomic_t<u64> sema_bits{};
|
|
|
|
|
|
|
|
|
|
|
|
// Semaphores (one per thread), data is platform-specific but 0 means empty
|
|
|
|
|
|
atomic_t<u32> sema_data[60]{};
|
|
|
|
|
|
|
|
|
|
|
|
atomic_t<u32>* sema_alloc()
|
|
|
|
|
|
{
|
2020-10-25 02:17:54 +02:00
|
|
|
|
#ifdef USE_STD
|
|
|
|
|
|
const u32 cond_id = cond_alloc();
|
|
|
|
|
|
|
|
|
|
|
|
if (cond_id == 0)
|
|
|
|
|
|
{
|
|
|
|
|
|
// Too many threads
|
|
|
|
|
|
return nullptr;
|
|
|
|
|
|
}
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
2020-10-23 18:31:29 +02:00
|
|
|
|
const auto [bits, ok] = sema_bits.fetch_op([](u64& bits)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (bits + 1 < (1ull << 60))
|
|
|
|
|
|
{
|
|
|
|
|
|
// Set lowest clear bit
|
|
|
|
|
|
bits |= bits + 1;
|
|
|
|
|
|
return true;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
if (ok) [[likely]]
|
|
|
|
|
|
{
|
|
|
|
|
|
// Find lowest clear bit
|
|
|
|
|
|
const auto sema = &sema_data[std::countr_one(bits)];
|
|
|
|
|
|
|
2020-10-25 02:17:54 +02:00
|
|
|
|
#if defined(USE_STD)
|
|
|
|
|
|
sema->release(cond_id);
|
|
|
|
|
|
#elif defined(USE_FUTEX)
|
2020-10-23 18:31:29 +02:00
|
|
|
|
sema->release(1);
|
2020-10-24 03:40:12 +02:00
|
|
|
|
#elif defined(_WIN32)
|
|
|
|
|
|
if (NtWaitForAlertByThreadId)
|
|
|
|
|
|
{
|
|
|
|
|
|
sema->release(GetCurrentThreadId());
|
|
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
|
|
|
|
|
sema->release(1);
|
|
|
|
|
|
}
|
2020-10-23 18:31:29 +02:00
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
return sema;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void sema_free(atomic_t<u32>* sema)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (sema < sema_data || sema >= std::end(sema_data))
|
|
|
|
|
|
{
|
|
|
|
|
|
std::abort();
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Clear sema
|
2020-10-25 02:17:54 +02:00
|
|
|
|
#ifdef USE_STD
|
|
|
|
|
|
cond_free(sema->exchange(0));
|
|
|
|
|
|
#else
|
2020-10-23 18:31:29 +02:00
|
|
|
|
sema->release(0);
|
2020-10-25 02:17:54 +02:00
|
|
|
|
#endif
|
2020-10-23 18:31:29 +02:00
|
|
|
|
// Clear sema bit
|
|
|
|
|
|
sema_bits &= ~(1ull << (sema - sema_data));
|
|
|
|
|
|
}
|
2019-10-20 19:08:09 +02:00
|
|
|
|
};
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Main hashtable for atomic wait.
|
2020-10-23 18:31:29 +02:00
|
|
|
|
alignas(64) static sync_var s_hashtable[s_hashtable_size]{};
|
2019-10-20 19:08:09 +02:00
|
|
|
|
|
|
|
|
|
|
namespace
|
|
|
|
|
|
{
|
|
|
|
|
|
struct slot_info
|
|
|
|
|
|
{
|
|
|
|
|
|
constexpr slot_info() noexcept = default;
|
2019-09-09 03:32:30 +02:00
|
|
|
|
|
2019-10-20 19:08:09 +02:00
|
|
|
|
// Branch extension
|
|
|
|
|
|
sync_var branch[48 - s_hashtable_power]{};
|
2019-10-20 01:41:19 +02:00
|
|
|
|
};
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Number of search groups (defines max slot branch count as gcount * 64)
|
2019-10-20 21:01:10 +02:00
|
|
|
|
static constexpr u32 s_slot_gcount = (s_hashtable_power > 7 ? 4096 : 256) / 64;
|
2019-10-20 01:41:19 +02:00
|
|
|
|
|
|
|
|
|
|
// Array of slot branch objects
|
2020-10-23 18:31:29 +02:00
|
|
|
|
alignas(64) static slot_info s_slot_list[s_slot_gcount * 64]{};
|
2019-10-20 01:41:19 +02:00
|
|
|
|
|
|
|
|
|
|
// Allocation bits
|
2020-10-23 18:31:29 +02:00
|
|
|
|
alignas(64) static atomic_t<u64> s_slot_bits[s_slot_gcount]{};
|
2019-09-22 00:31:23 +02:00
|
|
|
|
|
2019-10-20 01:41:19 +02:00
|
|
|
|
static u64 slot_alloc()
|
|
|
|
|
|
{
|
|
|
|
|
|
// Diversify search start points to reduce contention and increase immediate success chance
|
|
|
|
|
|
#ifdef _WIN32
|
|
|
|
|
|
const u32 start = GetCurrentProcessorNumber();
|
|
|
|
|
|
#elif __linux__
|
|
|
|
|
|
const u32 start = sched_getcpu();
|
|
|
|
|
|
#else
|
|
|
|
|
|
const u32 start = __rdtsc();
|
2019-09-22 00:31:23 +02:00
|
|
|
|
#endif
|
|
|
|
|
|
|
2019-10-20 01:41:19 +02:00
|
|
|
|
for (u32 i = 0;; i++)
|
|
|
|
|
|
{
|
2020-10-23 18:31:29 +02:00
|
|
|
|
const u32 group = (i + start * 8) % s_slot_gcount;
|
2019-07-29 19:28:20 +02:00
|
|
|
|
|
2019-10-20 01:41:19 +02:00
|
|
|
|
const auto [bits, ok] = s_slot_bits[group].fetch_op([](u64& bits)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (~bits)
|
|
|
|
|
|
{
|
|
|
|
|
|
// Set lowest clear bit
|
|
|
|
|
|
bits |= bits + 1;
|
|
|
|
|
|
return true;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
if (ok)
|
|
|
|
|
|
{
|
|
|
|
|
|
// Find lowest clear bit
|
2020-04-13 14:31:41 +02:00
|
|
|
|
return group * 64 + std::countr_one(bits);
|
2019-10-20 01:41:19 +02:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2019-10-20 21:01:10 +02:00
|
|
|
|
// TODO: unreachable
|
2019-10-20 01:41:19 +02:00
|
|
|
|
std::abort();
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2019-10-20 19:08:09 +02:00
|
|
|
|
static sync_var* slot_get(std::uintptr_t iptr, sync_var* loc, u64 lv = 0)
|
2019-09-08 21:48:26 +02:00
|
|
|
|
{
|
2019-10-20 01:41:19 +02:00
|
|
|
|
if (!loc)
|
|
|
|
|
|
{
|
|
|
|
|
|
return nullptr;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2019-10-20 19:08:09 +02:00
|
|
|
|
const u64 value = loc->addr_ref.load();
|
2019-10-20 01:41:19 +02:00
|
|
|
|
|
2019-10-20 21:52:18 +02:00
|
|
|
|
if ((value & s_waiter_mask) == 0)
|
2019-10-20 01:41:19 +02:00
|
|
|
|
{
|
|
|
|
|
|
return nullptr;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if ((value & s_pointer_mask) == (iptr & s_pointer_mask))
|
|
|
|
|
|
{
|
2019-10-20 19:08:09 +02:00
|
|
|
|
return loc;
|
2019-10-20 01:41:19 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if ((value & s_collision_bit) == 0)
|
|
|
|
|
|
{
|
|
|
|
|
|
return nullptr;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Get the number of leading equal bits to determine subslot
|
2020-04-13 20:57:16 +02:00
|
|
|
|
const u64 eq_bits = std::countl_zero<u64>((((iptr ^ value) & (s_pointer_mask >> lv)) | ~s_pointer_mask) << 16);
|
2019-10-20 01:41:19 +02:00
|
|
|
|
|
|
|
|
|
|
// Proceed recursively, increment level
|
|
|
|
|
|
return slot_get(iptr, s_slot_list[(value & s_slot_mask) / one_v<s_slot_mask>].branch + eq_bits, eq_bits + 1);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static void slot_free(u64 id)
|
|
|
|
|
|
{
|
|
|
|
|
|
// Reset allocation bit
|
|
|
|
|
|
id = (id & s_slot_mask) / one_v<s_slot_mask>;
|
|
|
|
|
|
s_slot_bits[id / 64] &= ~(1ull << (id % 64));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2019-10-20 21:52:18 +02:00
|
|
|
|
static void slot_free(std::uintptr_t iptr, sync_var* loc, u64 lv = 0)
|
|
|
|
|
|
{
|
|
|
|
|
|
const u64 value = loc->addr_ref.load();
|
|
|
|
|
|
|
|
|
|
|
|
if ((value & s_pointer_mask) != (iptr & s_pointer_mask))
|
|
|
|
|
|
{
|
|
|
|
|
|
if ((value & s_waiter_mask) == 0 || (value & s_collision_bit) == 0)
|
|
|
|
|
|
{
|
|
|
|
|
|
std::abort();
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Get the number of leading equal bits to determine subslot
|
2020-04-13 20:57:16 +02:00
|
|
|
|
const u64 eq_bits = std::countl_zero<u64>((((iptr ^ value) & (s_pointer_mask >> lv)) | ~s_pointer_mask) << 16);
|
2019-10-20 21:52:18 +02:00
|
|
|
|
|
|
|
|
|
|
// Proceed recursively, to deallocate deepest branch first
|
|
|
|
|
|
slot_free(iptr, s_slot_list[(value & s_slot_mask) / one_v<s_slot_mask>].branch + eq_bits, eq_bits + 1);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Actual cleanup in reverse order
|
|
|
|
|
|
auto [_old, ok] = loc->addr_ref.fetch_op([&](u64& value)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (value & s_waiter_mask)
|
|
|
|
|
|
{
|
|
|
|
|
|
value -= one_v<s_waiter_mask>;
|
|
|
|
|
|
|
|
|
|
|
|
if (!(value & s_waiter_mask))
|
|
|
|
|
|
{
|
|
|
|
|
|
// Reset on last waiter
|
|
|
|
|
|
value = 0;
|
|
|
|
|
|
return 2;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
std::abort();
|
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
if (ok > 1 && _old & s_collision_bit)
|
|
|
|
|
|
{
|
2020-10-23 18:31:29 +02:00
|
|
|
|
if (loc->sema_bits)
|
|
|
|
|
|
std::abort();
|
|
|
|
|
|
|
2019-10-20 21:52:18 +02:00
|
|
|
|
// Deallocate slot on last waiter
|
|
|
|
|
|
slot_free(_old);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2020-10-24 03:40:12 +02:00
|
|
|
|
SAFE_BUFFERS void atomic_storage_futex::wait(const void* data, std::size_t size, u64 old_value, u64 timeout, u64 mask)
|
2019-10-20 01:41:19 +02:00
|
|
|
|
{
|
|
|
|
|
|
const std::uintptr_t iptr = reinterpret_cast<std::uintptr_t>(data);
|
2019-08-02 00:23:26 +02:00
|
|
|
|
|
2019-10-20 01:41:19 +02:00
|
|
|
|
// Allocated slot index
|
|
|
|
|
|
u64 slot_a = -1;
|
2019-08-02 00:23:26 +02:00
|
|
|
|
|
2019-10-20 01:41:19 +02:00
|
|
|
|
// Found slot object
|
2019-10-20 19:08:09 +02:00
|
|
|
|
sync_var* slot = nullptr;
|
2019-08-02 00:23:26 +02:00
|
|
|
|
|
2019-10-20 01:41:19 +02:00
|
|
|
|
auto install_op = [&](u64& value) -> u64
|
2019-08-02 00:23:26 +02:00
|
|
|
|
{
|
2019-10-20 01:41:19 +02:00
|
|
|
|
if ((value & s_waiter_mask) == s_waiter_mask)
|
2019-09-09 01:42:05 +02:00
|
|
|
|
{
|
2019-10-20 01:41:19 +02:00
|
|
|
|
// Return immediately on waiter overflow
|
|
|
|
|
|
return 0;
|
2019-09-09 01:42:05 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2019-10-20 01:41:19 +02:00
|
|
|
|
if (!value || (value & s_pointer_mask) == (iptr & s_pointer_mask))
|
2019-08-02 00:23:26 +02:00
|
|
|
|
{
|
2019-10-20 19:08:09 +02:00
|
|
|
|
// Store pointer bits
|
|
|
|
|
|
value |= (iptr & s_pointer_mask);
|
|
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
|
|
|
|
|
if ((value & s_collision_bit) == 0)
|
2019-09-09 01:42:05 +02:00
|
|
|
|
{
|
2019-10-20 01:41:19 +02:00
|
|
|
|
if (slot_a + 1 == 0)
|
2019-09-09 01:42:05 +02:00
|
|
|
|
{
|
2019-10-20 19:08:09 +02:00
|
|
|
|
// Second waiter: allocate slot and install it
|
2019-10-20 01:41:19 +02:00
|
|
|
|
slot_a = slot_alloc() * one_v<s_slot_mask>;
|
2019-09-09 01:42:05 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2019-10-20 01:41:19 +02:00
|
|
|
|
value |= slot_a;
|
2019-09-09 01:42:05 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2019-10-20 01:41:19 +02:00
|
|
|
|
// Set collision bit
|
|
|
|
|
|
value |= s_collision_bit;
|
2019-08-02 00:23:26 +02:00
|
|
|
|
}
|
2019-07-27 00:34:10 +02:00
|
|
|
|
|
2019-10-20 01:41:19 +02:00
|
|
|
|
// Add waiter
|
|
|
|
|
|
value += one_v<s_waiter_mask>;
|
|
|
|
|
|
return value;
|
|
|
|
|
|
};
|
2019-07-27 00:34:10 +02:00
|
|
|
|
|
2019-10-20 01:41:19 +02:00
|
|
|
|
// Search detail
|
|
|
|
|
|
u64 lv = 0;
|
2019-09-09 11:28:21 +02:00
|
|
|
|
|
2019-10-20 19:08:09 +02:00
|
|
|
|
for (sync_var* ptr = &s_hashtable[iptr % s_hashtable_size];;)
|
2019-07-27 00:34:10 +02:00
|
|
|
|
{
|
2019-10-20 19:08:09 +02:00
|
|
|
|
auto [_old, ok] = ptr->addr_ref.fetch_op(install_op);
|
2019-10-20 01:41:19 +02:00
|
|
|
|
|
|
|
|
|
|
if (slot_a + 1)
|
2019-07-27 00:34:10 +02:00
|
|
|
|
{
|
2019-10-20 19:08:09 +02:00
|
|
|
|
if ((_old & s_collision_bit) == 0 && (ok & s_collision_bit) && (ok & s_slot_mask) == slot_a)
|
2019-10-20 01:41:19 +02:00
|
|
|
|
{
|
|
|
|
|
|
// Slot set successfully
|
|
|
|
|
|
slot_a = -1;
|
|
|
|
|
|
}
|
2019-07-27 00:34:10 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2019-10-20 01:41:19 +02:00
|
|
|
|
if (!ok)
|
2019-07-27 00:34:10 +02:00
|
|
|
|
{
|
2019-10-20 01:41:19 +02:00
|
|
|
|
// Expected only on top level
|
2020-10-23 18:31:29 +02:00
|
|
|
|
if (timeout + 1 || ptr_cmp<false>(data, size, old_value, mask))
|
|
|
|
|
|
{
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// TODO
|
|
|
|
|
|
busy_wait(30000);
|
|
|
|
|
|
continue;
|
2019-07-27 00:34:10 +02:00
|
|
|
|
}
|
2019-10-20 01:41:19 +02:00
|
|
|
|
|
|
|
|
|
|
if (!_old || (_old & s_pointer_mask) == (iptr & s_pointer_mask))
|
2019-07-27 00:34:10 +02:00
|
|
|
|
{
|
2019-10-20 01:41:19 +02:00
|
|
|
|
// Success
|
|
|
|
|
|
if (slot_a + 1)
|
|
|
|
|
|
{
|
|
|
|
|
|
// Cleanup slot if unused
|
|
|
|
|
|
slot_free(slot_a);
|
|
|
|
|
|
slot_a = -1;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2019-10-20 19:08:09 +02:00
|
|
|
|
slot = ptr;
|
2019-10-20 01:41:19 +02:00
|
|
|
|
break;
|
2019-07-27 00:34:10 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2019-10-20 01:41:19 +02:00
|
|
|
|
// Get the number of leading equal bits (between iptr and slot owner)
|
2020-04-13 20:57:16 +02:00
|
|
|
|
const u64 eq_bits = std::countl_zero<u64>((((iptr ^ ok) & (s_pointer_mask >> lv)) | ~s_pointer_mask) << 16);
|
2019-07-27 00:34:10 +02:00
|
|
|
|
|
2019-10-20 01:41:19 +02:00
|
|
|
|
// Collision; need to go deeper
|
2019-10-20 19:08:09 +02:00
|
|
|
|
ptr = s_slot_list[(ok & s_slot_mask) / one_v<s_slot_mask>].branch + eq_bits;
|
2019-10-20 01:41:19 +02:00
|
|
|
|
|
|
|
|
|
|
lv = eq_bits + 1;
|
2019-07-27 00:34:10 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2020-10-24 03:40:12 +02:00
|
|
|
|
#ifdef _WIN32
|
|
|
|
|
|
// May be used by NtWaitForAlertByThreadId
|
|
|
|
|
|
u32 thread_id[16]{GetCurrentThreadId()};
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
2020-10-23 18:31:29 +02:00
|
|
|
|
auto sema = slot->sema_alloc();
|
2019-10-20 01:41:19 +02:00
|
|
|
|
|
2020-10-23 18:31:29 +02:00
|
|
|
|
while (!sema)
|
2019-09-22 00:31:23 +02:00
|
|
|
|
{
|
2020-10-23 18:31:29 +02:00
|
|
|
|
if (timeout + 1 || ptr_cmp<false>(data, size, old_value, mask))
|
2019-09-22 00:31:23 +02:00
|
|
|
|
{
|
2020-10-23 18:31:29 +02:00
|
|
|
|
slot_free(iptr, &s_hashtable[iptr % s_hashtable_size]);
|
|
|
|
|
|
return;
|
2019-09-22 00:31:23 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2020-10-23 18:31:29 +02:00
|
|
|
|
// TODO
|
|
|
|
|
|
busy_wait(30000);
|
|
|
|
|
|
sema = slot->sema_alloc();
|
2019-09-22 00:31:23 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2020-10-25 02:17:54 +02:00
|
|
|
|
#ifdef USE_STD
|
|
|
|
|
|
// Create mutex for condition variable (already locked)
|
|
|
|
|
|
std::unique_lock lock(cond_get(sema->load() & 0x7fffffff)->mtx, std::adopt_lock);
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
2020-10-23 18:31:29 +02:00
|
|
|
|
// Can skip unqueue process if true
|
2020-10-25 02:17:54 +02:00
|
|
|
|
#if defined(USE_FUTEX) || defined(USE_STD)
|
|
|
|
|
|
constexpr bool fallback = true;
|
|
|
|
|
|
#else
|
2019-10-20 01:41:19 +02:00
|
|
|
|
bool fallback = false;
|
2020-10-23 18:31:29 +02:00
|
|
|
|
#endif
|
2019-10-20 01:41:19 +02:00
|
|
|
|
|
2020-10-23 18:31:29 +02:00
|
|
|
|
while (ptr_cmp(data, size, old_value, mask))
|
2019-07-27 00:34:10 +02:00
|
|
|
|
{
|
2019-10-20 17:19:54 +02:00
|
|
|
|
#ifdef USE_FUTEX
|
|
|
|
|
|
struct timespec ts;
|
|
|
|
|
|
ts.tv_sec = timeout / 1'000'000'000;
|
|
|
|
|
|
ts.tv_nsec = timeout % 1'000'000'000;
|
|
|
|
|
|
|
2020-10-23 18:31:29 +02:00
|
|
|
|
if (sema->load() > 1) [[unlikely]]
|
2019-10-20 17:19:54 +02:00
|
|
|
|
{
|
2020-10-23 18:31:29 +02:00
|
|
|
|
// Signaled prematurely
|
|
|
|
|
|
sema->release(1);
|
2019-10-20 17:19:54 +02:00
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
2020-10-23 18:31:29 +02:00
|
|
|
|
futex(sema, FUTEX_WAIT_PRIVATE, 1, timeout + 1 ? &ts : nullptr);
|
2019-10-20 17:19:54 +02:00
|
|
|
|
}
|
2020-10-25 02:17:54 +02:00
|
|
|
|
#elif defined(USE_STD)
|
|
|
|
|
|
const u32 val = sema->load();
|
|
|
|
|
|
|
|
|
|
|
|
if (val >> 31)
|
|
|
|
|
|
{
|
|
|
|
|
|
// Locked by notifier
|
|
|
|
|
|
if (!ptr_cmp(data, size, old_value, mask))
|
|
|
|
|
|
{
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
else if (timeout + 1)
|
|
|
|
|
|
{
|
|
|
|
|
|
cond_get(val)->cond.wait_for(lock, std::chrono::nanoseconds(timeout));
|
|
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
|
|
|
|
|
cond_get(val)->cond.wait(lock);
|
|
|
|
|
|
}
|
2020-10-23 18:31:29 +02:00
|
|
|
|
#elif defined(_WIN32)
|
2020-10-21 08:54:37 +02:00
|
|
|
|
LARGE_INTEGER qw;
|
|
|
|
|
|
qw.QuadPart = -static_cast<s64>(timeout / 100);
|
2020-10-20 21:00:15 +02:00
|
|
|
|
|
2020-10-21 08:54:37 +02:00
|
|
|
|
if (timeout % 100)
|
2019-09-09 01:42:05 +02:00
|
|
|
|
{
|
2020-10-21 08:54:37 +02:00
|
|
|
|
// Round up to closest 100ns unit
|
|
|
|
|
|
qw.QuadPart -= 1;
|
|
|
|
|
|
}
|
2020-10-20 21:00:15 +02:00
|
|
|
|
|
2020-10-24 03:40:12 +02:00
|
|
|
|
if (NtWaitForAlertByThreadId)
|
2019-09-22 00:31:23 +02:00
|
|
|
|
{
|
2020-10-24 03:40:12 +02:00
|
|
|
|
if (fallback) [[unlikely]]
|
|
|
|
|
|
{
|
|
|
|
|
|
// Restart waiting
|
|
|
|
|
|
if (sema->load() == umax)
|
|
|
|
|
|
{
|
|
|
|
|
|
sema->release(thread_id[0]);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fallback = false;
|
|
|
|
|
|
}
|
2019-09-22 00:31:23 +02:00
|
|
|
|
|
2020-10-24 03:40:12 +02:00
|
|
|
|
// Let's assume it can return spuriously
|
|
|
|
|
|
switch (DWORD status = NtWaitForAlertByThreadId(thread_id, timeout + 1 ? &qw : nullptr))
|
|
|
|
|
|
{
|
|
|
|
|
|
case NTSTATUS_ALERTED: fallback = true; break;
|
|
|
|
|
|
case NTSTATUS_TIMEOUT: break;
|
|
|
|
|
|
default:
|
|
|
|
|
|
{
|
|
|
|
|
|
SetLastError(status);
|
|
|
|
|
|
fmt::raw_verify_error("Unexpected NtWaitForAlertByThreadId result.", nullptr, 0);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
else
|
2019-09-22 00:31:23 +02:00
|
|
|
|
{
|
2020-10-24 03:40:12 +02:00
|
|
|
|
if (fallback)
|
|
|
|
|
|
{
|
|
|
|
|
|
// Restart waiting
|
|
|
|
|
|
verify(HERE), sema->load() == 2;
|
|
|
|
|
|
sema->release(1);
|
|
|
|
|
|
fallback = false;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (!NtWaitForKeyedEvent(nullptr, sema, false, timeout + 1 ? &qw : nullptr))
|
|
|
|
|
|
{
|
|
|
|
|
|
// Error code assumed to be timeout
|
|
|
|
|
|
fallback = true;
|
|
|
|
|
|
}
|
2019-09-22 00:31:23 +02:00
|
|
|
|
}
|
2019-07-27 00:34:10 +02:00
|
|
|
|
#endif
|
|
|
|
|
|
|
2020-10-23 18:31:29 +02:00
|
|
|
|
if (timeout + 1)
|
2019-07-27 00:34:10 +02:00
|
|
|
|
{
|
2020-10-23 18:31:29 +02:00
|
|
|
|
// TODO: reduce timeout instead
|
2019-07-27 00:34:10 +02:00
|
|
|
|
break;
|
|
|
|
|
|
}
|
2020-10-23 18:31:29 +02:00
|
|
|
|
}
|
2019-07-27 00:34:10 +02:00
|
|
|
|
|
2020-10-23 18:31:29 +02:00
|
|
|
|
while (!fallback)
|
|
|
|
|
|
{
|
|
|
|
|
|
#if defined(_WIN32)
|
2020-10-21 08:54:37 +02:00
|
|
|
|
static LARGE_INTEGER instant{};
|
2020-10-20 21:00:15 +02:00
|
|
|
|
|
2020-10-24 03:40:12 +02:00
|
|
|
|
if (NtWaitForAlertByThreadId)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (sema->compare_and_swap_test(thread_id[0], -1))
|
|
|
|
|
|
{
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (NtWaitForAlertByThreadId(thread_id, &instant) == NTSTATUS_ALERTED)
|
|
|
|
|
|
{
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2020-10-23 18:31:29 +02:00
|
|
|
|
if (sema->compare_and_swap_test(1, 2))
|
2019-09-22 00:31:23 +02:00
|
|
|
|
{
|
2020-10-23 18:31:29 +02:00
|
|
|
|
// Succeeded in self-notifying
|
|
|
|
|
|
break;
|
2019-07-27 00:34:10 +02:00
|
|
|
|
}
|
2019-09-22 00:31:23 +02:00
|
|
|
|
|
2020-10-23 18:31:29 +02:00
|
|
|
|
if (!NtWaitForKeyedEvent(nullptr, sema, false, &instant))
|
2019-09-22 00:31:23 +02:00
|
|
|
|
{
|
2020-10-23 18:31:29 +02:00
|
|
|
|
// Succeeded in obtaining an event without waiting
|
|
|
|
|
|
break;
|
2019-09-22 00:31:23 +02:00
|
|
|
|
}
|
|
|
|
|
|
#endif
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2020-10-24 03:40:12 +02:00
|
|
|
|
#ifdef _WIN32
|
|
|
|
|
|
verify(HERE), thread_id[0] == GetCurrentThreadId();
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
2020-10-25 02:17:54 +02:00
|
|
|
|
#ifdef USE_STD
|
|
|
|
|
|
lock.unlock();
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
2020-10-23 18:31:29 +02:00
|
|
|
|
slot->sema_free(sema);
|
2019-07-27 00:34:10 +02:00
|
|
|
|
|
2019-10-20 21:52:18 +02:00
|
|
|
|
slot_free(iptr, &s_hashtable[iptr % s_hashtable_size]);
|
2019-08-02 00:23:26 +02:00
|
|
|
|
|
2019-10-20 01:41:19 +02:00
|
|
|
|
s_tls_wait_cb(nullptr);
|
|
|
|
|
|
}
|
2019-09-08 21:48:26 +02:00
|
|
|
|
|
2020-10-23 18:31:29 +02:00
|
|
|
|
// Platform specific wake-up function
|
|
|
|
|
|
static inline bool alert_sema(atomic_t<u32>* sema)
|
|
|
|
|
|
{
|
|
|
|
|
|
#ifdef USE_FUTEX
|
|
|
|
|
|
if (sema->load() == 1 && sema->compare_and_swap_test(1, 2))
|
|
|
|
|
|
{
|
|
|
|
|
|
// Use "wake all" arg for robustness, only 1 thread is expected
|
|
|
|
|
|
futex(sema, FUTEX_WAKE_PRIVATE, 0x7fff'ffff);
|
|
|
|
|
|
return true;
|
|
|
|
|
|
}
|
2020-10-25 02:17:54 +02:00
|
|
|
|
#elif defined(USE_STD)
|
|
|
|
|
|
// Check if not zero and not locked
|
|
|
|
|
|
u32 old_val = sema->load();
|
|
|
|
|
|
|
|
|
|
|
|
if (((old_val - 1) >> 31) == 0)
|
|
|
|
|
|
{
|
|
|
|
|
|
const auto [cond_id, ok] = sema->fetch_op([](u32& id)
|
|
|
|
|
|
{
|
|
|
|
|
|
if ((id - 1) >> 31)
|
|
|
|
|
|
{
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Set notify lock
|
|
|
|
|
|
id |= 1u << 31;
|
|
|
|
|
|
return true;
|
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
if (ok)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (auto cond = cond_get(cond_id))
|
|
|
|
|
|
{
|
|
|
|
|
|
// Not super efficient: locking is required to avoid lost notifications
|
|
|
|
|
|
cond->mtx.lock();
|
|
|
|
|
|
cond->mtx.unlock();
|
|
|
|
|
|
cond->cond.notify_all();
|
|
|
|
|
|
|
|
|
|
|
|
// Try to remove notifier lock gracefully
|
|
|
|
|
|
if (!sema->compare_and_swap_test(cond_id | (1u << 31), cond_id)) [[unlikely]]
|
|
|
|
|
|
{
|
|
|
|
|
|
// Cleanup helping
|
|
|
|
|
|
cond_free(cond_id);
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2020-10-23 18:31:29 +02:00
|
|
|
|
#elif defined(_WIN32)
|
2020-10-24 03:40:12 +02:00
|
|
|
|
if (NtWaitForAlertByThreadId)
|
|
|
|
|
|
{
|
|
|
|
|
|
u32 tid = sema->load();
|
|
|
|
|
|
|
|
|
|
|
|
// Check if tid is neither 0 nor -1
|
|
|
|
|
|
if (tid + 1 > 1 && sema->compare_and_swap_test(tid, -1))
|
|
|
|
|
|
{
|
|
|
|
|
|
if (NtAlertThreadByThreadId(tid) == NTSTATUS_SUCCESS)
|
|
|
|
|
|
{
|
|
|
|
|
|
return true;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2020-10-23 18:31:29 +02:00
|
|
|
|
if (sema->load() == 1 && sema->compare_and_swap_test(1, 2))
|
|
|
|
|
|
{
|
|
|
|
|
|
// Can wait in rare cases, which is its annoying weakness
|
|
|
|
|
|
NtReleaseKeyedEvent(nullptr, sema, 1, nullptr);
|
|
|
|
|
|
return true;
|
|
|
|
|
|
}
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2019-09-08 21:48:26 +02:00
|
|
|
|
void atomic_storage_futex::set_wait_callback(bool(*cb)(const void* data))
|
|
|
|
|
|
{
|
|
|
|
|
|
if (cb)
|
|
|
|
|
|
{
|
|
|
|
|
|
s_tls_wait_cb = cb;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void atomic_storage_futex::raw_notify(const void* data)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (data)
|
|
|
|
|
|
{
|
|
|
|
|
|
notify_all(data);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2019-09-22 00:31:23 +02:00
|
|
|
|
|
|
|
|
|
|
void atomic_storage_futex::notify_one(const void* data)
|
|
|
|
|
|
{
|
|
|
|
|
|
const std::uintptr_t iptr = reinterpret_cast<std::uintptr_t>(data);
|
|
|
|
|
|
|
2019-10-20 01:41:19 +02:00
|
|
|
|
const auto slot = slot_get(iptr, &s_hashtable[(iptr) % s_hashtable_size]);
|
2019-09-22 00:31:23 +02:00
|
|
|
|
|
2019-10-20 01:41:19 +02:00
|
|
|
|
if (!slot)
|
2019-09-22 00:31:23 +02:00
|
|
|
|
{
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
2019-10-20 01:41:19 +02:00
|
|
|
|
|
2020-10-23 18:31:29 +02:00
|
|
|
|
for (u64 bits = slot->sema_bits; bits; bits &= bits - 1)
|
2019-09-22 00:31:23 +02:00
|
|
|
|
{
|
2020-10-23 18:31:29 +02:00
|
|
|
|
const auto sema = &slot->sema_data[std::countr_zero(bits)];
|
2019-09-22 00:31:23 +02:00
|
|
|
|
|
2020-10-23 18:31:29 +02:00
|
|
|
|
if (alert_sema(sema))
|
2019-09-22 00:31:23 +02:00
|
|
|
|
{
|
2020-10-23 18:31:29 +02:00
|
|
|
|
break;
|
2019-09-22 00:31:23 +02:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void atomic_storage_futex::notify_all(const void* data)
|
|
|
|
|
|
{
|
|
|
|
|
|
const std::uintptr_t iptr = reinterpret_cast<std::uintptr_t>(data);
|
|
|
|
|
|
|
2019-10-20 01:41:19 +02:00
|
|
|
|
const auto slot = slot_get(iptr, &s_hashtable[(iptr) % s_hashtable_size]);
|
2019-09-22 00:31:23 +02:00
|
|
|
|
|
2019-10-20 01:41:19 +02:00
|
|
|
|
if (!slot)
|
2019-09-22 00:31:23 +02:00
|
|
|
|
{
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
2019-10-20 01:41:19 +02:00
|
|
|
|
|
2020-10-23 21:43:27 +02:00
|
|
|
|
#if defined(_WIN32) && !defined(USE_FUTEX)
|
2020-10-24 03:40:12 +02:00
|
|
|
|
if (!NtAlertThreadByThreadId)
|
2020-10-23 21:43:27 +02:00
|
|
|
|
{
|
|
|
|
|
|
// Make a copy to filter out waiters that fail some checks
|
|
|
|
|
|
u64 copy = slot->sema_bits.load();
|
|
|
|
|
|
|
|
|
|
|
|
// Used for making non-blocking syscall
|
2020-10-24 03:40:12 +02:00
|
|
|
|
static LARGE_INTEGER instant{};
|
2020-10-23 21:43:27 +02:00
|
|
|
|
|
|
|
|
|
|
for (u64 bits = copy; bits; bits &= bits - 1)
|
|
|
|
|
|
{
|
|
|
|
|
|
const u32 id = std::countr_zero(bits);
|
|
|
|
|
|
|
|
|
|
|
|
const auto sema = &slot->sema_data[id];
|
|
|
|
|
|
|
|
|
|
|
|
if (sema->load() == 1 && sema->compare_and_swap_test(1, 2))
|
|
|
|
|
|
{
|
|
|
|
|
|
// Waiters locked for notification
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Remove the bit from next stage
|
|
|
|
|
|
copy &= ~(1ull << id);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// If only one waiter exists, there is no point in trying to optimize
|
|
|
|
|
|
if (copy & (copy - 1))
|
|
|
|
|
|
{
|
|
|
|
|
|
for (u64 bits = copy; bits; bits &= bits - 1)
|
|
|
|
|
|
{
|
|
|
|
|
|
const u32 id = std::countr_zero(bits);
|
|
|
|
|
|
|
|
|
|
|
|
const auto sema = &slot->sema_data[id];
|
|
|
|
|
|
|
|
|
|
|
|
if (NtReleaseKeyedEvent(nullptr, sema, 1, &instant))
|
|
|
|
|
|
{
|
|
|
|
|
|
// Failed to notify immediately
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Remove the bit from next stage
|
|
|
|
|
|
copy &= ~(1ull << id);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Proceed with remaining bits using "normal" blocking waiting
|
|
|
|
|
|
for (u64 bits = copy; bits; bits &= bits - 1)
|
|
|
|
|
|
{
|
|
|
|
|
|
NtReleaseKeyedEvent(nullptr, &slot->sema_data[std::countr_zero(bits)], 1, nullptr);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
for (u64 bits = slot->sema_bits.load(); bits; bits &= bits - 1)
|
2019-09-22 00:31:23 +02:00
|
|
|
|
{
|
2020-10-23 18:31:29 +02:00
|
|
|
|
const auto sema = &slot->sema_data[std::countr_zero(bits)];
|
2019-09-22 00:31:23 +02:00
|
|
|
|
|
2020-10-23 18:31:29 +02:00
|
|
|
|
if (alert_sema(sema))
|
2019-09-22 00:31:23 +02:00
|
|
|
|
{
|
2020-10-23 18:31:29 +02:00
|
|
|
|
continue;
|
2019-09-22 00:31:23 +02:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|