mirror of
https://github.com/RPCSX/rpcsx.git
synced 2026-04-04 22:19:02 +00:00
moved tsc and asm utilities to rx
This commit is contained in:
parent
bd215fab92
commit
640df36c48
121 changed files with 706 additions and 1225 deletions
|
|
@ -9,7 +9,8 @@
|
|||
#include <map>
|
||||
#include <iostream>
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/align.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include "util/coro.hpp"
|
||||
|
||||
using namespace std::literals::string_literals;
|
||||
|
|
@ -2386,7 +2387,7 @@ u64 fs::get_dir_size(const std::string& path, u64 rounding_alignment, atomic_t<b
|
|||
|
||||
if (!entry.is_directory)
|
||||
{
|
||||
result += utils::align(entry.size, rounding_alignment);
|
||||
result += rx::alignUp(entry.size, rounding_alignment);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
|||
|
|
@ -5,9 +5,10 @@
|
|||
#include "File.h"
|
||||
#include "util/logs.hpp"
|
||||
#include "util/vm.hpp"
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include "util/v128.hpp"
|
||||
#include "util/simd.hpp"
|
||||
#include "rx/align.hpp"
|
||||
|
||||
#ifdef __linux__
|
||||
#include <unistd.h>
|
||||
|
|
@ -158,8 +159,8 @@ static u8* add_jit_memory(usz size, usz align)
|
|||
// Simple allocation by incrementing pointer to the next free data
|
||||
const u64 pos = Ctr.atomic_op([&](u64& ctr) -> u64
|
||||
{
|
||||
const u64 _pos = utils::align(ctr & 0xffff'ffff, align);
|
||||
const u64 _new = utils::align(_pos + size, align);
|
||||
const u64 _pos = rx::alignUp(ctr & 0xffff'ffff, align);
|
||||
const u64 _new = rx::alignUp(_pos + size, align);
|
||||
|
||||
if (_new > 0x40000000) [[unlikely]]
|
||||
{
|
||||
|
|
@ -175,7 +176,7 @@ static u8* add_jit_memory(usz size, usz align)
|
|||
// Check the necessity to commit more memory
|
||||
if (_new > olda) [[unlikely]]
|
||||
{
|
||||
newa = utils::align(_new, 0x200000);
|
||||
newa = rx::alignUp(_new, 0x200000);
|
||||
}
|
||||
|
||||
ctr += _new - (ctr & 0xffff'ffff);
|
||||
|
|
@ -237,9 +238,9 @@ void* jit_runtime_base::_add(asmjit::CodeHolder* code, usz align) noexcept
|
|||
|
||||
for (asmjit::Section* section : code->_sections)
|
||||
{
|
||||
if (section->offset() + section->bufferSize() > utils::align<usz>(codeSize, align))
|
||||
if (section->offset() + section->bufferSize() > rx::alignUp<usz>(codeSize, align))
|
||||
{
|
||||
fmt::throw_exception("CodeHolder section exceeds range: Section->offset: 0x%x, Section->bufferSize: 0x%x, alloted-memory=0x%x", section->offset(), section->bufferSize(), utils::align<usz>(codeSize, align));
|
||||
fmt::throw_exception("CodeHolder section exceeds range: Section->offset: 0x%x, Section->bufferSize: 0x%x, alloted-memory=0x%x", section->offset(), section->bufferSize(), rx::alignUp<usz>(codeSize, align));
|
||||
}
|
||||
|
||||
std::memcpy(p + section->offset(), section->data(), section->bufferSize());
|
||||
|
|
@ -365,7 +366,7 @@ jit_runtime_base& asmjit::get_global_runtime()
|
|||
{
|
||||
return m_pos.atomic_op([&](uchar*& pos) -> uchar*
|
||||
{
|
||||
const auto r = reinterpret_cast<uchar*>(utils::align(uptr(pos), align));
|
||||
const auto r = reinterpret_cast<uchar*>(rx::alignUp(uptr(pos), align));
|
||||
|
||||
if (r >= pos && r + size > pos && r + size <= m_max)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -7,7 +7,8 @@
|
|||
#include "util/logs.hpp"
|
||||
#include "mutex.h"
|
||||
#include "util/vm.hpp"
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include "rx/align.hpp"
|
||||
#include "Crypto/unzip.h"
|
||||
|
||||
#include <charconv>
|
||||
|
|
@ -216,7 +217,7 @@ struct MemoryManager1 : llvm::RTDyldMemoryManager
|
|||
~MemoryManager1() override
|
||||
{
|
||||
// Hack: don't release to prevent reuse of address space, see jit_announce
|
||||
// constexpr auto how_much = [](u64 pos) { return utils::align(pos, pos < c_page_size ? c_page_size / 4 : c_page_size); };
|
||||
// constexpr auto how_much = [](u64 pos) { return rx::alignUp(pos, pos < c_page_size ? c_page_size / 4 : c_page_size); };
|
||||
// utils::memory_decommit(m_code_mems, how_much(code_ptr));
|
||||
// utils::memory_decommit(m_data_ro_mems, how_much(data_ro_ptr));
|
||||
// utils::memory_decommit(m_data_rw_mems, how_much(data_rw_ptr));
|
||||
|
|
@ -249,7 +250,7 @@ struct MemoryManager1 : llvm::RTDyldMemoryManager
|
|||
{
|
||||
align = align ? align : 16;
|
||||
|
||||
const u64 sizea = utils::align(size, align);
|
||||
const u64 sizea = rx::alignUp(size, align);
|
||||
|
||||
if (!size || align > c_page_size || sizea > c_max_size || sizea < size)
|
||||
{
|
||||
|
|
@ -259,7 +260,7 @@ struct MemoryManager1 : llvm::RTDyldMemoryManager
|
|||
|
||||
u64 oldp = alloc_pos;
|
||||
|
||||
u64 olda = utils::align(oldp, align);
|
||||
u64 olda = rx::alignUp(oldp, align);
|
||||
|
||||
ensure(olda >= oldp);
|
||||
ensure(olda < ~sizea);
|
||||
|
|
@ -285,8 +286,8 @@ struct MemoryManager1 : llvm::RTDyldMemoryManager
|
|||
// Optimization: split the first allocation to 512 KiB for single-module compilers
|
||||
if (oldp < c_page_size && align < page_quarter && (std::min(newp, c_page_size) - 1) / page_quarter != (oldp - 1) / page_quarter)
|
||||
{
|
||||
const u64 pagea = utils::align(oldp, page_quarter);
|
||||
const u64 psize = utils::align(std::min(newp, c_page_size) - pagea, page_quarter);
|
||||
const u64 pagea = rx::alignUp(oldp, page_quarter);
|
||||
const u64 psize = rx::alignUp(std::min(newp, c_page_size) - pagea, page_quarter);
|
||||
utils::memory_commit(reinterpret_cast<u8*>(block) + (pagea % c_max_size), psize, prot);
|
||||
|
||||
// Advance
|
||||
|
|
@ -296,8 +297,8 @@ struct MemoryManager1 : llvm::RTDyldMemoryManager
|
|||
if ((newp - 1) / c_page_size != (oldp - 1) / c_page_size)
|
||||
{
|
||||
// Allocate pages on demand
|
||||
const u64 pagea = utils::align(oldp, c_page_size);
|
||||
const u64 psize = utils::align(newp - pagea, c_page_size);
|
||||
const u64 pagea = rx::alignUp(oldp, c_page_size);
|
||||
const u64 psize = rx::alignUp(newp - pagea, c_page_size);
|
||||
utils::memory_commit(reinterpret_cast<u8*>(block) + (pagea % c_max_size), psize, prot);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,6 @@
|
|||
#include "stdafx.h"
|
||||
|
||||
#include "rx/debug.hpp"
|
||||
#include "Emu/Cell/timers.hpp"
|
||||
#include "Emu/System.h"
|
||||
#include "Emu/Cell/SPUThread.h"
|
||||
|
|
@ -88,7 +90,7 @@ DYNAMIC_IMPORT_RENAME("Kernel32.dll", SetThreadDescriptionImport, "SetThreadDesc
|
|||
|
||||
#include "util/vm.hpp"
|
||||
#include "util/logs.hpp"
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include "util/v128.hpp"
|
||||
#include "util/simd.hpp"
|
||||
#include "util/sysinfo.hpp"
|
||||
|
|
@ -141,74 +143,11 @@ std::string dump_useful_thread_info()
|
|||
return result;
|
||||
}
|
||||
|
||||
#ifndef _WIN32
|
||||
bool IsDebuggerPresent()
|
||||
{
|
||||
#if defined(__APPLE__) || defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)
|
||||
int mib[] = {
|
||||
CTL_KERN,
|
||||
KERN_PROC,
|
||||
KERN_PROC_PID,
|
||||
getpid(),
|
||||
#if defined(__NetBSD__) || defined(__OpenBSD__)
|
||||
sizeof(struct kinfo_proc),
|
||||
1,
|
||||
#endif
|
||||
};
|
||||
u_int miblen = std::size(mib);
|
||||
struct kinfo_proc info;
|
||||
usz size = sizeof(info);
|
||||
|
||||
if (sysctl(mib, miblen, &info, &size, NULL, 0))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return info.KP_FLAGS & P_TRACED;
|
||||
#else
|
||||
char buf[4096];
|
||||
fs::file status_fd("/proc/self/status");
|
||||
if (!status_fd)
|
||||
{
|
||||
std::fprintf(stderr, "Failed to open /proc/self/status\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto num_read = status_fd.read(buf, sizeof(buf) - 1);
|
||||
if (num_read == 0 || num_read == umax)
|
||||
{
|
||||
std::fprintf(stderr, "Failed to read /proc/self/status (%d)\n", errno);
|
||||
return false;
|
||||
}
|
||||
|
||||
buf[num_read] = '\0';
|
||||
std::string_view status = buf;
|
||||
|
||||
const auto found = status.find("TracerPid:");
|
||||
if (found == umax)
|
||||
{
|
||||
std::fprintf(stderr, "Failed to find 'TracerPid:' in /proc/self/status\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
for (const char* cp = status.data() + found + 10; cp <= status.data() + num_read; ++cp)
|
||||
{
|
||||
if (!std::isspace(*cp))
|
||||
{
|
||||
return std::isdigit(*cp) != 0 && *cp != '0';
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
bool is_debugger_present()
|
||||
{
|
||||
if (g_cfg.core.external_debugger)
|
||||
return true;
|
||||
return IsDebuggerPresent();
|
||||
return rx::isDebuggerPresent();
|
||||
}
|
||||
|
||||
#if defined(ARCH_X64)
|
||||
|
|
@ -2071,7 +2010,7 @@ static void signal_handler(int /*sig*/, siginfo_t* info, void* uct) noexcept
|
|||
sys_log.notice("\n%s", dump_useful_thread_info());
|
||||
logs::listener::sync_all();
|
||||
|
||||
if (IsDebuggerPresent())
|
||||
if (rx::isDebuggerPresent())
|
||||
{
|
||||
// Convert to SIGTRAP
|
||||
raise(SIGTRAP);
|
||||
|
|
@ -2091,7 +2030,7 @@ static void sigill_handler(int /*sig*/, siginfo_t* info, void* /*uct*/) noexcept
|
|||
sys_log.notice("\n%s", dump_useful_thread_info());
|
||||
logs::listener::sync_all();
|
||||
|
||||
if (IsDebuggerPresent())
|
||||
if (rx::isDebuggerPresent())
|
||||
{
|
||||
// Convert to SIGTRAP
|
||||
raise(SIGTRAP);
|
||||
|
|
@ -2140,7 +2079,7 @@ const bool s_exception_handler_set = []() -> bool
|
|||
std::abort();
|
||||
}
|
||||
|
||||
std::printf("Debugger: %d\n", +IsDebuggerPresent());
|
||||
std::printf("Debugger: %d\n", +rx::isDebuggerPresent());
|
||||
return true;
|
||||
}();
|
||||
|
||||
|
|
@ -2150,10 +2089,10 @@ const bool s_terminate_handler_set = []() -> bool
|
|||
{
|
||||
std::set_terminate([]()
|
||||
{
|
||||
if (IsDebuggerPresent())
|
||||
if (rx::isDebuggerPresent())
|
||||
{
|
||||
logs::listener::sync_all();
|
||||
utils::trap();
|
||||
rx::breakpoint();
|
||||
}
|
||||
|
||||
report_fatal_error("RPCS3 has abnormally terminated.");
|
||||
|
|
@ -2214,7 +2153,7 @@ void thread_base::initialize(void (*error_cb)())
|
|||
{
|
||||
if (attempts == umax)
|
||||
{
|
||||
g_tls_wait_time += utils::get_tsc() - stamp0;
|
||||
g_tls_wait_time += rx::get_tsc() - stamp0;
|
||||
}
|
||||
else if (attempts > 1)
|
||||
{
|
||||
|
|
@ -2246,7 +2185,7 @@ void thread_base::set_name(std::string name)
|
|||
};
|
||||
|
||||
// Set thread name for VS debugger
|
||||
if (IsDebuggerPresent())
|
||||
if (rx::isDebuggerPresent())
|
||||
[&]() NEVER_INLINE
|
||||
{
|
||||
THREADNAME_INFO info;
|
||||
|
|
@ -2527,7 +2466,7 @@ void thread_ctrl::wait_for(u64 usec, [[maybe_unused]] bool alert /* true */)
|
|||
|
||||
void thread_ctrl::wait_until(u64* wait_time, u64 add_time, u64 min_wait, bool update_to_current_time)
|
||||
{
|
||||
*wait_time = utils::add_saturate<u64>(*wait_time, add_time);
|
||||
*wait_time = rx::add_saturate<u64>(*wait_time, add_time);
|
||||
|
||||
// TODO: Implement proper support for "waiting until" inside atomic wait engine
|
||||
const u64 current_time = get_system_time();
|
||||
|
|
@ -2546,7 +2485,7 @@ void thread_ctrl::wait_until(u64* wait_time, u64 add_time, u64 min_wait, bool up
|
|||
|
||||
if (min_wait)
|
||||
{
|
||||
*wait_time = std::max<u64>(*wait_time, utils::add_saturate<u64>(current_time, min_wait));
|
||||
*wait_time = std::max<u64>(*wait_time, rx::add_saturate<u64>(current_time, min_wait));
|
||||
}
|
||||
|
||||
wait_for(*wait_time - current_time);
|
||||
|
|
@ -2588,7 +2527,7 @@ void thread_ctrl::wait_for_accurate(u64 usec)
|
|||
}
|
||||
else
|
||||
{
|
||||
busy_wait(100);
|
||||
rx::busy_wait(100);
|
||||
}
|
||||
|
||||
const auto current = std::chrono::steady_clock::now();
|
||||
|
|
@ -2663,7 +2602,7 @@ bool thread_base::join(bool dtor) const
|
|||
// Hacked for too sleepy threads (1ms) TODO: make sure it's unneeded and remove
|
||||
const auto timeout = dtor && Emu.IsStopped() ? atomic_wait_timeout{1'000'000} : atomic_wait_timeout::inf;
|
||||
|
||||
auto stamp0 = utils::get_tsc();
|
||||
auto stamp0 = rx::get_tsc();
|
||||
|
||||
for (u64 i = 0; (m_sync & 3) <= 1; i++)
|
||||
{
|
||||
|
|
@ -2676,7 +2615,7 @@ bool thread_base::join(bool dtor) const
|
|||
|
||||
if (i >= 16 && !(i & (i - 1)) && timeout != atomic_wait_timeout::inf)
|
||||
{
|
||||
sig_log.error("Thread [%s] is too sleepy. Waiting for it %.3fus already!", *m_tname.load(), (utils::get_tsc() - stamp0) / (utils::get_tsc_freq() / 1000000.));
|
||||
sig_log.error("Thread [%s] is too sleepy. Waiting for it %.3fus already!", *m_tname.load(), (rx::get_tsc() - stamp0) / (utils::get_tsc_freq() / 1000000.));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -2764,7 +2703,7 @@ void thread_base::exec()
|
|||
|
||||
for (thread_future* prev{};;)
|
||||
{
|
||||
utils::prefetch_exec(prev_head->exec.load());
|
||||
rx::prefetch_exec(prev_head->exec.load());
|
||||
|
||||
if (auto next = prev_head->next.get())
|
||||
{
|
||||
|
|
@ -2836,7 +2775,7 @@ void thread_base::exec()
|
|||
|
||||
logs::listener::sync_all();
|
||||
|
||||
if (IsDebuggerPresent())
|
||||
if (rx::isDebuggerPresent())
|
||||
{
|
||||
// Prevent repeatedly halting the debugger in case multiple threads crashed at once
|
||||
static atomic_t<u64> s_last_break = 0;
|
||||
|
|
@ -2861,7 +2800,7 @@ void thread_base::exec()
|
|||
})
|
||||
.second)
|
||||
{
|
||||
utils::trap();
|
||||
rx::breakpoint();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,476 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "util/types.hpp"
|
||||
#include "util/tsc.hpp"
|
||||
#include "util/atomic.hpp"
|
||||
#include <functional>
|
||||
|
||||
extern bool g_use_rtm;
|
||||
extern u64 g_rtm_tx_limit1;
|
||||
|
||||
#ifdef _M_X64
|
||||
#ifdef _MSC_VER
|
||||
extern "C"
|
||||
{
|
||||
u32 _xbegin();
|
||||
void _xend();
|
||||
void _mm_pause();
|
||||
void _mm_prefetch(const char*, int);
|
||||
void _m_prefetchw(const volatile void*);
|
||||
|
||||
uchar _rotl8(uchar, uchar);
|
||||
ushort _rotl16(ushort, uchar);
|
||||
u64 __popcnt64(u64);
|
||||
|
||||
s64 __mulh(s64, s64);
|
||||
u64 __umulh(u64, u64);
|
||||
|
||||
s64 _div128(s64, s64, s64, s64*);
|
||||
u64 _udiv128(u64, u64, u64, u64*);
|
||||
void __debugbreak();
|
||||
}
|
||||
#include <intrin.h>
|
||||
#else
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
namespace utils
|
||||
{
|
||||
// Transaction helper (result = pair of success and op result, or just bool)
|
||||
template <typename F, typename R = std::invoke_result_t<F>>
|
||||
inline auto tx_start(F op)
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
uint status = -1;
|
||||
|
||||
for (auto stamp0 = get_tsc(), stamp1 = stamp0; g_use_rtm && stamp1 - stamp0 <= g_rtm_tx_limit1; stamp1 = get_tsc())
|
||||
{
|
||||
#ifndef _MSC_VER
|
||||
__asm__ goto("xbegin %l[retry];" ::: "memory" : retry);
|
||||
#else
|
||||
status = _xbegin();
|
||||
|
||||
if (status != _XBEGIN_STARTED) [[unlikely]]
|
||||
{
|
||||
goto retry;
|
||||
}
|
||||
#endif
|
||||
|
||||
if constexpr (std::is_void_v<R>)
|
||||
{
|
||||
std::invoke(op);
|
||||
#ifndef _MSC_VER
|
||||
__asm__ volatile("xend;" ::: "memory");
|
||||
#else
|
||||
_xend();
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto result = std::invoke(op);
|
||||
#ifndef _MSC_VER
|
||||
__asm__ volatile("xend;" ::: "memory");
|
||||
#else
|
||||
_xend();
|
||||
#endif
|
||||
return std::make_pair(true, std::move(result));
|
||||
}
|
||||
|
||||
retry:
|
||||
#ifndef _MSC_VER
|
||||
__asm__ volatile("movl %%eax, %0;" : "=r"(status)::"memory");
|
||||
#endif
|
||||
if (!status) [[unlikely]]
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
#else
|
||||
static_cast<void>(op);
|
||||
#endif
|
||||
|
||||
if constexpr (std::is_void_v<R>)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
return std::make_pair(false, R());
|
||||
}
|
||||
};
|
||||
|
||||
// Try to prefetch to Level 2 cache since it's not split to data/code on most processors
|
||||
template <typename T>
|
||||
constexpr void prefetch_exec(T func)
|
||||
{
|
||||
if (std::is_constant_evaluated())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
const u64 value = reinterpret_cast<u64>(func);
|
||||
const void* ptr = reinterpret_cast<const void*>(value);
|
||||
|
||||
#ifdef _M_X64
|
||||
return _mm_prefetch(static_cast<const char*>(ptr), _MM_HINT_T1);
|
||||
#else
|
||||
return __builtin_prefetch(ptr, 0, 2);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Try to prefetch to Level 1 cache
|
||||
constexpr void prefetch_read(const void* ptr)
|
||||
{
|
||||
if (std::is_constant_evaluated())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef _M_X64
|
||||
return _mm_prefetch(static_cast<const char*>(ptr), _MM_HINT_T0);
|
||||
#else
|
||||
return __builtin_prefetch(ptr, 0, 3);
|
||||
#endif
|
||||
}
|
||||
|
||||
constexpr void prefetch_write(void* ptr)
|
||||
{
|
||||
if (std::is_constant_evaluated())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
#if defined(_M_X64) && !defined(__clang__)
|
||||
return _m_prefetchw(ptr);
|
||||
#else
|
||||
return __builtin_prefetch(ptr, 1, 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
constexpr u8 rol8(u8 x, u8 n)
|
||||
{
|
||||
if (std::is_constant_evaluated())
|
||||
{
|
||||
return (x << (n & 7)) | (x >> ((-n & 7)));
|
||||
}
|
||||
|
||||
#ifdef _MSC_VER
|
||||
return _rotl8(x, n);
|
||||
#elif defined(__clang__)
|
||||
return __builtin_rotateleft8(x, n);
|
||||
#elif defined(ARCH_X64)
|
||||
return __builtin_ia32_rolqi(x, n);
|
||||
#else
|
||||
return (x << (n & 7)) | (x >> ((-n & 7)));
|
||||
#endif
|
||||
}
|
||||
|
||||
constexpr u16 rol16(u16 x, u16 n)
|
||||
{
|
||||
if (std::is_constant_evaluated())
|
||||
{
|
||||
return (x << (n & 15)) | (x >> ((-n & 15)));
|
||||
}
|
||||
|
||||
#ifdef _MSC_VER
|
||||
return _rotl16(x, static_cast<uchar>(n));
|
||||
#elif defined(__clang__)
|
||||
return __builtin_rotateleft16(x, n);
|
||||
#elif defined(ARCH_X64)
|
||||
return __builtin_ia32_rolhi(x, n);
|
||||
#else
|
||||
return (x << (n & 15)) | (x >> ((-n & 15)));
|
||||
#endif
|
||||
}
|
||||
|
||||
constexpr u32 rol32(u32 x, u32 n)
|
||||
{
|
||||
if (std::is_constant_evaluated())
|
||||
{
|
||||
return (x << (n & 31)) | (x >> (((0 - n) & 31)));
|
||||
}
|
||||
|
||||
#ifdef _MSC_VER
|
||||
return _rotl(x, n);
|
||||
#elif defined(__clang__)
|
||||
return __builtin_rotateleft32(x, n);
|
||||
#else
|
||||
return (x << (n & 31)) | (x >> (((0 - n) & 31)));
|
||||
#endif
|
||||
}
|
||||
|
||||
constexpr u64 rol64(u64 x, u64 n)
|
||||
{
|
||||
if (std::is_constant_evaluated())
|
||||
{
|
||||
return (x << (n & 63)) | (x >> (((0 - n) & 63)));
|
||||
}
|
||||
|
||||
#ifdef _MSC_VER
|
||||
return _rotl64(x, static_cast<int>(n));
|
||||
#elif defined(__clang__)
|
||||
return __builtin_rotateleft64(x, n);
|
||||
#else
|
||||
return (x << (n & 63)) | (x >> (((0 - n) & 63)));
|
||||
#endif
|
||||
}
|
||||
|
||||
constexpr u32 popcnt64(u64 v)
|
||||
{
|
||||
#if !defined(_MSC_VER) || defined(__SSE4_2__)
|
||||
if (std::is_constant_evaluated())
|
||||
#endif
|
||||
{
|
||||
v = (v & 0xaaaaaaaaaaaaaaaa) / 2 + (v & 0x5555555555555555);
|
||||
v = (v & 0xcccccccccccccccc) / 4 + (v & 0x3333333333333333);
|
||||
v = (v & 0xf0f0f0f0f0f0f0f0) / 16 + (v & 0x0f0f0f0f0f0f0f0f);
|
||||
v = (v & 0xff00ff00ff00ff00) / 256 + (v & 0x00ff00ff00ff00ff);
|
||||
v = ((v & 0xffff0000ffff0000) >> 16) + (v & 0x0000ffff0000ffff);
|
||||
return static_cast<u32>((v >> 32) + v);
|
||||
}
|
||||
|
||||
#if !defined(_MSC_VER) || defined(__SSE4_2__)
|
||||
#ifdef _MSC_VER
|
||||
return static_cast<u32>(__popcnt64(v));
|
||||
#else
|
||||
return __builtin_popcountll(v);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
constexpr u32 popcnt128(const u128& v)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
return popcnt64(v.lo) + popcnt64(v.hi);
|
||||
#else
|
||||
return popcnt64(v) + popcnt64(v >> 64);
|
||||
#endif
|
||||
}
|
||||
|
||||
constexpr u64 umulh64(u64 x, u64 y)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
if (std::is_constant_evaluated())
|
||||
#endif
|
||||
{
|
||||
return static_cast<u64>((u128{x} * u128{y}) >> 64);
|
||||
}
|
||||
|
||||
#ifdef _MSC_VER
|
||||
return __umulh(x, y);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline s64 mulh64(s64 x, s64 y)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
return __mulh(x, y);
|
||||
#else
|
||||
return (s128{x} * s128{y}) >> 64;
|
||||
#endif
|
||||
}
|
||||
|
||||
inline s64 div128(s64 high, s64 low, s64 divisor, s64* remainder = nullptr)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
s64 rem = 0;
|
||||
s64 r = _div128(high, low, divisor, &rem);
|
||||
|
||||
if (remainder)
|
||||
{
|
||||
*remainder = rem;
|
||||
}
|
||||
#else
|
||||
const s128 x = (u128{static_cast<u64>(high)} << 64) | u64(low);
|
||||
const s128 r = x / divisor;
|
||||
|
||||
if (remainder)
|
||||
{
|
||||
*remainder = x % divisor;
|
||||
}
|
||||
#endif
|
||||
return r;
|
||||
}
|
||||
|
||||
inline u64 udiv128(u64 high, u64 low, u64 divisor, u64* remainder = nullptr)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
u64 rem = 0;
|
||||
u64 r = _udiv128(high, low, divisor, &rem);
|
||||
|
||||
if (remainder)
|
||||
{
|
||||
*remainder = rem;
|
||||
}
|
||||
#else
|
||||
const u128 x = (u128{high} << 64) | low;
|
||||
const u128 r = x / divisor;
|
||||
|
||||
if (remainder)
|
||||
{
|
||||
*remainder = x % divisor;
|
||||
}
|
||||
#endif
|
||||
return r;
|
||||
}
|
||||
|
||||
#ifdef _MSC_VER
|
||||
inline u128 operator/(u128 lhs, u64 rhs)
|
||||
{
|
||||
u64 rem = 0;
|
||||
return _udiv128(lhs.hi, lhs.lo, rhs, &rem);
|
||||
}
|
||||
#endif
|
||||
|
||||
constexpr u32 ctz128(u128 arg)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
if (!arg.lo)
|
||||
return std::countr_zero(arg.hi) + 64u;
|
||||
else
|
||||
return std::countr_zero(arg.lo);
|
||||
#else
|
||||
if (u64 lo = static_cast<u64>(arg))
|
||||
return std::countr_zero<u64>(lo);
|
||||
else
|
||||
return std::countr_zero<u64>(arg >> 64) + 64;
|
||||
#endif
|
||||
}
|
||||
|
||||
constexpr u32 clz128(u128 arg)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
if (arg.hi)
|
||||
return std::countl_zero(arg.hi);
|
||||
else
|
||||
return std::countl_zero(arg.lo) + 64;
|
||||
#else
|
||||
if (u64 hi = static_cast<u64>(arg >> 64))
|
||||
return std::countl_zero<u64>(hi);
|
||||
else
|
||||
return std::countl_zero<u64>(arg) + 64;
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void pause()
|
||||
{
|
||||
#if defined(ARCH_ARM64)
|
||||
__asm__ volatile("yield");
|
||||
#elif defined(_M_X64)
|
||||
_mm_pause();
|
||||
#elif defined(ARCH_X64)
|
||||
__builtin_ia32_pause();
|
||||
#else
|
||||
#error "Missing utils::pause() implementation"
|
||||
#endif
|
||||
}
|
||||
|
||||
// Synchronization helper (cache-friendly busy waiting)
|
||||
inline void busy_wait(usz cycles = 3000)
|
||||
{
|
||||
const u64 stop = get_tsc() + cycles;
|
||||
do
|
||||
pause();
|
||||
while (get_tsc() < stop);
|
||||
}
|
||||
|
||||
// Align to power of 2
|
||||
template <typename T, typename U>
|
||||
requires std::is_unsigned_v<T>
|
||||
constexpr std::make_unsigned_t<std::common_type_t<T, U>> align(T value, U align)
|
||||
{
|
||||
return static_cast<std::make_unsigned_t<std::common_type_t<T, U>>>((value + (align - 1)) & (T{0} - align));
|
||||
}
|
||||
|
||||
// General purpose aligned division, the result is rounded up not truncated
|
||||
template <typename T>
|
||||
requires std::is_unsigned_v<T>
|
||||
constexpr T aligned_div(T value, std::type_identity_t<T> align)
|
||||
{
|
||||
return static_cast<T>(value / align + T{!!(value % align)});
|
||||
}
|
||||
|
||||
// General purpose aligned division, the result is rounded to nearest
|
||||
template <typename T>
|
||||
requires std::is_integral_v<T>
|
||||
constexpr T rounded_div(T value, std::type_identity_t<T> align)
|
||||
{
|
||||
if constexpr (std::is_unsigned_v<T>)
|
||||
{
|
||||
return static_cast<T>(value / align + T{(value % align) > (align / 2)});
|
||||
}
|
||||
|
||||
return static_cast<T>(value / align + (value > 0 ? T{(value % align) > (align / 2)} : 0 - T{(value % align) < (align / 2)}));
|
||||
}
|
||||
|
||||
// Multiplying by ratio, semi-resistant to overflows
|
||||
template <UnsignedInt T>
|
||||
constexpr T rational_mul(T value, std::type_identity_t<T> numerator, std::type_identity_t<T> denominator)
|
||||
{
|
||||
if constexpr (sizeof(T) <= sizeof(u64) / 2)
|
||||
{
|
||||
return static_cast<T>(value * u64{numerator} / u64{denominator});
|
||||
}
|
||||
|
||||
#if is_u128_emulated
|
||||
if constexpr (sizeof(T) <= sizeof(u128) / 2)
|
||||
{
|
||||
return static_cast<T>(u128_from_mul(value, numerator) / u64{denominator});
|
||||
}
|
||||
#endif
|
||||
|
||||
return static_cast<T>(value / denominator * numerator + (value % denominator) * numerator / denominator);
|
||||
}
|
||||
|
||||
template <UnsignedInt T>
|
||||
constexpr T add_saturate(T addend1, T addend2)
|
||||
{
|
||||
return static_cast<T>(~addend1) < addend2 ? T{umax} : static_cast<T>(addend1 + addend2);
|
||||
}
|
||||
|
||||
template <UnsignedInt T>
|
||||
constexpr T sub_saturate(T minuend, T subtrahend)
|
||||
{
|
||||
return minuend < subtrahend ? T{0} : static_cast<T>(minuend - subtrahend);
|
||||
}
|
||||
|
||||
template <UnsignedInt T>
|
||||
constexpr T mul_saturate(T factor1, T factor2)
|
||||
{
|
||||
return factor1 > 0 && T{umax} / factor1 < factor2 ? T{umax} : static_cast<T>(factor1 * factor2);
|
||||
}
|
||||
|
||||
inline void trigger_write_page_fault(void* ptr)
|
||||
{
|
||||
#if defined(ARCH_X64) && !defined(_MSC_VER)
|
||||
__asm__ volatile("lock orl $0, 0(%0)" ::"r"(ptr));
|
||||
#elif defined(ARCH_ARM64) && !defined(ANDROID)
|
||||
u32 value = 0;
|
||||
u32* u32_ptr = static_cast<u32*>(ptr);
|
||||
__asm__ volatile("ldset %w0, %w0, %1" : "+r"(value), "=Q"(*u32_ptr) : "r"(value));
|
||||
#else
|
||||
*static_cast<atomic_t<u32>*>(ptr) += 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void trap()
|
||||
{
|
||||
#ifdef _M_X64
|
||||
__debugbreak();
|
||||
#elif defined(ARCH_X64)
|
||||
__asm__ volatile("int3");
|
||||
#elif defined(ARCH_ARM64)
|
||||
__asm__ volatile("brk 0x42");
|
||||
#else
|
||||
#error "Missing utils::trap() implementation"
|
||||
#endif
|
||||
}
|
||||
} // namespace utils
|
||||
|
||||
using utils::busy_wait;
|
||||
|
||||
#ifdef _MSC_VER
|
||||
using utils::operator/;
|
||||
#endif
|
||||
|
|
@ -50,9 +50,9 @@ static bool has_waitv()
|
|||
#include <array>
|
||||
#include <random>
|
||||
|
||||
#include "asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include "endian.hpp"
|
||||
#include "tsc.hpp"
|
||||
#include "rx/tsc.hpp"
|
||||
|
||||
// Total number of entries.
|
||||
static constexpr usz s_hashtable_size = 1u << 17;
|
||||
|
|
@ -402,7 +402,7 @@ static u32 cond_alloc(uptr iptr, u32 tls_slot = -1)
|
|||
constexpr u128 max_mask = dup8(8192);
|
||||
|
||||
// Leave only bits indicating sub-semaphore is full, find free one
|
||||
const u32 pos = utils::ctz128(~val & max_mask);
|
||||
const u32 pos = rx::ctz128(~val & max_mask);
|
||||
|
||||
if (pos == 128) [[unlikely]]
|
||||
{
|
||||
|
|
@ -422,7 +422,7 @@ static u32 cond_alloc(uptr iptr, u32 tls_slot = -1)
|
|||
{
|
||||
constexpr u128 max_mask = dup8(1024);
|
||||
|
||||
const u32 pos = utils::ctz128(~val & max_mask);
|
||||
const u32 pos = rx::ctz128(~val & max_mask);
|
||||
|
||||
val += u128{1} << (pos / 11 * 11);
|
||||
|
||||
|
|
@ -433,7 +433,7 @@ static u32 cond_alloc(uptr iptr, u32 tls_slot = -1)
|
|||
{
|
||||
constexpr u128 max_mask = dup8(64) | (dup8(64) << 56);
|
||||
|
||||
const u32 pos = utils::ctz128(~val & max_mask);
|
||||
const u32 pos = rx::ctz128(~val & max_mask);
|
||||
|
||||
val += u128{1} << (pos / 7 * 7);
|
||||
|
||||
|
|
@ -495,15 +495,15 @@ static void cond_free(u32 cond_id, u32 tls_slot = -1)
|
|||
}
|
||||
|
||||
// Call the destructor if necessary
|
||||
utils::prefetch_write(s_cond_bits + cond_id / 64);
|
||||
rx::prefetch_write(s_cond_bits + cond_id / 64);
|
||||
|
||||
const u32 level3 = cond_id / 64 % 16;
|
||||
const u32 level2 = cond_id / 1024 % 8;
|
||||
const u32 level1 = cond_id / 8192 % 8;
|
||||
|
||||
utils::prefetch_write(s_cond_sem3 + level2);
|
||||
utils::prefetch_write(s_cond_sem2 + level1);
|
||||
utils::prefetch_write(&s_cond_sem1);
|
||||
rx::prefetch_write(s_cond_sem3 + level2);
|
||||
rx::prefetch_write(s_cond_sem2 + level1);
|
||||
rx::prefetch_write(&s_cond_sem1);
|
||||
|
||||
cond->destroy();
|
||||
|
||||
|
|
@ -676,7 +676,7 @@ namespace
|
|||
|
||||
u64 utils::get_unique_tsc()
|
||||
{
|
||||
const u64 stamp0 = utils::get_tsc();
|
||||
const u64 stamp0 = rx::get_tsc();
|
||||
|
||||
if (!s_min_tsc.fetch_op([=](u64& tsc)
|
||||
{
|
||||
|
|
@ -832,7 +832,7 @@ FORCE_INLINE auto root_info::slot_search(uptr iptr, F func) noexcept
|
|||
{
|
||||
if (u16 cond_id = _this->slots[std::countr_zero(bits)])
|
||||
{
|
||||
utils::prefetch_read(s_cond_list + cond_id);
|
||||
rx::prefetch_read(s_cond_list + cond_id);
|
||||
cond_ids[cond_count++] = cond_id;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,7 +9,8 @@
|
|||
#include "Emu/VFS.h"
|
||||
|
||||
#include "util/types.hpp"
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include "rx/align.hpp"
|
||||
|
||||
#include <charconv>
|
||||
#include <regex>
|
||||
|
|
@ -972,10 +973,10 @@ static usz apply_modification(std::vector<u32>& applied, patch_engine::patch_inf
|
|||
// Do not allow null address or if resultant ptr is not a VM ptr
|
||||
if (const u32 alloc_at = (p.offset & -4096); alloc_at >> 16)
|
||||
{
|
||||
const u32 alloc_size = utils::align(static_cast<u32>(p.value.long_value) + alloc_at % 4096, 4096);
|
||||
const u32 alloc_size = rx::alignUp(static_cast<u32>(p.value.long_value) + alloc_at % 4096, 4096);
|
||||
|
||||
// Allocate map if needed, if allocated flags will indicate that bit 62 is set (unique identifier)
|
||||
auto alloc_map = vm::reserve_map(vm::any, alloc_at & -0x10000, utils::align(alloc_size, 0x10000), vm::page_size_64k | (1ull << 62));
|
||||
auto alloc_map = vm::reserve_map(vm::any, alloc_at & -0x10000, rx::alignUp(alloc_size, 0x10000), vm::page_size_64k | (1ull << 62));
|
||||
|
||||
u64 flags = vm::alloc_unwritable;
|
||||
|
||||
|
|
@ -1106,7 +1107,7 @@ static usz apply_modification(std::vector<u32>& applied, patch_engine::patch_inf
|
|||
}
|
||||
case patch_type::c_utf8:
|
||||
{
|
||||
memory_size = utils::add_saturate<u32>(::size32(p.original_value), 1);
|
||||
memory_size = rx::add_saturate<u32>(::size32(p.original_value), 1);
|
||||
break;
|
||||
}
|
||||
case patch_type::move_file:
|
||||
|
|
@ -1165,7 +1166,7 @@ static usz apply_modification(std::vector<u32>& applied, patch_engine::patch_inf
|
|||
continue;
|
||||
}
|
||||
|
||||
const u32 alloc_size = utils::align(static_cast<u32>(p.value.long_value + 1) * 4, 0x10000);
|
||||
const u32 alloc_size = rx::alignUp(static_cast<u32>(p.value.long_value + 1) * 4, 0x10000);
|
||||
|
||||
// Check if should maybe reuse previous code cave allocation (0 size)
|
||||
if (alloc_size - 4 != 0)
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
#include "util/types.hpp"
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
|
||||
/*
|
||||
C-style format parser. Appends formatted string to `out`, returns number of characters written.
|
||||
|
|
@ -59,7 +59,7 @@ usz cfmt_append(Dst& out, const Char* fmt, Src&& src)
|
|||
{
|
||||
if constexpr (sizeof(value) == 16)
|
||||
{
|
||||
out.resize(out.size() + std::max<u64>(min_num, 129 / 3 - (utils::clz128(value | 1) + 1) / 3), '0');
|
||||
out.resize(out.size() + std::max<u64>(min_num, 129 / 3 - (rx::clz128(value | 1) + 1) / 3), '0');
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
@ -77,7 +77,7 @@ usz cfmt_append(Dst& out, const Char* fmt, Src&& src)
|
|||
{
|
||||
if constexpr (sizeof(value) == 16)
|
||||
{
|
||||
out.resize(out.size() + std::max<u64>(min_num, 128 / 4 - utils::clz128(value | 1) / 4), '0');
|
||||
out.resize(out.size() + std::max<u64>(min_num, 128 / 4 - rx::clz128(value | 1) / 4), '0');
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@
|
|||
#include <algorithm>
|
||||
|
||||
#ifdef _WIN32
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include "windows.h"
|
||||
#include "tlhelp32.h"
|
||||
#ifdef _MSC_VER
|
||||
|
|
@ -148,7 +148,7 @@ namespace utils
|
|||
status = PdhGetFormattedCounterArray(m_cpu_cores, PDH_FMT_DOUBLE, &dwBufferSize, &dwItemCount, nullptr);
|
||||
if (static_cast<PDH_STATUS>(PDH_MORE_DATA) == status)
|
||||
{
|
||||
std::vector<PDH_FMT_COUNTERVALUE_ITEM> items(utils::aligned_div(dwBufferSize, sizeof(PDH_FMT_COUNTERVALUE_ITEM)));
|
||||
std::vector<PDH_FMT_COUNTERVALUE_ITEM> items(rx::aligned_div(dwBufferSize, sizeof(PDH_FMT_COUNTERVALUE_ITEM)));
|
||||
if (items.size() >= dwItemCount)
|
||||
{
|
||||
status = PdhGetFormattedCounterArray(m_cpu_cores, PDH_FMT_DOUBLE, &dwBufferSize, &dwItemCount, items.data());
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
#include "mutex.h"
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
|
||||
void shared_mutex::imp_lock_shared(u32 val)
|
||||
{
|
||||
|
|
@ -26,7 +26,7 @@ void shared_mutex::imp_lock_shared(u32 val)
|
|||
return;
|
||||
}
|
||||
|
||||
busy_wait();
|
||||
rx::busy_wait();
|
||||
}
|
||||
|
||||
// Acquire writer lock and downgrade
|
||||
|
|
@ -96,7 +96,7 @@ void shared_mutex::imp_lock(u32 val)
|
|||
|
||||
for (int i = 0; i < 10; i++)
|
||||
{
|
||||
busy_wait();
|
||||
rx::busy_wait();
|
||||
|
||||
const u32 old = m_value;
|
||||
|
||||
|
|
@ -138,7 +138,7 @@ void shared_mutex::imp_lock_upgrade()
|
|||
{
|
||||
for (int i = 0; i < 10; i++)
|
||||
{
|
||||
busy_wait();
|
||||
rx::busy_wait();
|
||||
|
||||
if (try_lock_upgrade())
|
||||
{
|
||||
|
|
@ -178,7 +178,7 @@ void shared_mutex::imp_lock_unlock()
|
|||
|
||||
_max = val / c_one;
|
||||
|
||||
busy_wait(1500);
|
||||
rx::busy_wait(1500);
|
||||
}
|
||||
|
||||
// Lock and unlock
|
||||
|
|
|
|||
|
|
@ -1,12 +1,12 @@
|
|||
#include "sema.h"
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
|
||||
void semaphore_base::imp_wait()
|
||||
{
|
||||
for (int i = 0; i < 10; i++)
|
||||
{
|
||||
busy_wait();
|
||||
rx::busy_wait();
|
||||
|
||||
const u32 value = m_value.load();
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
#include "util/types.hpp"
|
||||
#include "util/logs.hpp"
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include "util/sysinfo.hpp"
|
||||
#include "util/endian.hpp"
|
||||
#include "util/lockless.h"
|
||||
|
|
@ -112,7 +112,7 @@ bool uncompressed_serialization_file_handler::handle_file_op(utils::serial& ar,
|
|||
ar.data_offset = pos;
|
||||
}
|
||||
|
||||
const usz read_pre_buffer = ar.data.empty() ? 0 : utils::sub_saturate<usz>(ar.data_offset, pos);
|
||||
const usz read_pre_buffer = ar.data.empty() ? 0 : rx::sub_saturate<usz>(ar.data_offset, pos);
|
||||
|
||||
if (read_pre_buffer)
|
||||
{
|
||||
|
|
@ -128,8 +128,8 @@ bool uncompressed_serialization_file_handler::handle_file_op(utils::serial& ar,
|
|||
|
||||
// Adjustment to prevent overflow
|
||||
const usz subtrahend = ar.data.empty() ? 0 : 1;
|
||||
const usz read_past_buffer = utils::sub_saturate<usz>(pos + (size - subtrahend), ar.data_offset + (ar.data.size() - subtrahend));
|
||||
const usz read_limit = utils::sub_saturate<usz>(ar.m_max_data, ar.data_offset);
|
||||
const usz read_past_buffer = rx::sub_saturate<usz>(pos + (size - subtrahend), ar.data_offset + (ar.data.size() - subtrahend));
|
||||
const usz read_limit = rx::sub_saturate<usz>(ar.m_max_data, ar.data_offset);
|
||||
|
||||
if (read_past_buffer)
|
||||
{
|
||||
|
|
@ -410,7 +410,7 @@ bool compressed_serialization_file_handler::handle_file_op(utils::serial& ar, us
|
|||
// ar.seek_pos(pos);
|
||||
// }
|
||||
|
||||
const usz read_pre_buffer = utils::sub_saturate<usz>(ar.data_offset, pos);
|
||||
const usz read_pre_buffer = rx::sub_saturate<usz>(ar.data_offset, pos);
|
||||
|
||||
if (read_pre_buffer)
|
||||
{
|
||||
|
|
@ -421,8 +421,8 @@ bool compressed_serialization_file_handler::handle_file_op(utils::serial& ar, us
|
|||
|
||||
// Adjustment to prevent overflow
|
||||
const usz subtrahend = ar.data.empty() ? 0 : 1;
|
||||
const usz read_past_buffer = utils::sub_saturate<usz>(pos + (size - subtrahend), ar.data_offset + (ar.data.size() - subtrahend));
|
||||
const usz read_limit = utils::sub_saturate<usz>(ar.m_max_data, ar.data_offset);
|
||||
const usz read_past_buffer = rx::sub_saturate<usz>(pos + (size - subtrahend), ar.data_offset + (ar.data.size() - subtrahend));
|
||||
const usz read_limit = rx::sub_saturate<usz>(ar.m_max_data, ar.data_offset);
|
||||
|
||||
if (read_past_buffer)
|
||||
{
|
||||
|
|
@ -506,7 +506,7 @@ usz compressed_serialization_file_handler::read_at(utils::serial& ar, usz read_p
|
|||
m_stream_data_index = m_zs.avail_in ? m_zs.next_in - m_stream_data.data() : m_stream_data.size();
|
||||
|
||||
// Adjust again in case the values simply did not fit into uInt
|
||||
m_zs.avail_out = adjust_for_uint(utils::sub_saturate<usz>(total_to_read, read_size));
|
||||
m_zs.avail_out = adjust_for_uint(rx::sub_saturate<usz>(total_to_read, read_size));
|
||||
m_zs.avail_in = adjust_for_uint(m_stream_data.size() - m_stream_data_index);
|
||||
|
||||
if (need_more_file_memory)
|
||||
|
|
@ -779,7 +779,7 @@ usz compressed_serialization_file_handler::get_size(const utils::serial& ar, usz
|
|||
return memory_available;
|
||||
}
|
||||
|
||||
return std::max<usz>(utils::mul_saturate<usz>(m_file->size(), 6), memory_available);
|
||||
return std::max<usz>(rx::mul_saturate<usz>(m_file->size(), 6), memory_available);
|
||||
}
|
||||
|
||||
struct compressed_zstd_stream_data
|
||||
|
|
@ -973,7 +973,7 @@ bool compressed_zstd_serialization_file_handler::handle_file_op(utils::serial& a
|
|||
// ar.seek_pos(pos);
|
||||
// }
|
||||
|
||||
const usz read_pre_buffer = utils::sub_saturate<usz>(ar.data_offset, pos);
|
||||
const usz read_pre_buffer = rx::sub_saturate<usz>(ar.data_offset, pos);
|
||||
|
||||
if (read_pre_buffer)
|
||||
{
|
||||
|
|
@ -984,8 +984,8 @@ bool compressed_zstd_serialization_file_handler::handle_file_op(utils::serial& a
|
|||
|
||||
// Adjustment to prevent overflow
|
||||
const usz subtrahend = ar.data.empty() ? 0 : 1;
|
||||
const usz read_past_buffer = utils::sub_saturate<usz>(pos + (size - subtrahend), ar.data_offset + (ar.data.size() - subtrahend));
|
||||
const usz read_limit = utils::sub_saturate<usz>(ar.m_max_data, ar.data_offset);
|
||||
const usz read_past_buffer = rx::sub_saturate<usz>(pos + (size - subtrahend), ar.data_offset + (ar.data.size() - subtrahend));
|
||||
const usz read_limit = rx::sub_saturate<usz>(ar.m_max_data, ar.data_offset);
|
||||
|
||||
if (read_past_buffer)
|
||||
{
|
||||
|
|
@ -1326,7 +1326,7 @@ usz compressed_zstd_serialization_file_handler::get_size(const utils::serial& ar
|
|||
}
|
||||
|
||||
return recommended;
|
||||
// return std::max<usz>(utils::mul_saturate<usz>(ZSTD_decompressBound(m_file->size()), 2), memory_available);
|
||||
// return std::max<usz>(rx::mul_saturate<usz>(ZSTD_decompressBound(m_file->size()), 2), memory_available);
|
||||
}
|
||||
|
||||
bool null_serialization_file_handler::handle_file_op(utils::serial&, usz, usz, const void*)
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
#include "util/types.hpp"
|
||||
#include "util/v128.hpp"
|
||||
#include "util/sysinfo.hpp"
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include "util/JIT.h"
|
||||
#include <rx/simd.hpp>
|
||||
|
||||
|
|
|
|||
|
|
@ -18,14 +18,14 @@
|
|||
#include <sys/resource.h>
|
||||
#ifndef __APPLE__
|
||||
#include <sys/utsname.h>
|
||||
#include <errno.h>
|
||||
#include <cerrno>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include <thread>
|
||||
#include <fstream>
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include "util/fence.hpp"
|
||||
|
||||
#if defined(_M_X64) && defined(_MSC_VER)
|
||||
|
|
@ -790,7 +790,7 @@ static constexpr ullong round_tsc(ullong val, ullong known_error)
|
|||
known_error /= 10;
|
||||
}
|
||||
|
||||
return utils::rounded_div(val, by) * by;
|
||||
return rx::rounded_div(val, by) * by;
|
||||
}
|
||||
|
||||
namespace utils
|
||||
|
|
@ -898,7 +898,7 @@ static const bool s_tsc_freq_evaluated = []() -> bool
|
|||
{
|
||||
for (usz i = 0; i < retry_count; i++)
|
||||
{
|
||||
const u64 rdtsc_read = (utils::lfence(), utils::get_tsc());
|
||||
const u64 rdtsc_read = (utils::lfence(), rx::get_tsc());
|
||||
#ifdef _WIN32
|
||||
LARGE_INTEGER ctr;
|
||||
QueryPerformanceCounter(&ctr);
|
||||
|
|
@ -906,7 +906,7 @@ static const bool s_tsc_freq_evaluated = []() -> bool
|
|||
struct timespec ts;
|
||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||
#endif
|
||||
const u64 rdtsc_read2 = (utils::lfence(), utils::get_tsc());
|
||||
const u64 rdtsc_read2 = (utils::lfence(), rx::get_tsc());
|
||||
|
||||
#ifdef _WIN32
|
||||
const u64 timer_read = ctr.QuadPart - time_base;
|
||||
|
|
@ -961,10 +961,10 @@ static const bool s_tsc_freq_evaluated = []() -> bool
|
|||
|
||||
const u128 data = u128_from_mul(rdtsc_data[1] - rdtsc_data[0], timer_freq);
|
||||
|
||||
const u64 res = utils::udiv128(static_cast<u64>(data >> 64), static_cast<u64>(data), (timer_data[1] - timer_data[0]));
|
||||
const u64 res = rx::udiv128(static_cast<u64>(data >> 64), static_cast<u64>(data), (timer_data[1] - timer_data[0]));
|
||||
|
||||
// Rounding
|
||||
return round_tsc(res, utils::mul_saturate<u64>(utils::add_saturate<u64>(rdtsc_diff[0], rdtsc_diff[1]), utils::aligned_div(timer_freq, timer_data[1] - timer_data[0])));
|
||||
return round_tsc(res, rx::mul_saturate<u64>(rx::add_saturate<u64>(rdtsc_diff[0], rdtsc_diff[1]), rx::aligned_div(timer_freq, timer_data[1] - timer_data[0])));
|
||||
}();
|
||||
|
||||
atomic_storage<u64>::store(utils::s_tsc_freq, cal_tsc);
|
||||
|
|
|
|||
|
|
@ -1,29 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "util/types.hpp"
|
||||
|
||||
#ifdef _M_X64
|
||||
#ifdef _MSC_VER
|
||||
extern "C" u64 __rdtsc();
|
||||
#else
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
namespace utils
|
||||
{
|
||||
inline u64 get_tsc()
|
||||
{
|
||||
#if defined(ARCH_ARM64)
|
||||
u64 r = 0;
|
||||
__asm__ volatile("mrs %0, cntvct_el0" : "=r"(r));
|
||||
return r;
|
||||
#elif defined(_M_X64)
|
||||
return __rdtsc();
|
||||
#elif defined(ARCH_X64)
|
||||
return __builtin_ia32_rdtsc();
|
||||
#else
|
||||
#error "Missing utils::get_tsc() implementation"
|
||||
#endif
|
||||
}
|
||||
} // namespace utils
|
||||
|
|
@ -1,6 +1,8 @@
|
|||
#include "stdafx.h"
|
||||
#include "util/vm.hpp"
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include "rx/align.hpp"
|
||||
|
||||
#ifdef _WIN32
|
||||
#include "util/File.h"
|
||||
#include "util/dyn_lib.hpp"
|
||||
|
|
@ -492,7 +494,7 @@ namespace utils
|
|||
}
|
||||
|
||||
shm::shm(u64 size, u32 flags)
|
||||
: m_flags(flags), m_size(utils::align(size, 0x10000))
|
||||
: m_flags(flags), m_size(rx::alignUp(size, 0x10000))
|
||||
{
|
||||
#ifdef _WIN32
|
||||
const ULARGE_INTEGER max_size{.QuadPart = m_size};
|
||||
|
|
@ -535,7 +537,7 @@ namespace utils
|
|||
}
|
||||
|
||||
shm::shm(u64 size, const std::string& storage)
|
||||
: m_size(utils::align(size, 0x10000))
|
||||
: m_size(rx::alignUp(size, 0x10000))
|
||||
{
|
||||
#ifdef _WIN32
|
||||
fs::file f;
|
||||
|
|
@ -857,7 +859,7 @@ namespace utils
|
|||
{
|
||||
const u64 res64 = reinterpret_cast<u64>(::mmap(reinterpret_cast<void*>(ptr64), m_size + 0xf000, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0));
|
||||
|
||||
const u64 aligned = utils::align(res64, 0x10000);
|
||||
const u64 aligned = rx::alignUp(res64, 0x10000);
|
||||
const auto result = ::mmap(reinterpret_cast<void*>(aligned), m_size, +prot, (cow ? MAP_PRIVATE : MAP_SHARED) | MAP_FIXED, m_file, 0);
|
||||
|
||||
// Now cleanup remnants
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue