Compare commits

...

6 commits

Author SHA1 Message Date
DH 3986f77869 orbis/umtx: remove state from context
Some checks failed
Formatting check / formatting-check (push) Has been cancelled
Build RPCSX / build-linux (push) Has been cancelled
Build RPCSX / build-android (arm64-v8a, armv8-a) (push) Has been cancelled
Build RPCSX / build-android (arm64-v8a, armv8.1-a) (push) Has been cancelled
Build RPCSX / build-android (arm64-v8a, armv8.2-a) (push) Has been cancelled
Build RPCSX / build-android (arm64-v8a, armv8.4-a) (push) Has been cancelled
Build RPCSX / build-android (arm64-v8a, armv8.5-a) (push) Has been cancelled
Build RPCSX / build-android (arm64-v8a, armv9-a) (push) Has been cancelled
Build RPCSX / build-android (arm64-v8a, armv9.1-a) (push) Has been cancelled
Build RPCSX / build-android (x86_64, x86-64) (push) Has been cancelled
2025-10-06 01:58:24 +03:00
DH e66ce512d2 kernel: Add GlobalKernelObject utility 2025-10-06 01:57:23 +03:00
DH fd9bf42538 rx: shared_cv/shared_mtx disable copying/moving 2025-10-06 01:55:11 +03:00
DH be56f0745a rx/serializer: fixed compilation with gcc 2025-10-06 01:54:10 +03:00
DH 37f423aec3 add missed file changes 2025-10-05 20:07:19 +03:00
DH 640df36c48 moved tsc and asm utilities to rx 2025-10-05 19:28:03 +03:00
133 changed files with 1032 additions and 1381 deletions

View file

@ -272,14 +272,13 @@ target_compile_definitions(rx PRIVATE
if (WITH_PS4)
find_package(nlohmann_json CONFIG)
add_subdirectory(tools)
add_subdirectory(kernel/orbis)
endif()
add_subdirectory(rpcsx)
add_subdirectory(kernel)
if (WITH_PS3)
include(ConfigureCompiler)
add_subdirectory(kernel/cellos)
add_subdirectory(rpcs3)
add_subdirectory(ps3fw)
endif()

View file

@ -41,12 +41,13 @@
#include "rpcs3_version.h"
#include "rpcsx/fw/ps3/cellMsgDialog.h"
#include "rpcsx/fw/ps3/cellSysutil.h"
#include "rx/asm.hpp"
#include "rx/debug.hpp"
#include "util/File.h"
#include "util/JIT.h"
#include "util/StrFmt.h"
#include "util/StrUtil.h"
#include "util/Thread.h"
#include "util/asm.hpp"
#include "util/console.h"
#include "util/fixed_typemap.hpp"
#include "util/logs.hpp"
@ -241,7 +242,7 @@ void jit_announce(uptr, usz, std::string_view);
__android_log_write(ANDROID_LOG_FATAL, "RPCS3", buf.c_str());
jit_announce(0, 0, "");
utils::trap();
rx::breakpoint();
std::abort();
std::terminate();
}

10
kernel/CMakeLists.txt Normal file
View file

@ -0,0 +1,10 @@
add_library(kernel INTERFACE)
target_include_directories(kernel INTERFACE include)
if (WITH_PS3)
add_subdirectory(cellos)
endif()
if (WITH_PS4)
add_subdirectory(orbis)
endif()

View file

@ -53,10 +53,10 @@
#include "sys_usbd.h"
#include "sys_vm.h"
#include "rx/tsc.hpp"
#include "util/atomic_bit_set.h"
#include "util/init_mutex.hpp"
#include "util/sysinfo.hpp"
#include "util/tsc.hpp"
#include <algorithm>
#include <deque>
#include <optional>
@ -2138,7 +2138,7 @@ void lv2_obj::schedule_all(u64 current_time) {
}
if (const u64 freq = s_yield_frequency) {
const u64 tsc = utils::get_tsc();
const u64 tsc = rx::get_tsc();
const u64 last_tsc = s_last_yield_tsc;
if (tsc >= last_tsc && tsc <= s_max_allowed_yield_tsc &&
@ -2297,7 +2297,7 @@ mwaitx_func static void __mwaitx(u32 cycles, u32 cstate) {
// First bit indicates cstate, 0x0 for C.02 state (lower power) or 0x1 for C.01
// state (higher power)
waitpkg_func static void __tpause(u32 cycles, u32 cstate) {
const u64 tsc = utils::get_tsc() + cycles;
const u64 tsc = rx::get_tsc() + cycles;
_tpause(cstate, tsc);
}
#endif

View file

@ -9,7 +9,7 @@
#include "sys_cond.h"
#include "util/asm.hpp"
#include "rx/asm.hpp"
LOG_CHANNEL(sys_cond);
@ -454,7 +454,7 @@ error_code sys_cond_wait(ppu_thread &ppu, u32 cond_id, u64 timeout) {
}
for (usz i = 0; cpu_flag::signal - ppu.state && i < 50; i++) {
busy_wait(500);
rx::busy_wait(500);
}
if (ppu.state & cpu_flag::signal) {

View file

@ -8,7 +8,8 @@
#include "Emu/Memory/vm_locking.h"
#include "rpcsx/fw/ps3/sys_lv2dbg.h"
#include "util/asm.hpp"
#include "rx/align.hpp"
#include "rx/asm.hpp"
void ppu_register_function_at(u32 addr, u32 size,
ppu_intrp_func_t ptr = nullptr);
@ -92,7 +93,7 @@ error_code sys_dbg_write_process_memory(s32 pid, u32 address, u32 size,
for (u32 i = address, exec_update_size = 0; i < end;) {
const u32 op_size =
std::min<u32>(utils::align<u32>(i + 1, 0x10000), end) - i;
std::min<u32>(rx::alignUp<u32>(i + 1, 0x10000), end) - i;
const bool is_exec =
vm::check_addr(i, vm::page_executable | vm::page_readable);

View file

@ -11,7 +11,7 @@
#include "Emu/Cell/SPUThread.h"
#include "sys_process.h"
#include "util/asm.hpp"
#include "rx/asm.hpp"
LOG_CHANNEL(sys_event);
@ -468,7 +468,7 @@ error_code sys_event_queue_receive(ppu_thread &ppu, u32 equeue_id,
}
for (usz i = 0; cpu_flag::signal - ppu.state && i < 50; i++) {
busy_wait(500);
rx::busy_wait(500);
}
if (ppu.state & cpu_flag::signal) {

View file

@ -7,7 +7,7 @@
#include "Emu/Cell/ErrorCodes.h"
#include "Emu/Cell/PPUThread.h"
#include "util/asm.hpp"
#include "rx/asm.hpp"
LOG_CHANNEL(sys_event_flag);
@ -195,7 +195,7 @@ error_code sys_event_flag_wait(ppu_thread &ppu, u32 id, u64 bitptn, u32 mode,
}
for (usz i = 0; cpu_flag::signal - ppu.state && i < 50; i++) {
busy_wait(500);
rx::busy_wait(500);
}
if (ppu.state & cpu_flag::signal) {

View file

@ -1,9 +1,9 @@
#include "stdafx.h"
#include "rx/asm.hpp"
#include "sys_fs.h"
#include "sys_memory.h"
#include "sys_sync.h"
#include "util/asm.hpp"
#include "Crypto/unedat.h"
#include "Emu/Cell/PPUThread.h"
@ -618,7 +618,7 @@ struct lv2_file::file_view : fs::file_base {
fs::stat_t stat = m_file->file.get_stat();
// TODO: Check this on realhw
// stat.size = utils::sub_saturate<u64>(stat.size, m_off);
// stat.size = rx::sub_saturate<u64>(stat.size, m_off);
stat.is_writable = false;
return stat;
@ -655,7 +655,7 @@ struct lv2_file::file_view : fs::file_base {
}
u64 size() override {
return utils::sub_saturate<u64>(m_file->file.size(), m_off);
return rx::sub_saturate<u64>(m_file->file.size(), m_off);
}
fs::file_id get_id() override {

View file

@ -8,7 +8,7 @@
#include "Emu/Cell/PPUThread.h"
#include "sys_lwmutex.h"
#include "util/asm.hpp"
#include "rx/asm.hpp"
LOG_CHANNEL(sys_lwcond);
@ -490,7 +490,7 @@ error_code _sys_lwcond_queue_wait(ppu_thread &ppu, u32 lwcond_id,
}
for (usz i = 0; cpu_flag::signal - ppu.state && i < 50; i++) {
busy_wait(500);
rx::busy_wait(500);
}
if (ppu.state & cpu_flag::signal) {

View file

@ -7,7 +7,7 @@
#include "Emu/Cell/ErrorCodes.h"
#include "Emu/Cell/PPUThread.h"
#include "util/asm.hpp"
#include "rx/asm.hpp"
LOG_CHANNEL(sys_lwmutex);
@ -194,7 +194,7 @@ error_code _sys_lwmutex_lock(ppu_thread &ppu, u32 lwmutex_id, u64 timeout) {
}
for (usz i = 0; cpu_flag::signal - ppu.state && i < 50; i++) {
busy_wait(500);
rx::busy_wait(500);
}
if (ppu.state & cpu_flag::signal) {

View file

@ -8,7 +8,8 @@
#include "Emu/IdManager.h"
#include "Emu/Memory/vm_locking.h"
#include "util/asm.hpp"
#include "rx/align.hpp"
#include "rx/asm.hpp"
LOG_CHANNEL(sys_memory);
@ -75,11 +76,11 @@ struct sys_memory_address_table {
};
std::shared_ptr<vm::block_t> reserve_map(u32 alloc_size, u32 align) {
return vm::reserve_map(
align == 0x10000 ? vm::user64k : vm::user1m, 0,
align == 0x10000 ? 0x20000000 : utils::align(alloc_size, 0x10000000),
align == 0x10000 ? (vm::page_size_64k | vm::bf0_0x1)
: (vm::page_size_1m | vm::bf0_0x1));
return vm::reserve_map(align == 0x10000 ? vm::user64k : vm::user1m, 0,
align == 0x10000 ? 0x20000000
: rx::alignUp(alloc_size, 0x10000000),
align == 0x10000 ? (vm::page_size_64k | vm::bf0_0x1)
: (vm::page_size_1m | vm::bf0_0x1));
}
// Todo: fix order of error checks

View file

@ -5,7 +5,7 @@
#include "Emu/Cell/ErrorCodes.h"
#include "Emu/Cell/PPUThread.h"
#include "util/asm.hpp"
#include "rx/asm.hpp"
#include "sys_mutex.h"
@ -147,7 +147,7 @@ error_code sys_mutex_lock(ppu_thread &ppu, u32 mutex_id, u64 timeout) {
// Try busy waiting a bit if advantageous
for (u32 i = 0, end = lv2_obj::has_ppus_in_running_state() ? 3 : 10;
id_manager::g_mutex.is_lockable() && i < end; i++) {
busy_wait(300);
rx::busy_wait(300);
result = mutex.try_lock(ppu);
if (!result ||
@ -212,7 +212,7 @@ error_code sys_mutex_lock(ppu_thread &ppu, u32 mutex_id, u64 timeout) {
}
for (usz i = 0; cpu_flag::signal - ppu.state && i < 40; i++) {
busy_wait(500);
rx::busy_wait(500);
}
if (ppu.state & cpu_flag::signal) {

View file

@ -15,7 +15,8 @@
#include "sys_mmapper.h"
#include "sys_process.h"
#include "util/asm.hpp"
#include "rx/align.hpp"
#include "rx/asm.hpp"
#include <thread>
@ -148,7 +149,7 @@ void _sys_ppu_thread_exit(ppu_thread &ppu, u64 errorcode) {
// Need to wait until the current writers finish
if (ppu.state & cpu_flag::memory) {
for (; writer_mask; writer_mask &= vm::g_range_lock_bits[1]) {
busy_wait(200);
rx::busy_wait(200);
}
}
}
@ -468,7 +469,7 @@ error_code _sys_ppu_thread_create(ppu_thread &ppu, vm::ptr<u64> thread_id,
const u32 tls = param->tls;
// Compute actual stack size and allocate
const u32 stack_size = utils::align<u32>(std::max<u32>(_stacksz, 4096), 4096);
const u32 stack_size = rx::alignUp<u32>(std::max<u32>(_stacksz, 4096), 4096);
auto &dct = g_fxo->get<lv2_memory_container>();

View file

@ -10,9 +10,9 @@
#include "Emu/RSX/Core/RSXReservationLock.hpp"
#include "Emu/RSX/RSXThread.h"
#include "Emu/System.h"
#include "rx/asm.hpp"
#include "sys_event.h"
#include "sys_vm.h"
#include "util/asm.hpp"
LOG_CHANNEL(sys_rsx);
@ -46,7 +46,7 @@ static void set_rsx_dmactl(rsx::thread *render, u64 get_put) {
}
}
utils::pause();
rx::pause();
}
// Schedule FIFO interrupt to deal with this immediately

View file

@ -7,7 +7,7 @@
#include "Emu/Cell/ErrorCodes.h"
#include "Emu/Cell/PPUThread.h"
#include "util/asm.hpp"
#include "rx/asm.hpp"
LOG_CHANNEL(sys_rwlock);
@ -151,7 +151,7 @@ error_code sys_rwlock_rlock(ppu_thread &ppu, u32 rw_lock_id, u64 timeout) {
}
for (usz i = 0; cpu_flag::signal - ppu.state && i < 50; i++) {
busy_wait(500);
rx::busy_wait(500);
}
if (ppu.state & cpu_flag::signal) {
@ -355,7 +355,7 @@ error_code sys_rwlock_wlock(ppu_thread &ppu, u32 rw_lock_id, u64 timeout) {
}
for (usz i = 0; cpu_flag::signal - ppu.state && i < 50; i++) {
busy_wait(500);
rx::busy_wait(500);
}
if (ppu.state & cpu_flag::signal) {

View file

@ -7,7 +7,7 @@
#include "Emu/Cell/ErrorCodes.h"
#include "Emu/Cell/PPUThread.h"
#include "util/asm.hpp"
#include "rx/asm.hpp"
LOG_CHANNEL(sys_semaphore);
@ -167,7 +167,7 @@ error_code sys_semaphore_wait(ppu_thread &ppu, u32 sem_id, u64 timeout) {
}
for (usz i = 0; cpu_flag::signal - ppu.state && i < 50; i++) {
busy_wait(500);
rx::busy_wait(500);
}
if (ppu.state & cpu_flag::signal) {

View file

@ -21,7 +21,8 @@
#include "sys_mmapper.h"
#include "sys_process.h"
#include "util/asm.hpp"
#include "rx/align.hpp"
#include "rx/asm.hpp"
LOG_CHANNEL(sys_spu);
@ -129,7 +130,7 @@ void sys_spu_image::load(const fs::file &stream) {
this->nsegs = 0;
this->segs = vm::null;
vm::page_protect(segs.addr(), utils::align(mem_size, 4096), 0, 0,
vm::page_protect(segs.addr(), rx::alignUp(mem_size, 4096), 0, 0,
vm::page_writable);
}
@ -196,8 +197,8 @@ void sys_spu_image::deploy(u8 *loc, std::span<const sys_spu_segment> segs,
}
auto mem_translate = [loc](u32 addr, u32 size) {
return utils::add_saturate<u32>(addr, size) <= SPU_LS_SIZE ? loc + addr
: nullptr;
return rx::add_saturate<u32>(addr, size) <= SPU_LS_SIZE ? loc + addr
: nullptr;
};
// Apply the patch
@ -1259,7 +1260,7 @@ error_code sys_spu_thread_group_terminate(ppu_thread &ppu, u32 id, s32 value) {
// termination
auto short_sleep = [](ppu_thread &ppu) {
lv2_obj::sleep(ppu);
busy_wait(3000);
rx::busy_wait(3000);
ppu.check_state();
ppu.state += cpu_flag::wait;
};

View file

@ -5,8 +5,8 @@
#include "Emu/Cell/ErrorCodes.h"
#include "Emu/Cell/timers.hpp"
#include "Emu/system_config.h"
#include "rx/tsc.hpp"
#include "sys_process.h"
#include "util/tsc.hpp"
#include "util/sysinfo.hpp"
@ -14,7 +14,7 @@ u64 g_timebase_offs{};
static u64 systemtime_offset;
#ifndef __linux__
#include "util/asm.hpp"
#include "rx/asm.hpp"
#endif
#ifdef _WIN32
@ -151,7 +151,7 @@ u64 convert_to_timebased_time(u64 time) {
u64 get_timebased_time() {
if (u64 freq = utils::get_tsc_freq()) {
const u64 tsc = utils::get_tsc();
const u64 tsc = rx::get_tsc();
#if _MSC_VER
const u64 result =
@ -218,7 +218,7 @@ void initialize_timebased_time(u64 timebased_init, bool reset) {
// Returns some relative time in microseconds, don't change this fact
u64 get_system_time() {
if (u64 freq = utils::get_tsc_freq()) {
const u64 tsc = utils::get_tsc();
const u64 tsc = rx::get_tsc();
#if _MSC_VER
const u64 result = static_cast<u64>(u128_from_mul(tsc, 1000000ull) / freq);
@ -358,7 +358,7 @@ error_code sys_time_get_current_time(vm::ptr<s64> sec, vm::ptr<s64> nsec) {
// Get time difference in nanoseconds (using 128 bit accumulator)
const u64 diff_sl = diff_base * 1000000000ull;
const u64 diff_sh = utils::umulh64(diff_base, 1000000000ull);
const u64 diff_sh = rx::umulh64(diff_base, 1000000000ull);
const u64 diff = utils::udiv128(diff_sh, diff_sl, s_time_aux_info.perf_freq);
// get time since Epoch in nanoseconds

View file

@ -9,9 +9,9 @@
#include "Emu/System.h"
#include "Emu/system_config.h"
#include "rx/asm.hpp"
#include "sys_event.h"
#include "sys_process.h"
#include "util/asm.hpp"
#include <deque>
#include <thread>
@ -77,9 +77,9 @@ u64 lv2_timer::check_unlocked(u64 _now) noexcept {
if (period) {
// Set next expiration time and check again
const u64 expire0 = utils::add_saturate<u64>(next, period);
const u64 expire0 = rx::add_saturate<u64>(next, period);
expire.release(expire0);
return utils::sub_saturate<u64>(expire0, _now);
return rx::sub_saturate<u64>(expire0, _now);
}
// Stop after oneshot
@ -265,11 +265,11 @@ error_code _sys_timer_start(ppu_thread &ppu, u32 timer_id, u64 base_time,
const u64 expire =
period == 0 ? base_time : // oneshot
base_time == 0
? utils::add_saturate(start_time, period)
? rx::add_saturate(start_time, period)
:
// periodic timer with no base (using start time as base)
start_time < utils::add_saturate(base_time, period)
? utils::add_saturate(base_time, period)
start_time < rx::add_saturate(base_time, period)
? rx::add_saturate(base_time, period)
:
// periodic with base time over start time
[&]() -> u64 // periodic timer base before start time (align to
@ -282,10 +282,10 @@ error_code _sys_timer_start(ppu_thread &ppu, u32 timer_id, u64 base_time,
// }
// while (base_time < start_time);
const u64 start_time_with_base_time_reminder = utils::add_saturate(
const u64 start_time_with_base_time_reminder = rx::add_saturate(
start_time - start_time % period, base_time % period);
return utils::add_saturate(
return rx::add_saturate(
start_time_with_base_time_reminder,
start_time_with_base_time_reminder < start_time ? period : 0);
}();
@ -428,10 +428,10 @@ error_code sys_timer_usleep(ppu_thread &ppu, u64 sleep_time) {
// Over/underflow checks
if (add_time >= 0) {
sleep_time = utils::add_saturate<u64>(sleep_time, add_time);
sleep_time = rx::add_saturate<u64>(sleep_time, add_time);
} else {
sleep_time =
std::max<u64>(1, utils::sub_saturate<u64>(sleep_time, -add_time));
std::max<u64>(1, rx::sub_saturate<u64>(sleep_time, -add_time));
}
lv2_obj::sleep(ppu, g_cfg.core.sleep_timers_accuracy <

View file

@ -0,0 +1,139 @@
#pragma once
#include "rx/LinkedNode.hpp"
#include "rx/Serializer.hpp"
#include <cassert>
namespace kernel {
namespace detail {
struct GlobalObjectCtl {
void (*construct)();
void (*destruct)();
void (*serialize)(rx::Serializer &);
void (*deserialize)(rx::Deserializer &);
};
template <typename NamespaceT, typename T> struct GlobalKernelObjectInstance {
static inline T *instance = nullptr;
static inline rx::LinkedNode<GlobalObjectCtl> ctl = {
.object = {
.construct = +[] { instance->construct(); },
.destruct = +[] { instance->destruct(); },
.serialize = +[](rx::Serializer &s) { instance->serialize(s); },
.deserialize = +[](rx::Deserializer &s) { instance->deserialize(s); },
},
};
};
} // namespace detail
template <typename NamespaceT> struct GlobalKernelObjectStorage {
template <typename T> static void AddObject() {
auto node = &detail::GlobalKernelObjectInstance<NamespaceT, T>::ctl;
auto head = GetHead();
if (head) {
head->prev = node;
node->next = head;
}
*GetHeadPtr() = node;
}
static void ConstructAll() {
for (auto it = GetHead(); it != nullptr; it = it->next) {
it->object.construct();
}
}
static void DestructAll() {
for (auto it = GetHead(); it != nullptr; it = it->next) {
it->object.destruct();
}
}
static void SerializeAll(rx::Serializer &s) {
for (auto it = GetHead(); it != nullptr; it = it->next) {
it->object.serialize(s);
}
}
static void DeserializeAll(rx::Deserializer &s) {
for (auto it = GetHead(); it != nullptr; it = it->next) {
it->object.deserialize(s);
}
}
private:
static rx::LinkedNode<detail::GlobalObjectCtl> *GetHead() {
return *GetHeadPtr();
}
static rx::LinkedNode<detail::GlobalObjectCtl> **GetHeadPtr() {
static rx::LinkedNode<detail::GlobalObjectCtl> *registry;
return &registry;
}
};
template <rx::Serializable T, typename NamespaceT>
requires std::is_default_constructible_v<T>
class GlobalKernelObject {
union U {
T object;
U() {}
~U() {}
};
U mHolder;
public:
template <typename = void> GlobalKernelObject() {
auto &instance =
detail::GlobalKernelObjectInstance<NamespaceT,
GlobalKernelObject>::instance;
assert(instance == nullptr);
instance = this;
GlobalKernelObjectStorage<NamespaceT>::template AddObject<
GlobalKernelObject>();
}
T *operator->() { return &mHolder.object; }
const T *operator->() const { return &mHolder.object; }
T &operator*() { return mHolder.object; }
const T &operator*() const { return mHolder.object; }
operator T &() { return mHolder.object; }
operator const T &() const { return mHolder.object; }
void serialize(rx::Serializer &s)
requires rx::Serializable<T>
{
s.serialize(mHolder.object);
}
void deserialize(rx::Deserializer &s)
requires rx::Serializable<T>
{
std::construct_at(&mHolder.object);
s.deserialize(mHolder.object);
}
T &get() { return mHolder.object; }
const T &get() const { return mHolder.object; }
private:
template <typename... Args>
requires(std::is_constructible_v<T, Args && ...>)
void construct(Args &&...args) noexcept(
std::is_nothrow_constructible_v<T, Args &&...>) {
std::construct_at(&mHolder.object, std::forward<Args>(args)...);
}
template <typename... Args>
void destruct() noexcept(std::is_nothrow_destructible_v<T>) {
mHolder.object.~T();
}
friend detail::GlobalKernelObjectInstance<NamespaceT, GlobalKernelObject>;
};
} // namespace kernel

View file

@ -67,7 +67,7 @@ add_library(obj.orbis-kernel OBJECT
src/utils/Logs.cpp
)
target_link_libraries(obj.orbis-kernel PUBLIC orbis::kernel::config rx)
target_link_libraries(obj.orbis-kernel PUBLIC orbis::kernel::config rx kernel)
target_include_directories(obj.orbis-kernel
PUBLIC

View file

@ -0,0 +1,26 @@
#pragma once
#include <kernel/GlobalKernelObject.hpp>
namespace orbis {
struct OrbisNamespace;
template <rx::Serializable T>
using GlobalKernelObject = kernel::GlobalKernelObject<T, OrbisNamespace>;
template <rx::Serializable T> GlobalKernelObject<T> createGlobalObject() {
return {};
}
inline void constructAllGlobals() {
kernel::GlobalKernelObjectStorage<OrbisNamespace>::ConstructAll();
}
inline void destructAllGlobals() {
kernel::GlobalKernelObjectStorage<OrbisNamespace>::DestructAll();
}
template <typename T> T &getGlobalObject() {
assert(detail::GlobalKernelObjectInstance<GlobalKernelObject<T>>::instance);
return kernel::detail::GlobalKernelObjectInstance<
OrbisNamespace, GlobalKernelObject<T>>::instance->get();
}
} // namespace orbis

View file

@ -8,7 +8,6 @@
#include "osem.hpp"
#include "rx/IdMap.hpp"
#include "rx/LinkedNode.hpp"
#include "rx/SharedCV.hpp"
#include "rx/SharedMutex.hpp"
#include "thread/types.hpp"
@ -22,35 +21,6 @@ namespace orbis {
struct Process;
struct Thread;
struct UmtxKey {
// TODO: may contain a reference to a shared memory
std::uintptr_t addr;
orbis::pid_t pid;
auto operator<=>(const UmtxKey &) const = default;
};
struct UmtxCond {
Thread *thr;
rx::shared_cv cv;
UmtxCond(Thread *thr) : thr(thr) {}
};
struct UmtxChain {
rx::shared_mutex mtx;
using queue_type = utils::kmultimap<UmtxKey, UmtxCond>;
queue_type sleep_queue;
queue_type spare_queue;
std::pair<const UmtxKey, UmtxCond> *enqueue(UmtxKey &key, Thread *thr);
void erase(std::pair<const UmtxKey, UmtxCond> *obj);
queue_type::iterator erase(queue_type::iterator it);
uint notify_one(const UmtxKey &key);
uint notify_all(const UmtxKey &key);
uint notify_n(const UmtxKey &key, sint count);
};
enum class FwType : std::uint8_t {
Unknown,
Ps4,
@ -172,26 +142,6 @@ public:
kenvValue[len] = '0';
}
enum {
c_golden_ratio_prime = 2654404609u,
c_umtx_chains = 512,
c_umtx_shifts = 23,
};
// Use getUmtxChain0 or getUmtxChain1
std::tuple<UmtxChain &, UmtxKey, std::unique_lock<rx::shared_mutex>>
getUmtxChainIndexed(int i, Thread *t, uint32_t flags, void *ptr);
// Internal Umtx: Wait/Cv/Sem
auto getUmtxChain0(Thread *t, uint32_t flags, void *ptr) {
return getUmtxChainIndexed(0, t, flags, ptr);
}
// Internal Umtx: Mutex/Umtx/Rwlock
auto getUmtxChain1(Thread *t, uint32_t flags, void *ptr) {
return getUmtxChainIndexed(1, t, flags, ptr);
}
rx::Ref<EventEmitter> deviceEventEmitter;
rx::Ref<rx::RcBase> shmDevice;
rx::Ref<rx::RcBase> dmemDevice;
@ -235,8 +185,6 @@ private:
utils::kmultimap<std::size_t, void *> m_free_heap;
utils::kmultimap<std::size_t, void *> m_used_node;
UmtxChain m_umtx_chains[2][c_umtx_chains]{};
std::atomic<long> m_tsc_freq{0};
rx::shared_mutex m_thread_id_mtx;

View file

@ -283,24 +283,6 @@ void KernelContext::kfree(void *ptr, std::size_t size) {
}
}
std::tuple<UmtxChain &, UmtxKey, std::unique_lock<rx::shared_mutex>>
KernelContext::getUmtxChainIndexed(int i, Thread *t, uint32_t flags,
void *ptr) {
auto pid = t->tproc->pid;
auto p = reinterpret_cast<std::uintptr_t>(ptr);
if (flags & 1) {
pid = 0; // Process shared (TODO)
ORBIS_LOG_WARNING("Using process-shared umtx", t->tid, ptr, (p % 0x4000));
t->where();
}
auto n = p + pid;
if (flags & 1)
n %= 0x4000;
n = ((n * c_golden_ratio_prime) >> c_umtx_shifts) % c_umtx_chains;
std::unique_lock lock(m_umtx_chains[i][n].mtx);
return {m_umtx_chains[i][n], UmtxKey{p, pid}, std::move(lock)};
}
inline namespace utils {
void kfree(void *ptr, std::size_t size) { return g_context.kfree(ptr, size); }
void *kalloc(std::size_t size, std::size_t align) {

View file

@ -1,11 +1,85 @@
#include "umtx.hpp"
#include "orbis/umtx.hpp"
#include "GlobalKernelObject.hpp"
#include "error.hpp"
#include "orbis/KernelContext.hpp"
#include "orbis-config.hpp"
#include "orbis/thread.hpp"
#include "orbis/utils/Logs.hpp"
#include "rx/Serializer.hpp"
#include <limits>
namespace orbis {
struct UmtxKey {
// TODO: may contain a reference to a shared memory
std::uintptr_t addr;
orbis::pid_t pid;
auto operator<=>(const UmtxKey &) const = default;
};
struct UmtxCond {
Thread *thr;
rx::shared_cv cv;
UmtxCond(Thread *thr) : thr(thr) {}
};
struct UmtxChain {
rx::shared_mutex mtx;
using queue_type = utils::kmultimap<UmtxKey, UmtxCond>;
queue_type sleep_queue;
queue_type spare_queue;
std::pair<const UmtxKey, UmtxCond> *enqueue(UmtxKey &key, Thread *thr);
void erase(std::pair<const UmtxKey, UmtxCond> *obj);
queue_type::iterator erase(queue_type::iterator it);
uint notify_one(const UmtxKey &key);
uint notify_all(const UmtxKey &key);
uint notify_n(const UmtxKey &key, sint count);
};
struct UmtxStorage {
enum {
c_golden_ratio_prime = 2654404609u,
c_umtx_chains = 512,
c_umtx_shifts = 23,
};
UmtxChain m_umtx_chains[2][c_umtx_chains]{};
// Use getUmtxChain0 or getUmtxChain1
std::tuple<UmtxChain &, UmtxKey, std::unique_lock<rx::shared_mutex>>
getUmtxChainIndexed(int i, Thread *t, uint32_t flags, void *ptr) {
auto pid = t->tproc->pid;
auto p = reinterpret_cast<std::uintptr_t>(ptr);
if (flags & 1) {
pid = 0; // Process shared (TODO)
ORBIS_LOG_WARNING("Using process-shared umtx", t->tid, ptr, (p % 0x4000));
t->where();
}
auto n = p + pid;
if (flags & 1)
n %= 0x4000;
n = ((n * c_golden_ratio_prime) >> c_umtx_shifts) % c_umtx_chains;
std::unique_lock lock(m_umtx_chains[i][n].mtx);
return {m_umtx_chains[i][n], UmtxKey{p, pid}, std::move(lock)};
}
// Internal Umtx: Wait/Cv/Sem
auto getUmtxChain0(Thread *t, uint32_t flags, void *ptr) {
return getUmtxChainIndexed(0, t, flags, ptr);
}
// Internal Umtx: Mutex/Umtx/Rwlock
auto getUmtxChain1(Thread *t, uint32_t flags, void *ptr) {
return getUmtxChainIndexed(1, t, flags, ptr);
}
void serialize(rx::Serializer &) const {}
void deserialize(rx::Deserializer &) {}
};
static auto umtxStorage = createGlobalObject<UmtxStorage>();
std::pair<const UmtxKey, UmtxCond> *UmtxChain::enqueue(UmtxKey &key,
Thread *thr) {
if (!spare_queue.empty()) {
@ -80,7 +154,7 @@ orbis::ErrorCode orbis::umtx_unlock_umtx(Thread *thread, ptr<umtx> umtx,
orbis::ErrorCode orbis::umtx_wait(Thread *thread, ptr<void> addr, ulong id,
std::uint64_t ut, bool is32, bool ipc) {
ORBIS_LOG_NOTICE(__FUNCTION__, thread->tid, addr, id, ut, is32);
auto [chain, key, lock] = g_context.getUmtxChain0(thread, ipc, addr);
auto [chain, key, lock] = umtxStorage->getUmtxChain0(thread, ipc, addr);
auto node = chain.enqueue(key, thread);
ErrorCode result = {};
ulong val = 0;
@ -127,7 +201,7 @@ orbis::ErrorCode orbis::umtx_wait(Thread *thread, ptr<void> addr, ulong id,
orbis::ErrorCode orbis::umtx_wake(Thread *thread, ptr<void> addr, sint n_wake) {
ORBIS_LOG_NOTICE(__FUNCTION__, thread->tid, addr, n_wake);
auto [chain, key, lock] = g_context.getUmtxChain0(thread, true, addr);
auto [chain, key, lock] = umtxStorage->getUmtxChain0(thread, true, addr);
if (key.pid == 0) {
// IPC workaround (TODO)
chain.notify_all(key);
@ -162,7 +236,7 @@ static ErrorCode do_lock_normal(Thread *thread, ptr<umutex> m, uint flags,
std::uint64_t ut, umutex_lock_mode mode) {
ORBIS_LOG_TRACE(__FUNCTION__, thread->tid, m, flags, ut, mode);
auto [chain, key, lock] = g_context.getUmtxChain1(thread, flags, m);
auto [chain, key, lock] = umtxStorage->getUmtxChain1(thread, flags, m);
ErrorCode error = {};
while (true) {
int owner = m->owner.load(std::memory_order_acquire);
@ -219,7 +293,7 @@ static ErrorCode do_lock_pp(Thread *thread, ptr<umutex> m, uint flags,
static ErrorCode do_unlock_normal(Thread *thread, ptr<umutex> m, uint flags) {
ORBIS_LOG_TRACE(__FUNCTION__, thread->tid, m, flags);
auto [chain, key, lock] = g_context.getUmtxChain1(thread, flags, m);
auto [chain, key, lock] = umtxStorage->getUmtxChain1(thread, flags, m);
int owner = m->owner.load(std::memory_order_acquire);
if ((owner & ~kUmutexContested) != thread->tid)
@ -344,7 +418,7 @@ orbis::ErrorCode orbis::umtx_cv_wait(Thread *thread, ptr<ucond> cv,
}
}
auto [chain, key, lock] = g_context.getUmtxChain0(thread, cv->flags, cv);
auto [chain, key, lock] = umtxStorage->getUmtxChain0(thread, cv->flags, cv);
auto node = chain.enqueue(key, thread);
if (!cv->has_waiters.load(std::memory_order::relaxed)) {
@ -398,7 +472,7 @@ orbis::ErrorCode orbis::umtx_cv_wait(Thread *thread, ptr<ucond> cv,
orbis::ErrorCode orbis::umtx_cv_signal(Thread *thread, ptr<ucond> cv) {
ORBIS_LOG_TRACE(__FUNCTION__, thread->tid, cv);
auto [chain, key, lock] = g_context.getUmtxChain0(thread, cv->flags, cv);
auto [chain, key, lock] = umtxStorage->getUmtxChain0(thread, cv->flags, cv);
if (key.pid == 0) {
// IPC workaround (TODO)
chain.notify_all(key);
@ -413,7 +487,7 @@ orbis::ErrorCode orbis::umtx_cv_signal(Thread *thread, ptr<ucond> cv) {
orbis::ErrorCode orbis::umtx_cv_broadcast(Thread *thread, ptr<ucond> cv) {
ORBIS_LOG_TRACE(__FUNCTION__, thread->tid, cv);
auto [chain, key, lock] = g_context.getUmtxChain0(thread, cv->flags, cv);
auto [chain, key, lock] = umtxStorage->getUmtxChain0(thread, cv->flags, cv);
chain.notify_all(key);
cv->has_waiters.store(0, std::memory_order::relaxed);
return {};
@ -423,7 +497,8 @@ orbis::ErrorCode orbis::umtx_rw_rdlock(Thread *thread, ptr<urwlock> rwlock,
slong fflag, ulong ut) {
ORBIS_LOG_TRACE(__FUNCTION__, thread->tid, rwlock, fflag, ut);
auto flags = rwlock->flags;
auto [chain, key, lock] = g_context.getUmtxChain1(thread, flags & 1, rwlock);
auto [chain, key, lock] =
umtxStorage->getUmtxChain1(thread, flags & 1, rwlock);
auto wrflags = kUrwLockWriteOwner;
if (!(fflag & kUrwLockPreferReader) && !(flags & kUrwLockPreferReader)) {
@ -521,7 +596,8 @@ orbis::ErrorCode orbis::umtx_rw_wrlock(Thread *thread, ptr<urwlock> rwlock,
ORBIS_LOG_TRACE(__FUNCTION__, thread->tid, rwlock, ut);
auto flags = rwlock->flags;
auto [chain, key, lock] = g_context.getUmtxChain1(thread, flags & 1, rwlock);
auto [chain, key, lock] =
umtxStorage->getUmtxChain1(thread, flags & 1, rwlock);
uint32_t blocked_readers = 0;
ErrorCode error = {};
@ -626,7 +702,8 @@ orbis::ErrorCode orbis::umtx_rw_wrlock(Thread *thread, ptr<urwlock> rwlock,
orbis::ErrorCode orbis::umtx_rw_unlock(Thread *thread, ptr<urwlock> rwlock) {
auto flags = rwlock->flags;
auto [chain, key, lock] = g_context.getUmtxChain1(thread, flags & 1, rwlock);
auto [chain, key, lock] =
umtxStorage->getUmtxChain1(thread, flags & 1, rwlock);
auto state = rwlock->state.load(std::memory_order::relaxed);
if (state & kUrwLockWriteOwner) {
@ -681,7 +758,7 @@ orbis::ErrorCode orbis::umtx_rw_unlock(Thread *thread, ptr<urwlock> rwlock) {
orbis::ErrorCode orbis::umtx_wake_private(Thread *thread, ptr<void> addr,
sint n_wake) {
ORBIS_LOG_TRACE(__FUNCTION__, thread->tid, addr, n_wake);
auto [chain, key, lock] = g_context.getUmtxChain0(thread, false, addr);
auto [chain, key, lock] = umtxStorage->getUmtxChain0(thread, false, addr);
chain.notify_n(key, n_wake);
return {};
}
@ -711,7 +788,7 @@ orbis::ErrorCode orbis::umtx_wake_umutex(Thread *thread, ptr<umutex> m,
if (ErrorCode err = uread(flags, &m->flags); err != ErrorCode{})
return err;
auto [chain, key, lock] = g_context.getUmtxChain1(thread, flags, m);
auto [chain, key, lock] = umtxStorage->getUmtxChain1(thread, flags, m);
int owner = m->owner.load(std::memory_order::acquire);
if ((owner & ~kUmutexContested) != 0)
@ -736,7 +813,7 @@ orbis::ErrorCode orbis::umtx_wake_umutex(Thread *thread, ptr<umutex> m,
orbis::ErrorCode orbis::umtx_sem_wait(Thread *thread, ptr<usem> sem,
std::uint64_t ut) {
ORBIS_LOG_TRACE(__FUNCTION__, sem, ut);
auto [chain, key, lock] = g_context.getUmtxChain0(thread, sem->flags, sem);
auto [chain, key, lock] = umtxStorage->getUmtxChain0(thread, sem->flags, sem);
auto node = chain.enqueue(key, thread);
std::uint32_t has_waiters = sem->has_waiters;
@ -785,7 +862,7 @@ orbis::ErrorCode orbis::umtx_sem_wait(Thread *thread, ptr<usem> sem,
orbis::ErrorCode orbis::umtx_sem_wake(Thread *thread, ptr<usem> sem) {
ORBIS_LOG_TRACE(__FUNCTION__, sem);
auto [chain, key, lock] = g_context.getUmtxChain0(thread, sem->flags, sem);
auto [chain, key, lock] = umtxStorage->getUmtxChain0(thread, sem->flags, sem);
if (key.pid == 0) {
// IPC workaround (TODO)
chain.notify_all(key);
@ -819,7 +896,8 @@ orbis::ErrorCode orbis::umtx_wake2_umutex(Thread *thread, ptr<umutex> m,
if (ErrorCode err = uread(flags, &m->flags); err != ErrorCode{})
return err;
auto [chain, key, lock] = g_context.getUmtxChain1(thread, wakeFlags & 1, m);
auto [chain, key, lock] =
umtxStorage->getUmtxChain1(thread, wakeFlags & 1, m);
int owner = 0;
@ -860,7 +938,8 @@ orbis::ErrorCode orbis::umtx_wake3_umutex(Thread *thread, ptr<umutex> m,
if (ErrorCode err = uread(flags, &m->flags); err != ErrorCode{})
return err;
auto [chain, key, lock] = g_context.getUmtxChain1(thread, wakeFlags & 1, m);
auto [chain, key, lock] =
umtxStorage->getUmtxChain1(thread, wakeFlags & 1, m);
int owner = 0;
std::size_t count = chain.sleep_queue.count(key);

View file

@ -1,4 +1,6 @@
#include "stdafx.h"
#include "rx/align.hpp"
#include "Emu/perf_meter.hpp"
#include "Emu/Cell/PPUModule.h"
#include "cellos/sys_sync.h"
@ -9,7 +11,7 @@
#include "cellAdec.h"
#include "util/simd.hpp"
#include "util/asm.hpp"
#include "rx/asm.hpp"
LOG_CHANNEL(cellAdec);
@ -415,7 +417,7 @@ void LpcmDecContext::exec(ppu_thread& ppu)
be_t<f32>* const _output = std::assume_aligned<0x80>(output.get_ptr());
s64 output_size = cmd.au_size;
s32 sample_num = static_cast<s32>(utils::align(+lpcm_param->audioPayloadSize, 0x10));
s32 sample_num = static_cast<s32>(rx::alignUp(+lpcm_param->audioPayloadSize, 0x10));
s32 channel_num = 0;
if (!dvd_packing)
@ -860,11 +862,11 @@ error_code _CellAdecCoreOpGetMemSize_lpcm(vm::ptr<CellAdecAttr> attr)
cellAdec.notice("_CellAdecCoreOpGetMemSize_lpcm(attr=*0x%x)", attr);
constexpr u32 mem_size =
utils::align(static_cast<u32>(sizeof(LpcmDecContext)), 0x80) + utils::align(static_cast<u32>(sizeof(CellAdecParamLpcm)), 0x80) + 0x100 // Command data for Spurs task
+ LPCM_DEC_OUTPUT_BUFFER_SIZE + 0x2900 // sizeof(CellSpurs) + sizeof(CellSpursTaskset)
+ 0x3b400 // Spurs context
+ 0x300 // (sizeof(CellSpursQueue) + 0x80 + queue buffer) * 2
+ 0x855; // Unused
rx::alignUp(static_cast<u32>(sizeof(LpcmDecContext)), 0x80) + rx::alignUp(static_cast<u32>(sizeof(CellAdecParamLpcm)), 0x80) + 0x100 // Command data for Spurs task
+ LPCM_DEC_OUTPUT_BUFFER_SIZE + 0x2900 // sizeof(CellSpurs) + sizeof(CellSpursTaskset)
+ 0x3b400 // Spurs context
+ 0x300 // (sizeof(CellSpursQueue) + 0x80 + queue buffer) * 2
+ 0x855; // Unused
static_assert(mem_size == 0x7ebd5);
@ -883,7 +885,7 @@ error_code _CellAdecCoreOpOpenExt_lpcm(ppu_thread& ppu, vm::ptr<LpcmDecContext>
ensure(handle.aligned(0x80)); // LLE doesn't check the alignment or aligns the address itself
ensure(!!notifyAuDone && !!notifyAuDoneArg && !!notifyPcmOut && !!notifyPcmOutArg && !!notifyError && !!notifyErrorArg && !!notifySeqDone && !!notifySeqDoneArg); // These should always be set
const u32 end_of_context_addr = handle.addr() + utils::align(static_cast<u32>(sizeof(LpcmDecContext)), 0x80);
const u32 end_of_context_addr = handle.addr() + rx::alignUp(static_cast<u32>(sizeof(LpcmDecContext)), 0x80);
handle->cmd_queue.front = 0;
handle->cmd_queue.back = 0;
@ -1587,10 +1589,10 @@ error_code adecOpen(ppu_thread& ppu, vm::ptr<CellAdecType> type, vm::cptr<CellAd
const s32 pcm_handle_num = core_ops->getPcmHandleNum(ppu);
const u32 bitstream_info_size = core_ops->getBsiInfoSize(ppu);
const auto _this = vm::ptr<AdecContext>::make(utils::align(+res->startAddr, 0x80));
const auto _this = vm::ptr<AdecContext>::make(rx::alignUp(+res->startAddr, 0x80));
const auto frames = vm::ptr<AdecFrame>::make(_this.addr() + sizeof(AdecContext));
const u32 bitstream_infos_addr = frames.addr() + pcm_handle_num * sizeof(AdecFrame);
const auto core_handle = vm::ptr<void>::make(utils::align(bitstream_infos_addr + bitstream_info_size * pcm_handle_num, 0x80));
const auto core_handle = vm::ptr<void>::make(rx::alignUp(bitstream_infos_addr + bitstream_info_size * pcm_handle_num, 0x80));
if (type->audioCodecType == CELL_ADEC_TYPE_LPCM_DVD)
{

View file

@ -1,11 +1,13 @@
#include "stdafx.h"
#include "rx/align.hpp"
#include "Emu/perf_meter.hpp"
#include "Emu/Cell/PPUModule.h"
#include "cellos/sys_sync.h"
#include "cellos/sys_ppu_thread.h"
#include "Emu/savestate_utils.hpp"
#include "sysPrxForUser.h"
#include "util/asm.hpp"
#include "rx/asm.hpp"
#include "util/media_utils.h"
#include "cellAtracXdec.h"
@ -182,7 +184,7 @@ error_code AtracXdecDecoder::set_config_info(u32 sampling_freq, u32 ch_config_id
this->sampling_freq = sampling_freq;
this->ch_config_idx = ch_config_idx;
this->nbytes = nbytes;
this->nbytes_128_aligned = utils::align(nbytes, 0x80);
this->nbytes_128_aligned = rx::alignUp(nbytes, 0x80);
this->nch_in = ch_config_idx <= 4 ? ch_config_idx : ch_config_idx + 1;
if (ch_config_idx > 7u)
@ -741,7 +743,7 @@ error_code _CellAdecCoreOpGetMemSize_atracx(vm::ptr<CellAdecAttr> attr)
constexpr u32 mem_size =
sizeof(AtracXdecContext) + 0x7f + ATXDEC_SPURS_STRUCTS_SIZE + 0x1d8 + atracXdecGetSpursMemSize(nch_in) + ATXDEC_SAMPLES_PER_FRAME * sizeof(f32) * nch_in;
attr->workMemSize = utils::align(mem_size, 0x80);
attr->workMemSize = rx::alignUp(mem_size, 0x80);
return CELL_OK;
}
@ -765,7 +767,7 @@ error_code _CellAdecCoreOpOpenExt_atracx(ppu_thread& ppu, vm::ptr<AtracXdecConte
ensure(!!notifyAuDone && !!notifyAuDoneArg && !!notifyPcmOut && !!notifyPcmOutArg && !!notifyError && !!notifyErrorArg && !!notifySeqDone && !!notifySeqDoneArg); // These should always be set by cellAdec
write_to_ptr(handle.get_ptr(), AtracXdecContext(notifyAuDone, notifyAuDoneArg, notifyPcmOut, notifyPcmOutArg, notifyError, notifyErrorArg, notifySeqDone, notifySeqDoneArg,
vm::bptr<u8>::make(handle.addr() + utils::align(static_cast<u32>(sizeof(AtracXdecContext)), 0x80) + ATXDEC_SPURS_STRUCTS_SIZE)));
vm::bptr<u8>::make(handle.addr() + rx::alignUp(static_cast<u32>(sizeof(AtracXdecContext)), 0x80) + ATXDEC_SPURS_STRUCTS_SIZE)));
const vm::var<sys_mutex_attribute_t> mutex_attr{{SYS_SYNC_PRIORITY, SYS_SYNC_NOT_RECURSIVE, SYS_SYNC_NOT_PROCESS_SHARED, SYS_SYNC_NOT_ADAPTIVE, 0, 0, 0, {"_atd001"_u64}}};
const vm::var<sys_cond_attribute_t> cond_attr{{SYS_SYNC_NOT_PROCESS_SHARED, 0, 0, {"_atd002"_u64}}};

View file

@ -1,4 +1,6 @@
#include "stdafx.h"
#include "rx/align.hpp"
#include "Emu/System.h"
#include "Emu/IdManager.h"
#include "Emu/Cell/PPUModule.h"
@ -7,7 +9,7 @@
#include "cellPamf.h"
#include "cellDmux.h"
#include "util/asm.hpp"
#include "rx/asm.hpp"
#include <thread>
@ -765,7 +767,7 @@ PesHeader::PesHeader(DemuxerStream& stream)
}
ElementaryStream::ElementaryStream(Demuxer* dmux, u32 addr, u32 size, u32 fidMajor, u32 fidMinor, u32 sup1, u32 sup2, vm::ptr<CellDmuxCbEsMsg> cbFunc, u32 cbArg, u32 spec)
: put(utils::align(addr, 128)), dmux(dmux), memAddr(utils::align(addr, 128)), memSize(size - (addr - memAddr)), fidMajor(fidMajor), fidMinor(fidMinor), sup1(sup1), sup2(sup2), cbFunc(cbFunc), cbArg(cbArg), spec(spec)
: put(rx::alignUp(addr, 128)), dmux(dmux), memAddr(rx::alignUp(addr, 128)), memSize(size - (addr - memAddr)), fidMajor(fidMajor), fidMinor(fidMinor), sup1(sup1), sup2(sup2), cbFunc(cbFunc), cbArg(cbArg), spec(spec)
{
}
@ -849,7 +851,7 @@ void ElementaryStream::push_au(u32 size, u64 dts, u64 pts, u64 userdata, bool ra
addr = put;
put = utils::align(put + 128 + size, 128);
put = rx::alignUp(put + 128 + size, 128);
put_count++;
}

View file

@ -18,7 +18,7 @@
#include "Crypto/utils.h"
#include "Loader/PSF.h"
#include "util/StrUtil.h"
#include "util/asm.hpp"
#include "rx/asm.hpp"
#include "util/init_mutex.hpp"
#include <span>
@ -691,7 +691,7 @@ error_code cellHddGameGetSizeKB(ppu_thread& ppu, vm::ptr<u32> size)
// This function is very slow by nature
// TODO: Check if after first use the result is being cached so the sleep can
// be reduced in this case
lv2_sleep(utils::sub_saturate<u64>(dirsz == umax ? 2000 : 200000,
lv2_sleep(rx::sub_saturate<u64>(dirsz == umax ? 2000 : 200000,
get_guest_system_time() - start_sleep),
&ppu);
@ -757,7 +757,7 @@ error_code cellGameDataGetSizeKB(ppu_thread& ppu, vm::ptr<u32> size)
// This function is very slow by nature
// TODO: Check if after first use the result is being cached so the sleep can
// be reduced in this case
lv2_sleep(utils::sub_saturate<u64>(dirsz == umax ? 2000 : 200000,
lv2_sleep(rx::sub_saturate<u64>(dirsz == umax ? 2000 : 200000,
get_guest_system_time() - start_sleep),
&ppu);
@ -1127,7 +1127,7 @@ cellGameContentPermit(ppu_thread& ppu,
}
// This function is very slow by nature
lv2_sleep(utils::sub_saturate<u64>(
lv2_sleep(rx::sub_saturate<u64>(
!perm.temp.empty() || perm.can_create ? 200000 : 2000,
get_guest_system_time() - start_sleep),
&ppu);
@ -1886,7 +1886,7 @@ error_code cellGameGetSizeKB(ppu_thread& ppu, vm::ptr<s32> size)
// This function is very slow by nature
// TODO: Check if after first use the result is being cached so the sleep can
// be reduced in this case
lv2_sleep(utils::sub_saturate<u64>(dirsz == umax ? 1000 : 200000,
lv2_sleep(rx::sub_saturate<u64>(dirsz == umax ? 1000 : 200000,
get_guest_system_time() - start_sleep),
&ppu);

View file

@ -10,7 +10,7 @@
#include "cellGcmSys.h"
#include "sysPrxForUser.h"
#include "util/asm.hpp"
#include "rx/asm.hpp"
LOG_CHANNEL(cellGcmSys);
@ -1491,7 +1491,7 @@ s32 cellGcmCallback(ppu_thread& ppu, vm::ptr<CellGcmContextData> context, u32 co
return 0;
}
busy_wait();
rx::busy_wait();
}
return CELL_OK;

View file

@ -1,3 +1,5 @@
#include "stdafx.h"
#include "cellSysutil.h"
#include "cellUserInfo.h"
#include "Emu/Cell/PPUModule.h"
@ -12,7 +14,6 @@
#include "Emu/localized_string.h"
#include "Emu/savestate_utils.hpp"
#include "Emu/system_config.h"
#include "stdafx.h"
#include "cellMsgDialog.h"
#include "cellSaveData.h"
@ -26,7 +27,9 @@
#include <mutex>
#include <span>
#include "util/asm.hpp"
#include "rx/asm.hpp"
#include "rx/align.hpp"
#include "rx/types.hpp"
LOG_CHANNEL(cellSaveData);
@ -65,11 +68,11 @@ std::string SaveDataEntry::date() const
std::string SaveDataEntry::data_size() const
{
std::string metric = "KB";
u64 sz = utils::aligned_div(size, 1000);
u64 sz = rx::aligned_div(size, 1000);
if (sz > 1000)
{
metric = "MB";
sz = utils::aligned_div(sz, 1000);
sz = rx::aligned_div(sz, 1000);
}
return fmt::format("%lu %s", sz, metric);
}
@ -1286,7 +1289,7 @@ savedata_op(ppu_thread& ppu, u32 operation, u32 version, vm::cptr<char> dirName,
{
if (!file.is_directory)
{
size_bytes += utils::align(file.size, 1024);
size_bytes += rx::alignUp(file.size, 1024);
}
}
@ -1728,7 +1731,7 @@ savedata_op(ppu_thread& ppu, u32 operation, u32 version, vm::cptr<char> dirName,
statGet->fileNum++;
size_bytes +=
utils::align(entry.size, 1024); // firmware rounds this value up
rx::alignUp(entry.size, 1024); // firmware rounds this value up
if (statGet->fileListNum >= setBuf->fileListMax)
continue;
@ -2345,7 +2348,7 @@ savedata_op(ppu_thread& ppu, u32 operation, u32 version, vm::cptr<char> dirName,
final_blist = fmt::merge(blist, "/");
psf::assign(
psf, "RPCS3_BLIST",
psf::string(utils::align(::size32(final_blist) + 1, 4), final_blist));
psf::string(rx::alignUp(::size32(final_blist) + 1, 4), final_blist));
// Write all files in temporary directory
auto& fsfo = all_files["PARAM.SFO"];

View file

@ -15,7 +15,7 @@
#include "sysPrxForUser.h"
#include "cellSpurs.h"
#include "util/asm.hpp"
#include "rx/asm.hpp"
#include "util/v128.hpp"
#include "util/simd.hpp"
@ -4145,7 +4145,7 @@ s32 _spurs::create_task(vm::ptr<CellSpursTaskset> taskset, vm::ptr<u32> task_id,
{
v128 ls_pattern_128 = v128::from64r(ls_pattern->_u64[0], ls_pattern->_u64[1]);
const u32 ls_blocks = utils::popcnt128(ls_pattern_128._u);
const u32 ls_blocks = rx::popcnt128(ls_pattern_128._u);
if (ls_blocks > alloc_ls_blocks)
{

View file

@ -6,7 +6,7 @@
#include "Emu/Cell/SPURecompiler.h"
#include "cellSpurs.h"
#include "util/asm.hpp"
#include "rx/asm.hpp"
#include "util/v128.hpp"
#include "util/simd.hpp"
@ -1446,7 +1446,7 @@ s32 spursTasksetProcessRequest(spu_thread& spu, s32 request, u32* taskId, u32* i
{
v128 newlyReadyTasks = gv_andn(ready, signalled | pready);
numNewlyReadyTasks = utils::popcnt128(newlyReadyTasks._u);
numNewlyReadyTasks = rx::popcnt128(newlyReadyTasks._u);
}
v128 readyButNotRunning;
@ -1701,7 +1701,7 @@ s32 spursTasketSaveTaskContext(spu_thread& spu)
u32 allocLsBlocks = static_cast<u32>(taskInfo->context_save_storage_and_alloc_ls_blocks & 0x7F);
v128 ls_pattern = v128::from64r(taskInfo->ls_pattern._u64[0], taskInfo->ls_pattern._u64[1]);
const u32 lsBlocks = utils::popcnt128(ls_pattern._u);
const u32 lsBlocks = rx::popcnt128(ls_pattern._u);
if (lsBlocks > allocLsBlocks)
{

View file

@ -1,7 +1,9 @@
#include "stdafx.h"
#include "Emu/Cell/PPUModule.h"
#include "Emu/IdManager.h"
#include "util/asm.hpp"
#include "rx/align.hpp"
#include "rx/asm.hpp"
#include "sceNp.h"
#include "sceNp2.h"
@ -946,7 +948,7 @@ error_code cellSysutilAvc2Load_shared(SceNpMatching2ContextId /*ctx_id*/, u32 /*
window_count++;
}
total_bitrate = utils::align<u32>(window_count * bitrate, 0x100000) + 0x100000;
total_bitrate = rx::alignUp<u32>(window_count * bitrate, 0x100000) + 0x100000;
}
settings.video_stream_sharing = init_param->video_param.video_stream_sharing;

View file

@ -1,3 +1,5 @@
#include "stdafx.h"
#include "Emu/Cell/PPUModule.h"
#include "cellos/sys_ppu_thread.h"
#include "cellos/sys_process.h"
@ -5,7 +7,7 @@
#include "Emu/IdManager.h"
#include "Emu/perf_meter.hpp"
#include "Emu/savestate_utils.hpp"
#include "stdafx.h"
#include "rx/align.hpp"
#include "sysPrxForUser.h"
#include "util/media_utils.h"
@ -32,7 +34,7 @@ extern "C"
#include "cellPamf.h"
#include "cellVdec.h"
#include "util/asm.hpp"
#include "rx/asm.hpp"
#include "util/lockless.h"
#include <cmath>
#include <mutex>
@ -1660,7 +1662,7 @@ error_code cellVdecGetPicItem(ppu_thread& ppu, u32 handle,
const int buffer_size = av_image_get_buffer_size(
vdec->ctx->pix_fmt, vdec->ctx->width, vdec->ctx->height, 1);
ensure(buffer_size >= 0);
info->size = utils::align<u32>(buffer_size, 128);
info->size = rx::alignUp<u32>(buffer_size, 128);
info->auNum = 1;
info->auPts[0].lower = static_cast<u32>(pts);
info->auPts[0].upper = static_cast<u32>(pts >> 32);

View file

@ -20,7 +20,7 @@
#include "cellos/sys_event.h"
#include "cellos/sys_fs.h"
#include "util/asm.hpp"
#include "rx/asm.hpp"
#include <algorithm>
#include <functional>
#include <shared_mutex>
@ -1490,7 +1490,7 @@ error_code sceNpTrophyGetGameProgress(u32 context, u32 handle,
const u32 trp_count = ctxt->tropusr->GetTrophiesCount();
// Round result to nearest (TODO: Check 0 trophies)
*percentage = trp_count ? utils::rounded_div(unlocked * 100, trp_count) : 0;
*percentage = trp_count ? rx::rounded_div(unlocked * 100, trp_count) : 0;
if (trp_count == 0 || trp_count > 128)
{

View file

@ -7,7 +7,7 @@
#include "cellos/sys_mutex.h"
#include "sysPrxForUser.h"
#include "util/asm.hpp"
#include "rx/asm.hpp"
LOG_CHANNEL(sysPrxForUser);
@ -151,7 +151,7 @@ error_code sys_lwmutex_lock(ppu_thread& ppu, vm::ptr<sys_lwmutex_t> lwmutex, u64
for (u32 i = 0; i < 10; i++)
{
busy_wait();
rx::busy_wait();
if (lwmutex->vars.owner.load() == lwmutex_free)
{
@ -210,7 +210,7 @@ error_code sys_lwmutex_lock(ppu_thread& ppu, vm::ptr<sys_lwmutex_t> lwmutex, u64
{
for (u32 i = 0; i < 10; i++)
{
busy_wait();
rx::busy_wait();
if (lwmutex->vars.owner.load() == lwmutex_free)
{

View file

@ -1,4 +1,6 @@
#include "stdafx.h"
#include "rx/align.hpp"
#include "key_vault.h"
#include "unedat.h"
#include "sha1.h"
@ -8,7 +10,7 @@
#include "Emu/system_utils.hpp"
#include "util/asm.hpp"
#include "rx/asm.hpp"
#include <algorithm>
#include <span>
@ -233,7 +235,7 @@ s64 decrypt_block(const fs::file* in, u8* out, EDAT_HEADER* edat, NPD_HEADER* np
// Locate the real data.
const usz pad_length = length;
length = utils::align<usz>(pad_length, 0x10);
length = rx::alignUp<usz>(pad_length, 0x10);
// Setup buffers for decryption and read the data.
std::vector<u8> enc_data_buf(is_out_buffer_aligned || length == pad_length ? 0 : length);
@ -432,12 +434,12 @@ bool check_data(u8* key, EDAT_HEADER* edat, NPD_HEADER* npd, const fs::file* f,
return false;
}
const usz block_num = utils::aligned_div<u64>(edat->file_size, edat->block_size);
const usz block_num = rx::aligned_div<u64>(edat->file_size, edat->block_size);
constexpr usz metadata_offset = 0x100;
const usz metadata_size = utils::mul_saturate<u64>(metadata_section_size, block_num);
const usz metadata_size = rx::mul_saturate<u64>(metadata_section_size, block_num);
u64 metadata_section_offset = metadata_offset;
if (utils::add_saturate<u64>(utils::add_saturate<u64>(file_offset, metadata_section_offset), metadata_size) > f->size())
if (rx::add_saturate<u64>(rx::add_saturate<u64>(file_offset, metadata_section_offset), metadata_size) > f->size())
{
return false;
}
@ -860,7 +862,7 @@ bool EDATADecrypter::ReadHeader()
//}
file_size = edatHeader.file_size;
total_blocks = ::narrow<u32>(utils::aligned_div(edatHeader.file_size, edatHeader.block_size));
total_blocks = ::narrow<u32>(rx::aligned_div(edatHeader.file_size, edatHeader.block_size));
// Try decrypting the first block instead
u8 data_sample[1];
@ -886,7 +888,7 @@ u64 EDATADecrypter::ReadData(u64 pos, u8* data, u64 size)
// Now we need to offset things to account for the actual 'range' requested
const u64 startOffset = pos % edatHeader.block_size;
const u64 num_blocks = utils::aligned_div(startOffset + size, edatHeader.block_size);
const u64 num_blocks = rx::aligned_div(startOffset + size, edatHeader.block_size);
// Find and decrypt block range covering pos + size
const u32 starting_block = ::narrow<u32>(pos / edatHeader.block_size);

View file

@ -1,7 +1,7 @@
#include "stdafx.h"
#include "aes.h"
#include "unself.h"
#include "util/asm.hpp"
#include "rx/asm.hpp"
#include "Emu/System.h"
#include "Emu/system_utils.hpp"
#include "Crypto/unzip.h"
@ -887,7 +887,7 @@ bool SELFDecrypter::LoadHeaders(bool isElf32, SelfAdditionalInfo* out_info)
m_seg_ext_hdr.back().Load(self_f);
}
if (m_ext_hdr.version_hdr_offset == 0 || utils::add_saturate<u64>(m_ext_hdr.version_hdr_offset, sizeof(version_header)) > self_f.size())
if (m_ext_hdr.version_hdr_offset == 0 || rx::add_saturate<u64>(m_ext_hdr.version_hdr_offset, sizeof(version_header)) > self_f.size())
{
return false;
}

View file

@ -1,4 +1,5 @@
#include "stdafx.h"
#include "CPUThread.h"
#include "CPUDisAsm.h"
@ -14,7 +15,7 @@
#include "Emu/RSX/RSXThread.h"
#include "Emu/perf_meter.hpp"
#include "util/asm.hpp"
#include "rx/asm.hpp"
#include <thread>
#include <unordered_map>
#include <map>
@ -64,7 +65,6 @@ void fmt_class_string<cpu_flag>::format(std::string& out, u64 arg)
case cpu_flag::dbg_global_pause: return "G-PAUSE";
case cpu_flag::dbg_pause: return "PAUSE";
case cpu_flag::dbg_step: return "STEP";
case cpu_flag::bitset_last: break;
}
return unknown;
@ -124,7 +124,7 @@ void fmt_class_string<cpu_threads_emulation_info_dump_t>::format(std::string& ou
for (u32 i = 0; !rlock.try_lock() && i < 100; i++)
{
busy_wait();
rx::busy_wait();
}
if (rlock)
@ -533,7 +533,7 @@ namespace cpu_counter
if (ok) [[likely]]
{
// Get actual slot number
id = utils::ctz128(~bits);
id = rx::ctz128(~bits);
// Register thread
if (s_cpu_list[id].compare_and_swap_test(nullptr, _this)) [[likely]]
@ -552,7 +552,7 @@ namespace cpu_counter
return;
}
busy_wait(300);
rx::busy_wait(300);
}
s_tls_thread_slot = id;
@ -599,7 +599,7 @@ namespace cpu_counter
{
for (u128 bits = copy; bits; bits &= bits - 1)
{
const u32 index = utils::ctz128(bits);
const u32 index = rx::ctz128(bits);
if (cpu_thread* cpu = s_cpu_list[index].load())
{
@ -1062,7 +1062,7 @@ bool cpu_thread::check_state() noexcept
{
if (i < 20 || ctr & 1)
{
busy_wait(300);
rx::busy_wait(300);
}
else
{
@ -1404,7 +1404,7 @@ bool cpu_thread::suspend_work::push(cpu_thread* _this) noexcept
{
if (cpu != _this)
{
utils::prefetch_write(&cpu->state);
rx::prefetch_write(&cpu->state);
return true;
}
@ -1446,7 +1446,7 @@ bool cpu_thread::suspend_work::push(cpu_thread* _this) noexcept
break;
}
utils::pause();
rx::pause();
}
// Second increment: all threads paused
@ -1480,13 +1480,13 @@ bool cpu_thread::suspend_work::push(cpu_thread* _this) noexcept
{
for (u32 i = 0; i < work->prf_size; i++)
{
utils::prefetch_write(work->prf_list[0]);
rx::prefetch_write(work->prf_list[0]);
}
}
cpu_counter::for_all_cpu(copy2, [&](cpu_thread* cpu)
{
utils::prefetch_write(&cpu->state);
rx::prefetch_write(&cpu->state);
return true;
});

View file

@ -8719,10 +8719,22 @@ FORCE_INLINE __m128i _mm_aeskeygenassist_si128(__m128i a, const int rcon)
uint8x16_t dest = {
// Undo ShiftRows step from AESE and extract X1 and X3
u8[0x4], u8[0x1], u8[0xE], u8[0xB], // SubBytes(X1)
u8[0x1], u8[0xE], u8[0xB], u8[0x4], // ROT(SubBytes(X1))
u8[0xC], u8[0x9], u8[0x6], u8[0x3], // SubBytes(X3)
u8[0x9], u8[0x6], u8[0x3], u8[0xC], // ROT(SubBytes(X3))
u8[0x4],
u8[0x1],
u8[0xE],
u8[0xB], // SubBytes(X1)
u8[0x1],
u8[0xE],
u8[0xB],
u8[0x4], // ROT(SubBytes(X1))
u8[0xC],
u8[0x9],
u8[0x6],
u8[0x3], // SubBytes(X3)
u8[0x9],
u8[0x6],
u8[0x3],
u8[0xC], // ROT(SubBytes(X3))
};
uint32x4_t r = {0, (unsigned)rcon, 0, (unsigned)rcon};
return vreinterpretq_m128i_u8(dest) ^ vreinterpretq_m128i_u32(r);

View file

@ -1,4 +1,5 @@
#include "stdafx.h"
#include "PPUAnalyser.h"
#include "cellos/sys_sync.h"
@ -8,7 +9,8 @@
#include <unordered_set>
#include "util/yaml.hpp"
#include "util/asm.hpp"
#include "rx/align.hpp"
#include "rx/asm.hpp"
LOG_CHANNEL(ppu_validator);
@ -25,7 +27,6 @@ void fmt_class_string<ppu_attr>::format(std::string& out, u64 arg)
case ppu_attr::no_return: return "no_return";
case ppu_attr::no_size: return "no_size";
case ppu_attr::has_mfvscr: return "has_mfvscr";
case ppu_attr::bitset_last: break;
}
return unknown;
@ -2243,7 +2244,7 @@ bool ppu_module<lv2_obj>::analyse(u32 lib_toc, u32 entry, const u32 sec_end, con
}
}
jt_end = utils::align<u32>(static_cast<u32>(std::min<u64>(jt_end - 1, ctr(maxv) - 1) + 1), 4);
jt_end = rx::alignUp<u32>(static_cast<u32>(std::min<u64>(jt_end - 1, ctr(maxv) - 1) + 1), 4);
get_jumptable_end(jumpatble_off, jumpatble_ptr, false);
@ -2882,7 +2883,7 @@ bool ppu_module<lv2_obj>::analyse(u32 lib_toc, u32 entry, const u32 sec_end, con
block.attr = ppu_attr::no_size;
}
per_instruction_bytes += utils::sub_saturate<u32>(lim, func.addr);
per_instruction_bytes += rx::sub_saturate<u32>(lim, func.addr);
addr_next = std::max<u32>(addr_next, lim);
continue;
}
@ -3291,7 +3292,7 @@ bool ppu_module<lv2_obj>::analyse(u32 lib_toc, u32 entry, const u32 sec_end, con
if (per_instruction_bytes)
{
const bool error = per_instruction_bytes >= 200 && per_instruction_bytes / 4 >= utils::aligned_div<u32>(::size32(funcs), 128);
const bool error = per_instruction_bytes >= 200 && per_instruction_bytes / 4 >= rx::aligned_div<u32>(::size32(funcs), 128);
(error ? ppu_log.error : ppu_log.notice)("%d instructions will be compiled on per-instruction basis in total", per_instruction_bytes / 4);
}

View file

@ -1,11 +1,12 @@
#pragma once
#include <functional>
#include <string>
#include <map>
#include <deque>
#include <span>
#include "util/types.hpp"
#include "util/asm.hpp"
#include "rx/align.hpp"
#include "rx/types.hpp"
#include "util/to_endian.hpp"
#include "rx/EnumBitSet.hpp"
@ -218,7 +219,7 @@ struct ppu_module : public Type
const u32 seg_size = seg.size;
const u32 seg_addr = seg.addr;
if (seg_size >= std::max<usz>(size_bytes, 1) && addr <= utils::align<u32>(seg_addr + seg_size, 0x10000) - size_bytes)
if (seg_size >= std::max<usz>(size_bytes, 1) && addr <= rx::alignUp<u32>(seg_addr + seg_size, 0x10000) - size_bytes)
{
return reinterpret_cast<to_be_t<T>*>(static_cast<u8*>(seg.ptr) + (addr - seg_addr));
}

View file

@ -4,7 +4,7 @@
#include "PPUAnalyser.h"
#include "Emu/IdManager.h"
#include "util/asm.hpp"
#include "rx/asm.hpp"
#include <cmath>
@ -222,7 +222,7 @@ std::pair<PPUDisAsm::const_op, u64> PPUDisAsm::try_get_const_op_gpr_value(u32 re
GET_CONST_REG(reg_rs, op.rs);
return {form, utils::rol64(reg_rs, op.sh64) & (~0ull << (op.mbe64 ^ 63))};
return {form, rx::rol64(reg_rs, op.sh64) & (~0ull << (op.mbe64 ^ 63))};
}
case ppu_itype::OR:
{

View file

@ -15,7 +15,7 @@
#include <cmath>
#include <climits>
#include "util/asm.hpp"
#include "rx/asm.hpp"
#include "util/v128.hpp"
#include "util/simd.hpp"
#include "util/sysinfo.hpp"
@ -3509,7 +3509,7 @@ auto RLWIMI()
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op)
{
const u64 mask = ppu_rotate_mask(32 + op.mb32, 32 + op.me32);
ppu.gpr[op.ra] = (ppu.gpr[op.ra] & ~mask) | (dup32(utils::rol32(static_cast<u32>(ppu.gpr[op.rs]), op.sh32)) & mask);
ppu.gpr[op.ra] = (ppu.gpr[op.ra] & ~mask) | (dup32(rx::rol32(static_cast<u32>(ppu.gpr[op.rs]), op.sh32)) & mask);
if constexpr (((Flags == has_rc) || ...))
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
};
@ -3524,7 +3524,7 @@ auto RLWINM()
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op)
{
ppu.gpr[op.ra] = dup32(utils::rol32(static_cast<u32>(ppu.gpr[op.rs]), op.sh32)) & ppu_rotate_mask(32 + op.mb32, 32 + op.me32);
ppu.gpr[op.ra] = dup32(rx::rol32(static_cast<u32>(ppu.gpr[op.rs]), op.sh32)) & ppu_rotate_mask(32 + op.mb32, 32 + op.me32);
if constexpr (((Flags == has_rc) || ...))
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
};
@ -3539,7 +3539,7 @@ auto RLWNM()
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op)
{
ppu.gpr[op.ra] = dup32(utils::rol32(static_cast<u32>(ppu.gpr[op.rs]), ppu.gpr[op.rb] & 0x1f)) & ppu_rotate_mask(32 + op.mb32, 32 + op.me32);
ppu.gpr[op.ra] = dup32(rx::rol32(static_cast<u32>(ppu.gpr[op.rs]), ppu.gpr[op.rb] & 0x1f)) & ppu_rotate_mask(32 + op.mb32, 32 + op.me32);
if constexpr (((Flags == has_rc) || ...))
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
};
@ -3634,7 +3634,7 @@ auto RLDICL()
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op)
{
ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], op.sh64) & (~0ull >> op.mbe64);
ppu.gpr[op.ra] = rx::rol64(ppu.gpr[op.rs], op.sh64) & (~0ull >> op.mbe64);
if constexpr (((Flags == has_rc) || ...))
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
};
@ -3649,7 +3649,7 @@ auto RLDICR()
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op)
{
ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], op.sh64) & (~0ull << (op.mbe64 ^ 63));
ppu.gpr[op.ra] = rx::rol64(ppu.gpr[op.rs], op.sh64) & (~0ull << (op.mbe64 ^ 63));
if constexpr (((Flags == has_rc) || ...))
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
};
@ -3664,7 +3664,7 @@ auto RLDIC()
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op)
{
ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], op.sh64) & ppu_rotate_mask(op.mbe64, op.sh64 ^ 63);
ppu.gpr[op.ra] = rx::rol64(ppu.gpr[op.rs], op.sh64) & ppu_rotate_mask(op.mbe64, op.sh64 ^ 63);
if constexpr (((Flags == has_rc) || ...))
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
};
@ -3680,7 +3680,7 @@ auto RLDIMI()
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op)
{
const u64 mask = ppu_rotate_mask(op.mbe64, op.sh64 ^ 63);
ppu.gpr[op.ra] = (ppu.gpr[op.ra] & ~mask) | (utils::rol64(ppu.gpr[op.rs], op.sh64) & mask);
ppu.gpr[op.ra] = (ppu.gpr[op.ra] & ~mask) | (rx::rol64(ppu.gpr[op.rs], op.sh64) & mask);
if constexpr (((Flags == has_rc) || ...))
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
};
@ -3695,7 +3695,7 @@ auto RLDCL()
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op)
{
ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], ppu.gpr[op.rb] & 0x3f) & (~0ull >> op.mbe64);
ppu.gpr[op.ra] = rx::rol64(ppu.gpr[op.rs], ppu.gpr[op.rb] & 0x3f) & (~0ull >> op.mbe64);
if constexpr (((Flags == has_rc) || ...))
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
};
@ -3710,7 +3710,7 @@ auto RLDCR()
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op)
{
ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], ppu.gpr[op.rb] & 0x3f) & (~0ull << (op.mbe64 ^ 63));
ppu.gpr[op.ra] = rx::rol64(ppu.gpr[op.rs], ppu.gpr[op.rb] & 0x3f) & (~0ull << (op.mbe64 ^ 63));
if constexpr (((Flags == has_rc) || ...))
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
};
@ -3842,7 +3842,7 @@ auto MULHDU()
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op)
{
ppu.gpr[op.rd] = utils::umulh64(ppu.gpr[op.ra], ppu.gpr[op.rb]);
ppu.gpr[op.rd] = rx::umulh64(ppu.gpr[op.ra], ppu.gpr[op.rb]);
if constexpr (((Flags == has_rc) || ...))
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.rd], 0);
};
@ -4243,7 +4243,7 @@ auto MULHD()
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op)
{
ppu.gpr[op.rd] = utils::mulh64(ppu.gpr[op.ra], ppu.gpr[op.rb]);
ppu.gpr[op.rd] = rx::mulh64(ppu.gpr[op.ra], ppu.gpr[op.rb]);
if constexpr (((Flags == has_rc) || ...))
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.rd], 0);
};
@ -4675,7 +4675,7 @@ auto MULLD()
ppu.gpr[op.rd] = RA * RB;
if (op.oe) [[unlikely]]
{
const s64 high = utils::mulh64(RA, RB);
const s64 high = rx::mulh64(RA, RB);
ppu_ov_set(ppu, high != s64(ppu.gpr[op.rd]) >> 63);
}
if constexpr (((Flags == has_rc) || ...))

View file

@ -28,7 +28,8 @@
#include <span>
#include <set>
#include <algorithm>
#include "util/asm.hpp"
#include "rx/asm.hpp"
#include "rx/align.hpp"
LOG_CHANNEL(ppu_loader);
@ -341,7 +342,7 @@ static void ppu_initialize_modules(ppu_linkage_info* link, utils::serial* ar = n
if (!hle_funcs_addr)
hle_funcs_addr = vm::alloc(::size32(hle_funcs) * 8, vm::main);
else
vm::page_protect(hle_funcs_addr, utils::align(::size32(hle_funcs) * 8, 0x1000), 0, vm::page_writable);
vm::page_protect(hle_funcs_addr, rx::alignUp(::size32(hle_funcs) * 8, 0x1000), 0, vm::page_writable);
// Initialize as PPU executable code
ppu_register_range(hle_funcs_addr, ::size32(hle_funcs) * 8);
@ -359,7 +360,7 @@ static void ppu_initialize_modules(ppu_linkage_info* link, utils::serial* ar = n
}
// Set memory protection to read-only
vm::page_protect(hle_funcs_addr, utils::align(::size32(hle_funcs) * 8, 0x1000), 0, 0, vm::page_writable);
vm::page_protect(hle_funcs_addr, rx::alignUp(::size32(hle_funcs) * 8, 0x1000), 0, 0, vm::page_writable);
// Initialize function names
const bool is_first = g_ppu_function_names.empty();
@ -489,7 +490,7 @@ static void ppu_initialize_modules(ppu_linkage_info* link, utils::serial* ar = n
}
else
{
const u32 next = utils::align(alloc_addr, variable.second.align);
const u32 next = rx::alignUp(alloc_addr, variable.second.align);
const u32 end = next + variable.second.size - 1;
if (!next || (end >> 16 != alloc_addr >> 16))
@ -1191,7 +1192,7 @@ static void ppu_check_patch_spu_images(const ppu_module<lv2_obj>& mod, const ppu
u32 prev_bound = 0;
for (u32 i = find_first_of_multiple(seg_view, prefixes, 0); i < seg.size; i = find_first_of_multiple(seg_view, prefixes, utils::align<u32>(i + 1, 4)))
for (u32 i = find_first_of_multiple(seg_view, prefixes, 0); i < seg.size; i = find_first_of_multiple(seg_view, prefixes, rx::alignUp<u32>(i + 1, 4)))
{
const auto elf_header = ensure(mod.get_ptr<u8>(seg.addr + i));
@ -1201,7 +1202,7 @@ static void ppu_check_patch_spu_images(const ppu_module<lv2_obj>& mod, const ppu
const u32 old_i = i;
u32 guid_start = umax, guid_end = umax;
for (u32 search = i & -128, tries = 10; tries && search >= prev_bound; tries--, search = utils::sub_saturate<u32>(search, 128))
for (u32 search = i & -128, tries = 10; tries && search >= prev_bound; tries--, search = rx::sub_saturate<u32>(search, 128))
{
if (seg_view[search] != 0x42 && seg_view[search] != 0x43)
{
@ -1271,7 +1272,7 @@ static void ppu_check_patch_spu_images(const ppu_module<lv2_obj>& mod, const ppu
if (addr_last >= 0x80 && valid_count >= 2)
{
const u32 begin = i & -128;
u32 end = std::min<u32>(seg.size, utils::align<u32>(i + addr_last + 256, 128));
u32 end = std::min<u32>(seg.size, rx::alignUp<u32>(i + addr_last + 256, 128));
u32 guessed_ls_addr = 0;
@ -1611,7 +1612,7 @@ shared_ptr<lv2_prx> ppu_load_prx(const ppu_prx_object& elf, bool virtual_load, c
if (virtual_load)
{
addr = std::exchange(allocating_address, allocating_address + utils::align<u32>(mem_size, 0x10000));
addr = std::exchange(allocating_address, allocating_address + rx::alignUp<u32>(mem_size, 0x10000));
}
else
{
@ -1625,7 +1626,7 @@ shared_ptr<lv2_prx> ppu_load_prx(const ppu_prx_object& elf, bool virtual_load, c
// Leave additional room for the analyser so it can safely access beyond limit a bit
// Because with VM the address sapce is not really a limit so any u32 address is valid there, here it is UB to create pointer that goes beyond the boundaries
// TODO: Use make_shared_for_overwrite when all compilers support it
const usz alloc_size = utils::align<usz>(mem_size, 0x10000) + 4096;
const usz alloc_size = rx::alignUp<usz>(mem_size, 0x10000) + 4096;
prx->allocations.push_back(std::shared_ptr<u8[]>(new u8[alloc_size]));
_seg.ptr = prx->allocations.back().get();
std::memset(static_cast<u8*>(_seg.ptr) + prog.bin.size(), 0, alloc_size - 4096 - prog.bin.size());
@ -1725,7 +1726,7 @@ shared_ptr<lv2_prx> ppu_load_prx(const ppu_prx_object& elf, bool virtual_load, c
{
const auto& rel = reinterpret_cast<const ppu_prx_relocation_info&>(prog.bin[i]);
if (rel.offset >= utils::align<u64>(::at32(prx->segs, rel.index_addr).size, 0x100))
if (rel.offset >= rx::alignUp<u64>(::at32(prx->segs, rel.index_addr).size, 0x100))
{
fmt::throw_exception("Relocation offset out of segment memory! (offset=0x%x, index_addr=%u, seg_size=0x%x)", rel.offset, rel.index_addr, prx->segs[rel.index_addr].size);
}
@ -2201,7 +2202,7 @@ bool ppu_load_exec(const ppu_exec_object& elf, bool virtual_load, const std::str
// Leave additional room for the analyser so it can safely access beyond limit a bit
// Because with VM the address sapce is not really a limit so any u32 address is valid there, here it is UB to create pointer that goes beyond the boundaries
// TODO: Use make_shared_for_overwrite when all compilers support it
const usz alloc_size = utils::align<usz>(size, 0x10000) + 4096;
const usz alloc_size = rx::alignUp<usz>(size, 0x10000) + 4096;
_main.allocations.push_back(std::shared_ptr<u8[]>(new u8[alloc_size]));
_seg.ptr = _main.allocations.back().get();
std::memset(static_cast<u8*>(_seg.ptr) + prog.bin.size(), 0, alloc_size - 4096 - prog.bin.size());
@ -2247,7 +2248,7 @@ bool ppu_load_exec(const ppu_exec_object& elf, bool virtual_load, const std::str
else
{
// For backwards compatibility: already loaded memory will always be writable
const u32 size0 = utils::align(size + addr % 0x10000, 0x10000);
const u32 size0 = rx::alignUp(size + addr % 0x10000, 0x10000);
const u32 addr0 = addr & -0x10000;
vm::page_protect(addr0, size0, 0, vm::page_writable | vm::page_readable, vm::page_executable);
}
@ -2721,7 +2722,7 @@ bool ppu_load_exec(const ppu_exec_object& elf, bool virtual_load, const std::str
default:
{
// According to elad335, the min value seems to be 64KB instead of the expected 4KB (SYS_PROCESS_PARAM_STACK_SIZE_MIN)
primary_stacksize = utils::align<u32>(std::clamp<u32>(sz, 0x10000, SYS_PROCESS_PARAM_STACK_SIZE_MAX), 4096);
primary_stacksize = rx::alignUp<u32>(std::clamp<u32>(sz, 0x10000, SYS_PROCESS_PARAM_STACK_SIZE_MAX), 4096);
break;
}
}
@ -2738,29 +2739,29 @@ bool ppu_load_exec(const ppu_exec_object& elf, bool virtual_load, const std::str
if (!Emu.data.empty())
{
std::memcpy(vm::base(ppu->stack_addr + ppu->stack_size - ::size32(Emu.data)), Emu.data.data(), Emu.data.size());
ppu->gpr[1] -= utils::align<u32>(::size32(Emu.data), 0x10);
ppu->gpr[1] -= rx::alignUp<u32>(::size32(Emu.data), 0x10);
}
// Initialize process arguments
// Calculate storage requirements on the stack
const u32 pointers_storage_size = u32{sizeof(u64)} * utils::align<u32>(::size32(Emu.envp) + ::size32(Emu.argv) + 2, 2);
const u32 pointers_storage_size = u32{sizeof(u64)} * rx::alignUp<u32>(::size32(Emu.envp) + ::size32(Emu.argv) + 2, 2);
u32 stack_alloc_size = pointers_storage_size;
for (const auto& arg : Emu.argv)
{
stack_alloc_size += utils::align<u32>(::size32(arg) + 1, 0x10);
stack_alloc_size += rx::alignUp<u32>(::size32(arg) + 1, 0x10);
}
for (const auto& arg : Emu.envp)
{
stack_alloc_size += utils::align<u32>(::size32(arg) + 1, 0x10);
stack_alloc_size += rx::alignUp<u32>(::size32(arg) + 1, 0x10);
}
ensure(ppu->stack_size > stack_alloc_size);
vm::ptr<u64> args = vm::cast(static_cast<u32>(ppu->stack_addr + ppu->stack_size - stack_alloc_size - utils::align<u32>(::size32(Emu.data), 0x10)));
vm::ptr<u64> args = vm::cast(static_cast<u32>(ppu->stack_addr + ppu->stack_size - stack_alloc_size - rx::alignUp<u32>(::size32(Emu.data), 0x10)));
vm::ptr<u8> args_data = vm::cast(args.addr() + pointers_storage_size);
const vm::ptr<u64> argv = args;
@ -2772,7 +2773,7 @@ bool ppu_load_exec(const ppu_exec_object& elf, bool virtual_load, const std::str
std::memcpy(args_data.get_ptr(), arg.data(), arg_size);
*args++ = args_data.addr();
args_data = vm::cast(args_data.addr() + utils::align<u32>(arg_size, 0x10));
args_data = vm::cast(args_data.addr() + rx::alignUp<u32>(arg_size, 0x10));
}
*args++ = 0;
@ -2787,7 +2788,7 @@ bool ppu_load_exec(const ppu_exec_object& elf, bool virtual_load, const std::str
std::memcpy(args_data.get_ptr(), arg.data(), arg_size);
*args++ = args_data.addr();
args_data = vm::cast(args_data.addr() + utils::align<u32>(arg_size, 0x10));
args_data = vm::cast(args_data.addr() + rx::alignUp<u32>(arg_size, 0x10));
}
*args++ = 0;
@ -2855,7 +2856,7 @@ bool ppu_load_exec(const ppu_exec_object& elf, bool virtual_load, const std::str
if (prog.p_type == 0x1u /* LOAD */ && prog.p_memsz && (prog.p_flags & 0x022000002) == 0u /* W */)
{
// Set memory protection to read-only when necessary (only if PPU-W, SPU-W, RSX-W are all disabled)
ensure(vm::page_protect(addr, utils::align(size, 0x1000), 0, 0, vm::page_writable));
ensure(vm::page_protect(addr, rx::alignUp(size, 0x1000), 0, 0, vm::page_writable));
}
}
@ -2934,7 +2935,7 @@ std::pair<shared_ptr<lv2_overlay>, CellError> ppu_load_overlay(const ppu_exec_ob
// Leave additional room for the analyser so it can safely access beyond limit a bit
// Because with VM the address sapce is not really a limit so any u32 address is valid there, here it is UB to create pointer that goes beyond the boundaries
// TODO: Use make_shared_for_overwrite when all compilers support it
const usz alloc_size = utils::align<usz>(size, 0x10000) + 4096;
const usz alloc_size = rx::alignUp<usz>(size, 0x10000) + 4096;
ovlm->allocations.push_back(std::shared_ptr<u8[]>(new u8[alloc_size]));
_seg.ptr = ovlm->allocations.back().get();
std::memset(static_cast<u8*>(_seg.ptr) + prog.bin.size(), 0, alloc_size - 4096 - prog.bin.size());
@ -3230,7 +3231,7 @@ bool ppu_load_rel_exec(const ppu_rel_object& elf)
{
if (s.sh_type != sec_type::sht_progbits)
{
memsize = utils::align<u32>(memsize + vm::cast(s.sh_size), 128);
memsize = rx::alignUp<u32>(memsize + vm::cast(s.sh_size), 128);
}
}
@ -3278,7 +3279,7 @@ bool ppu_load_rel_exec(const ppu_rel_object& elf)
relm.secs.emplace_back(_sec);
std::memcpy(vm::base(addr), s.get_bin().data(), size);
addr = utils::align<u32>(addr + size, 128);
addr = rx::alignUp<u32>(addr + size, 128);
}
}

View file

@ -62,7 +62,8 @@
#include <optional>
#include <charconv>
#include "util/asm.hpp"
#include "rx/asm.hpp"
#include "rx/align.hpp"
#include "util/vm.hpp"
#include "util/v128.hpp"
#include "util/simd.hpp"
@ -217,7 +218,7 @@ public:
user acquire(u64 amount)
{
amount = utils::aligned_div<u64>(amount, k_block_size);
amount = rx::aligned_div<u64>(amount, k_block_size);
u32 allocated = 0;
while (!m_free.fetch_op([&, this](u32& value)
@ -225,7 +226,7 @@ public:
if (value >= amount || value == m_total)
{
// Allow at least allocation, make 0 the "memory unavailable" sign value for atomic waiting efficiency
const u32 new_val = static_cast<u32>(utils::sub_saturate<u64>(value, amount));
const u32 new_val = static_cast<u32>(rx::sub_saturate<u64>(value, amount));
allocated = value - new_val;
value = new_val;
return true;
@ -869,7 +870,7 @@ extern void ppu_register_range(u32 addr, u32 size)
return;
}
size = utils::align(size + addr % 0x10000, 0x10000);
size = rx::alignUp(size + addr % 0x10000, 0x10000);
addr &= -0x10000;
// Register executable range at
@ -1816,7 +1817,7 @@ std::vector<std::pair<u32, u32>> ppu_thread::dump_callstack_list() const
if (pos_dist >= inst_pos.size())
{
const u32 inst_bound = utils::align<u32>(pos, 256);
const u32 inst_bound = rx::alignUp<u32>(pos, 256);
const usz old_size = inst_pos.size();
const usz new_size = pos_dist + (inst_bound - pos) / 4 + 1;
@ -1903,7 +1904,7 @@ std::vector<std::pair<u32, u32>> ppu_thread::dump_callstack_list() const
for (u32 back = 1; back < 20; back++)
{
be_t<u32>& opcode = get_inst(utils::sub_saturate<u32>(_cia, back * 4));
be_t<u32>& opcode = get_inst(rx::sub_saturate<u32>(_cia, back * 4));
if (!opcode)
{
@ -3588,11 +3589,11 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value)
return false;
}
utils::prefetch_read(ppu.rdata);
utils::prefetch_read(ppu.rdata + 64);
rx::prefetch_read(ppu.rdata);
rx::prefetch_read(ppu.rdata + 64);
ppu.last_faddr = addr;
ppu.last_ftime = res.load() & -128;
ppu.last_ftsc = utils::get_tsc();
ppu.last_ftsc = rx::get_tsc();
return false;
}
default:
@ -3699,7 +3700,7 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value)
ppu.last_faddr = addr;
ppu.last_ftime = old_rtime & -128;
ppu.last_ftsc = utils::get_tsc();
ppu.last_ftsc = rx::get_tsc();
std::memcpy(&ppu.rdata[addr & 0x78], &old_data, 8);
}
@ -3941,7 +3942,7 @@ namespace
fs::stat_t get_stat() override
{
fs::stat_t stat = m_file.get_stat();
stat.size = std::min<u64>(utils::sub_saturate<u64>(stat.size, m_off), m_max_size);
stat.size = std::min<u64>(rx::sub_saturate<u64>(stat.size, m_off), m_max_size);
stat.is_writable = false;
return stat;
}
@ -3960,7 +3961,7 @@ namespace
u64 read_at(u64 offset, void* buffer, u64 size) override
{
return m_file.read_at(offset + m_off, buffer, std::min<u64>(size, utils::sub_saturate<u64>(m_max_size, offset)));
return m_file.read_at(offset + m_off, buffer, std::min<u64>(size, rx::sub_saturate<u64>(m_max_size, offset)));
}
u64 write(const void*, u64) override
@ -3988,7 +3989,7 @@ namespace
u64 size() override
{
return std::min<u64>(utils::sub_saturate<u64>(m_file.size(), m_off), m_max_size);
return std::min<u64>(rx::sub_saturate<u64>(m_file.size(), m_off), m_max_size);
}
};
} // namespace
@ -5624,7 +5625,7 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
}
// Initialize compiler instance
while (jits.size() < utils::aligned_div<u64>(module_counter, c_moudles_per_jit) && is_being_used_in_emulation)
while (jits.size() < rx::aligned_div<u64>(module_counter, c_moudles_per_jit) && is_being_used_in_emulation)
{
jits.emplace_back(std::make_shared<jit_compiler>(s_link_table, g_cfg.core.llvm_cpu, 0, symbols_cement));
@ -5652,7 +5653,7 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
const bool divide_by_twenty = !workload.empty();
const usz increment_link_count_at = (divide_by_twenty ? 20 : 1);
g_progr_ptotal += static_cast<u32>(utils::aligned_div<u64>(link_workload.size(), increment_link_count_at));
g_progr_ptotal += static_cast<u32>(rx::aligned_div<u64>(link_workload.size(), increment_link_count_at));
usz mod_index = umax;
@ -5785,7 +5786,7 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_size)
{
concurent_memory_limit memory_limit(utils::aligned_div<u64>(utils::get_total_memory(), 2));
concurent_memory_limit memory_limit(rx::aligned_div<u64>(utils::get_total_memory(), 2));
return ppu_initialize(info, check_only, file_size, memory_limit);
}

View file

@ -847,7 +847,7 @@ Value* PPUTranslator::ReadMemory(Value* addr, Type* type, bool is_be, u32 align)
m_may_be_mmio = false;
if (auto ptr = m_info.get_ptr<instructions_to_test>(std::max<u32>(m_info.segs[0].addr, (m_reloc ? m_reloc->addr : 0) + utils::sub_saturate<u32>(::narrow<u32>(m_addr), sizeof(instructions_to_test) / 2))))
if (auto ptr = m_info.get_ptr<instructions_to_test>(std::max<u32>(m_info.segs[0].addr, (m_reloc ? m_reloc->addr : 0) + rx::sub_saturate<u32>(::narrow<u32>(m_addr), sizeof(instructions_to_test) / 2))))
{
if (ppu_test_address_may_be_mmio(std::span(ptr->insts)))
{
@ -920,7 +920,7 @@ void PPUTranslator::WriteMemory(Value* addr, Value* value, bool is_be, u32 align
be_t<u32> insts[128];
};
if (auto ptr = m_info.get_ptr<instructions_to_test>(std::max<u32>(m_info.segs[0].addr, (m_reloc ? m_reloc->addr : 0) + utils::sub_saturate<u32>(::narrow<u32>(m_addr), sizeof(instructions_to_test) / 2))))
if (auto ptr = m_info.get_ptr<instructions_to_test>(std::max<u32>(m_info.segs[0].addr, (m_reloc ? m_reloc->addr : 0) + rx::sub_saturate<u32>(::narrow<u32>(m_addr), sizeof(instructions_to_test) / 2))))
{
if (ppu_test_address_may_be_mmio(std::span(ptr->insts)))
{

View file

@ -1,7 +1,8 @@
#include "stdafx.h"
#include "Emu/IdManager.h"
#include "Loader/ELF.h"
#include "util/asm.hpp"
#include "rx/asm.hpp"
#include "rx/align.hpp"
#include "SPUThread.h"
@ -450,7 +451,7 @@ void spu_load_rel_exec(const spu_rel_object& elf)
{
if (shdr.sh_type == sec_type::sht_progbits && shdr.sh_flags().all_of(sh_flag::shf_alloc))
{
total_memsize = utils::align<u32>(total_memsize + shdr.sh_size, 4);
total_memsize = rx::alignUp<u32>(total_memsize + shdr.sh_size, 4);
}
}
@ -462,7 +463,7 @@ void spu_load_rel_exec(const spu_rel_object& elf)
if (shdr.sh_type == sec_type::sht_progbits && shdr.sh_flags().all_of(sh_flag::shf_alloc))
{
std::memcpy(spu->_ptr<void>(offs), shdr.get_bin().data(), shdr.sh_size);
offs = utils::align<u32>(offs + shdr.sh_size, 4);
offs = rx::alignUp<u32>(offs + shdr.sh_size, 4);
}
}

View file

@ -9,7 +9,8 @@
#include "SPUInterpreter.h"
#include "Crypto/sha1.h"
#include "util/asm.hpp"
#include "rx/align.hpp"
#include "rx/asm.hpp"
#include "util/v128.hpp"
#include "util/sysinfo.hpp"
@ -282,7 +283,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
words_align = 64;
const u32 starta = start & -64;
const u32 enda = utils::align(end, 64);
const u32 enda = rx::alignUp(end, 64);
const u32 sizea = (enda - starta) / 64;
ensure(sizea);
@ -363,7 +364,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
words_align = 32;
const u32 starta = start & -32;
const u32 enda = utils::align(end, 32);
const u32 enda = rx::alignUp(end, 32);
const u32 sizea = (enda - starta) / 32;
ensure(sizea);
@ -486,7 +487,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
words_align = 32;
const u32 starta = start & -32;
const u32 enda = utils::align(end, 32);
const u32 enda = rx::alignUp(end, 32);
const u32 sizea = (enda - starta) / 32;
ensure(sizea);
@ -3211,7 +3212,7 @@ void spu_recompiler::ROTQBYI(spu_opcode_t op)
}
else if (s == 4 || s == 8 || s == 12)
{
c->pshufd(va, va, utils::rol8(0xE4, s / 2));
c->pshufd(va, va, rx::rol8(0xE4, s / 2));
}
else if (utils::has_ssse3())
{

View file

@ -25,6 +25,7 @@
#include <optional>
#include <unordered_set>
#include "rx/align.hpp"
#include "util/v128.hpp"
#include "util/simd.hpp"
#include "util/sysinfo.hpp"
@ -658,7 +659,7 @@ std::deque<spu_program> spu_cache::get()
const u32 size = block_info.size;
const u32 addr = block_info.addr;
if (utils::add_saturate<u32>(addr, size * 4) > SPU_LS_SIZE)
if (rx::add_saturate<u32>(addr, size * 4) > SPU_LS_SIZE)
{
break;
}
@ -1253,7 +1254,7 @@ void spu_cache::initialize(bool build_existing_cache)
fmt::append(dump, "\n\t%49s", "");
for (u32 i = 0; i < std::min<usz>(f->data.size(), std::max<usz>(64, utils::aligned_div<u32>(depth_m, 4))); i++)
for (u32 i = 0; i < std::min<usz>(f->data.size(), std::max<usz>(64, rx::aligned_div<u32>(depth_m, 4))); i++)
{
fmt::append(dump, "%-10s", g_spu_iname.decode(std::bit_cast<be_t<u32>>(f->data[i])));
}
@ -2308,12 +2309,12 @@ std::vector<u32> spu_thread::discover_functions(u32 base_addr, std::span<const u
// TODO: Does not detect jumptables or fixed-addr indirect calls
const v128 brasl_mask = is_known_addr ? v128::from32p(0x62u << 23) : v128::from32p(umax);
for (u32 i = utils::align<u32>(base_addr, 0x10); i < std::min<u32>(base_addr + ::size32(ls), 0x3FFF0); i += 0x10)
for (u32 i = rx::alignUp<u32>(base_addr, 0x10); i < std::min<u32>(base_addr + ::size32(ls), 0x3FFF0); i += 0x10)
{
// Search for BRSL LR and BRASL LR or BR
// TODO: BISL
const v128 inst = read_from_ptr<be_t<v128>>(ls.data(), i - base_addr);
const v128 cleared_i16 = gv_and32(inst, v128::from32p(utils::rol32(~0xffff, 7)));
const v128 cleared_i16 = gv_and32(inst, v128::from32p(rx::rol32(~0xffff, 7)));
const v128 eq_brsl = gv_eq32(cleared_i16, v128::from32p(0x66u << 23));
const v128 eq_brasl = gv_eq32(cleared_i16, brasl_mask);
const v128 eq_br = gv_eq32(cleared_i16, v128::from32p(0x64u << 23));
@ -5376,7 +5377,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
const usz block_tail = duplicate_positions[it_begin - it_tail];
// Check if the distance is precisely two times from the end
if (reg_state_it.size() - block_start != utils::rol64(reg_state_it.size() - block_tail, 1))
if (reg_state_it.size() - block_start != rx::rol64(reg_state_it.size() - block_tail, 1))
{
continue;
}
@ -7143,7 +7144,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
v_reg2 = 3,
};
for (auto it = infos.lower_bound(utils::sub_saturate<u32>(pattern.put_pc, 512)); it != infos.end() && it->first < pattern.put_pc + 512; it++)
for (auto it = infos.lower_bound(rx::sub_saturate<u32>(pattern.put_pc, 512)); it != infos.end() && it->first < pattern.put_pc + 512; it++)
{
for (auto& state : it->second->end_reg_state)
{
@ -7622,7 +7623,7 @@ struct spu_llvm
// Notify all before queue runs out if there is considerable excess
// Optimized that: if there are many workers, it acts soon
// If there are only a few workers, it postpones notifications until there is some more workload
if (notify_compile_count && std::min<u32>(7, utils::aligned_div<u32>(worker_count * 2, 3) + 2) <= compile_pending)
if (notify_compile_count && std::min<u32>(7, rx::aligned_div<u32>(worker_count * 2, 3) + 2) <= compile_pending)
{
for (usz i = 0; i < worker_count; i++)
{

View file

@ -6,7 +6,7 @@
#include "Emu/Cell/SPUAnalyser.h"
#include "Emu/system_config.h"
#include "util/asm.hpp"
#include "rx/asm.hpp"
#include "util/v128.hpp"
#include "util/simd.hpp"
#include "util/sysinfo.hpp"
@ -289,7 +289,7 @@ bool ROT(spu_thread& spu, spu_opcode_t op)
for (u32 i = 0; i < 4; i++)
{
spu.gpr[op.rt]._u32[i] = utils::rol32(a._u32[i], b._u32[i]);
spu.gpr[op.rt]._u32[i] = rx::rol32(a._u32[i], b._u32[i]);
}
return true;
}
@ -344,7 +344,7 @@ bool ROTH(spu_thread& spu, spu_opcode_t op)
for (u32 i = 0; i < 8; i++)
{
spu.gpr[op.rt]._u16[i] = utils::rol16(a._u16[i], b._u16[i]);
spu.gpr[op.rt]._u16[i] = rx::rol16(a._u16[i], b._u16[i]);
}
return true;
}

View file

@ -1215,7 +1215,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
rsx::reservation_lock rsx_lock(raddr, 128);
// Touch memory
utils::trigger_write_page_fault(vm::base(dest ^ (4096 / 2)));
rx::trigger_write_page_fault(vm::base(dest ^ (4096 / 2)));
auto [old_res, ok] = res.fetch_op([&](u64& rval)
{

View file

@ -1,3 +1,4 @@
#include "rx/align.hpp"
#include "stdafx.h"
#include "util/JIT.h"
#include "util/date_time.h"
@ -31,7 +32,7 @@
#include <shared_mutex>
#include <span>
#include "util/vm.hpp"
#include "util/asm.hpp"
#include "rx/asm.hpp"
#include "util/v128.hpp"
#include "util/simd.hpp"
#include "util/sysinfo.hpp"
@ -448,7 +449,7 @@ mwaitx_func static void __mwaitx(u32 cycles, u32 cstate, const void* cline, cons
// First bit indicates cstate, 0x0 for C.02 state (lower power) or 0x1 for C.01 state (higher power)
waitpkg_func static void __tpause(u32 cycles, u32 cstate)
{
const u64 tsc = utils::get_tsc() + cycles;
const u64 tsc = rx::get_tsc() + cycles;
_tpause(cstate, tsc);
}
#endif
@ -522,7 +523,7 @@ namespace spu
{
// Slight pause if function is overburdened
const auto count = atomic_instruction_table[pc_offset].observe() * 100ull;
busy_wait(count);
rx::busy_wait(count);
}
ensure(!spu.check_state());
@ -1774,7 +1775,7 @@ void spu_thread::cpu_return()
// Wait for all threads to have error codes if exited by sys_spu_thread_exit
for (u32 status; !thread->exit_status.try_read(status) || status != thread->last_exit_status;)
{
utils::pause();
rx::pause();
}
}
}
@ -2307,60 +2308,6 @@ void spu_thread::push_snr(u32 number, u32 value)
const u32 event_bit = SPU_EVENT_S1 >> (number & 1);
const bool bitor_bit = !!((snr_config >> number) & 1);
// Redundant, g_use_rtm is checked inside tx_start now.
if (g_use_rtm && false)
{
bool channel_notify = false;
bool thread_notify = false;
const bool ok = utils::tx_start([&]
{
channel_notify = (channel->data.raw() == spu_channel::bit_wait);
thread_notify = (channel->data.raw() & spu_channel::bit_count) == 0;
if (channel_notify)
{
ensure(channel->jostling_value.raw() == spu_channel::bit_wait);
channel->jostling_value.raw() = value;
channel->data.raw() = 0;
}
else if (bitor_bit)
{
channel->data.raw() &= ~spu_channel::bit_wait;
channel->data.raw() |= spu_channel::bit_count | value;
}
else
{
channel->data.raw() = spu_channel::bit_count | value;
}
if (thread_notify)
{
ch_events.raw().events |= event_bit;
if (ch_events.raw().mask & event_bit)
{
ch_events.raw().count = 1;
thread_notify = ch_events.raw().waiting != 0;
}
else
{
thread_notify = false;
}
}
});
if (ok)
{
if (channel_notify)
channel->data.notify_one();
if (thread_notify)
this->notify();
return;
}
}
// Lock event channel in case it needs event notification
ch_events.atomic_op([](ch_events_t& ev)
{
@ -2527,7 +2474,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8*
range_lock = _this->range_lock;
}
utils::prefetch_write(range_lock);
rx::prefetch_write(range_lock);
for (u32 size = args.size, size0; is_get; size -= size0, dst += size0, src += size0, eal += size0)
{
@ -2541,7 +2488,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8*
}
else if (++i < 25) [[likely]]
{
busy_wait(300);
rx::busy_wait(300);
}
else
{
@ -2706,7 +2653,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8*
if (true || ++i < 10)
{
busy_wait(500);
rx::busy_wait(500);
}
else
{
@ -2947,7 +2894,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8*
}
u32 range_addr = eal & -128;
u32 range_end = utils::align(eal + size, 128);
u32 range_end = rx::alignUp(eal + size, 128);
// Handle the case of crossing 64K page borders (TODO: maybe split in 4K fragments?)
if (range_addr >> 16 != (range_end - 1) >> 16)
@ -3131,7 +3078,7 @@ plain_access:
bool spu_thread::do_dma_check(const spu_mfc_cmd& args)
{
const u32 mask = utils::rol32(1, args.tag);
const u32 mask = rx::rol32(1, args.tag);
if (mfc_barrier & mask || (args.cmd & (MFC_BARRIER_MASK | MFC_FENCE_MASK) && mfc_fence & mask)) [[unlikely]]
{
@ -3147,13 +3094,13 @@ bool spu_thread::do_dma_check(const spu_mfc_cmd& args)
if ((mfc_queue[i].cmd & ~0xc) == MFC_BARRIER_CMD)
{
mfc_barrier |= -1;
mfc_fence |= utils::rol32(1, mfc_queue[i].tag);
mfc_fence |= rx::rol32(1, mfc_queue[i].tag);
continue;
}
if (true)
{
const u32 _mask = utils::rol32(1u, mfc_queue[i].tag);
const u32 _mask = rx::rol32(1u, mfc_queue[i].tag);
// A command with barrier hard blocks that tag until it's been dealt with
if (mfc_queue[i].cmd & MFC_BARRIER_MASK)
@ -3258,7 +3205,7 @@ bool spu_thread::do_list_transfer(spu_mfc_cmd& args)
u8* dst = this->ls + arg_lsa;
// Assume success, prepare the next elements
arg_lsa += fetch_size * utils::align<u32>(s_size, 16);
arg_lsa += fetch_size * rx::alignUp<u32>(s_size, 16);
item_ptr += fetch_size;
arg_size -= fetch_size * 8;
@ -3266,11 +3213,11 @@ bool spu_thread::do_list_transfer(spu_mfc_cmd& args)
constexpr usz _128 = 128;
// This whole function relies on many constraints to be met (crashes real MFC), we can a have minor optimization assuming EA alignment to be +16 with +16 byte transfers
#define MOV_T(type, index, _ea) \
{ \
const usz ea = _ea; \
*reinterpret_cast<type*>(dst + index * utils::align<u32>(sizeof(type), 16) + ea % (sizeof(type) < 16 ? 16 : 1)) = *reinterpret_cast<const type*>(src + ea); \
} \
#define MOV_T(type, index, _ea) \
{ \
const usz ea = _ea; \
*reinterpret_cast<type*>(dst + index * rx::alignUp<u32>(sizeof(type), 16) + ea % (sizeof(type) < 16 ? 16 : 1)) = *reinterpret_cast<const type*>(src + ea); \
} \
void()
#define MOV_128(index, ea) mov_rdata(*reinterpret_cast<decltype(rdata)*>(dst + index * _128), *reinterpret_cast<const decltype(rdata)*>(src + (ea)))
@ -3522,7 +3469,7 @@ bool spu_thread::do_list_transfer(spu_mfc_cmd& args)
#undef MOV_T
#undef MOV_128
// Optimization miss, revert changes
arg_lsa -= fetch_size * utils::align<u32>(s_size, 16);
arg_lsa -= fetch_size * rx::alignUp<u32>(s_size, 16);
item_ptr -= fetch_size;
arg_size += fetch_size * 8;
}
@ -3604,7 +3551,7 @@ bool spu_thread::do_list_transfer(spu_mfc_cmd& args)
}
}
arg_lsa += utils::align<u32>(size, 16);
arg_lsa += rx::alignUp<u32>(size, 16);
}
// Avoid inlining huge transfers because it intentionally drops range lock unlock
else if (optimization_compatible == MFC_PUT_CMD && ((addr >> 28 == rsx::constants::local_mem_base >> 28) || (addr < RAW_SPU_BASE_ADDR && size - 1 <= 0x400 - 1 && (addr % 0x10000 + (size - 1)) < 0x10000)))
@ -3615,7 +3562,7 @@ bool spu_thread::do_list_transfer(spu_mfc_cmd& args)
if (!g_use_rtm)
{
vm::range_lock(range_lock, addr & -128, utils::align<u32>(addr + size, 128) - (addr & -128));
vm::range_lock(range_lock, addr & -128, rx::alignUp<u32>(addr + size, 128) - (addr & -128));
}
}
else
@ -3690,7 +3637,7 @@ bool spu_thread::do_list_transfer(spu_mfc_cmd& args)
}
}
arg_lsa += utils::align<u32>(size, 16);
arg_lsa += rx::alignUp<u32>(size, 16);
}
else if (size)
{
@ -3703,7 +3650,7 @@ bool spu_thread::do_list_transfer(spu_mfc_cmd& args)
transfer.lsa = arg_lsa | (addr & 0xf);
transfer.size = size;
arg_lsa += utils::align<u32>(size, 16);
arg_lsa += rx::alignUp<u32>(size, 16);
do_dma_transfer(this, transfer, ls);
}
@ -3721,14 +3668,14 @@ bool spu_thread::do_list_transfer(spu_mfc_cmd& args)
{
range_lock->release(0);
ch_stall_mask |= utils::rol32(1, args.tag);
ch_stall_mask |= rx::rol32(1, args.tag);
if (!ch_stall_stat.get_count())
{
set_events(SPU_EVENT_SN);
}
ch_stall_stat.set_value(utils::rol32(1, args.tag) | ch_stall_stat.get_value());
ch_stall_stat.set_value(rx::rol32(1, args.tag) | ch_stall_stat.get_value());
args.tag |= 0x80; // Set stalled status
args.eal = ::narrow<u32>(reinterpret_cast<const u8*>(item_ptr) - this->ls);
@ -3853,7 +3800,7 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args)
return false;
});
const u64 count2 = utils::get_tsc() - perf2.get();
const u64 count2 = rx::get_tsc() - perf2.get();
if (count2 > 20000 && g_cfg.core.perf_report) [[unlikely]]
{
@ -3881,11 +3828,11 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args)
return false;
}
utils::prefetch_read(rdata);
utils::prefetch_read(rdata + 64);
rx::prefetch_read(rdata);
rx::prefetch_read(rdata + 64);
last_faddr = addr;
last_ftime = res.load() & -128;
last_ftsc = utils::get_tsc();
last_ftsc = rx::get_tsc();
return false;
}
default:
@ -3973,7 +3920,7 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args)
if (!vm::check_addr(addr, vm::page_writable))
{
utils::trigger_write_page_fault(vm::base(addr));
rx::trigger_write_page_fault(vm::base(addr));
}
raddr = 0;
@ -4036,7 +3983,7 @@ void do_cell_atomic_128_store(u32 addr, const void* to_write)
}
else if (k < 15)
{
busy_wait(500);
rx::busy_wait(500);
}
else
{
@ -4053,7 +4000,7 @@ void do_cell_atomic_128_store(u32 addr, const void* to_write)
}
else if (j < 15)
{
busy_wait(500);
rx::busy_wait(500);
}
else
{
@ -4075,7 +4022,7 @@ void do_cell_atomic_128_store(u32 addr, const void* to_write)
else if (!g_use_rtm)
{
// Provoke page fault
utils::trigger_write_page_fault(vm::base(addr));
rx::trigger_write_page_fault(vm::base(addr));
// Hard lock
auto spu = cpu ? cpu->try_get<spu_thread>() : nullptr;
@ -4102,7 +4049,7 @@ void do_cell_atomic_128_store(u32 addr, const void* to_write)
});
vm::reservation_acquire(addr) += 32;
result = utils::get_tsc() - perf0.get();
result = rx::get_tsc() - perf0.get();
}
if (result > 20000 && g_cfg.core.perf_report) [[unlikely]]
@ -4150,7 +4097,7 @@ bool spu_thread::do_mfc(bool can_escape, bool must_finish)
auto process_command = [&](spu_mfc_cmd& args)
{
// Select tag bit in the tag mask or the stall mask
const u32 mask = utils::rol32(1, args.tag);
const u32 mask = rx::rol32(1, args.tag);
if ((args.cmd & ~0xc) == MFC_BARRIER_CMD)
{
@ -4240,7 +4187,7 @@ bool spu_thread::do_mfc(bool can_escape, bool must_finish)
{
// Get commands' execution mask
// Mask bits are always set when mfc_transfers_shuffling is 0
return static_cast<u16>((0 - (1u << std::min<u32>(g_cfg.core.mfc_transfers_shuffling, size))) | utils::get_tsc());
return static_cast<u16>((0 - (1u << std::min<u32>(g_cfg.core.mfc_transfers_shuffling, size))) | rx::get_tsc());
};
// Process enqueued commands
@ -4733,7 +4680,7 @@ bool spu_thread::process_mfc_cmd()
else
#endif
{
busy_wait(300);
rx::busy_wait(300);
}
if (getllar_spin_count == 3)
@ -4875,7 +4822,7 @@ bool spu_thread::process_mfc_cmd()
if (i < 24) [[likely]]
{
i++;
busy_wait(300);
rx::busy_wait(300);
}
else
{
@ -5159,7 +5106,7 @@ bool spu_thread::process_mfc_cmd()
std::memcpy(dump.data, _ptr<u8>(ch_mfc_cmd.lsa & 0x3ff80), 128);
}
const u32 mask = utils::rol32(1, ch_mfc_cmd.tag);
const u32 mask = rx::rol32(1, ch_mfc_cmd.tag);
if ((mfc_barrier | mfc_fence) & mask) [[unlikely]]
{
@ -5214,11 +5161,11 @@ bool spu_thread::process_mfc_cmd()
}
mfc_queue[mfc_size++] = ch_mfc_cmd;
mfc_fence |= utils::rol32(1, ch_mfc_cmd.tag);
mfc_fence |= rx::rol32(1, ch_mfc_cmd.tag);
if (ch_mfc_cmd.cmd & MFC_BARRIER_MASK)
{
mfc_barrier |= utils::rol32(1, ch_mfc_cmd.tag);
mfc_barrier |= rx::rol32(1, ch_mfc_cmd.tag);
}
return true;
@ -5267,11 +5214,11 @@ bool spu_thread::process_mfc_cmd()
}
mfc_size++;
mfc_fence |= utils::rol32(1, cmd.tag);
mfc_fence |= rx::rol32(1, cmd.tag);
if (cmd.cmd & MFC_BARRIER_MASK)
{
mfc_barrier |= utils::rol32(1, cmd.tag);
mfc_barrier |= rx::rol32(1, cmd.tag);
}
if (check_mfc_interrupts(pc + 4))
@ -5297,7 +5244,7 @@ bool spu_thread::process_mfc_cmd()
{
mfc_queue[mfc_size++] = ch_mfc_cmd;
mfc_barrier |= -1;
mfc_fence |= utils::rol32(1, ch_mfc_cmd.tag);
mfc_fence |= rx::rol32(1, ch_mfc_cmd.tag);
}
return true;
@ -5592,7 +5539,7 @@ retry:
if (reading && res.locks && mask_hint & (SPU_EVENT_S1 | SPU_EVENT_S2))
{
busy_wait(100);
rx::busy_wait(100);
goto retry;
}
@ -5899,7 +5846,7 @@ s64 spu_thread::get_ch_value(u32 ch)
}
}
const usz seed = (utils::get_tsc() >> 8) % 100;
const usz seed = (rx::get_tsc() >> 8) % 100;
#ifdef __linux__
const bool reservation_busy_waiting = false;
@ -5998,7 +5945,7 @@ s64 spu_thread::get_ch_value(u32 ch)
{
if (u32 work_count = g_spu_work_count)
{
const u32 true_free = utils::sub_saturate<u32>(utils::get_thread_count(), 10);
const u32 true_free = rx::sub_saturate<u32>(utils::get_thread_count(), 10);
if (work_count > true_free)
{
@ -6123,7 +6070,7 @@ s64 spu_thread::get_ch_value(u32 ch)
}
else
{
busy_wait();
rx::busy_wait();
}
continue;
@ -6490,7 +6437,7 @@ bool spu_thread::set_ch_value(u32 ch, u32 value)
value &= 0x1f;
// Reset stall status for specified tag
const u32 tag_mask = utils::rol32(1, value);
const u32 tag_mask = rx::rol32(1, value);
if (ch_stall_mask & tag_mask)
{
@ -7320,7 +7267,7 @@ bool spu_thread::try_load_debug_capture()
void spu_thread::wakeup_delay(u32 div) const
{
if (g_cfg.core.spu_wakeup_delay_mask & (1u << index))
thread_ctrl::wait_for_accurate(utils::aligned_div(+g_cfg.core.spu_wakeup_delay, div));
thread_ctrl::wait_for_accurate(rx::aligned_div(+g_cfg.core.spu_wakeup_delay, div));
}
spu_function_logger::spu_function_logger(spu_thread& spu, const char* func) noexcept
@ -7397,7 +7344,7 @@ s64 spu_channel::pop_wait(cpu_thread& spu, bool pop)
for (int i = 0; i < 10; i++)
{
busy_wait();
rx::busy_wait();
if (!(data & bit_wait))
{
@ -7473,7 +7420,7 @@ bool spu_channel::push_wait(cpu_thread& spu, u32 value, bool push)
return true;
}
busy_wait();
rx::busy_wait();
state = data;
}
@ -7528,7 +7475,7 @@ std::pair<u32, u32> spu_channel_4_t::pop_wait(cpu_thread& spu, bool pop_value)
for (int i = 0; i < 10; i++)
{
busy_wait();
rx::busy_wait();
if (!atomic_storage<u8>::load(values.raw().waiting))
{

View file

@ -1,4 +1,6 @@
#include "stdafx.h"
#include "rx/align.hpp"
#include "vm_locking.h"
#include "vm_ptr.h"
#include "vm_ref.h"
@ -14,7 +16,8 @@
#include <span>
#include "util/vm.hpp"
#include "util/asm.hpp"
#include "rx/asm.hpp"
#include "rx/align.hpp"
#include "util/simd.hpp"
#include "util/serialization.hpp"
@ -245,7 +248,7 @@ namespace vm
// Try triggering a page fault (write)
// TODO: Read memory if needed
utils::trigger_write_page_fault(vm::base(test / 4096 == begin / 4096 ? begin : test));
rx::trigger_write_page_fault(vm::base(test / 4096 == begin / 4096 ? begin : test));
continue;
}
}
@ -258,7 +261,7 @@ namespace vm
perf0.restart();
}
busy_wait(200);
rx::busy_wait(200);
if (i >= 2 && !_cpu)
{
@ -339,9 +342,9 @@ namespace vm
auto range_lock = &*std::prev(std::end(vm::g_range_lock_set));
*range_lock = addr | u64{size} << 32 | flags;
utils::prefetch_read(g_range_lock_set + 0);
utils::prefetch_read(g_range_lock_set + 2);
utils::prefetch_read(g_range_lock_set + 4);
rx::prefetch_read(g_range_lock_set + 0);
rx::prefetch_read(g_range_lock_set + 2);
rx::prefetch_read(g_range_lock_set + 4);
const auto range = utils::address_range::start_length(addr, size);
@ -364,7 +367,7 @@ namespace vm
break;
}
utils::pause();
rx::pause();
}
return range_lock;
@ -407,7 +410,7 @@ namespace vm
}
if (i < 100)
busy_wait(200);
rx::busy_wait(200);
else
std::this_thread::yield();
@ -516,12 +519,12 @@ namespace vm
if (to_prepare_memory)
{
// We have some spare time, prepare cache lines (todo: reservation tests here)
utils::prefetch_write(vm::get_super_ptr(addr));
utils::prefetch_write(vm::get_super_ptr(addr) + 64);
rx::prefetch_write(vm::get_super_ptr(addr));
rx::prefetch_write(vm::get_super_ptr(addr) + 64);
to_prepare_memory = false;
}
busy_wait(200);
rx::busy_wait(200);
}
else
{
@ -552,9 +555,9 @@ namespace vm
addr1 = static_cast<u16>(addr) | is_shared;
}
utils::prefetch_read(g_range_lock_set + 0);
utils::prefetch_read(g_range_lock_set + 2);
utils::prefetch_read(g_range_lock_set + 4);
rx::prefetch_read(g_range_lock_set + 0);
rx::prefetch_read(g_range_lock_set + 2);
rx::prefetch_read(g_range_lock_set + 4);
u64 to_clear = get_range_lock_bits(false);
@ -568,7 +571,7 @@ namespace vm
for (u64 hi = addr2 >> 16, max = (addr2 + size2 - 1) >> 16; hi <= max; hi++)
{
u64 addr3 = addr2;
u64 size3 = std::min<u64>(addr2 + size2, utils::align(addr2, 0x10000)) - addr2;
u64 size3 = std::min<u64>(addr2 + size2, rx::alignUp(addr2, 0x10000)) - addr2;
if (u64 is_shared = g_shmem[hi]) [[unlikely]]
{
@ -594,12 +597,12 @@ namespace vm
if (to_prepare_memory)
{
utils::prefetch_write(vm::get_super_ptr(addr));
utils::prefetch_write(vm::get_super_ptr(addr) + 64);
rx::prefetch_write(vm::get_super_ptr(addr));
rx::prefetch_write(vm::get_super_ptr(addr) + 64);
to_prepare_memory = false;
}
utils::pause();
rx::pause();
}
for (auto lock = g_locks.cbegin(), end = lock + g_cfg.core.ppu_threads; lock != end; lock++)
@ -610,12 +613,12 @@ namespace vm
{
if (to_prepare_memory)
{
utils::prefetch_write(vm::get_super_ptr(addr));
utils::prefetch_write(vm::get_super_ptr(addr) + 64);
rx::prefetch_write(vm::get_super_ptr(addr));
rx::prefetch_write(vm::get_super_ptr(addr) + 64);
to_prepare_memory = false;
}
utils::pause();
rx::pause();
}
}
}
@ -642,7 +645,7 @@ namespace vm
}
else if (i < 15)
{
busy_wait(500);
rx::busy_wait(500);
}
else
{
@ -683,7 +686,7 @@ namespace vm
}
else if (i < 15)
{
busy_wait(500);
rx::busy_wait(500);
}
else
{
@ -1078,13 +1081,13 @@ namespace vm
if (state & page_1m_size)
{
i = utils::align(i + 1, 0x100000 / 4096);
i = rx::alignUp(i + 1, 0x100000 / 4096);
continue;
}
if (state & page_64k_size)
{
i = utils::align(i + 1, 0x10000 / 4096);
i = rx::alignUp(i + 1, 0x10000 / 4096);
continue;
}
@ -1359,7 +1362,7 @@ namespace vm
const u32 min_page_size = flags & page_size_4k ? 0x1000 : 0x10000;
// Align to minimal page size
const u32 size = utils::align(orig_size, min_page_size) + (flags & stack_guarded ? 0x2000 : 0);
const u32 size = rx::alignUp(orig_size, min_page_size) + (flags & stack_guarded ? 0x2000 : 0);
// Check alignment (it's page allocation, so passing small values there is just silly)
if (align < min_page_size || align != (0x80000000u >> std::countl_zero(align)))
@ -1387,7 +1390,7 @@ namespace vm
const u32 max = (this->addr + this->size - size) & (0 - align);
u32 addr = utils::align(this->addr, align);
u32 addr = rx::alignUp(this->addr, align);
if (this->addr > max || addr > max)
{
@ -1434,7 +1437,7 @@ namespace vm
const u32 size0 = orig_size + addr % min_page_size;
// Align to minimal page size
const u32 size = utils::align(size0, min_page_size);
const u32 size = rx::alignUp(size0, min_page_size);
// Return if addr or size is invalid
// If shared memory is provided, addr/size must be aligned
@ -1870,7 +1873,7 @@ namespace vm
return nullptr;
}
for (u32 addr = utils::align<u32>(0x10000000, align);; addr += align)
for (u32 addr = rx::alignUp<u32>(0x10000000, align);; addr += align)
{
if (_test_map(addr, size))
{
@ -1950,7 +1953,7 @@ namespace vm
vm::writer_lock lock;
// Align to minimal page size
const u32 size = utils::align(orig_size, 0x10000);
const u32 size = rx::alignUp(orig_size, 0x10000);
// Check alignment
if (align < 0x10000 || align != (0x80000000u >> std::countl_zero(align)))
@ -2178,7 +2181,7 @@ namespace vm
// Wait a bit before accessing global lock
range_lock->release(0);
busy_wait(200);
rx::busy_wait(200);
}
const bool result = try_access_internal(begin, ptr, size, is_write);
@ -2399,7 +2402,7 @@ namespace vm
// Prevent overflow
const u32 size = 0 - max_size < addr ? (0 - addr) : max_size;
for (u32 i = addr, end = utils::align(addr + size, 4096) - 1; i <= end;)
for (u32 i = addr, end = rx::alignUp(addr + size, 4096) - 1; i <= end;)
{
if (check_pages && !vm::check_addr(i, vm::page_readable))
{

View file

@ -3,7 +3,7 @@
#include "vm.h"
#include "vm_locking.h"
#include "util/atomic.hpp"
#include "util/tsc.hpp"
#include "rx/tsc.hpp"
#include <functional>
extern bool g_use_rtm;
@ -209,7 +209,7 @@ namespace vm
unsigned status = -1;
u64 _old = 0;
auto stamp0 = utils::get_tsc(), stamp1 = stamp0, stamp2 = stamp0;
auto stamp0 = rx::get_tsc(), stamp1 = stamp0, stamp2 = stamp0;
#ifndef _MSC_VER
__asm__ goto("xbegin %l[stage2];" ::: "memory" : stage2);
@ -271,16 +271,16 @@ namespace vm
#ifndef _MSC_VER
__asm__ volatile("mov %%eax, %0;" : "=r"(status)::"memory");
#endif
stamp1 = utils::get_tsc();
stamp1 = rx::get_tsc();
// Stage 2: try to lock reservation first
_old = res.fetch_add(1);
// Compute stamps excluding memory touch
stamp2 = utils::get_tsc() - (stamp1 - stamp0);
stamp2 = rx::get_tsc() - (stamp1 - stamp0);
// Start lightened transaction
for (; !(_old & vm::rsrv_unique_lock) && stamp2 - stamp0 <= g_rtm_tx_limit2; stamp2 = utils::get_tsc())
for (; !(_old & vm::rsrv_unique_lock) && stamp2 - stamp0 <= g_rtm_tx_limit2; stamp2 = rx::get_tsc())
{
if (cpu.has_pause_flag())
{

View file

@ -4,7 +4,8 @@
#include "Emu/Memory/vm_ptr.h"
#include "util/mutex.h"
#include "util/asm.hpp"
#include "rx/asm.hpp"
#include "rx/align.hpp"
#include "util/logs.hpp"
LOG_CHANNEL(np_mem_allocator);
@ -52,7 +53,7 @@ namespace np
}
// Align allocs
const u32 alloc_size = utils::align(size, 4);
const u32 alloc_size = rx::alignUp(size, 4);
if (alloc_size > m_avail)
{
np_mem_allocator.error("Not enough memory available in NP pool!");

View file

@ -1,7 +1,8 @@
#pragma once
#include "Emu/Memory/vm_ptr.h"
#include "util/asm.hpp"
#include "rx/asm.hpp"
#include "rx/align.hpp"
namespace np
{
@ -9,7 +10,7 @@ namespace np
{
public:
event_data(u32 vm_addr, u32 initial_size, u32 max_size)
: m_max_size(max_size), m_cur_size(utils::align(initial_size, 4))
: m_max_size(max_size), m_cur_size(rx::alignUp(initial_size, 4))
{
m_data_ptr.set(vm_addr);
}
@ -50,7 +51,7 @@ namespace np
template <typename T>
T* allocate(u32 size, vm::bptr<T>& dest)
{
const u32 to_alloc = utils::align(size, 4);
const u32 to_alloc = rx::alignUp(size, 4);
ensure((m_cur_size + to_alloc) <= m_max_size, "event_data::allocate: size would overflow the allocated buffer!");
u8* dest_ptr = reinterpret_cast<u8*>(&dest);

View file

@ -1,5 +1,6 @@
#include "stdafx.h"
#include "util/asm.hpp"
#include "rx/asm.hpp"
#include "rx/align.hpp"
#include "np_gui_cache.h"
LOG_CHANNEL(np_gui_cache);
@ -72,7 +73,7 @@ namespace np
const auto& room = ::at32(rooms, room_id);
const u32 room_size = ::narrow<u32>(utils::align(sizeof(SceNpMatchingRoomStatus), 8) + (utils::align(sizeof(SceNpMatchingRoomMember), 8) * room.members.size()));
const u32 room_size = ::narrow<u32>(rx::alignUp(sizeof(SceNpMatchingRoomStatus), 8) + (rx::alignUp(sizeof(SceNpMatchingRoomMember), 8) * room.members.size()));
if (!data)
return not_an_error(room_size);
@ -94,12 +95,12 @@ namespace np
{
if (!cur_member_ptr)
{
room_status->members = vm::cast(data.addr() + utils::align(sizeof(SceNpMatchingRoomStatus), 8));
room_status->members = vm::cast(data.addr() + rx::alignUp(sizeof(SceNpMatchingRoomStatus), 8));
cur_member_ptr = room_status->members;
}
else
{
cur_member_ptr->next = vm::cast(cur_member_ptr.addr() + utils::align(sizeof(SceNpMatchingRoomMember), 8));
cur_member_ptr->next = vm::cast(cur_member_ptr.addr() + rx::alignUp(sizeof(SceNpMatchingRoomMember), 8));
cur_member_ptr = cur_member_ptr->next;
}

View file

@ -7,7 +7,8 @@
#include "cellos/sys_memory.h"
#include "Emu/RSX/RSXThread.h"
#include "util/asm.hpp"
#include "rx/asm.hpp"
#include "rx/align.hpp"
#include <thread>
@ -26,7 +27,7 @@ namespace rsx
}
// User memory + fifo size
buffer_size = utils::align<u32>(buffer_size, 0x100000) + 0x10000000;
buffer_size = rx::alignUp<u32>(buffer_size, 0x100000) + 0x10000000;
// We are not allowed to drain all memory so add a little
g_fxo->init<lv2_memory_container>(buffer_size + 0x1000000);

View file

@ -5,7 +5,8 @@
#include "../rsx_utils.h"
#include "3rdparty/bcdec/bcdec.hpp"
#include "util/asm.hpp"
#include "rx/asm.hpp"
#include "rx/align.hpp"
namespace utils
{
@ -661,13 +662,13 @@ namespace
}
else if constexpr (block_edge_in_texel == 4)
{
current_subresource_layout.width_in_block = utils::aligned_div(miplevel_width_in_texel, block_edge_in_texel);
current_subresource_layout.height_in_block = utils::aligned_div(miplevel_height_in_texel, block_edge_in_texel);
current_subresource_layout.width_in_block = rx::aligned_div(miplevel_width_in_texel, block_edge_in_texel);
current_subresource_layout.height_in_block = rx::aligned_div(miplevel_height_in_texel, block_edge_in_texel);
}
else
{
// Only the width is compressed
current_subresource_layout.width_in_block = utils::aligned_div(miplevel_width_in_texel, block_edge_in_texel);
current_subresource_layout.width_in_block = rx::aligned_div(miplevel_width_in_texel, block_edge_in_texel);
current_subresource_layout.height_in_block = miplevel_height_in_texel;
}
@ -699,7 +700,7 @@ namespace
if (!padded_row) // Only swizzled textures obey this restriction
{
offset_in_src = utils::align(offset_in_src, 128);
offset_in_src = rx::alignUp(offset_in_src, 128);
}
}
@ -1429,8 +1430,8 @@ namespace rsx
usz result = 0;
for (u16 i = 0; i < mipmap; ++i)
{
usz rowPitch = utils::align(block_size_in_byte * width_in_blocks, row_pitch_alignment);
result += utils::align(rowPitch * height_in_blocks * depth, mipmap_alignment);
usz rowPitch = rx::alignUp(block_size_in_byte * width_in_blocks, row_pitch_alignment);
result += rx::alignUp(rowPitch * height_in_blocks * depth, mipmap_alignment);
height_in_blocks = std::max<usz>(height_in_blocks / 2, 1);
width_in_blocks = std::max<usz>(width_in_blocks / 2, 1);
}

View file

@ -1,7 +1,8 @@
#pragma once
#include "util/StrFmt.h"
#include "util/asm.hpp"
#include "rx/asm.hpp"
#include "rx/align.hpp"
/**
* Ring buffer memory helper :
@ -20,8 +21,8 @@ protected:
template <int Alignment>
bool can_alloc(usz size) const
{
usz alloc_size = utils::align(size, Alignment);
usz aligned_put_pos = utils::align(m_put_pos, Alignment);
usz alloc_size = rx::alignUp(size, Alignment);
usz aligned_put_pos = rx::alignUp(m_put_pos, Alignment);
if (aligned_put_pos + alloc_size < m_size)
{
// range before get
@ -85,8 +86,8 @@ public:
template <int Alignment>
usz alloc(usz size)
{
const usz alloc_size = utils::align(size, Alignment);
const usz aligned_put_pos = utils::align(m_put_pos, Alignment);
const usz alloc_size = rx::alignUp(size, Alignment);
const usz aligned_put_pos = rx::alignUp(m_put_pos, Alignment);
if (!can_alloc<Alignment>(size) && !grow(alloc_size))
{

View file

@ -1,7 +1,8 @@
#include "stdafx.h"
#include "surface_store.h"
#include "util/asm.hpp"
#include "rx/asm.hpp"
#include "rx/align.hpp"
namespace rsx
{
@ -39,20 +40,20 @@ namespace rsx
{
switch (format)
{
case surface_color_format::b8: return utils::align(width, 256);
case surface_color_format::b8: return rx::alignUp(width, 256);
case surface_color_format::g8b8:
case surface_color_format::x1r5g5b5_o1r5g5b5:
case surface_color_format::x1r5g5b5_z1r5g5b5:
case surface_color_format::r5g6b5: return utils::align(width * 2, 256);
case surface_color_format::r5g6b5: return rx::alignUp(width * 2, 256);
case surface_color_format::a8b8g8r8:
case surface_color_format::x8b8g8r8_o8b8g8r8:
case surface_color_format::x8b8g8r8_z8b8g8r8:
case surface_color_format::x8r8g8b8_o8r8g8b8:
case surface_color_format::x8r8g8b8_z8r8g8b8:
case surface_color_format::x32:
case surface_color_format::a8r8g8b8: return utils::align(width * 4, 256);
case surface_color_format::w16z16y16x16: return utils::align(width * 8, 256);
case surface_color_format::w32z32y32x32: return utils::align(width * 16, 256);
case surface_color_format::a8r8g8b8: return rx::alignUp(width * 4, 256);
case surface_color_format::w16z16y16x16: return rx::alignUp(width * 8, 256);
case surface_color_format::w32z32y32x32: return rx::alignUp(width * 16, 256);
}
fmt::throw_exception("Unknown color surface format");
}

View file

@ -8,7 +8,8 @@
#include "../rsx_utils.h"
#include <list>
#include "util/asm.hpp"
#include "rx/align.hpp"
#include "rx/asm.hpp"
namespace rsx
{
@ -806,7 +807,7 @@ namespace rsx
continue;
}
num_rows = utils::aligned_div(this_range.length(), rsx_pitch);
num_rows = rx::aligned_div(this_range.length(), rsx_pitch);
}
for (u32 row = 0, offset = (this_range.start - range.start), section_len = (this_range.end - range.start + 1);
@ -1186,7 +1187,7 @@ namespace rsx
{
// Width is calculated in the coordinate-space of the requester; normalize
info.src_area.x = (info.src_area.x * required_bpp) / surface_bpp;
info.src_area.width = utils::align(width * required_bpp, surface_bpp) / surface_bpp;
info.src_area.width = rx::alignUp(width * required_bpp, surface_bpp) / surface_bpp;
}
else
{

View file

@ -1,4 +1,4 @@
#pragma once
#include <util/asm.hpp>
#include <rx/asm.hpp>
#include <util/sysinfo.hpp>

View file

@ -1,6 +1,7 @@
#include "GLCompute.h"
#include "GLTexture.h"
#include "util/StrUtil.h"
#include "rx/align.hpp"
namespace gl
{
@ -196,7 +197,7 @@ namespace gl
m_data_length = data_length;
const auto num_bytes_per_invocation = optimal_group_size * kernel_size * 4;
const auto num_bytes_to_process = utils::align(data_length, num_bytes_per_invocation);
const auto num_bytes_to_process = rx::alignUp(data_length, num_bytes_per_invocation);
const auto num_invocations = num_bytes_to_process / num_bytes_per_invocation;
if ((num_bytes_to_process + data_offset) > data->size())
@ -364,7 +365,7 @@ namespace gl
dst->bind_range(gl::buffer::target::ssbo, GL_COMPUTE_BUFFER_SLOT(2), out_offset, row_pitch * 4 * region.height);
const int num_invocations = utils::aligned_div(region.width * region.height, optimal_kernel_size * optimal_group_size);
const int num_invocations = rx::aligned_div(region.width * region.height, optimal_kernel_size * optimal_group_size);
compute_task::run(cmd, num_invocations);
}
@ -411,7 +412,7 @@ namespace gl
dst->bind_range(gl::buffer::target::ssbo, GL_COMPUTE_BUFFER_SLOT(1), out_offset, row_pitch * 4 * region.height);
const int num_invocations = utils::aligned_div(region.width * region.height, optimal_kernel_size * optimal_group_size);
const int num_invocations = rx::aligned_div(region.width * region.height, optimal_kernel_size * optimal_group_size);
compute_task::run(cmd, num_invocations);
}
@ -437,7 +438,7 @@ namespace gl
void cs_ssbo_to_color_image::run(gl::command_context& cmd, const buffer* src, const texture_view* dst, const u32 src_offset, const coordu& dst_region, const pixel_buffer_layout& layout)
{
const u32 bpp = dst->image()->pitch() / dst->image()->width();
const u32 row_length = utils::align(dst_region.width * bpp, std::max<int>(layout.alignment, 1)) / bpp;
const u32 row_length = rx::alignUp(dst_region.width * bpp, std::max<int>(layout.alignment, 1)) / bpp;
m_program.uniforms["swap_bytes"] = layout.swap_bytes;
m_program.uniforms["src_pitch"] = row_length;
@ -448,7 +449,7 @@ namespace gl
src->bind_range(gl::buffer::target::ssbo, GL_COMPUTE_BUFFER_SLOT(0), src_offset, row_length * bpp * dst_region.height);
glBindImageTexture(GL_COMPUTE_IMAGE_SLOT(0), dst->id(), 0, GL_FALSE, 0, GL_WRITE_ONLY, dst->view_format());
const int num_invocations = utils::aligned_div(dst_region.width * dst_region.height, optimal_kernel_size * optimal_group_size);
const int num_invocations = rx::aligned_div(dst_region.width * dst_region.height, optimal_kernel_size * optimal_group_size);
compute_task::run(cmd, num_invocations);
}

View file

@ -337,7 +337,7 @@ namespace gl
set_parameters(cmd);
const u32 num_bytes_per_invocation = (sizeof(_BlockType) * optimal_group_size);
const u32 linear_invocations = utils::aligned_div(data_length, num_bytes_per_invocation);
const u32 linear_invocations = rx::aligned_div(data_length, num_bytes_per_invocation);
compute_task::run(cmd, linear_invocations);
}
};

View file

@ -12,6 +12,8 @@
#include "Emu/RSX/Host/RSXDMAWriter.h"
#include "Emu/RSX/NV47/HW/context_accessors.define.h"
#include "rx/align.hpp"
[[noreturn]] extern void report_fatal_error(std::string_view _text, bool is_html = false, bool include_help_text = true);
namespace
@ -895,7 +897,7 @@ void GLGSRender::load_program_env()
if (update_fragment_texture_env)
m_texture_parameters_buffer->reserve_storage_on_heap(256);
if (update_fragment_constants)
m_fragment_constants_buffer->reserve_storage_on_heap(utils::align(fragment_constants_size, 256));
m_fragment_constants_buffer->reserve_storage_on_heap(rx::alignUp(fragment_constants_size, 256));
if (update_transform_constants)
m_transform_constants_buffer->reserve_storage_on_heap(8192);
if (update_raster_env)

View file

@ -15,7 +15,7 @@
#include "util/geometry.h"
#include "util/File.h"
#include "util/logs.hpp"
#include "util/asm.hpp"
#include "rx/asm.hpp"
#include "glutils/common.h"
// TODO: Include on use

View file

@ -4,6 +4,8 @@
#include "../Program/RSXOverlay.h"
#include "Emu/Cell/timers.hpp"
#include "rx/align.hpp"
namespace gl
{
// Lame
@ -544,7 +546,7 @@ namespace gl
const pixel_buffer_layout& layout)
{
const u32 bpp = dst->image()->pitch() / dst->image()->width();
const u32 row_length = utils::align(dst_region.width * bpp, std::max<int>(layout.alignment, 1)) / bpp;
const u32 row_length = rx::alignUp(dst_region.width * bpp, std::max<int>(layout.alignment, 1)) / bpp;
program_handle.uniforms["src_pitch"] = row_length;
program_handle.uniforms["swap_bytes"] = layout.swap_bytes;

View file

@ -2,6 +2,8 @@
#include "GLResolveHelper.h"
#include "GLTexture.h"
#include "rx/align.hpp"
#include <unordered_map>
#include <stack>
@ -225,8 +227,8 @@ namespace gl
multisampled = msaa_image;
resolve = resolve_image;
const u32 invocations_x = utils::align(resolve_image->width(), cs_wave_x) / cs_wave_x;
const u32 invocations_y = utils::align(resolve_image->height(), cs_wave_y) / cs_wave_y;
const u32 invocations_x = rx::alignUp(resolve_image->width(), cs_wave_x) / cs_wave_x;
const u32 invocations_y = rx::alignUp(resolve_image->height(), cs_wave_y) / cs_wave_y;
compute_task::run(cmd, invocations_x, invocations_y);
}

View file

@ -9,7 +9,8 @@
#include "../RSXThread.h"
#include "util/asm.hpp"
#include "rx/align.hpp"
#include "rx/asm.hpp"
namespace gl
{
@ -664,7 +665,7 @@ namespace gl
u8 block_size_in_bytes = rsx::get_format_block_size_in_bytes(format);
u64 image_linear_size = staging_buffer.size();
const auto min_required_buffer_size = std::max<u64>(utils::align(image_linear_size * 4, 0x100000), 16 * 0x100000);
const auto min_required_buffer_size = std::max<u64>(rx::alignUp(image_linear_size * 4, 0x100000), 16 * 0x100000);
if (driver_caps.ARB_compute_shader_supported)
{
@ -825,7 +826,7 @@ namespace gl
}
else
{
const auto aligned_pitch = utils::align<u32>(dst->pitch(), 4);
const auto aligned_pitch = rx::alignUp<u32>(dst->pitch(), 4);
const u32 texture_data_sz = dst->depth() * dst->height() * aligned_pitch;
data_upload_buf.resize(texture_data_sz);
}
@ -1002,7 +1003,7 @@ namespace gl
u32 scratch_offset = 0;
const u64 min_storage_requirement = src_mem.image_size_in_bytes + dst_mem.image_size_in_bytes;
const u64 min_required_buffer_size = utils::align(min_storage_requirement, 256);
const u64 min_required_buffer_size = rx::alignUp(min_storage_requirement, 256);
if (g_typeless_transfer_buffer.size() >= min_required_buffer_size) [[likely]]
{

View file

@ -3,7 +3,8 @@
#include "GLTextureCache.h"
#include "../Common/BufferUtils.h"
#include "util/asm.hpp"
#include "rx/align.hpp"
#include "rx/asm.hpp"
namespace gl
{
@ -82,7 +83,7 @@ namespace gl
}
else
{
const u32 num_rows = utils::align(valid_length, rsx_pitch) / rsx_pitch;
const u32 num_rows = rx::alignUp(valid_length, rsx_pitch) / rsx_pitch;
u32* data = static_cast<u32*>(dst);
for (u32 row = 0; row < num_rows; ++row)
{
@ -212,7 +213,7 @@ namespace gl
// Dimensions were given in 'dst' space. Work out the real source coordinates
const auto src_bpp = slice.src->pitch() / slice.src->width();
src_x = (src_x * dst_bpp) / src_bpp;
src_w = utils::aligned_div<u16>(src_w * dst_bpp, src_bpp);
src_w = rx::aligned_div<u16>(src_w * dst_bpp, src_bpp);
}
if (auto surface = dynamic_cast<gl::render_target*>(slice.src))

View file

@ -7,6 +7,8 @@
#include "../Common/texture_cache.h"
#include "rx/align.hpp"
#include <memory>
#include <vector>
@ -49,7 +51,7 @@ namespace gl
void init_buffer(const gl::texture* src)
{
const u32 vram_size = src->pitch() * src->height();
const u32 buffer_size = utils::align(vram_size, 4096);
const u32 buffer_size = rx::alignUp(vram_size, 4096);
if (pbo)
{

View file

@ -2,7 +2,7 @@
#include "../OpenGL.h"
#include <util/types.hpp>
#include <util/asm.hpp>
#include <rx/asm.hpp>
#include <util/logs.hpp>
namespace gl

View file

@ -3,6 +3,7 @@
#include "buffer_object.h"
#include "state_tracker.hpp"
#include "pixel_settings.hpp"
#include "rx/align.hpp"
namespace gl
{
@ -119,14 +120,14 @@ namespace gl
case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
{
m_compressed = true;
m_pitch = utils::align(width, 4) / 2;
m_pitch = rx::alignUp(width, 4) / 2;
break;
}
case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
{
m_compressed = true;
m_pitch = utils::align(width, 4);
m_pitch = rx::alignUp(width, 4);
break;
}
default:

View file

@ -1,6 +1,8 @@
#include "stdafx.h"
#include "ring_buffer.h"
#include "rx/align.hpp"
namespace gl
{
void ring_buffer::recreate(GLsizeiptr size, const void* data)
@ -37,7 +39,7 @@ namespace gl
{
u32 offset = m_data_loc;
if (m_data_loc)
offset = utils::align(offset, alignment);
offset = rx::alignUp(offset, alignment);
if ((offset + alloc_size) > m_size)
{
@ -56,7 +58,7 @@ namespace gl
}
// Align data loc to 256; allows some "guard" region so we dont trample our own data inadvertently
m_data_loc = utils::align(offset + alloc_size, 256);
m_data_loc = rx::alignUp(offset + alloc_size, 256);
return std::make_pair(static_cast<char*>(m_memory_mapping) + offset, offset);
}
@ -108,9 +110,9 @@ namespace gl
u32 offset = m_data_loc;
if (m_data_loc)
offset = utils::align(offset, 256);
offset = rx::alignUp(offset, 256);
const u32 block_size = utils::align(alloc_size + 16, 256); // Overallocate just in case we need to realign base
const u32 block_size = rx::alignUp(alloc_size + 16, 256); // Overallocate just in case we need to realign base
if ((offset + block_size) > m_size)
{
@ -144,10 +146,10 @@ namespace gl
{
u32 offset = m_data_loc;
if (m_data_loc)
offset = utils::align(offset, alignment);
offset = rx::alignUp(offset, alignment);
u32 padding = (offset - m_data_loc);
u32 real_size = utils::align(padding + alloc_size, alignment); // Ensures we leave the loc pointer aligned after we exit
u32 real_size = rx::alignUp(padding + alloc_size, alignment); // Ensures we leave the loc pointer aligned after we exit
if (real_size > m_mapped_bytes)
{
@ -158,10 +160,10 @@ namespace gl
offset = m_data_loc;
if (m_data_loc)
offset = utils::align(offset, alignment);
offset = rx::alignUp(offset, alignment);
padding = (offset - m_data_loc);
real_size = utils::align(padding + alloc_size, alignment);
real_size = rx::alignUp(padding + alloc_size, alignment);
}
m_data_loc = offset + real_size;
@ -270,7 +272,7 @@ namespace gl
u32 scratch_ring_buffer::alloc(u32 size, u32 alignment)
{
u64 start = utils::align(m_alloc_pointer, alignment);
u64 start = rx::alignUp(m_alloc_pointer, alignment);
m_alloc_pointer = (start + size);
if (static_cast<GLsizeiptr>(m_alloc_pointer) > m_storage.size())

View file

@ -58,7 +58,7 @@ namespace gl
m_src = fmt::replace_all(m_src, replacement_table);
// Fill with 0 to avoid sending incomplete/unused variables to the GPU
m_constants_buf.resize(utils::rounded_div(push_constants_size, 4), 0);
m_constants_buf.resize(rx::rounded_div(push_constants_size, 4), 0);
create();
@ -106,8 +106,8 @@ namespace gl
glBindImageTexture(GL_COMPUTE_IMAGE_SLOT(0), dst->id(), 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8);
constexpr auto wg_size = 16;
const auto invocations_x = utils::aligned_div(output_size.width, wg_size);
const auto invocations_y = utils::aligned_div(output_size.height, wg_size);
const auto invocations_x = rx::aligned_div(output_size.width, wg_size);
const auto invocations_y = rx::aligned_div(output_size.height, wg_size);
ensure(invocations_x == (output_size.width + (wg_size - 1)) / wg_size);
ensure(invocations_y == (output_size.height + (wg_size - 1)) / wg_size);

View file

@ -2,7 +2,7 @@
#include "RSXDMAWriter.h"
#include "util//Thread.h"
#include <util/asm.hpp>
#include <rx/asm.hpp>
namespace rsx
{
@ -56,7 +56,7 @@ namespace rsx
// FIXME: This is a busy wait, consider yield to improve responsiveness on weak devices.
while (!m_host_context_ptr->in_flight_commands_completed())
{
utils::pause();
rx::pause();
if (thread_ctrl::state() == thread_state::aborting)
{

View file

@ -3,7 +3,7 @@
#include "Emu/System.h"
#include "rpcsx/fw/ps3/cellMsgDialog.h"
#include "util/asm.hpp"
#include "rx/asm.hpp"
namespace rsx
{
@ -36,7 +36,7 @@ namespace rsx
while (ref_cnt.load() && !Emu.IsStopped())
{
utils::pause();
rx::pause();
}
}
@ -112,7 +112,7 @@ namespace rsx
{
while (ref_cnt.load() && !Emu.IsStopped())
{
utils::pause();
rx::pause();
}
}
} // namespace rsx

View file

@ -1,7 +1,7 @@
#include "stdafx.h"
#include "overlay_manager.h"
#include "Emu/System.h"
#include <util/asm.hpp>
#include <rx/asm.hpp>
namespace rsx
{
@ -37,7 +37,7 @@ namespace rsx
*m_input_thread = thread_state::aborting;
while (*m_input_thread <= thread_state::aborting)
{
utils::pause();
rx::pause();
}
}
}

View file

@ -9,7 +9,8 @@
#include "cellos/sys_rsx.h"
#include "NV47/HW/context.h"
#include "util/asm.hpp"
#include "rx/align.hpp"
#include "rx/asm.hpp"
#include <thread>
#include <bitset>
@ -139,7 +140,7 @@ namespace rsx
u32 bytes_read = 0;
// Find the next set bit after every iteration
for (int i = 0;; i = (std::countr_zero<u32>(utils::rol8(to_fetch, 0 - i - 1)) + i + 1) % 8)
for (int i = 0;; i = (std::countr_zero<u32>(rx::rol8(to_fetch, 0 - i - 1)) + i + 1) % 8)
{
// If a reservation is being updated, try to load another
const auto& res = vm::reservation_acquire(addr1 + i * 128);
@ -193,7 +194,7 @@ namespace rsx
}
else
{
busy_wait(200);
rx::busy_wait(200);
}
if (strict_fetch_ordering)
@ -247,7 +248,7 @@ namespace rsx
for (u32 remaining = size, addr = m_internal_get, ptr = from; remaining > 0;)
{
const u32 next_block = utils::align(addr + 1, _1M);
const u32 next_block = rx::alignUp(addr + 1, _1M);
const u32 available = (next_block - addr);
if (remaining <= available)
{

View file

@ -9,7 +9,7 @@
#include "util/lockless.h"
#include <thread>
#include "util/asm.hpp"
#include "rx/asm.hpp"
namespace rsx
{
@ -181,13 +181,13 @@ namespace rsx
while (_thr.m_enqueued_count.load() > _thr.m_processed_count.load())
{
rsxthr->on_semaphore_acquire_wait();
utils::pause();
rx::pause();
}
}
else
{
while (_thr.m_enqueued_count.load() > _thr.m_processed_count.load())
utils::pause();
rx::pause();
}
return true;

View file

@ -27,7 +27,8 @@
#include "util/date_time.h"
#include "util/asm.hpp"
#include "rx/align.hpp"
#include "rx/asm.hpp"
#include <span>
#include <thread>
@ -332,11 +333,11 @@ namespace rsx
{
// Division operator
_min_index = std::min(_min_index, first / attrib.frequency);
_max_index = std::max<u32>(_max_index, utils::aligned_div(max_index, attrib.frequency));
_max_index = std::max<u32>(_max_index, rx::aligned_div(max_index, attrib.frequency));
if (freq_count > 0 && freq_count != umax)
{
const u32 max = utils::aligned_div(max_index, attrib.frequency);
const u32 max = rx::aligned_div(max_index, attrib.frequency);
max_result_by_division = std::max<u32>(max_result_by_division, max);
// Discard lower frequencies because it has been proven that there are indices higher than them
@ -365,7 +366,7 @@ namespace rsx
// The alternative would be re-iterating again over all of them
if (get_location(real_offset_address) == CELL_GCM_LOCATION_LOCAL)
{
if (utils::add_saturate<u32>(real_offset_address - rsx::constants::local_mem_base, (_max_index + 1) * attribute_stride) <= render->local_mem_size)
if (rx::add_saturate<u32>(real_offset_address - rsx::constants::local_mem_base, (_max_index + 1) * attribute_stride) <= render->local_mem_size)
{
break;
}
@ -734,7 +735,7 @@ namespace rsx
{
// Be compatible with previous bitwise serialization
ar(std::span<u8>(reinterpret_cast<u8*>(this), OFFSET_OF(avconf, scan_mode)));
ar.pos += utils::align<usz>(OFFSET_OF(avconf, scan_mode), alignof(avconf)) - OFFSET_OF(avconf, scan_mode);
ar.pos += rx::alignUp<usz>(OFFSET_OF(avconf, scan_mode), alignof(avconf)) - OFFSET_OF(avconf, scan_mode);
return;
}
@ -1169,7 +1170,7 @@ namespace rsx
for (; t == now; now = get_time_ns())
{
utils::pause();
rx::pause();
}
timestamp_ctrl = now;
@ -2590,7 +2591,7 @@ namespace rsx
{
if (u32 advance = disasm.disasm(pcs_of_valid_cmds.back()))
{
pcs_of_valid_cmds.push_back(utils::add_saturate<u32>(pcs_of_valid_cmds.back(), advance));
pcs_of_valid_cmds.push_back(rx::add_saturate<u32>(pcs_of_valid_cmds.back(), advance));
}
else
{
@ -2722,7 +2723,7 @@ namespace rsx
}
// Some cases do not need full delay
remaining = utils::aligned_div(remaining, div);
remaining = rx::aligned_div(remaining, div);
const u64 until = get_system_time() + remaining;
while (true)
@ -2751,7 +2752,7 @@ namespace rsx
}
else
{
busy_wait(100);
rx::busy_wait(100);
}
const u64 current = get_system_time();
@ -2862,7 +2863,7 @@ namespace rsx
for (u32 ea = address >> 20, end = ea + (size >> 20); ea < end; ea++)
{
const u32 io = utils::rol32(iomap_table.io[ea], 32 - 20);
const u32 io = rx::rol32(iomap_table.io[ea], 32 - 20);
if (io + 1)
{
@ -2892,7 +2893,7 @@ namespace rsx
while (to_unmap)
{
bit = (std::countr_zero<u64>(utils::rol64(to_unmap, 0 - bit)) + bit);
bit = (std::countr_zero<u64>(rx::rol64(to_unmap, 0 - bit)) + bit);
to_unmap &= ~(1ull << bit);
constexpr u16 null_entry = 0xFFFF;
@ -2998,7 +2999,7 @@ namespace rsx
while (!external_interrupt_ack && !is_stopped())
{
utils::pause();
rx::pause();
}
}
@ -3022,7 +3023,7 @@ namespace rsx
while (external_interrupt_lock && (cpu_flag::ret - state))
{
// TODO: Investigate non busy-spinning method
utils::pause();
rx::pause();
}
external_interrupt_ack.store(false);
@ -3364,7 +3365,7 @@ namespace rsx
}
const u64 current_time = get_system_time();
const u64 current_tsc = utils::get_tsc();
const u64 current_tsc = rx::get_tsc();
u64 preempt_count = 0;
if (frame_times.size() >= 60)

View file

@ -4,6 +4,8 @@
#include "vkutils/buffer_object.h"
#include "VKPipelineCompiler.h"
#include "rx/align.hpp"
#define VK_MAX_COMPUTE_TASKS 8192 // Max number of jobs per frame
namespace vk
@ -219,7 +221,7 @@ namespace vk
#include "../Program/GLSLSnippets/ShuffleBytes.glsl"
;
const auto parameters_size = utils::align(push_constants_size, 16) / 16;
const auto parameters_size = rx::alignUp(push_constants_size, 16) / 16;
const std::pair<std::string_view, std::string> syntax_replace[] =
{
{"%loc", "0"},
@ -387,7 +389,7 @@ namespace vk
word_count = num_words;
block_length = num_words * 4;
const u32 linear_invocations = utils::aligned_div(word_count, optimal_group_size);
const u32 linear_invocations = rx::aligned_div(word_count, optimal_group_size);
compute_task::run(cmd, linear_invocations);
}
} // namespace vk

View file

@ -6,7 +6,8 @@
#include "Emu/IdManager.h"
#include "util/StrUtil.h"
#include "util/asm.hpp"
#include "rx/asm.hpp"
#include "rx/align.hpp"
#include <unordered_map>
@ -484,7 +485,7 @@ namespace vk
set_parameters(cmd);
const u32 num_bytes_per_invocation = (sizeof(_BlockType) * optimal_group_size);
const u32 linear_invocations = utils::aligned_div(data_length, num_bytes_per_invocation);
const u32 linear_invocations = rx::aligned_div(data_length, num_bytes_per_invocation);
compute_task::run(cmd, linear_invocations);
}
};
@ -602,8 +603,8 @@ namespace vk
this->out_offset = config.dst_offset;
const auto tile_aligned_height = std::min(
utils::align<u32>(config.image_height, 64),
utils::aligned_div(config.tile_size - config.tile_base_offset, config.tile_pitch));
rx::alignUp<u32>(config.image_height, 64),
rx::aligned_div(config.tile_size - config.tile_base_offset, config.tile_pitch));
if constexpr (Op == RSX_detiler_op::decode)
{
@ -656,7 +657,7 @@ namespace vk
const u32 subtexels_per_invocation = (config.image_bpp < 4) ? (4 / config.image_bpp) : 1;
const u32 virtual_width = config.image_width / subtexels_per_invocation;
const u32 invocations_x = utils::aligned_div(virtual_width, optimal_group_size);
const u32 invocations_x = rx::aligned_div(virtual_width, optimal_group_size);
compute_task::run(cmd, invocations_x, config.image_height, 1);
}
};

View file

@ -7,7 +7,9 @@
#include "Emu/RSX/RSXThread.h"
#include "util/mutex.h"
#include "util/asm.hpp"
#include "rx/asm.hpp"
#include "rx/align.hpp"
#include <unordered_map>
namespace vk
@ -413,7 +415,7 @@ namespace vk
std::lock_guard lock(g_dma_mutex);
const u32 start = (local_address & s_dma_block_mask);
const u32 end = utils::align(local_address + length, static_cast<u32>(s_dma_block_length));
const u32 end = rx::alignUp(local_address + length, static_cast<u32>(s_dma_block_length));
for (u32 block = start; block < end;)
{

View file

@ -22,7 +22,8 @@
#include "../Program/SPIRVCommon.h"
#include "util/asm.hpp"
#include "rx/asm.hpp"
#include "rx/align.hpp"
namespace vk
{
@ -919,7 +920,7 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
// Wait for deadlock to clear
while (m_queue_status & flush_queue_state::deadlock)
{
utils::pause();
rx::pause();
}
g_fxo->get<rsx::dma_manager>().clear_mem_fault_flag();
@ -2081,13 +2082,13 @@ void VKGSRender::load_program_env()
rsx::io_buffer indirection_table_buf([&](usz size) -> std::pair<void*, usz>
{
indirection_table_offset = m_instancing_buffer_ring_info.alloc<1>(utils::align(size, alignment));
indirection_table_offset = m_instancing_buffer_ring_info.alloc<1>(rx::alignUp(size, alignment));
return std::make_pair(m_instancing_buffer_ring_info.map(indirection_table_offset, size), size);
});
rsx::io_buffer constants_array_buf([&](usz size) -> std::pair<void*, usz>
{
constants_data_table_offset = m_instancing_buffer_ring_info.alloc<1>(utils::align(size, alignment));
constants_data_table_offset = m_instancing_buffer_ring_info.alloc<1>(rx::alignUp(size, alignment));
return std::make_pair(m_instancing_buffer_ring_info.map(constants_data_table_offset, size), size);
});
@ -2105,7 +2106,7 @@ void VKGSRender::load_program_env()
auto alloc_storage = [&](usz size) -> std::pair<void*, usz>
{
const auto alignment = m_device->gpu().get_limits().minUniformBufferOffsetAlignment;
mem_offset = m_transform_constants_ring_info.alloc<1>(utils::align(size, alignment));
mem_offset = m_transform_constants_ring_info.alloc<1>(rx::alignUp(size, alignment));
return std::make_pair(m_transform_constants_ring_info.map(mem_offset, size), size);
};
@ -2921,7 +2922,7 @@ void VKGSRender::get_occlusion_query_result(rsx::reports::occlusion_query_info*
}
rsx_log.warning("[Performance warning] Unexpected ZCULL read caused a hard sync");
busy_wait();
rx::busy_wait();
}
data.sync();

View file

@ -8,7 +8,7 @@
#include "Emu/RSX/rsx_utils.h"
#include "Emu/RSX/rsx_cache.h"
#include "util/mutex.h"
#include "util/asm.hpp"
#include "rx/asm.hpp"
#include <optional>
#include <thread>
@ -289,7 +289,7 @@ namespace vk
{
while (num_waiters.load() != 0)
{
utils::pause();
rx::pause();
}
}

View file

@ -8,7 +8,8 @@
#include "upscalers/bilinear_pass.hpp"
#include "upscalers/fsr_pass.h"
#include "upscalers/nearest_pass.hpp"
#include "util/asm.hpp"
#include "rx/asm.hpp"
#include "rx/align.hpp"
#include "util/video_provider.h"
extern atomic_t<bool> g_user_asked_for_screenshot;
@ -762,7 +763,7 @@ void VKGSRender::flip(const rsx::display_flip_info_t& info)
{
const usz sshot_size = buffer_height * buffer_width * 4;
vk::buffer sshot_vkbuf(*m_device, utils::align(sshot_size, 0x100000), m_device->get_memory_mapping().host_visible_coherent,
vk::buffer sshot_vkbuf(*m_device, rx::alignUp(sshot_size, 0x100000), m_device->get_memory_mapping().host_visible_coherent,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0, VMM_ALLOCATION_POOL_UNDEFINED);
VkBufferImageCopy copy_info;

View file

@ -4,7 +4,7 @@
#include "VKQueryPool.h"
#include "VKRenderPass.h"
#include "VKResourceManager.h"
#include "util/asm.hpp"
#include "rx/asm.hpp"
#include "VKGSRender.h"
namespace vk
@ -172,7 +172,7 @@ namespace vk
while (!query_info.ready)
{
utils::pause();
rx::pause();
poke_query(query_info, index, result_flags);
}
}

View file

@ -4,6 +4,7 @@
#include "VKOverlays.h"
#include "vkutils/image.h"
#include "rx/align.hpp"
namespace vk
{
@ -65,8 +66,8 @@ namespace vk
multisampled = msaa_image;
resolve = resolve_image;
const u32 invocations_x = utils::align(resolve_image->width(), cs_wave_x) / cs_wave_x;
const u32 invocations_y = utils::align(resolve_image->height(), cs_wave_y) / cs_wave_y;
const u32 invocations_x = rx::alignUp(resolve_image->width(), cs_wave_x) / cs_wave_x;
const u32 invocations_y = rx::alignUp(resolve_image->height(), cs_wave_y) / cs_wave_y;
compute_task::run(cmd, invocations_x, invocations_y, 1);
}

View file

@ -13,7 +13,8 @@
#include "../GCM.h"
#include "../rsx_utils.h"
#include "util/asm.hpp"
#include "rx/align.hpp"
#include "rx/asm.hpp"
namespace vk
{
@ -94,7 +95,7 @@ namespace vk
ensure(dst->size() >= allocation_end);
const auto data_offset = u32(region.bufferOffset);
const auto z32_offset = utils::align<u32>(data_offset + packed16_length, 256);
const auto z32_offset = rx::alignUp<u32>(data_offset + packed16_length, 256);
// 1. Copy the depth to buffer
VkBufferImageCopy region2;
@ -148,8 +149,8 @@ namespace vk
ensure(dst->size() >= allocation_end);
const auto data_offset = u32(region.bufferOffset);
const auto z_offset = utils::align<u32>(data_offset + packed_length, 256);
const auto s_offset = utils::align<u32>(z_offset + in_depth_size, 256);
const auto z_offset = rx::alignUp<u32>(data_offset + packed_length, 256);
const auto s_offset = rx::alignUp<u32>(z_offset + in_depth_size, 256);
// 1. Copy the depth and stencil blocks to separate banks
VkBufferImageCopy sub_regions[2];
@ -246,7 +247,7 @@ namespace vk
ensure(src->size() >= allocation_end);
const auto data_offset = u32(region.bufferOffset);
const auto z32_offset = utils::align<u32>(data_offset + packed16_length, 256);
const auto z32_offset = rx::alignUp<u32>(data_offset + packed16_length, 256);
// 1. Pre-compute barrier
vk::insert_buffer_memory_barrier(cmd, src->value, z32_offset, packed32_length,
@ -281,11 +282,11 @@ namespace vk
ensure(src->size() >= allocation_end); // "Out of memory (compute heap). Lower your resolution scale setting."
const auto data_offset = u32(region.bufferOffset);
const auto z_offset = utils::align<u32>(data_offset + packed_length, 256);
const auto s_offset = utils::align<u32>(z_offset + in_depth_size, 256);
const auto z_offset = rx::alignUp<u32>(data_offset + packed_length, 256);
const auto s_offset = rx::alignUp<u32>(z_offset + in_depth_size, 256);
// Zero out the stencil block
VK_GET_SYMBOL(vkCmdFillBuffer)(cmd, src->value, s_offset, utils::align(in_stencil_size, 4), 0);
VK_GET_SYMBOL(vkCmdFillBuffer)(cmd, src->value, s_offset, rx::alignUp(in_stencil_size, 4), 0);
vk::insert_buffer_memory_barrier(cmd, src->value, s_offset, in_stencil_size,
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
@ -848,7 +849,7 @@ namespace vk
const auto src_offset = section.bufferOffset;
// Align output to 128-byte boundary to keep some drivers happy
dst_offset = utils::align(dst_offset, 128);
dst_offset = rx::alignUp(dst_offset, 128);
u32 data_length = 0;
for (unsigned i = 0, j = packet.first; i < packet.second; ++i, ++j)
@ -1124,7 +1125,7 @@ namespace vk
if (layout.level == 0)
{
// Align mip0 on a 128-byte boundary
scratch_offset = utils::align(scratch_offset, 128);
scratch_offset = rx::alignUp(scratch_offset, 128);
}
// Copy from upload heap to scratch mem
@ -1254,7 +1255,7 @@ namespace vk
{
// Calculate the true length of the usable memory section
const auto available_tile_size = tiled_region.tile->size - (range.start - tiled_region.base_address);
const auto max_content_size = tiled_region.tile->pitch * utils::align<u32>(height, 64);
const auto max_content_size = tiled_region.tile->pitch * rx::alignUp<u32>(height, 64);
const auto section_length = std::min(max_content_size, available_tile_size);
// Sync the DMA layer

View file

@ -4,7 +4,7 @@
#include "VKCompute.h"
#include "VKAsyncScheduler.h"
#include "util/asm.hpp"
#include "rx/asm.hpp"
namespace vk
{
@ -450,7 +450,7 @@ namespace vk
// Dimensions were given in 'dst' space. Work out the real source coordinates
const auto src_bpp = vk::get_format_texel_width(section.src->format());
src_x = (src_x * dst_bpp) / src_bpp;
src_w = utils::aligned_div<u16>(src_w * dst_bpp, src_bpp);
src_w = rx::aligned_div<u16>(src_w * dst_bpp, src_bpp);
transform &= ~(rsx::surface_transform::coordinate_transform);
}

View file

@ -4,11 +4,14 @@
#include "VKRenderTargets.h"
#include "VKResourceManager.h"
#include "VKRenderPass.h"
#include "VKGSRenderTypes.hpp"
#include "vkutils/image_helpers.h"
#include "../Common/texture_cache.h"
#include "../Common/tiled_dma_copy.hpp"
#include "rx/align.hpp"
#include <memory>
#include <vector>
@ -289,7 +292,7 @@ namespace vk
if (tiled_region)
{
const auto available_tile_size = tiled_region.tile->size - (range.start - tiled_region.base_address);
const auto max_content_size = tiled_region.tile->pitch * utils::align(height, 64);
const auto max_content_size = tiled_region.tile->pitch * rx::alignUp(height, 64);
flush_length = std::min(max_content_size, available_tile_size);
}

View file

@ -117,8 +117,8 @@ namespace vk
configure(cmd);
constexpr auto wg_size = 16;
const auto invocations_x = utils::aligned_div(output_size.width, wg_size);
const auto invocations_y = utils::aligned_div(output_size.height, wg_size);
const auto invocations_x = rx::aligned_div(output_size.width, wg_size);
const auto invocations_y = rx::aligned_div(output_size.height, wg_size);
ensure(invocations_x == (output_size.width + (wg_size - 1)) / wg_size);
ensure(invocations_y == (output_size.height + (wg_size - 1)) / wg_size);

View file

@ -6,6 +6,7 @@
#include "../VKHelpers.h"
#include "../VKResourceManager.h"
#include "Emu/IdManager.h"
#include "rx/align.hpp"
#include <memory>
@ -60,7 +61,7 @@ namespace vk
// Create new heap. All sizes are aligned up by 64M, upto 1GiB
const usz size_limit = 1024 * 0x100000;
usz aligned_new_size = utils::align(m_size + size, 64 * 0x100000);
usz aligned_new_size = rx::alignUp(m_size + size, 64 * 0x100000);
if (aligned_new_size >= size_limit)
{

View file

@ -4,7 +4,8 @@
#include "../VKResourceManager.h"
#include <util/asm.hpp>
#include <rx/align.hpp>
#include <rx/asm.hpp>
namespace vk
{
@ -123,8 +124,8 @@ namespace vk
{
auto create_texture = [&]()
{
u32 new_width = utils::align(requested_width, 256u);
u32 new_height = utils::align(requested_height, 256u);
u32 new_width = rx::alignUp(requested_width, 256u);
u32 new_height = rx::alignUp(requested_height, 256u);
return new vk::image(*g_render_device, g_render_device->get_memory_mapping().device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
VK_IMAGE_TYPE_2D, format, new_width, new_height, 1, 1, 1, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
@ -165,7 +166,7 @@ namespace vk
if (!scratch_buffer)
{
// Choose optimal size
const u64 alloc_size = utils::align(min_required_size, 0x100000);
const u64 alloc_size = rx::alignUp(min_required_size, 0x100000);
scratch_buffer = std::make_unique<vk::buffer>(*g_render_device, alloc_size,
g_render_device->get_memory_mapping().device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
@ -184,7 +185,7 @@ namespace vk
if (init_mem || zero_memory)
{
// Zero-initialize the allocated VRAM
const u64 zero_length = init_mem ? buf->size() : utils::align(min_required_size, 4);
const u64 zero_length = init_mem ? buf->size() : rx::alignUp(min_required_size, 4);
VK_GET_SYMBOL(vkCmdFillBuffer)(cmd, buf->value, 0, zero_length, 0);
insert_buffer_memory_barrier(cmd, buf->value, 0, zero_length,

View file

@ -9,7 +9,7 @@
#include "Emu/Cell/timers.hpp"
#include "util/sysinfo.hpp"
#include "util/asm.hpp"
#include "rx/asm.hpp"
namespace vk
{
@ -170,7 +170,7 @@ namespace vk
{
while (!flushed)
{
utils::pause();
rx::pause();
}
}
@ -553,7 +553,7 @@ namespace vk
switch (status)
{
case VK_NOT_READY:
utils::pause();
rx::pause();
continue;
default:
die_with_error(status);
@ -592,7 +592,7 @@ namespace vk
if (timeout)
{
const auto now = freq ? utils::get_tsc() : get_system_time();
const auto now = freq ? rx::get_tsc() : get_system_time();
if (!start)
{
@ -608,7 +608,7 @@ namespace vk
}
}
utils::pause();
rx::pause();
}
}
} // namespace vk

Some files were not shown because too many files have changed in this diff Show more