mirror of
https://github.com/RPCSX/rpcsx.git
synced 2025-12-06 07:12:14 +01:00
moved tsc and asm utilities to rx
This commit is contained in:
parent
bd215fab92
commit
640df36c48
|
|
@ -41,12 +41,13 @@
|
|||
#include "rpcs3_version.h"
|
||||
#include "rpcsx/fw/ps3/cellMsgDialog.h"
|
||||
#include "rpcsx/fw/ps3/cellSysutil.h"
|
||||
#include "rx/asm.hpp"
|
||||
#include "rx/debug.hpp"
|
||||
#include "util/File.h"
|
||||
#include "util/JIT.h"
|
||||
#include "util/StrFmt.h"
|
||||
#include "util/StrUtil.h"
|
||||
#include "util/Thread.h"
|
||||
#include "util/asm.hpp"
|
||||
#include "util/console.h"
|
||||
#include "util/fixed_typemap.hpp"
|
||||
#include "util/logs.hpp"
|
||||
|
|
@ -241,7 +242,7 @@ void jit_announce(uptr, usz, std::string_view);
|
|||
__android_log_write(ANDROID_LOG_FATAL, "RPCS3", buf.c_str());
|
||||
|
||||
jit_announce(0, 0, "");
|
||||
utils::trap();
|
||||
rx::breakpoint();
|
||||
std::abort();
|
||||
std::terminate();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -53,10 +53,10 @@
|
|||
#include "sys_usbd.h"
|
||||
#include "sys_vm.h"
|
||||
|
||||
#include "rx/tsc.hpp"
|
||||
#include "util/atomic_bit_set.h"
|
||||
#include "util/init_mutex.hpp"
|
||||
#include "util/sysinfo.hpp"
|
||||
#include "util/tsc.hpp"
|
||||
#include <algorithm>
|
||||
#include <deque>
|
||||
#include <optional>
|
||||
|
|
@ -2138,7 +2138,7 @@ void lv2_obj::schedule_all(u64 current_time) {
|
|||
}
|
||||
|
||||
if (const u64 freq = s_yield_frequency) {
|
||||
const u64 tsc = utils::get_tsc();
|
||||
const u64 tsc = rx::get_tsc();
|
||||
const u64 last_tsc = s_last_yield_tsc;
|
||||
|
||||
if (tsc >= last_tsc && tsc <= s_max_allowed_yield_tsc &&
|
||||
|
|
@ -2297,7 +2297,7 @@ mwaitx_func static void __mwaitx(u32 cycles, u32 cstate) {
|
|||
// First bit indicates cstate, 0x0 for C.02 state (lower power) or 0x1 for C.01
|
||||
// state (higher power)
|
||||
waitpkg_func static void __tpause(u32 cycles, u32 cstate) {
|
||||
const u64 tsc = utils::get_tsc() + cycles;
|
||||
const u64 tsc = rx::get_tsc() + cycles;
|
||||
_tpause(cstate, tsc);
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@
|
|||
|
||||
#include "sys_cond.h"
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
|
||||
LOG_CHANNEL(sys_cond);
|
||||
|
||||
|
|
@ -454,7 +454,7 @@ error_code sys_cond_wait(ppu_thread &ppu, u32 cond_id, u64 timeout) {
|
|||
}
|
||||
|
||||
for (usz i = 0; cpu_flag::signal - ppu.state && i < 50; i++) {
|
||||
busy_wait(500);
|
||||
rx::busy_wait(500);
|
||||
}
|
||||
|
||||
if (ppu.state & cpu_flag::signal) {
|
||||
|
|
|
|||
|
|
@ -8,7 +8,8 @@
|
|||
#include "Emu/Memory/vm_locking.h"
|
||||
#include "rpcsx/fw/ps3/sys_lv2dbg.h"
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/align.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
|
||||
void ppu_register_function_at(u32 addr, u32 size,
|
||||
ppu_intrp_func_t ptr = nullptr);
|
||||
|
|
@ -92,7 +93,7 @@ error_code sys_dbg_write_process_memory(s32 pid, u32 address, u32 size,
|
|||
|
||||
for (u32 i = address, exec_update_size = 0; i < end;) {
|
||||
const u32 op_size =
|
||||
std::min<u32>(utils::align<u32>(i + 1, 0x10000), end) - i;
|
||||
std::min<u32>(rx::alignUp<u32>(i + 1, 0x10000), end) - i;
|
||||
|
||||
const bool is_exec =
|
||||
vm::check_addr(i, vm::page_executable | vm::page_readable);
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@
|
|||
#include "Emu/Cell/SPUThread.h"
|
||||
#include "sys_process.h"
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
|
||||
LOG_CHANNEL(sys_event);
|
||||
|
||||
|
|
@ -468,7 +468,7 @@ error_code sys_event_queue_receive(ppu_thread &ppu, u32 equeue_id,
|
|||
}
|
||||
|
||||
for (usz i = 0; cpu_flag::signal - ppu.state && i < 50; i++) {
|
||||
busy_wait(500);
|
||||
rx::busy_wait(500);
|
||||
}
|
||||
|
||||
if (ppu.state & cpu_flag::signal) {
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@
|
|||
#include "Emu/Cell/ErrorCodes.h"
|
||||
#include "Emu/Cell/PPUThread.h"
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
|
||||
LOG_CHANNEL(sys_event_flag);
|
||||
|
||||
|
|
@ -195,7 +195,7 @@ error_code sys_event_flag_wait(ppu_thread &ppu, u32 id, u64 bitptn, u32 mode,
|
|||
}
|
||||
|
||||
for (usz i = 0; cpu_flag::signal - ppu.state && i < 50; i++) {
|
||||
busy_wait(500);
|
||||
rx::busy_wait(500);
|
||||
}
|
||||
|
||||
if (ppu.state & cpu_flag::signal) {
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
#include "stdafx.h"
|
||||
|
||||
#include "rx/asm.hpp"
|
||||
#include "sys_fs.h"
|
||||
#include "sys_memory.h"
|
||||
#include "sys_sync.h"
|
||||
#include "util/asm.hpp"
|
||||
|
||||
#include "Crypto/unedat.h"
|
||||
#include "Emu/Cell/PPUThread.h"
|
||||
|
|
@ -618,7 +618,7 @@ struct lv2_file::file_view : fs::file_base {
|
|||
fs::stat_t stat = m_file->file.get_stat();
|
||||
|
||||
// TODO: Check this on realhw
|
||||
// stat.size = utils::sub_saturate<u64>(stat.size, m_off);
|
||||
// stat.size = rx::sub_saturate<u64>(stat.size, m_off);
|
||||
|
||||
stat.is_writable = false;
|
||||
return stat;
|
||||
|
|
@ -655,7 +655,7 @@ struct lv2_file::file_view : fs::file_base {
|
|||
}
|
||||
|
||||
u64 size() override {
|
||||
return utils::sub_saturate<u64>(m_file->file.size(), m_off);
|
||||
return rx::sub_saturate<u64>(m_file->file.size(), m_off);
|
||||
}
|
||||
|
||||
fs::file_id get_id() override {
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@
|
|||
#include "Emu/Cell/PPUThread.h"
|
||||
#include "sys_lwmutex.h"
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
|
||||
LOG_CHANNEL(sys_lwcond);
|
||||
|
||||
|
|
@ -490,7 +490,7 @@ error_code _sys_lwcond_queue_wait(ppu_thread &ppu, u32 lwcond_id,
|
|||
}
|
||||
|
||||
for (usz i = 0; cpu_flag::signal - ppu.state && i < 50; i++) {
|
||||
busy_wait(500);
|
||||
rx::busy_wait(500);
|
||||
}
|
||||
|
||||
if (ppu.state & cpu_flag::signal) {
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@
|
|||
#include "Emu/Cell/ErrorCodes.h"
|
||||
#include "Emu/Cell/PPUThread.h"
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
|
||||
LOG_CHANNEL(sys_lwmutex);
|
||||
|
||||
|
|
@ -194,7 +194,7 @@ error_code _sys_lwmutex_lock(ppu_thread &ppu, u32 lwmutex_id, u64 timeout) {
|
|||
}
|
||||
|
||||
for (usz i = 0; cpu_flag::signal - ppu.state && i < 50; i++) {
|
||||
busy_wait(500);
|
||||
rx::busy_wait(500);
|
||||
}
|
||||
|
||||
if (ppu.state & cpu_flag::signal) {
|
||||
|
|
|
|||
|
|
@ -8,7 +8,8 @@
|
|||
#include "Emu/IdManager.h"
|
||||
#include "Emu/Memory/vm_locking.h"
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/align.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
|
||||
LOG_CHANNEL(sys_memory);
|
||||
|
||||
|
|
@ -75,9 +76,9 @@ struct sys_memory_address_table {
|
|||
};
|
||||
|
||||
std::shared_ptr<vm::block_t> reserve_map(u32 alloc_size, u32 align) {
|
||||
return vm::reserve_map(
|
||||
align == 0x10000 ? vm::user64k : vm::user1m, 0,
|
||||
align == 0x10000 ? 0x20000000 : utils::align(alloc_size, 0x10000000),
|
||||
return vm::reserve_map(align == 0x10000 ? vm::user64k : vm::user1m, 0,
|
||||
align == 0x10000 ? 0x20000000
|
||||
: rx::alignUp(alloc_size, 0x10000000),
|
||||
align == 0x10000 ? (vm::page_size_64k | vm::bf0_0x1)
|
||||
: (vm::page_size_1m | vm::bf0_0x1));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
#include "Emu/Cell/ErrorCodes.h"
|
||||
#include "Emu/Cell/PPUThread.h"
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
|
||||
#include "sys_mutex.h"
|
||||
|
||||
|
|
@ -147,7 +147,7 @@ error_code sys_mutex_lock(ppu_thread &ppu, u32 mutex_id, u64 timeout) {
|
|||
// Try busy waiting a bit if advantageous
|
||||
for (u32 i = 0, end = lv2_obj::has_ppus_in_running_state() ? 3 : 10;
|
||||
id_manager::g_mutex.is_lockable() && i < end; i++) {
|
||||
busy_wait(300);
|
||||
rx::busy_wait(300);
|
||||
result = mutex.try_lock(ppu);
|
||||
|
||||
if (!result ||
|
||||
|
|
@ -212,7 +212,7 @@ error_code sys_mutex_lock(ppu_thread &ppu, u32 mutex_id, u64 timeout) {
|
|||
}
|
||||
|
||||
for (usz i = 0; cpu_flag::signal - ppu.state && i < 40; i++) {
|
||||
busy_wait(500);
|
||||
rx::busy_wait(500);
|
||||
}
|
||||
|
||||
if (ppu.state & cpu_flag::signal) {
|
||||
|
|
|
|||
|
|
@ -15,7 +15,8 @@
|
|||
#include "sys_mmapper.h"
|
||||
#include "sys_process.h"
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/align.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
|
||||
#include <thread>
|
||||
|
||||
|
|
@ -148,7 +149,7 @@ void _sys_ppu_thread_exit(ppu_thread &ppu, u64 errorcode) {
|
|||
// Need to wait until the current writers finish
|
||||
if (ppu.state & cpu_flag::memory) {
|
||||
for (; writer_mask; writer_mask &= vm::g_range_lock_bits[1]) {
|
||||
busy_wait(200);
|
||||
rx::busy_wait(200);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -468,7 +469,7 @@ error_code _sys_ppu_thread_create(ppu_thread &ppu, vm::ptr<u64> thread_id,
|
|||
const u32 tls = param->tls;
|
||||
|
||||
// Compute actual stack size and allocate
|
||||
const u32 stack_size = utils::align<u32>(std::max<u32>(_stacksz, 4096), 4096);
|
||||
const u32 stack_size = rx::alignUp<u32>(std::max<u32>(_stacksz, 4096), 4096);
|
||||
|
||||
auto &dct = g_fxo->get<lv2_memory_container>();
|
||||
|
||||
|
|
|
|||
|
|
@ -10,9 +10,9 @@
|
|||
#include "Emu/RSX/Core/RSXReservationLock.hpp"
|
||||
#include "Emu/RSX/RSXThread.h"
|
||||
#include "Emu/System.h"
|
||||
#include "rx/asm.hpp"
|
||||
#include "sys_event.h"
|
||||
#include "sys_vm.h"
|
||||
#include "util/asm.hpp"
|
||||
|
||||
LOG_CHANNEL(sys_rsx);
|
||||
|
||||
|
|
@ -46,7 +46,7 @@ static void set_rsx_dmactl(rsx::thread *render, u64 get_put) {
|
|||
}
|
||||
}
|
||||
|
||||
utils::pause();
|
||||
rx::pause();
|
||||
}
|
||||
|
||||
// Schedule FIFO interrupt to deal with this immediately
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@
|
|||
#include "Emu/Cell/ErrorCodes.h"
|
||||
#include "Emu/Cell/PPUThread.h"
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
|
||||
LOG_CHANNEL(sys_rwlock);
|
||||
|
||||
|
|
@ -151,7 +151,7 @@ error_code sys_rwlock_rlock(ppu_thread &ppu, u32 rw_lock_id, u64 timeout) {
|
|||
}
|
||||
|
||||
for (usz i = 0; cpu_flag::signal - ppu.state && i < 50; i++) {
|
||||
busy_wait(500);
|
||||
rx::busy_wait(500);
|
||||
}
|
||||
|
||||
if (ppu.state & cpu_flag::signal) {
|
||||
|
|
@ -355,7 +355,7 @@ error_code sys_rwlock_wlock(ppu_thread &ppu, u32 rw_lock_id, u64 timeout) {
|
|||
}
|
||||
|
||||
for (usz i = 0; cpu_flag::signal - ppu.state && i < 50; i++) {
|
||||
busy_wait(500);
|
||||
rx::busy_wait(500);
|
||||
}
|
||||
|
||||
if (ppu.state & cpu_flag::signal) {
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@
|
|||
#include "Emu/Cell/ErrorCodes.h"
|
||||
#include "Emu/Cell/PPUThread.h"
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
|
||||
LOG_CHANNEL(sys_semaphore);
|
||||
|
||||
|
|
@ -167,7 +167,7 @@ error_code sys_semaphore_wait(ppu_thread &ppu, u32 sem_id, u64 timeout) {
|
|||
}
|
||||
|
||||
for (usz i = 0; cpu_flag::signal - ppu.state && i < 50; i++) {
|
||||
busy_wait(500);
|
||||
rx::busy_wait(500);
|
||||
}
|
||||
|
||||
if (ppu.state & cpu_flag::signal) {
|
||||
|
|
|
|||
|
|
@ -21,7 +21,8 @@
|
|||
#include "sys_mmapper.h"
|
||||
#include "sys_process.h"
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/align.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
|
||||
LOG_CHANNEL(sys_spu);
|
||||
|
||||
|
|
@ -129,7 +130,7 @@ void sys_spu_image::load(const fs::file &stream) {
|
|||
this->nsegs = 0;
|
||||
this->segs = vm::null;
|
||||
|
||||
vm::page_protect(segs.addr(), utils::align(mem_size, 4096), 0, 0,
|
||||
vm::page_protect(segs.addr(), rx::alignUp(mem_size, 4096), 0, 0,
|
||||
vm::page_writable);
|
||||
}
|
||||
|
||||
|
|
@ -196,7 +197,7 @@ void sys_spu_image::deploy(u8 *loc, std::span<const sys_spu_segment> segs,
|
|||
}
|
||||
|
||||
auto mem_translate = [loc](u32 addr, u32 size) {
|
||||
return utils::add_saturate<u32>(addr, size) <= SPU_LS_SIZE ? loc + addr
|
||||
return rx::add_saturate<u32>(addr, size) <= SPU_LS_SIZE ? loc + addr
|
||||
: nullptr;
|
||||
};
|
||||
|
||||
|
|
@ -1259,7 +1260,7 @@ error_code sys_spu_thread_group_terminate(ppu_thread &ppu, u32 id, s32 value) {
|
|||
// termination
|
||||
auto short_sleep = [](ppu_thread &ppu) {
|
||||
lv2_obj::sleep(ppu);
|
||||
busy_wait(3000);
|
||||
rx::busy_wait(3000);
|
||||
ppu.check_state();
|
||||
ppu.state += cpu_flag::wait;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -5,8 +5,8 @@
|
|||
#include "Emu/Cell/ErrorCodes.h"
|
||||
#include "Emu/Cell/timers.hpp"
|
||||
#include "Emu/system_config.h"
|
||||
#include "rx/tsc.hpp"
|
||||
#include "sys_process.h"
|
||||
#include "util/tsc.hpp"
|
||||
|
||||
#include "util/sysinfo.hpp"
|
||||
|
||||
|
|
@ -14,7 +14,7 @@ u64 g_timebase_offs{};
|
|||
static u64 systemtime_offset;
|
||||
|
||||
#ifndef __linux__
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
|
|
@ -151,7 +151,7 @@ u64 convert_to_timebased_time(u64 time) {
|
|||
|
||||
u64 get_timebased_time() {
|
||||
if (u64 freq = utils::get_tsc_freq()) {
|
||||
const u64 tsc = utils::get_tsc();
|
||||
const u64 tsc = rx::get_tsc();
|
||||
|
||||
#if _MSC_VER
|
||||
const u64 result =
|
||||
|
|
@ -218,7 +218,7 @@ void initialize_timebased_time(u64 timebased_init, bool reset) {
|
|||
// Returns some relative time in microseconds, don't change this fact
|
||||
u64 get_system_time() {
|
||||
if (u64 freq = utils::get_tsc_freq()) {
|
||||
const u64 tsc = utils::get_tsc();
|
||||
const u64 tsc = rx::get_tsc();
|
||||
|
||||
#if _MSC_VER
|
||||
const u64 result = static_cast<u64>(u128_from_mul(tsc, 1000000ull) / freq);
|
||||
|
|
@ -358,7 +358,7 @@ error_code sys_time_get_current_time(vm::ptr<s64> sec, vm::ptr<s64> nsec) {
|
|||
|
||||
// Get time difference in nanoseconds (using 128 bit accumulator)
|
||||
const u64 diff_sl = diff_base * 1000000000ull;
|
||||
const u64 diff_sh = utils::umulh64(diff_base, 1000000000ull);
|
||||
const u64 diff_sh = rx::umulh64(diff_base, 1000000000ull);
|
||||
const u64 diff = utils::udiv128(diff_sh, diff_sl, s_time_aux_info.perf_freq);
|
||||
|
||||
// get time since Epoch in nanoseconds
|
||||
|
|
|
|||
|
|
@ -9,9 +9,9 @@
|
|||
|
||||
#include "Emu/System.h"
|
||||
#include "Emu/system_config.h"
|
||||
#include "rx/asm.hpp"
|
||||
#include "sys_event.h"
|
||||
#include "sys_process.h"
|
||||
#include "util/asm.hpp"
|
||||
|
||||
#include <deque>
|
||||
#include <thread>
|
||||
|
|
@ -77,9 +77,9 @@ u64 lv2_timer::check_unlocked(u64 _now) noexcept {
|
|||
|
||||
if (period) {
|
||||
// Set next expiration time and check again
|
||||
const u64 expire0 = utils::add_saturate<u64>(next, period);
|
||||
const u64 expire0 = rx::add_saturate<u64>(next, period);
|
||||
expire.release(expire0);
|
||||
return utils::sub_saturate<u64>(expire0, _now);
|
||||
return rx::sub_saturate<u64>(expire0, _now);
|
||||
}
|
||||
|
||||
// Stop after oneshot
|
||||
|
|
@ -265,11 +265,11 @@ error_code _sys_timer_start(ppu_thread &ppu, u32 timer_id, u64 base_time,
|
|||
const u64 expire =
|
||||
period == 0 ? base_time : // oneshot
|
||||
base_time == 0
|
||||
? utils::add_saturate(start_time, period)
|
||||
? rx::add_saturate(start_time, period)
|
||||
:
|
||||
// periodic timer with no base (using start time as base)
|
||||
start_time < utils::add_saturate(base_time, period)
|
||||
? utils::add_saturate(base_time, period)
|
||||
start_time < rx::add_saturate(base_time, period)
|
||||
? rx::add_saturate(base_time, period)
|
||||
:
|
||||
// periodic with base time over start time
|
||||
[&]() -> u64 // periodic timer base before start time (align to
|
||||
|
|
@ -282,10 +282,10 @@ error_code _sys_timer_start(ppu_thread &ppu, u32 timer_id, u64 base_time,
|
|||
// }
|
||||
// while (base_time < start_time);
|
||||
|
||||
const u64 start_time_with_base_time_reminder = utils::add_saturate(
|
||||
const u64 start_time_with_base_time_reminder = rx::add_saturate(
|
||||
start_time - start_time % period, base_time % period);
|
||||
|
||||
return utils::add_saturate(
|
||||
return rx::add_saturate(
|
||||
start_time_with_base_time_reminder,
|
||||
start_time_with_base_time_reminder < start_time ? period : 0);
|
||||
}();
|
||||
|
|
@ -428,10 +428,10 @@ error_code sys_timer_usleep(ppu_thread &ppu, u64 sleep_time) {
|
|||
|
||||
// Over/underflow checks
|
||||
if (add_time >= 0) {
|
||||
sleep_time = utils::add_saturate<u64>(sleep_time, add_time);
|
||||
sleep_time = rx::add_saturate<u64>(sleep_time, add_time);
|
||||
} else {
|
||||
sleep_time =
|
||||
std::max<u64>(1, utils::sub_saturate<u64>(sleep_time, -add_time));
|
||||
std::max<u64>(1, rx::sub_saturate<u64>(sleep_time, -add_time));
|
||||
}
|
||||
|
||||
lv2_obj::sleep(ppu, g_cfg.core.sleep_timers_accuracy <
|
||||
|
|
|
|||
|
|
@ -1,4 +1,6 @@
|
|||
#include "stdafx.h"
|
||||
|
||||
#include "rx/align.hpp"
|
||||
#include "Emu/perf_meter.hpp"
|
||||
#include "Emu/Cell/PPUModule.h"
|
||||
#include "cellos/sys_sync.h"
|
||||
|
|
@ -9,7 +11,7 @@
|
|||
#include "cellAdec.h"
|
||||
|
||||
#include "util/simd.hpp"
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
|
||||
LOG_CHANNEL(cellAdec);
|
||||
|
||||
|
|
@ -415,7 +417,7 @@ void LpcmDecContext::exec(ppu_thread& ppu)
|
|||
be_t<f32>* const _output = std::assume_aligned<0x80>(output.get_ptr());
|
||||
s64 output_size = cmd.au_size;
|
||||
|
||||
s32 sample_num = static_cast<s32>(utils::align(+lpcm_param->audioPayloadSize, 0x10));
|
||||
s32 sample_num = static_cast<s32>(rx::alignUp(+lpcm_param->audioPayloadSize, 0x10));
|
||||
s32 channel_num = 0;
|
||||
|
||||
if (!dvd_packing)
|
||||
|
|
@ -860,7 +862,7 @@ error_code _CellAdecCoreOpGetMemSize_lpcm(vm::ptr<CellAdecAttr> attr)
|
|||
cellAdec.notice("_CellAdecCoreOpGetMemSize_lpcm(attr=*0x%x)", attr);
|
||||
|
||||
constexpr u32 mem_size =
|
||||
utils::align(static_cast<u32>(sizeof(LpcmDecContext)), 0x80) + utils::align(static_cast<u32>(sizeof(CellAdecParamLpcm)), 0x80) + 0x100 // Command data for Spurs task
|
||||
rx::alignUp(static_cast<u32>(sizeof(LpcmDecContext)), 0x80) + rx::alignUp(static_cast<u32>(sizeof(CellAdecParamLpcm)), 0x80) + 0x100 // Command data for Spurs task
|
||||
+ LPCM_DEC_OUTPUT_BUFFER_SIZE + 0x2900 // sizeof(CellSpurs) + sizeof(CellSpursTaskset)
|
||||
+ 0x3b400 // Spurs context
|
||||
+ 0x300 // (sizeof(CellSpursQueue) + 0x80 + queue buffer) * 2
|
||||
|
|
@ -883,7 +885,7 @@ error_code _CellAdecCoreOpOpenExt_lpcm(ppu_thread& ppu, vm::ptr<LpcmDecContext>
|
|||
ensure(handle.aligned(0x80)); // LLE doesn't check the alignment or aligns the address itself
|
||||
ensure(!!notifyAuDone && !!notifyAuDoneArg && !!notifyPcmOut && !!notifyPcmOutArg && !!notifyError && !!notifyErrorArg && !!notifySeqDone && !!notifySeqDoneArg); // These should always be set
|
||||
|
||||
const u32 end_of_context_addr = handle.addr() + utils::align(static_cast<u32>(sizeof(LpcmDecContext)), 0x80);
|
||||
const u32 end_of_context_addr = handle.addr() + rx::alignUp(static_cast<u32>(sizeof(LpcmDecContext)), 0x80);
|
||||
|
||||
handle->cmd_queue.front = 0;
|
||||
handle->cmd_queue.back = 0;
|
||||
|
|
@ -1587,10 +1589,10 @@ error_code adecOpen(ppu_thread& ppu, vm::ptr<CellAdecType> type, vm::cptr<CellAd
|
|||
const s32 pcm_handle_num = core_ops->getPcmHandleNum(ppu);
|
||||
const u32 bitstream_info_size = core_ops->getBsiInfoSize(ppu);
|
||||
|
||||
const auto _this = vm::ptr<AdecContext>::make(utils::align(+res->startAddr, 0x80));
|
||||
const auto _this = vm::ptr<AdecContext>::make(rx::alignUp(+res->startAddr, 0x80));
|
||||
const auto frames = vm::ptr<AdecFrame>::make(_this.addr() + sizeof(AdecContext));
|
||||
const u32 bitstream_infos_addr = frames.addr() + pcm_handle_num * sizeof(AdecFrame);
|
||||
const auto core_handle = vm::ptr<void>::make(utils::align(bitstream_infos_addr + bitstream_info_size * pcm_handle_num, 0x80));
|
||||
const auto core_handle = vm::ptr<void>::make(rx::alignUp(bitstream_infos_addr + bitstream_info_size * pcm_handle_num, 0x80));
|
||||
|
||||
if (type->audioCodecType == CELL_ADEC_TYPE_LPCM_DVD)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -1,11 +1,13 @@
|
|||
#include "stdafx.h"
|
||||
|
||||
#include "rx/align.hpp"
|
||||
#include "Emu/perf_meter.hpp"
|
||||
#include "Emu/Cell/PPUModule.h"
|
||||
#include "cellos/sys_sync.h"
|
||||
#include "cellos/sys_ppu_thread.h"
|
||||
#include "Emu/savestate_utils.hpp"
|
||||
#include "sysPrxForUser.h"
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include "util/media_utils.h"
|
||||
|
||||
#include "cellAtracXdec.h"
|
||||
|
|
@ -182,7 +184,7 @@ error_code AtracXdecDecoder::set_config_info(u32 sampling_freq, u32 ch_config_id
|
|||
this->sampling_freq = sampling_freq;
|
||||
this->ch_config_idx = ch_config_idx;
|
||||
this->nbytes = nbytes;
|
||||
this->nbytes_128_aligned = utils::align(nbytes, 0x80);
|
||||
this->nbytes_128_aligned = rx::alignUp(nbytes, 0x80);
|
||||
this->nch_in = ch_config_idx <= 4 ? ch_config_idx : ch_config_idx + 1;
|
||||
|
||||
if (ch_config_idx > 7u)
|
||||
|
|
@ -741,7 +743,7 @@ error_code _CellAdecCoreOpGetMemSize_atracx(vm::ptr<CellAdecAttr> attr)
|
|||
constexpr u32 mem_size =
|
||||
sizeof(AtracXdecContext) + 0x7f + ATXDEC_SPURS_STRUCTS_SIZE + 0x1d8 + atracXdecGetSpursMemSize(nch_in) + ATXDEC_SAMPLES_PER_FRAME * sizeof(f32) * nch_in;
|
||||
|
||||
attr->workMemSize = utils::align(mem_size, 0x80);
|
||||
attr->workMemSize = rx::alignUp(mem_size, 0x80);
|
||||
|
||||
return CELL_OK;
|
||||
}
|
||||
|
|
@ -765,7 +767,7 @@ error_code _CellAdecCoreOpOpenExt_atracx(ppu_thread& ppu, vm::ptr<AtracXdecConte
|
|||
ensure(!!notifyAuDone && !!notifyAuDoneArg && !!notifyPcmOut && !!notifyPcmOutArg && !!notifyError && !!notifyErrorArg && !!notifySeqDone && !!notifySeqDoneArg); // These should always be set by cellAdec
|
||||
|
||||
write_to_ptr(handle.get_ptr(), AtracXdecContext(notifyAuDone, notifyAuDoneArg, notifyPcmOut, notifyPcmOutArg, notifyError, notifyErrorArg, notifySeqDone, notifySeqDoneArg,
|
||||
vm::bptr<u8>::make(handle.addr() + utils::align(static_cast<u32>(sizeof(AtracXdecContext)), 0x80) + ATXDEC_SPURS_STRUCTS_SIZE)));
|
||||
vm::bptr<u8>::make(handle.addr() + rx::alignUp(static_cast<u32>(sizeof(AtracXdecContext)), 0x80) + ATXDEC_SPURS_STRUCTS_SIZE)));
|
||||
|
||||
const vm::var<sys_mutex_attribute_t> mutex_attr{{SYS_SYNC_PRIORITY, SYS_SYNC_NOT_RECURSIVE, SYS_SYNC_NOT_PROCESS_SHARED, SYS_SYNC_NOT_ADAPTIVE, 0, 0, 0, {"_atd001"_u64}}};
|
||||
const vm::var<sys_cond_attribute_t> cond_attr{{SYS_SYNC_NOT_PROCESS_SHARED, 0, 0, {"_atd002"_u64}}};
|
||||
|
|
|
|||
|
|
@ -1,4 +1,6 @@
|
|||
#include "stdafx.h"
|
||||
|
||||
#include "rx/align.hpp"
|
||||
#include "Emu/System.h"
|
||||
#include "Emu/IdManager.h"
|
||||
#include "Emu/Cell/PPUModule.h"
|
||||
|
|
@ -7,7 +9,7 @@
|
|||
#include "cellPamf.h"
|
||||
#include "cellDmux.h"
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
|
||||
#include <thread>
|
||||
|
||||
|
|
@ -765,7 +767,7 @@ PesHeader::PesHeader(DemuxerStream& stream)
|
|||
}
|
||||
|
||||
ElementaryStream::ElementaryStream(Demuxer* dmux, u32 addr, u32 size, u32 fidMajor, u32 fidMinor, u32 sup1, u32 sup2, vm::ptr<CellDmuxCbEsMsg> cbFunc, u32 cbArg, u32 spec)
|
||||
: put(utils::align(addr, 128)), dmux(dmux), memAddr(utils::align(addr, 128)), memSize(size - (addr - memAddr)), fidMajor(fidMajor), fidMinor(fidMinor), sup1(sup1), sup2(sup2), cbFunc(cbFunc), cbArg(cbArg), spec(spec)
|
||||
: put(rx::alignUp(addr, 128)), dmux(dmux), memAddr(rx::alignUp(addr, 128)), memSize(size - (addr - memAddr)), fidMajor(fidMajor), fidMinor(fidMinor), sup1(sup1), sup2(sup2), cbFunc(cbFunc), cbArg(cbArg), spec(spec)
|
||||
{
|
||||
}
|
||||
|
||||
|
|
@ -849,7 +851,7 @@ void ElementaryStream::push_au(u32 size, u64 dts, u64 pts, u64 userdata, bool ra
|
|||
|
||||
addr = put;
|
||||
|
||||
put = utils::align(put + 128 + size, 128);
|
||||
put = rx::alignUp(put + 128 + size, 128);
|
||||
|
||||
put_count++;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@
|
|||
#include "Crypto/utils.h"
|
||||
#include "Loader/PSF.h"
|
||||
#include "util/StrUtil.h"
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include "util/init_mutex.hpp"
|
||||
|
||||
#include <span>
|
||||
|
|
@ -691,7 +691,7 @@ error_code cellHddGameGetSizeKB(ppu_thread& ppu, vm::ptr<u32> size)
|
|||
// This function is very slow by nature
|
||||
// TODO: Check if after first use the result is being cached so the sleep can
|
||||
// be reduced in this case
|
||||
lv2_sleep(utils::sub_saturate<u64>(dirsz == umax ? 2000 : 200000,
|
||||
lv2_sleep(rx::sub_saturate<u64>(dirsz == umax ? 2000 : 200000,
|
||||
get_guest_system_time() - start_sleep),
|
||||
&ppu);
|
||||
|
||||
|
|
@ -757,7 +757,7 @@ error_code cellGameDataGetSizeKB(ppu_thread& ppu, vm::ptr<u32> size)
|
|||
// This function is very slow by nature
|
||||
// TODO: Check if after first use the result is being cached so the sleep can
|
||||
// be reduced in this case
|
||||
lv2_sleep(utils::sub_saturate<u64>(dirsz == umax ? 2000 : 200000,
|
||||
lv2_sleep(rx::sub_saturate<u64>(dirsz == umax ? 2000 : 200000,
|
||||
get_guest_system_time() - start_sleep),
|
||||
&ppu);
|
||||
|
||||
|
|
@ -1127,7 +1127,7 @@ cellGameContentPermit(ppu_thread& ppu,
|
|||
}
|
||||
|
||||
// This function is very slow by nature
|
||||
lv2_sleep(utils::sub_saturate<u64>(
|
||||
lv2_sleep(rx::sub_saturate<u64>(
|
||||
!perm.temp.empty() || perm.can_create ? 200000 : 2000,
|
||||
get_guest_system_time() - start_sleep),
|
||||
&ppu);
|
||||
|
|
@ -1886,7 +1886,7 @@ error_code cellGameGetSizeKB(ppu_thread& ppu, vm::ptr<s32> size)
|
|||
// This function is very slow by nature
|
||||
// TODO: Check if after first use the result is being cached so the sleep can
|
||||
// be reduced in this case
|
||||
lv2_sleep(utils::sub_saturate<u64>(dirsz == umax ? 1000 : 200000,
|
||||
lv2_sleep(rx::sub_saturate<u64>(dirsz == umax ? 1000 : 200000,
|
||||
get_guest_system_time() - start_sleep),
|
||||
&ppu);
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
#include "stdafx.h"
|
||||
|
||||
#include "cellSysutil.h"
|
||||
#include "cellUserInfo.h"
|
||||
#include "Emu/Cell/PPUModule.h"
|
||||
|
|
@ -12,7 +14,6 @@
|
|||
#include "Emu/localized_string.h"
|
||||
#include "Emu/savestate_utils.hpp"
|
||||
#include "Emu/system_config.h"
|
||||
#include "stdafx.h"
|
||||
|
||||
#include "cellMsgDialog.h"
|
||||
#include "cellSaveData.h"
|
||||
|
|
@ -26,7 +27,9 @@
|
|||
#include <mutex>
|
||||
#include <span>
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include "rx/align.hpp"
|
||||
#include "rx/types.hpp"
|
||||
|
||||
LOG_CHANNEL(cellSaveData);
|
||||
|
||||
|
|
@ -65,11 +68,11 @@ std::string SaveDataEntry::date() const
|
|||
std::string SaveDataEntry::data_size() const
|
||||
{
|
||||
std::string metric = "KB";
|
||||
u64 sz = utils::aligned_div(size, 1000);
|
||||
u64 sz = rx::aligned_div(size, 1000);
|
||||
if (sz > 1000)
|
||||
{
|
||||
metric = "MB";
|
||||
sz = utils::aligned_div(sz, 1000);
|
||||
sz = rx::aligned_div(sz, 1000);
|
||||
}
|
||||
return fmt::format("%lu %s", sz, metric);
|
||||
}
|
||||
|
|
@ -1286,7 +1289,7 @@ savedata_op(ppu_thread& ppu, u32 operation, u32 version, vm::cptr<char> dirName,
|
|||
{
|
||||
if (!file.is_directory)
|
||||
{
|
||||
size_bytes += utils::align(file.size, 1024);
|
||||
size_bytes += rx::alignUp(file.size, 1024);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1728,7 +1731,7 @@ savedata_op(ppu_thread& ppu, u32 operation, u32 version, vm::cptr<char> dirName,
|
|||
statGet->fileNum++;
|
||||
|
||||
size_bytes +=
|
||||
utils::align(entry.size, 1024); // firmware rounds this value up
|
||||
rx::alignUp(entry.size, 1024); // firmware rounds this value up
|
||||
|
||||
if (statGet->fileListNum >= setBuf->fileListMax)
|
||||
continue;
|
||||
|
|
@ -2345,7 +2348,7 @@ savedata_op(ppu_thread& ppu, u32 operation, u32 version, vm::cptr<char> dirName,
|
|||
final_blist = fmt::merge(blist, "/");
|
||||
psf::assign(
|
||||
psf, "RPCS3_BLIST",
|
||||
psf::string(utils::align(::size32(final_blist) + 1, 4), final_blist));
|
||||
psf::string(rx::alignUp(::size32(final_blist) + 1, 4), final_blist));
|
||||
|
||||
// Write all files in temporary directory
|
||||
auto& fsfo = all_files["PARAM.SFO"];
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@
|
|||
#include "sysPrxForUser.h"
|
||||
#include "cellSpurs.h"
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include "util/v128.hpp"
|
||||
#include "util/simd.hpp"
|
||||
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
#include "Emu/Cell/SPURecompiler.h"
|
||||
#include "cellSpurs.h"
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include "util/v128.hpp"
|
||||
#include "util/simd.hpp"
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,9 @@
|
|||
#include "stdafx.h"
|
||||
|
||||
#include "Emu/Cell/PPUModule.h"
|
||||
#include "Emu/IdManager.h"
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/align.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
|
||||
#include "sceNp.h"
|
||||
#include "sceNp2.h"
|
||||
|
|
@ -946,7 +948,7 @@ error_code cellSysutilAvc2Load_shared(SceNpMatching2ContextId /*ctx_id*/, u32 /*
|
|||
window_count++;
|
||||
}
|
||||
|
||||
total_bitrate = utils::align<u32>(window_count * bitrate, 0x100000) + 0x100000;
|
||||
total_bitrate = rx::alignUp<u32>(window_count * bitrate, 0x100000) + 0x100000;
|
||||
}
|
||||
|
||||
settings.video_stream_sharing = init_param->video_param.video_stream_sharing;
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
#include "stdafx.h"
|
||||
|
||||
#include "Emu/Cell/PPUModule.h"
|
||||
#include "cellos/sys_ppu_thread.h"
|
||||
#include "cellos/sys_process.h"
|
||||
|
|
@ -5,7 +7,7 @@
|
|||
#include "Emu/IdManager.h"
|
||||
#include "Emu/perf_meter.hpp"
|
||||
#include "Emu/savestate_utils.hpp"
|
||||
#include "stdafx.h"
|
||||
#include "rx/align.hpp"
|
||||
#include "sysPrxForUser.h"
|
||||
#include "util/media_utils.h"
|
||||
|
||||
|
|
@ -32,7 +34,7 @@ extern "C"
|
|||
#include "cellPamf.h"
|
||||
#include "cellVdec.h"
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include "util/lockless.h"
|
||||
#include <cmath>
|
||||
#include <mutex>
|
||||
|
|
@ -1660,7 +1662,7 @@ error_code cellVdecGetPicItem(ppu_thread& ppu, u32 handle,
|
|||
const int buffer_size = av_image_get_buffer_size(
|
||||
vdec->ctx->pix_fmt, vdec->ctx->width, vdec->ctx->height, 1);
|
||||
ensure(buffer_size >= 0);
|
||||
info->size = utils::align<u32>(buffer_size, 128);
|
||||
info->size = rx::alignUp<u32>(buffer_size, 128);
|
||||
info->auNum = 1;
|
||||
info->auPts[0].lower = static_cast<u32>(pts);
|
||||
info->auPts[0].upper = static_cast<u32>(pts >> 32);
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@
|
|||
#include "cellos/sys_event.h"
|
||||
#include "cellos/sys_fs.h"
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <shared_mutex>
|
||||
|
|
@ -1490,7 +1490,7 @@ error_code sceNpTrophyGetGameProgress(u32 context, u32 handle,
|
|||
const u32 trp_count = ctxt->tropusr->GetTrophiesCount();
|
||||
|
||||
// Round result to nearest (TODO: Check 0 trophies)
|
||||
*percentage = trp_count ? utils::rounded_div(unlocked * 100, trp_count) : 0;
|
||||
*percentage = trp_count ? rx::rounded_div(unlocked * 100, trp_count) : 0;
|
||||
|
||||
if (trp_count == 0 || trp_count > 128)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@
|
|||
#include "cellos/sys_mutex.h"
|
||||
#include "sysPrxForUser.h"
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
|
||||
LOG_CHANNEL(sysPrxForUser);
|
||||
|
||||
|
|
@ -151,7 +151,7 @@ error_code sys_lwmutex_lock(ppu_thread& ppu, vm::ptr<sys_lwmutex_t> lwmutex, u64
|
|||
|
||||
for (u32 i = 0; i < 10; i++)
|
||||
{
|
||||
busy_wait();
|
||||
rx::busy_wait();
|
||||
|
||||
if (lwmutex->vars.owner.load() == lwmutex_free)
|
||||
{
|
||||
|
|
@ -210,7 +210,7 @@ error_code sys_lwmutex_lock(ppu_thread& ppu, vm::ptr<sys_lwmutex_t> lwmutex, u64
|
|||
{
|
||||
for (u32 i = 0; i < 10; i++)
|
||||
{
|
||||
busy_wait();
|
||||
rx::busy_wait();
|
||||
|
||||
if (lwmutex->vars.owner.load() == lwmutex_free)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -1,4 +1,6 @@
|
|||
#include "stdafx.h"
|
||||
|
||||
#include "rx/align.hpp"
|
||||
#include "key_vault.h"
|
||||
#include "unedat.h"
|
||||
#include "sha1.h"
|
||||
|
|
@ -8,7 +10,7 @@
|
|||
|
||||
#include "Emu/system_utils.hpp"
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include <algorithm>
|
||||
#include <span>
|
||||
|
||||
|
|
@ -233,7 +235,7 @@ s64 decrypt_block(const fs::file* in, u8* out, EDAT_HEADER* edat, NPD_HEADER* np
|
|||
|
||||
// Locate the real data.
|
||||
const usz pad_length = length;
|
||||
length = utils::align<usz>(pad_length, 0x10);
|
||||
length = rx::alignUp<usz>(pad_length, 0x10);
|
||||
|
||||
// Setup buffers for decryption and read the data.
|
||||
std::vector<u8> enc_data_buf(is_out_buffer_aligned || length == pad_length ? 0 : length);
|
||||
|
|
@ -432,12 +434,12 @@ bool check_data(u8* key, EDAT_HEADER* edat, NPD_HEADER* npd, const fs::file* f,
|
|||
return false;
|
||||
}
|
||||
|
||||
const usz block_num = utils::aligned_div<u64>(edat->file_size, edat->block_size);
|
||||
const usz block_num = rx::aligned_div<u64>(edat->file_size, edat->block_size);
|
||||
constexpr usz metadata_offset = 0x100;
|
||||
const usz metadata_size = utils::mul_saturate<u64>(metadata_section_size, block_num);
|
||||
const usz metadata_size = rx::mul_saturate<u64>(metadata_section_size, block_num);
|
||||
u64 metadata_section_offset = metadata_offset;
|
||||
|
||||
if (utils::add_saturate<u64>(utils::add_saturate<u64>(file_offset, metadata_section_offset), metadata_size) > f->size())
|
||||
if (rx::add_saturate<u64>(rx::add_saturate<u64>(file_offset, metadata_section_offset), metadata_size) > f->size())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
|
@ -860,7 +862,7 @@ bool EDATADecrypter::ReadHeader()
|
|||
//}
|
||||
|
||||
file_size = edatHeader.file_size;
|
||||
total_blocks = ::narrow<u32>(utils::aligned_div(edatHeader.file_size, edatHeader.block_size));
|
||||
total_blocks = ::narrow<u32>(rx::aligned_div(edatHeader.file_size, edatHeader.block_size));
|
||||
|
||||
// Try decrypting the first block instead
|
||||
u8 data_sample[1];
|
||||
|
|
@ -886,7 +888,7 @@ u64 EDATADecrypter::ReadData(u64 pos, u8* data, u64 size)
|
|||
// Now we need to offset things to account for the actual 'range' requested
|
||||
const u64 startOffset = pos % edatHeader.block_size;
|
||||
|
||||
const u64 num_blocks = utils::aligned_div(startOffset + size, edatHeader.block_size);
|
||||
const u64 num_blocks = rx::aligned_div(startOffset + size, edatHeader.block_size);
|
||||
|
||||
// Find and decrypt block range covering pos + size
|
||||
const u32 starting_block = ::narrow<u32>(pos / edatHeader.block_size);
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
#include "stdafx.h"
|
||||
#include "aes.h"
|
||||
#include "unself.h"
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include "Emu/System.h"
|
||||
#include "Emu/system_utils.hpp"
|
||||
#include "Crypto/unzip.h"
|
||||
|
|
@ -887,7 +887,7 @@ bool SELFDecrypter::LoadHeaders(bool isElf32, SelfAdditionalInfo* out_info)
|
|||
m_seg_ext_hdr.back().Load(self_f);
|
||||
}
|
||||
|
||||
if (m_ext_hdr.version_hdr_offset == 0 || utils::add_saturate<u64>(m_ext_hdr.version_hdr_offset, sizeof(version_header)) > self_f.size())
|
||||
if (m_ext_hdr.version_hdr_offset == 0 || rx::add_saturate<u64>(m_ext_hdr.version_hdr_offset, sizeof(version_header)) > self_f.size())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
#include "stdafx.h"
|
||||
|
||||
#include "CPUThread.h"
|
||||
#include "CPUDisAsm.h"
|
||||
|
||||
|
|
@ -14,7 +15,7 @@
|
|||
#include "Emu/RSX/RSXThread.h"
|
||||
#include "Emu/perf_meter.hpp"
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include <thread>
|
||||
#include <unordered_map>
|
||||
#include <map>
|
||||
|
|
@ -64,7 +65,6 @@ void fmt_class_string<cpu_flag>::format(std::string& out, u64 arg)
|
|||
case cpu_flag::dbg_global_pause: return "G-PAUSE";
|
||||
case cpu_flag::dbg_pause: return "PAUSE";
|
||||
case cpu_flag::dbg_step: return "STEP";
|
||||
case cpu_flag::bitset_last: break;
|
||||
}
|
||||
|
||||
return unknown;
|
||||
|
|
@ -124,7 +124,7 @@ void fmt_class_string<cpu_threads_emulation_info_dump_t>::format(std::string& ou
|
|||
|
||||
for (u32 i = 0; !rlock.try_lock() && i < 100; i++)
|
||||
{
|
||||
busy_wait();
|
||||
rx::busy_wait();
|
||||
}
|
||||
|
||||
if (rlock)
|
||||
|
|
@ -533,7 +533,7 @@ namespace cpu_counter
|
|||
if (ok) [[likely]]
|
||||
{
|
||||
// Get actual slot number
|
||||
id = utils::ctz128(~bits);
|
||||
id = rx::ctz128(~bits);
|
||||
|
||||
// Register thread
|
||||
if (s_cpu_list[id].compare_and_swap_test(nullptr, _this)) [[likely]]
|
||||
|
|
@ -552,7 +552,7 @@ namespace cpu_counter
|
|||
return;
|
||||
}
|
||||
|
||||
busy_wait(300);
|
||||
rx::busy_wait(300);
|
||||
}
|
||||
|
||||
s_tls_thread_slot = id;
|
||||
|
|
@ -599,7 +599,7 @@ namespace cpu_counter
|
|||
{
|
||||
for (u128 bits = copy; bits; bits &= bits - 1)
|
||||
{
|
||||
const u32 index = utils::ctz128(bits);
|
||||
const u32 index = rx::ctz128(bits);
|
||||
|
||||
if (cpu_thread* cpu = s_cpu_list[index].load())
|
||||
{
|
||||
|
|
@ -1062,7 +1062,7 @@ bool cpu_thread::check_state() noexcept
|
|||
{
|
||||
if (i < 20 || ctr & 1)
|
||||
{
|
||||
busy_wait(300);
|
||||
rx::busy_wait(300);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
@ -1404,7 +1404,7 @@ bool cpu_thread::suspend_work::push(cpu_thread* _this) noexcept
|
|||
{
|
||||
if (cpu != _this)
|
||||
{
|
||||
utils::prefetch_write(&cpu->state);
|
||||
rx::prefetch_write(&cpu->state);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -1446,7 +1446,7 @@ bool cpu_thread::suspend_work::push(cpu_thread* _this) noexcept
|
|||
break;
|
||||
}
|
||||
|
||||
utils::pause();
|
||||
rx::pause();
|
||||
}
|
||||
|
||||
// Second increment: all threads paused
|
||||
|
|
@ -1480,13 +1480,13 @@ bool cpu_thread::suspend_work::push(cpu_thread* _this) noexcept
|
|||
{
|
||||
for (u32 i = 0; i < work->prf_size; i++)
|
||||
{
|
||||
utils::prefetch_write(work->prf_list[0]);
|
||||
rx::prefetch_write(work->prf_list[0]);
|
||||
}
|
||||
}
|
||||
|
||||
cpu_counter::for_all_cpu(copy2, [&](cpu_thread* cpu)
|
||||
{
|
||||
utils::prefetch_write(&cpu->state);
|
||||
rx::prefetch_write(&cpu->state);
|
||||
return true;
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -8719,10 +8719,22 @@ FORCE_INLINE __m128i _mm_aeskeygenassist_si128(__m128i a, const int rcon)
|
|||
|
||||
uint8x16_t dest = {
|
||||
// Undo ShiftRows step from AESE and extract X1 and X3
|
||||
u8[0x4], u8[0x1], u8[0xE], u8[0xB], // SubBytes(X1)
|
||||
u8[0x1], u8[0xE], u8[0xB], u8[0x4], // ROT(SubBytes(X1))
|
||||
u8[0xC], u8[0x9], u8[0x6], u8[0x3], // SubBytes(X3)
|
||||
u8[0x9], u8[0x6], u8[0x3], u8[0xC], // ROT(SubBytes(X3))
|
||||
u8[0x4],
|
||||
u8[0x1],
|
||||
u8[0xE],
|
||||
u8[0xB], // SubBytes(X1)
|
||||
u8[0x1],
|
||||
u8[0xE],
|
||||
u8[0xB],
|
||||
u8[0x4], // ROT(SubBytes(X1))
|
||||
u8[0xC],
|
||||
u8[0x9],
|
||||
u8[0x6],
|
||||
u8[0x3], // SubBytes(X3)
|
||||
u8[0x9],
|
||||
u8[0x6],
|
||||
u8[0x3],
|
||||
u8[0xC], // ROT(SubBytes(X3))
|
||||
};
|
||||
uint32x4_t r = {0, (unsigned)rcon, 0, (unsigned)rcon};
|
||||
return vreinterpretq_m128i_u8(dest) ^ vreinterpretq_m128i_u32(r);
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
#include "stdafx.h"
|
||||
|
||||
#include "PPUAnalyser.h"
|
||||
|
||||
#include "cellos/sys_sync.h"
|
||||
|
|
@ -8,7 +9,8 @@
|
|||
|
||||
#include <unordered_set>
|
||||
#include "util/yaml.hpp"
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/align.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
|
||||
LOG_CHANNEL(ppu_validator);
|
||||
|
||||
|
|
@ -25,7 +27,6 @@ void fmt_class_string<ppu_attr>::format(std::string& out, u64 arg)
|
|||
case ppu_attr::no_return: return "no_return";
|
||||
case ppu_attr::no_size: return "no_size";
|
||||
case ppu_attr::has_mfvscr: return "has_mfvscr";
|
||||
case ppu_attr::bitset_last: break;
|
||||
}
|
||||
|
||||
return unknown;
|
||||
|
|
@ -2243,7 +2244,7 @@ bool ppu_module<lv2_obj>::analyse(u32 lib_toc, u32 entry, const u32 sec_end, con
|
|||
}
|
||||
}
|
||||
|
||||
jt_end = utils::align<u32>(static_cast<u32>(std::min<u64>(jt_end - 1, ctr(maxv) - 1) + 1), 4);
|
||||
jt_end = rx::alignUp<u32>(static_cast<u32>(std::min<u64>(jt_end - 1, ctr(maxv) - 1) + 1), 4);
|
||||
|
||||
get_jumptable_end(jumpatble_off, jumpatble_ptr, false);
|
||||
|
||||
|
|
@ -2882,7 +2883,7 @@ bool ppu_module<lv2_obj>::analyse(u32 lib_toc, u32 entry, const u32 sec_end, con
|
|||
block.attr = ppu_attr::no_size;
|
||||
}
|
||||
|
||||
per_instruction_bytes += utils::sub_saturate<u32>(lim, func.addr);
|
||||
per_instruction_bytes += rx::sub_saturate<u32>(lim, func.addr);
|
||||
addr_next = std::max<u32>(addr_next, lim);
|
||||
continue;
|
||||
}
|
||||
|
|
@ -3291,7 +3292,7 @@ bool ppu_module<lv2_obj>::analyse(u32 lib_toc, u32 entry, const u32 sec_end, con
|
|||
|
||||
if (per_instruction_bytes)
|
||||
{
|
||||
const bool error = per_instruction_bytes >= 200 && per_instruction_bytes / 4 >= utils::aligned_div<u32>(::size32(funcs), 128);
|
||||
const bool error = per_instruction_bytes >= 200 && per_instruction_bytes / 4 >= rx::aligned_div<u32>(::size32(funcs), 128);
|
||||
(error ? ppu_log.error : ppu_log.notice)("%d instructions will be compiled on per-instruction basis in total", per_instruction_bytes / 4);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,11 +1,12 @@
|
|||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <deque>
|
||||
#include <span>
|
||||
#include "util/types.hpp"
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/align.hpp"
|
||||
#include "rx/types.hpp"
|
||||
#include "util/to_endian.hpp"
|
||||
|
||||
#include "rx/EnumBitSet.hpp"
|
||||
|
|
@ -218,7 +219,7 @@ struct ppu_module : public Type
|
|||
const u32 seg_size = seg.size;
|
||||
const u32 seg_addr = seg.addr;
|
||||
|
||||
if (seg_size >= std::max<usz>(size_bytes, 1) && addr <= utils::align<u32>(seg_addr + seg_size, 0x10000) - size_bytes)
|
||||
if (seg_size >= std::max<usz>(size_bytes, 1) && addr <= rx::alignUp<u32>(seg_addr + seg_size, 0x10000) - size_bytes)
|
||||
{
|
||||
return reinterpret_cast<to_be_t<T>*>(static_cast<u8*>(seg.ptr) + (addr - seg_addr));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
#include "PPUAnalyser.h"
|
||||
#include "Emu/IdManager.h"
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
|
||||
#include <cmath>
|
||||
|
||||
|
|
@ -222,7 +222,7 @@ std::pair<PPUDisAsm::const_op, u64> PPUDisAsm::try_get_const_op_gpr_value(u32 re
|
|||
|
||||
GET_CONST_REG(reg_rs, op.rs);
|
||||
|
||||
return {form, utils::rol64(reg_rs, op.sh64) & (~0ull << (op.mbe64 ^ 63))};
|
||||
return {form, rx::rol64(reg_rs, op.sh64) & (~0ull << (op.mbe64 ^ 63))};
|
||||
}
|
||||
case ppu_itype::OR:
|
||||
{
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@
|
|||
#include <cmath>
|
||||
#include <climits>
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include "util/v128.hpp"
|
||||
#include "util/simd.hpp"
|
||||
#include "util/sysinfo.hpp"
|
||||
|
|
@ -3509,7 +3509,7 @@ auto RLWIMI()
|
|||
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op)
|
||||
{
|
||||
const u64 mask = ppu_rotate_mask(32 + op.mb32, 32 + op.me32);
|
||||
ppu.gpr[op.ra] = (ppu.gpr[op.ra] & ~mask) | (dup32(utils::rol32(static_cast<u32>(ppu.gpr[op.rs]), op.sh32)) & mask);
|
||||
ppu.gpr[op.ra] = (ppu.gpr[op.ra] & ~mask) | (dup32(rx::rol32(static_cast<u32>(ppu.gpr[op.rs]), op.sh32)) & mask);
|
||||
if constexpr (((Flags == has_rc) || ...))
|
||||
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
||||
};
|
||||
|
|
@ -3524,7 +3524,7 @@ auto RLWINM()
|
|||
|
||||
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op)
|
||||
{
|
||||
ppu.gpr[op.ra] = dup32(utils::rol32(static_cast<u32>(ppu.gpr[op.rs]), op.sh32)) & ppu_rotate_mask(32 + op.mb32, 32 + op.me32);
|
||||
ppu.gpr[op.ra] = dup32(rx::rol32(static_cast<u32>(ppu.gpr[op.rs]), op.sh32)) & ppu_rotate_mask(32 + op.mb32, 32 + op.me32);
|
||||
if constexpr (((Flags == has_rc) || ...))
|
||||
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
||||
};
|
||||
|
|
@ -3539,7 +3539,7 @@ auto RLWNM()
|
|||
|
||||
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op)
|
||||
{
|
||||
ppu.gpr[op.ra] = dup32(utils::rol32(static_cast<u32>(ppu.gpr[op.rs]), ppu.gpr[op.rb] & 0x1f)) & ppu_rotate_mask(32 + op.mb32, 32 + op.me32);
|
||||
ppu.gpr[op.ra] = dup32(rx::rol32(static_cast<u32>(ppu.gpr[op.rs]), ppu.gpr[op.rb] & 0x1f)) & ppu_rotate_mask(32 + op.mb32, 32 + op.me32);
|
||||
if constexpr (((Flags == has_rc) || ...))
|
||||
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
||||
};
|
||||
|
|
@ -3634,7 +3634,7 @@ auto RLDICL()
|
|||
|
||||
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op)
|
||||
{
|
||||
ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], op.sh64) & (~0ull >> op.mbe64);
|
||||
ppu.gpr[op.ra] = rx::rol64(ppu.gpr[op.rs], op.sh64) & (~0ull >> op.mbe64);
|
||||
if constexpr (((Flags == has_rc) || ...))
|
||||
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
||||
};
|
||||
|
|
@ -3649,7 +3649,7 @@ auto RLDICR()
|
|||
|
||||
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op)
|
||||
{
|
||||
ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], op.sh64) & (~0ull << (op.mbe64 ^ 63));
|
||||
ppu.gpr[op.ra] = rx::rol64(ppu.gpr[op.rs], op.sh64) & (~0ull << (op.mbe64 ^ 63));
|
||||
if constexpr (((Flags == has_rc) || ...))
|
||||
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
||||
};
|
||||
|
|
@ -3664,7 +3664,7 @@ auto RLDIC()
|
|||
|
||||
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op)
|
||||
{
|
||||
ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], op.sh64) & ppu_rotate_mask(op.mbe64, op.sh64 ^ 63);
|
||||
ppu.gpr[op.ra] = rx::rol64(ppu.gpr[op.rs], op.sh64) & ppu_rotate_mask(op.mbe64, op.sh64 ^ 63);
|
||||
if constexpr (((Flags == has_rc) || ...))
|
||||
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
||||
};
|
||||
|
|
@ -3680,7 +3680,7 @@ auto RLDIMI()
|
|||
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op)
|
||||
{
|
||||
const u64 mask = ppu_rotate_mask(op.mbe64, op.sh64 ^ 63);
|
||||
ppu.gpr[op.ra] = (ppu.gpr[op.ra] & ~mask) | (utils::rol64(ppu.gpr[op.rs], op.sh64) & mask);
|
||||
ppu.gpr[op.ra] = (ppu.gpr[op.ra] & ~mask) | (rx::rol64(ppu.gpr[op.rs], op.sh64) & mask);
|
||||
if constexpr (((Flags == has_rc) || ...))
|
||||
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
||||
};
|
||||
|
|
@ -3695,7 +3695,7 @@ auto RLDCL()
|
|||
|
||||
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op)
|
||||
{
|
||||
ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], ppu.gpr[op.rb] & 0x3f) & (~0ull >> op.mbe64);
|
||||
ppu.gpr[op.ra] = rx::rol64(ppu.gpr[op.rs], ppu.gpr[op.rb] & 0x3f) & (~0ull >> op.mbe64);
|
||||
if constexpr (((Flags == has_rc) || ...))
|
||||
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
||||
};
|
||||
|
|
@ -3710,7 +3710,7 @@ auto RLDCR()
|
|||
|
||||
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op)
|
||||
{
|
||||
ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], ppu.gpr[op.rb] & 0x3f) & (~0ull << (op.mbe64 ^ 63));
|
||||
ppu.gpr[op.ra] = rx::rol64(ppu.gpr[op.rs], ppu.gpr[op.rb] & 0x3f) & (~0ull << (op.mbe64 ^ 63));
|
||||
if constexpr (((Flags == has_rc) || ...))
|
||||
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
||||
};
|
||||
|
|
@ -3842,7 +3842,7 @@ auto MULHDU()
|
|||
|
||||
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op)
|
||||
{
|
||||
ppu.gpr[op.rd] = utils::umulh64(ppu.gpr[op.ra], ppu.gpr[op.rb]);
|
||||
ppu.gpr[op.rd] = rx::umulh64(ppu.gpr[op.ra], ppu.gpr[op.rb]);
|
||||
if constexpr (((Flags == has_rc) || ...))
|
||||
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.rd], 0);
|
||||
};
|
||||
|
|
@ -4243,7 +4243,7 @@ auto MULHD()
|
|||
|
||||
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op)
|
||||
{
|
||||
ppu.gpr[op.rd] = utils::mulh64(ppu.gpr[op.ra], ppu.gpr[op.rb]);
|
||||
ppu.gpr[op.rd] = rx::mulh64(ppu.gpr[op.ra], ppu.gpr[op.rb]);
|
||||
if constexpr (((Flags == has_rc) || ...))
|
||||
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.rd], 0);
|
||||
};
|
||||
|
|
@ -4675,7 +4675,7 @@ auto MULLD()
|
|||
ppu.gpr[op.rd] = RA * RB;
|
||||
if (op.oe) [[unlikely]]
|
||||
{
|
||||
const s64 high = utils::mulh64(RA, RB);
|
||||
const s64 high = rx::mulh64(RA, RB);
|
||||
ppu_ov_set(ppu, high != s64(ppu.gpr[op.rd]) >> 63);
|
||||
}
|
||||
if constexpr (((Flags == has_rc) || ...))
|
||||
|
|
|
|||
|
|
@ -28,7 +28,8 @@
|
|||
#include <span>
|
||||
#include <set>
|
||||
#include <algorithm>
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include "rx/align.hpp"
|
||||
|
||||
LOG_CHANNEL(ppu_loader);
|
||||
|
||||
|
|
@ -341,7 +342,7 @@ static void ppu_initialize_modules(ppu_linkage_info* link, utils::serial* ar = n
|
|||
if (!hle_funcs_addr)
|
||||
hle_funcs_addr = vm::alloc(::size32(hle_funcs) * 8, vm::main);
|
||||
else
|
||||
vm::page_protect(hle_funcs_addr, utils::align(::size32(hle_funcs) * 8, 0x1000), 0, vm::page_writable);
|
||||
vm::page_protect(hle_funcs_addr, rx::alignUp(::size32(hle_funcs) * 8, 0x1000), 0, vm::page_writable);
|
||||
|
||||
// Initialize as PPU executable code
|
||||
ppu_register_range(hle_funcs_addr, ::size32(hle_funcs) * 8);
|
||||
|
|
@ -359,7 +360,7 @@ static void ppu_initialize_modules(ppu_linkage_info* link, utils::serial* ar = n
|
|||
}
|
||||
|
||||
// Set memory protection to read-only
|
||||
vm::page_protect(hle_funcs_addr, utils::align(::size32(hle_funcs) * 8, 0x1000), 0, 0, vm::page_writable);
|
||||
vm::page_protect(hle_funcs_addr, rx::alignUp(::size32(hle_funcs) * 8, 0x1000), 0, 0, vm::page_writable);
|
||||
|
||||
// Initialize function names
|
||||
const bool is_first = g_ppu_function_names.empty();
|
||||
|
|
@ -489,7 +490,7 @@ static void ppu_initialize_modules(ppu_linkage_info* link, utils::serial* ar = n
|
|||
}
|
||||
else
|
||||
{
|
||||
const u32 next = utils::align(alloc_addr, variable.second.align);
|
||||
const u32 next = rx::alignUp(alloc_addr, variable.second.align);
|
||||
const u32 end = next + variable.second.size - 1;
|
||||
|
||||
if (!next || (end >> 16 != alloc_addr >> 16))
|
||||
|
|
@ -1191,7 +1192,7 @@ static void ppu_check_patch_spu_images(const ppu_module<lv2_obj>& mod, const ppu
|
|||
|
||||
u32 prev_bound = 0;
|
||||
|
||||
for (u32 i = find_first_of_multiple(seg_view, prefixes, 0); i < seg.size; i = find_first_of_multiple(seg_view, prefixes, utils::align<u32>(i + 1, 4)))
|
||||
for (u32 i = find_first_of_multiple(seg_view, prefixes, 0); i < seg.size; i = find_first_of_multiple(seg_view, prefixes, rx::alignUp<u32>(i + 1, 4)))
|
||||
{
|
||||
const auto elf_header = ensure(mod.get_ptr<u8>(seg.addr + i));
|
||||
|
||||
|
|
@ -1201,7 +1202,7 @@ static void ppu_check_patch_spu_images(const ppu_module<lv2_obj>& mod, const ppu
|
|||
const u32 old_i = i;
|
||||
u32 guid_start = umax, guid_end = umax;
|
||||
|
||||
for (u32 search = i & -128, tries = 10; tries && search >= prev_bound; tries--, search = utils::sub_saturate<u32>(search, 128))
|
||||
for (u32 search = i & -128, tries = 10; tries && search >= prev_bound; tries--, search = rx::sub_saturate<u32>(search, 128))
|
||||
{
|
||||
if (seg_view[search] != 0x42 && seg_view[search] != 0x43)
|
||||
{
|
||||
|
|
@ -1271,7 +1272,7 @@ static void ppu_check_patch_spu_images(const ppu_module<lv2_obj>& mod, const ppu
|
|||
if (addr_last >= 0x80 && valid_count >= 2)
|
||||
{
|
||||
const u32 begin = i & -128;
|
||||
u32 end = std::min<u32>(seg.size, utils::align<u32>(i + addr_last + 256, 128));
|
||||
u32 end = std::min<u32>(seg.size, rx::alignUp<u32>(i + addr_last + 256, 128));
|
||||
|
||||
u32 guessed_ls_addr = 0;
|
||||
|
||||
|
|
@ -1611,7 +1612,7 @@ shared_ptr<lv2_prx> ppu_load_prx(const ppu_prx_object& elf, bool virtual_load, c
|
|||
|
||||
if (virtual_load)
|
||||
{
|
||||
addr = std::exchange(allocating_address, allocating_address + utils::align<u32>(mem_size, 0x10000));
|
||||
addr = std::exchange(allocating_address, allocating_address + rx::alignUp<u32>(mem_size, 0x10000));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
@ -1625,7 +1626,7 @@ shared_ptr<lv2_prx> ppu_load_prx(const ppu_prx_object& elf, bool virtual_load, c
|
|||
// Leave additional room for the analyser so it can safely access beyond limit a bit
|
||||
// Because with VM the address sapce is not really a limit so any u32 address is valid there, here it is UB to create pointer that goes beyond the boundaries
|
||||
// TODO: Use make_shared_for_overwrite when all compilers support it
|
||||
const usz alloc_size = utils::align<usz>(mem_size, 0x10000) + 4096;
|
||||
const usz alloc_size = rx::alignUp<usz>(mem_size, 0x10000) + 4096;
|
||||
prx->allocations.push_back(std::shared_ptr<u8[]>(new u8[alloc_size]));
|
||||
_seg.ptr = prx->allocations.back().get();
|
||||
std::memset(static_cast<u8*>(_seg.ptr) + prog.bin.size(), 0, alloc_size - 4096 - prog.bin.size());
|
||||
|
|
@ -1725,7 +1726,7 @@ shared_ptr<lv2_prx> ppu_load_prx(const ppu_prx_object& elf, bool virtual_load, c
|
|||
{
|
||||
const auto& rel = reinterpret_cast<const ppu_prx_relocation_info&>(prog.bin[i]);
|
||||
|
||||
if (rel.offset >= utils::align<u64>(::at32(prx->segs, rel.index_addr).size, 0x100))
|
||||
if (rel.offset >= rx::alignUp<u64>(::at32(prx->segs, rel.index_addr).size, 0x100))
|
||||
{
|
||||
fmt::throw_exception("Relocation offset out of segment memory! (offset=0x%x, index_addr=%u, seg_size=0x%x)", rel.offset, rel.index_addr, prx->segs[rel.index_addr].size);
|
||||
}
|
||||
|
|
@ -2201,7 +2202,7 @@ bool ppu_load_exec(const ppu_exec_object& elf, bool virtual_load, const std::str
|
|||
// Leave additional room for the analyser so it can safely access beyond limit a bit
|
||||
// Because with VM the address sapce is not really a limit so any u32 address is valid there, here it is UB to create pointer that goes beyond the boundaries
|
||||
// TODO: Use make_shared_for_overwrite when all compilers support it
|
||||
const usz alloc_size = utils::align<usz>(size, 0x10000) + 4096;
|
||||
const usz alloc_size = rx::alignUp<usz>(size, 0x10000) + 4096;
|
||||
_main.allocations.push_back(std::shared_ptr<u8[]>(new u8[alloc_size]));
|
||||
_seg.ptr = _main.allocations.back().get();
|
||||
std::memset(static_cast<u8*>(_seg.ptr) + prog.bin.size(), 0, alloc_size - 4096 - prog.bin.size());
|
||||
|
|
@ -2247,7 +2248,7 @@ bool ppu_load_exec(const ppu_exec_object& elf, bool virtual_load, const std::str
|
|||
else
|
||||
{
|
||||
// For backwards compatibility: already loaded memory will always be writable
|
||||
const u32 size0 = utils::align(size + addr % 0x10000, 0x10000);
|
||||
const u32 size0 = rx::alignUp(size + addr % 0x10000, 0x10000);
|
||||
const u32 addr0 = addr & -0x10000;
|
||||
vm::page_protect(addr0, size0, 0, vm::page_writable | vm::page_readable, vm::page_executable);
|
||||
}
|
||||
|
|
@ -2721,7 +2722,7 @@ bool ppu_load_exec(const ppu_exec_object& elf, bool virtual_load, const std::str
|
|||
default:
|
||||
{
|
||||
// According to elad335, the min value seems to be 64KB instead of the expected 4KB (SYS_PROCESS_PARAM_STACK_SIZE_MIN)
|
||||
primary_stacksize = utils::align<u32>(std::clamp<u32>(sz, 0x10000, SYS_PROCESS_PARAM_STACK_SIZE_MAX), 4096);
|
||||
primary_stacksize = rx::alignUp<u32>(std::clamp<u32>(sz, 0x10000, SYS_PROCESS_PARAM_STACK_SIZE_MAX), 4096);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
@ -2738,29 +2739,29 @@ bool ppu_load_exec(const ppu_exec_object& elf, bool virtual_load, const std::str
|
|||
if (!Emu.data.empty())
|
||||
{
|
||||
std::memcpy(vm::base(ppu->stack_addr + ppu->stack_size - ::size32(Emu.data)), Emu.data.data(), Emu.data.size());
|
||||
ppu->gpr[1] -= utils::align<u32>(::size32(Emu.data), 0x10);
|
||||
ppu->gpr[1] -= rx::alignUp<u32>(::size32(Emu.data), 0x10);
|
||||
}
|
||||
|
||||
// Initialize process arguments
|
||||
|
||||
// Calculate storage requirements on the stack
|
||||
const u32 pointers_storage_size = u32{sizeof(u64)} * utils::align<u32>(::size32(Emu.envp) + ::size32(Emu.argv) + 2, 2);
|
||||
const u32 pointers_storage_size = u32{sizeof(u64)} * rx::alignUp<u32>(::size32(Emu.envp) + ::size32(Emu.argv) + 2, 2);
|
||||
|
||||
u32 stack_alloc_size = pointers_storage_size;
|
||||
|
||||
for (const auto& arg : Emu.argv)
|
||||
{
|
||||
stack_alloc_size += utils::align<u32>(::size32(arg) + 1, 0x10);
|
||||
stack_alloc_size += rx::alignUp<u32>(::size32(arg) + 1, 0x10);
|
||||
}
|
||||
|
||||
for (const auto& arg : Emu.envp)
|
||||
{
|
||||
stack_alloc_size += utils::align<u32>(::size32(arg) + 1, 0x10);
|
||||
stack_alloc_size += rx::alignUp<u32>(::size32(arg) + 1, 0x10);
|
||||
}
|
||||
|
||||
ensure(ppu->stack_size > stack_alloc_size);
|
||||
|
||||
vm::ptr<u64> args = vm::cast(static_cast<u32>(ppu->stack_addr + ppu->stack_size - stack_alloc_size - utils::align<u32>(::size32(Emu.data), 0x10)));
|
||||
vm::ptr<u64> args = vm::cast(static_cast<u32>(ppu->stack_addr + ppu->stack_size - stack_alloc_size - rx::alignUp<u32>(::size32(Emu.data), 0x10)));
|
||||
vm::ptr<u8> args_data = vm::cast(args.addr() + pointers_storage_size);
|
||||
|
||||
const vm::ptr<u64> argv = args;
|
||||
|
|
@ -2772,7 +2773,7 @@ bool ppu_load_exec(const ppu_exec_object& elf, bool virtual_load, const std::str
|
|||
std::memcpy(args_data.get_ptr(), arg.data(), arg_size);
|
||||
|
||||
*args++ = args_data.addr();
|
||||
args_data = vm::cast(args_data.addr() + utils::align<u32>(arg_size, 0x10));
|
||||
args_data = vm::cast(args_data.addr() + rx::alignUp<u32>(arg_size, 0x10));
|
||||
}
|
||||
|
||||
*args++ = 0;
|
||||
|
|
@ -2787,7 +2788,7 @@ bool ppu_load_exec(const ppu_exec_object& elf, bool virtual_load, const std::str
|
|||
std::memcpy(args_data.get_ptr(), arg.data(), arg_size);
|
||||
|
||||
*args++ = args_data.addr();
|
||||
args_data = vm::cast(args_data.addr() + utils::align<u32>(arg_size, 0x10));
|
||||
args_data = vm::cast(args_data.addr() + rx::alignUp<u32>(arg_size, 0x10));
|
||||
}
|
||||
|
||||
*args++ = 0;
|
||||
|
|
@ -2855,7 +2856,7 @@ bool ppu_load_exec(const ppu_exec_object& elf, bool virtual_load, const std::str
|
|||
if (prog.p_type == 0x1u /* LOAD */ && prog.p_memsz && (prog.p_flags & 0x022000002) == 0u /* W */)
|
||||
{
|
||||
// Set memory protection to read-only when necessary (only if PPU-W, SPU-W, RSX-W are all disabled)
|
||||
ensure(vm::page_protect(addr, utils::align(size, 0x1000), 0, 0, vm::page_writable));
|
||||
ensure(vm::page_protect(addr, rx::alignUp(size, 0x1000), 0, 0, vm::page_writable));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -2934,7 +2935,7 @@ std::pair<shared_ptr<lv2_overlay>, CellError> ppu_load_overlay(const ppu_exec_ob
|
|||
// Leave additional room for the analyser so it can safely access beyond limit a bit
|
||||
// Because with VM the address sapce is not really a limit so any u32 address is valid there, here it is UB to create pointer that goes beyond the boundaries
|
||||
// TODO: Use make_shared_for_overwrite when all compilers support it
|
||||
const usz alloc_size = utils::align<usz>(size, 0x10000) + 4096;
|
||||
const usz alloc_size = rx::alignUp<usz>(size, 0x10000) + 4096;
|
||||
ovlm->allocations.push_back(std::shared_ptr<u8[]>(new u8[alloc_size]));
|
||||
_seg.ptr = ovlm->allocations.back().get();
|
||||
std::memset(static_cast<u8*>(_seg.ptr) + prog.bin.size(), 0, alloc_size - 4096 - prog.bin.size());
|
||||
|
|
@ -3230,7 +3231,7 @@ bool ppu_load_rel_exec(const ppu_rel_object& elf)
|
|||
{
|
||||
if (s.sh_type != sec_type::sht_progbits)
|
||||
{
|
||||
memsize = utils::align<u32>(memsize + vm::cast(s.sh_size), 128);
|
||||
memsize = rx::alignUp<u32>(memsize + vm::cast(s.sh_size), 128);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -3278,7 +3279,7 @@ bool ppu_load_rel_exec(const ppu_rel_object& elf)
|
|||
relm.secs.emplace_back(_sec);
|
||||
|
||||
std::memcpy(vm::base(addr), s.get_bin().data(), size);
|
||||
addr = utils::align<u32>(addr + size, 128);
|
||||
addr = rx::alignUp<u32>(addr + size, 128);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -62,7 +62,8 @@
|
|||
#include <optional>
|
||||
#include <charconv>
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include "rx/align.hpp"
|
||||
#include "util/vm.hpp"
|
||||
#include "util/v128.hpp"
|
||||
#include "util/simd.hpp"
|
||||
|
|
@ -217,7 +218,7 @@ public:
|
|||
|
||||
user acquire(u64 amount)
|
||||
{
|
||||
amount = utils::aligned_div<u64>(amount, k_block_size);
|
||||
amount = rx::aligned_div<u64>(amount, k_block_size);
|
||||
|
||||
u32 allocated = 0;
|
||||
while (!m_free.fetch_op([&, this](u32& value)
|
||||
|
|
@ -225,7 +226,7 @@ public:
|
|||
if (value >= amount || value == m_total)
|
||||
{
|
||||
// Allow at least allocation, make 0 the "memory unavailable" sign value for atomic waiting efficiency
|
||||
const u32 new_val = static_cast<u32>(utils::sub_saturate<u64>(value, amount));
|
||||
const u32 new_val = static_cast<u32>(rx::sub_saturate<u64>(value, amount));
|
||||
allocated = value - new_val;
|
||||
value = new_val;
|
||||
return true;
|
||||
|
|
@ -869,7 +870,7 @@ extern void ppu_register_range(u32 addr, u32 size)
|
|||
return;
|
||||
}
|
||||
|
||||
size = utils::align(size + addr % 0x10000, 0x10000);
|
||||
size = rx::alignUp(size + addr % 0x10000, 0x10000);
|
||||
addr &= -0x10000;
|
||||
|
||||
// Register executable range at
|
||||
|
|
@ -1816,7 +1817,7 @@ std::vector<std::pair<u32, u32>> ppu_thread::dump_callstack_list() const
|
|||
|
||||
if (pos_dist >= inst_pos.size())
|
||||
{
|
||||
const u32 inst_bound = utils::align<u32>(pos, 256);
|
||||
const u32 inst_bound = rx::alignUp<u32>(pos, 256);
|
||||
|
||||
const usz old_size = inst_pos.size();
|
||||
const usz new_size = pos_dist + (inst_bound - pos) / 4 + 1;
|
||||
|
|
@ -1903,7 +1904,7 @@ std::vector<std::pair<u32, u32>> ppu_thread::dump_callstack_list() const
|
|||
|
||||
for (u32 back = 1; back < 20; back++)
|
||||
{
|
||||
be_t<u32>& opcode = get_inst(utils::sub_saturate<u32>(_cia, back * 4));
|
||||
be_t<u32>& opcode = get_inst(rx::sub_saturate<u32>(_cia, back * 4));
|
||||
|
||||
if (!opcode)
|
||||
{
|
||||
|
|
@ -3588,11 +3589,11 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value)
|
|||
return false;
|
||||
}
|
||||
|
||||
utils::prefetch_read(ppu.rdata);
|
||||
utils::prefetch_read(ppu.rdata + 64);
|
||||
rx::prefetch_read(ppu.rdata);
|
||||
rx::prefetch_read(ppu.rdata + 64);
|
||||
ppu.last_faddr = addr;
|
||||
ppu.last_ftime = res.load() & -128;
|
||||
ppu.last_ftsc = utils::get_tsc();
|
||||
ppu.last_ftsc = rx::get_tsc();
|
||||
return false;
|
||||
}
|
||||
default:
|
||||
|
|
@ -3699,7 +3700,7 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value)
|
|||
|
||||
ppu.last_faddr = addr;
|
||||
ppu.last_ftime = old_rtime & -128;
|
||||
ppu.last_ftsc = utils::get_tsc();
|
||||
ppu.last_ftsc = rx::get_tsc();
|
||||
std::memcpy(&ppu.rdata[addr & 0x78], &old_data, 8);
|
||||
}
|
||||
|
||||
|
|
@ -3941,7 +3942,7 @@ namespace
|
|||
fs::stat_t get_stat() override
|
||||
{
|
||||
fs::stat_t stat = m_file.get_stat();
|
||||
stat.size = std::min<u64>(utils::sub_saturate<u64>(stat.size, m_off), m_max_size);
|
||||
stat.size = std::min<u64>(rx::sub_saturate<u64>(stat.size, m_off), m_max_size);
|
||||
stat.is_writable = false;
|
||||
return stat;
|
||||
}
|
||||
|
|
@ -3960,7 +3961,7 @@ namespace
|
|||
|
||||
u64 read_at(u64 offset, void* buffer, u64 size) override
|
||||
{
|
||||
return m_file.read_at(offset + m_off, buffer, std::min<u64>(size, utils::sub_saturate<u64>(m_max_size, offset)));
|
||||
return m_file.read_at(offset + m_off, buffer, std::min<u64>(size, rx::sub_saturate<u64>(m_max_size, offset)));
|
||||
}
|
||||
|
||||
u64 write(const void*, u64) override
|
||||
|
|
@ -3988,7 +3989,7 @@ namespace
|
|||
|
||||
u64 size() override
|
||||
{
|
||||
return std::min<u64>(utils::sub_saturate<u64>(m_file.size(), m_off), m_max_size);
|
||||
return std::min<u64>(rx::sub_saturate<u64>(m_file.size(), m_off), m_max_size);
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
|
|
@ -5624,7 +5625,7 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
|||
}
|
||||
|
||||
// Initialize compiler instance
|
||||
while (jits.size() < utils::aligned_div<u64>(module_counter, c_moudles_per_jit) && is_being_used_in_emulation)
|
||||
while (jits.size() < rx::aligned_div<u64>(module_counter, c_moudles_per_jit) && is_being_used_in_emulation)
|
||||
{
|
||||
jits.emplace_back(std::make_shared<jit_compiler>(s_link_table, g_cfg.core.llvm_cpu, 0, symbols_cement));
|
||||
|
||||
|
|
@ -5652,7 +5653,7 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
|||
const bool divide_by_twenty = !workload.empty();
|
||||
const usz increment_link_count_at = (divide_by_twenty ? 20 : 1);
|
||||
|
||||
g_progr_ptotal += static_cast<u32>(utils::aligned_div<u64>(link_workload.size(), increment_link_count_at));
|
||||
g_progr_ptotal += static_cast<u32>(rx::aligned_div<u64>(link_workload.size(), increment_link_count_at));
|
||||
|
||||
usz mod_index = umax;
|
||||
|
||||
|
|
@ -5785,7 +5786,7 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
|||
|
||||
bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_size)
|
||||
{
|
||||
concurent_memory_limit memory_limit(utils::aligned_div<u64>(utils::get_total_memory(), 2));
|
||||
concurent_memory_limit memory_limit(rx::aligned_div<u64>(utils::get_total_memory(), 2));
|
||||
return ppu_initialize(info, check_only, file_size, memory_limit);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -847,7 +847,7 @@ Value* PPUTranslator::ReadMemory(Value* addr, Type* type, bool is_be, u32 align)
|
|||
|
||||
m_may_be_mmio = false;
|
||||
|
||||
if (auto ptr = m_info.get_ptr<instructions_to_test>(std::max<u32>(m_info.segs[0].addr, (m_reloc ? m_reloc->addr : 0) + utils::sub_saturate<u32>(::narrow<u32>(m_addr), sizeof(instructions_to_test) / 2))))
|
||||
if (auto ptr = m_info.get_ptr<instructions_to_test>(std::max<u32>(m_info.segs[0].addr, (m_reloc ? m_reloc->addr : 0) + rx::sub_saturate<u32>(::narrow<u32>(m_addr), sizeof(instructions_to_test) / 2))))
|
||||
{
|
||||
if (ppu_test_address_may_be_mmio(std::span(ptr->insts)))
|
||||
{
|
||||
|
|
@ -920,7 +920,7 @@ void PPUTranslator::WriteMemory(Value* addr, Value* value, bool is_be, u32 align
|
|||
be_t<u32> insts[128];
|
||||
};
|
||||
|
||||
if (auto ptr = m_info.get_ptr<instructions_to_test>(std::max<u32>(m_info.segs[0].addr, (m_reloc ? m_reloc->addr : 0) + utils::sub_saturate<u32>(::narrow<u32>(m_addr), sizeof(instructions_to_test) / 2))))
|
||||
if (auto ptr = m_info.get_ptr<instructions_to_test>(std::max<u32>(m_info.segs[0].addr, (m_reloc ? m_reloc->addr : 0) + rx::sub_saturate<u32>(::narrow<u32>(m_addr), sizeof(instructions_to_test) / 2))))
|
||||
{
|
||||
if (ppu_test_address_may_be_mmio(std::span(ptr->insts)))
|
||||
{
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
#include "stdafx.h"
|
||||
#include "Emu/IdManager.h"
|
||||
#include "Loader/ELF.h"
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include "rx/align.hpp"
|
||||
|
||||
#include "SPUThread.h"
|
||||
|
||||
|
|
@ -450,7 +451,7 @@ void spu_load_rel_exec(const spu_rel_object& elf)
|
|||
{
|
||||
if (shdr.sh_type == sec_type::sht_progbits && shdr.sh_flags().all_of(sh_flag::shf_alloc))
|
||||
{
|
||||
total_memsize = utils::align<u32>(total_memsize + shdr.sh_size, 4);
|
||||
total_memsize = rx::alignUp<u32>(total_memsize + shdr.sh_size, 4);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -462,7 +463,7 @@ void spu_load_rel_exec(const spu_rel_object& elf)
|
|||
if (shdr.sh_type == sec_type::sht_progbits && shdr.sh_flags().all_of(sh_flag::shf_alloc))
|
||||
{
|
||||
std::memcpy(spu->_ptr<void>(offs), shdr.get_bin().data(), shdr.sh_size);
|
||||
offs = utils::align<u32>(offs + shdr.sh_size, 4);
|
||||
offs = rx::alignUp<u32>(offs + shdr.sh_size, 4);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -9,7 +9,8 @@
|
|||
#include "SPUInterpreter.h"
|
||||
#include "Crypto/sha1.h"
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/align.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include "util/v128.hpp"
|
||||
#include "util/sysinfo.hpp"
|
||||
|
||||
|
|
@ -282,7 +283,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
|
|||
words_align = 64;
|
||||
|
||||
const u32 starta = start & -64;
|
||||
const u32 enda = utils::align(end, 64);
|
||||
const u32 enda = rx::alignUp(end, 64);
|
||||
const u32 sizea = (enda - starta) / 64;
|
||||
ensure(sizea);
|
||||
|
||||
|
|
@ -363,7 +364,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
|
|||
words_align = 32;
|
||||
|
||||
const u32 starta = start & -32;
|
||||
const u32 enda = utils::align(end, 32);
|
||||
const u32 enda = rx::alignUp(end, 32);
|
||||
const u32 sizea = (enda - starta) / 32;
|
||||
ensure(sizea);
|
||||
|
||||
|
|
@ -486,7 +487,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
|
|||
words_align = 32;
|
||||
|
||||
const u32 starta = start & -32;
|
||||
const u32 enda = utils::align(end, 32);
|
||||
const u32 enda = rx::alignUp(end, 32);
|
||||
const u32 sizea = (enda - starta) / 32;
|
||||
ensure(sizea);
|
||||
|
||||
|
|
@ -3211,7 +3212,7 @@ void spu_recompiler::ROTQBYI(spu_opcode_t op)
|
|||
}
|
||||
else if (s == 4 || s == 8 || s == 12)
|
||||
{
|
||||
c->pshufd(va, va, utils::rol8(0xE4, s / 2));
|
||||
c->pshufd(va, va, rx::rol8(0xE4, s / 2));
|
||||
}
|
||||
else if (utils::has_ssse3())
|
||||
{
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
#include <optional>
|
||||
#include <unordered_set>
|
||||
|
||||
#include "rx/align.hpp"
|
||||
#include "util/v128.hpp"
|
||||
#include "util/simd.hpp"
|
||||
#include "util/sysinfo.hpp"
|
||||
|
|
@ -658,7 +659,7 @@ std::deque<spu_program> spu_cache::get()
|
|||
const u32 size = block_info.size;
|
||||
const u32 addr = block_info.addr;
|
||||
|
||||
if (utils::add_saturate<u32>(addr, size * 4) > SPU_LS_SIZE)
|
||||
if (rx::add_saturate<u32>(addr, size * 4) > SPU_LS_SIZE)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
|
@ -1253,7 +1254,7 @@ void spu_cache::initialize(bool build_existing_cache)
|
|||
|
||||
fmt::append(dump, "\n\t%49s", "");
|
||||
|
||||
for (u32 i = 0; i < std::min<usz>(f->data.size(), std::max<usz>(64, utils::aligned_div<u32>(depth_m, 4))); i++)
|
||||
for (u32 i = 0; i < std::min<usz>(f->data.size(), std::max<usz>(64, rx::aligned_div<u32>(depth_m, 4))); i++)
|
||||
{
|
||||
fmt::append(dump, "%-10s", g_spu_iname.decode(std::bit_cast<be_t<u32>>(f->data[i])));
|
||||
}
|
||||
|
|
@ -2308,12 +2309,12 @@ std::vector<u32> spu_thread::discover_functions(u32 base_addr, std::span<const u
|
|||
// TODO: Does not detect jumptables or fixed-addr indirect calls
|
||||
const v128 brasl_mask = is_known_addr ? v128::from32p(0x62u << 23) : v128::from32p(umax);
|
||||
|
||||
for (u32 i = utils::align<u32>(base_addr, 0x10); i < std::min<u32>(base_addr + ::size32(ls), 0x3FFF0); i += 0x10)
|
||||
for (u32 i = rx::alignUp<u32>(base_addr, 0x10); i < std::min<u32>(base_addr + ::size32(ls), 0x3FFF0); i += 0x10)
|
||||
{
|
||||
// Search for BRSL LR and BRASL LR or BR
|
||||
// TODO: BISL
|
||||
const v128 inst = read_from_ptr<be_t<v128>>(ls.data(), i - base_addr);
|
||||
const v128 cleared_i16 = gv_and32(inst, v128::from32p(utils::rol32(~0xffff, 7)));
|
||||
const v128 cleared_i16 = gv_and32(inst, v128::from32p(rx::rol32(~0xffff, 7)));
|
||||
const v128 eq_brsl = gv_eq32(cleared_i16, v128::from32p(0x66u << 23));
|
||||
const v128 eq_brasl = gv_eq32(cleared_i16, brasl_mask);
|
||||
const v128 eq_br = gv_eq32(cleared_i16, v128::from32p(0x64u << 23));
|
||||
|
|
@ -5376,7 +5377,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
const usz block_tail = duplicate_positions[it_begin - it_tail];
|
||||
|
||||
// Check if the distance is precisely two times from the end
|
||||
if (reg_state_it.size() - block_start != utils::rol64(reg_state_it.size() - block_tail, 1))
|
||||
if (reg_state_it.size() - block_start != rx::rol64(reg_state_it.size() - block_tail, 1))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
|
@ -7143,7 +7144,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
|||
v_reg2 = 3,
|
||||
};
|
||||
|
||||
for (auto it = infos.lower_bound(utils::sub_saturate<u32>(pattern.put_pc, 512)); it != infos.end() && it->first < pattern.put_pc + 512; it++)
|
||||
for (auto it = infos.lower_bound(rx::sub_saturate<u32>(pattern.put_pc, 512)); it != infos.end() && it->first < pattern.put_pc + 512; it++)
|
||||
{
|
||||
for (auto& state : it->second->end_reg_state)
|
||||
{
|
||||
|
|
@ -7622,7 +7623,7 @@ struct spu_llvm
|
|||
// Notify all before queue runs out if there is considerable excess
|
||||
// Optimized that: if there are many workers, it acts soon
|
||||
// If there are only a few workers, it postpones notifications until there is some more workload
|
||||
if (notify_compile_count && std::min<u32>(7, utils::aligned_div<u32>(worker_count * 2, 3) + 2) <= compile_pending)
|
||||
if (notify_compile_count && std::min<u32>(7, rx::aligned_div<u32>(worker_count * 2, 3) + 2) <= compile_pending)
|
||||
{
|
||||
for (usz i = 0; i < worker_count; i++)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
#include "Emu/Cell/SPUAnalyser.h"
|
||||
#include "Emu/system_config.h"
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include "util/v128.hpp"
|
||||
#include "util/simd.hpp"
|
||||
#include "util/sysinfo.hpp"
|
||||
|
|
@ -289,7 +289,7 @@ bool ROT(spu_thread& spu, spu_opcode_t op)
|
|||
|
||||
for (u32 i = 0; i < 4; i++)
|
||||
{
|
||||
spu.gpr[op.rt]._u32[i] = utils::rol32(a._u32[i], b._u32[i]);
|
||||
spu.gpr[op.rt]._u32[i] = rx::rol32(a._u32[i], b._u32[i]);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
|
@ -344,7 +344,7 @@ bool ROTH(spu_thread& spu, spu_opcode_t op)
|
|||
|
||||
for (u32 i = 0; i < 8; i++)
|
||||
{
|
||||
spu.gpr[op.rt]._u16[i] = utils::rol16(a._u16[i], b._u16[i]);
|
||||
spu.gpr[op.rt]._u16[i] = rx::rol16(a._u16[i], b._u16[i]);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1215,7 +1215,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
|
|||
rsx::reservation_lock rsx_lock(raddr, 128);
|
||||
|
||||
// Touch memory
|
||||
utils::trigger_write_page_fault(vm::base(dest ^ (4096 / 2)));
|
||||
rx::trigger_write_page_fault(vm::base(dest ^ (4096 / 2)));
|
||||
|
||||
auto [old_res, ok] = res.fetch_op([&](u64& rval)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
#include "rx/align.hpp"
|
||||
#include "stdafx.h"
|
||||
#include "util/JIT.h"
|
||||
#include "util/date_time.h"
|
||||
|
|
@ -31,7 +32,7 @@
|
|||
#include <shared_mutex>
|
||||
#include <span>
|
||||
#include "util/vm.hpp"
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include "util/v128.hpp"
|
||||
#include "util/simd.hpp"
|
||||
#include "util/sysinfo.hpp"
|
||||
|
|
@ -448,7 +449,7 @@ mwaitx_func static void __mwaitx(u32 cycles, u32 cstate, const void* cline, cons
|
|||
// First bit indicates cstate, 0x0 for C.02 state (lower power) or 0x1 for C.01 state (higher power)
|
||||
waitpkg_func static void __tpause(u32 cycles, u32 cstate)
|
||||
{
|
||||
const u64 tsc = utils::get_tsc() + cycles;
|
||||
const u64 tsc = rx::get_tsc() + cycles;
|
||||
_tpause(cstate, tsc);
|
||||
}
|
||||
#endif
|
||||
|
|
@ -522,7 +523,7 @@ namespace spu
|
|||
{
|
||||
// Slight pause if function is overburdened
|
||||
const auto count = atomic_instruction_table[pc_offset].observe() * 100ull;
|
||||
busy_wait(count);
|
||||
rx::busy_wait(count);
|
||||
}
|
||||
|
||||
ensure(!spu.check_state());
|
||||
|
|
@ -1774,7 +1775,7 @@ void spu_thread::cpu_return()
|
|||
// Wait for all threads to have error codes if exited by sys_spu_thread_exit
|
||||
for (u32 status; !thread->exit_status.try_read(status) || status != thread->last_exit_status;)
|
||||
{
|
||||
utils::pause();
|
||||
rx::pause();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -2307,60 +2308,6 @@ void spu_thread::push_snr(u32 number, u32 value)
|
|||
const u32 event_bit = SPU_EVENT_S1 >> (number & 1);
|
||||
const bool bitor_bit = !!((snr_config >> number) & 1);
|
||||
|
||||
// Redundant, g_use_rtm is checked inside tx_start now.
|
||||
if (g_use_rtm && false)
|
||||
{
|
||||
bool channel_notify = false;
|
||||
bool thread_notify = false;
|
||||
|
||||
const bool ok = utils::tx_start([&]
|
||||
{
|
||||
channel_notify = (channel->data.raw() == spu_channel::bit_wait);
|
||||
thread_notify = (channel->data.raw() & spu_channel::bit_count) == 0;
|
||||
|
||||
if (channel_notify)
|
||||
{
|
||||
ensure(channel->jostling_value.raw() == spu_channel::bit_wait);
|
||||
channel->jostling_value.raw() = value;
|
||||
channel->data.raw() = 0;
|
||||
}
|
||||
else if (bitor_bit)
|
||||
{
|
||||
channel->data.raw() &= ~spu_channel::bit_wait;
|
||||
channel->data.raw() |= spu_channel::bit_count | value;
|
||||
}
|
||||
else
|
||||
{
|
||||
channel->data.raw() = spu_channel::bit_count | value;
|
||||
}
|
||||
|
||||
if (thread_notify)
|
||||
{
|
||||
ch_events.raw().events |= event_bit;
|
||||
|
||||
if (ch_events.raw().mask & event_bit)
|
||||
{
|
||||
ch_events.raw().count = 1;
|
||||
thread_notify = ch_events.raw().waiting != 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
thread_notify = false;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
if (ok)
|
||||
{
|
||||
if (channel_notify)
|
||||
channel->data.notify_one();
|
||||
if (thread_notify)
|
||||
this->notify();
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Lock event channel in case it needs event notification
|
||||
ch_events.atomic_op([](ch_events_t& ev)
|
||||
{
|
||||
|
|
@ -2527,7 +2474,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8*
|
|||
range_lock = _this->range_lock;
|
||||
}
|
||||
|
||||
utils::prefetch_write(range_lock);
|
||||
rx::prefetch_write(range_lock);
|
||||
|
||||
for (u32 size = args.size, size0; is_get; size -= size0, dst += size0, src += size0, eal += size0)
|
||||
{
|
||||
|
|
@ -2541,7 +2488,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8*
|
|||
}
|
||||
else if (++i < 25) [[likely]]
|
||||
{
|
||||
busy_wait(300);
|
||||
rx::busy_wait(300);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
@ -2706,7 +2653,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8*
|
|||
|
||||
if (true || ++i < 10)
|
||||
{
|
||||
busy_wait(500);
|
||||
rx::busy_wait(500);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
@ -2947,7 +2894,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8*
|
|||
}
|
||||
|
||||
u32 range_addr = eal & -128;
|
||||
u32 range_end = utils::align(eal + size, 128);
|
||||
u32 range_end = rx::alignUp(eal + size, 128);
|
||||
|
||||
// Handle the case of crossing 64K page borders (TODO: maybe split in 4K fragments?)
|
||||
if (range_addr >> 16 != (range_end - 1) >> 16)
|
||||
|
|
@ -3131,7 +3078,7 @@ plain_access:
|
|||
|
||||
bool spu_thread::do_dma_check(const spu_mfc_cmd& args)
|
||||
{
|
||||
const u32 mask = utils::rol32(1, args.tag);
|
||||
const u32 mask = rx::rol32(1, args.tag);
|
||||
|
||||
if (mfc_barrier & mask || (args.cmd & (MFC_BARRIER_MASK | MFC_FENCE_MASK) && mfc_fence & mask)) [[unlikely]]
|
||||
{
|
||||
|
|
@ -3147,13 +3094,13 @@ bool spu_thread::do_dma_check(const spu_mfc_cmd& args)
|
|||
if ((mfc_queue[i].cmd & ~0xc) == MFC_BARRIER_CMD)
|
||||
{
|
||||
mfc_barrier |= -1;
|
||||
mfc_fence |= utils::rol32(1, mfc_queue[i].tag);
|
||||
mfc_fence |= rx::rol32(1, mfc_queue[i].tag);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (true)
|
||||
{
|
||||
const u32 _mask = utils::rol32(1u, mfc_queue[i].tag);
|
||||
const u32 _mask = rx::rol32(1u, mfc_queue[i].tag);
|
||||
|
||||
// A command with barrier hard blocks that tag until it's been dealt with
|
||||
if (mfc_queue[i].cmd & MFC_BARRIER_MASK)
|
||||
|
|
@ -3258,7 +3205,7 @@ bool spu_thread::do_list_transfer(spu_mfc_cmd& args)
|
|||
u8* dst = this->ls + arg_lsa;
|
||||
|
||||
// Assume success, prepare the next elements
|
||||
arg_lsa += fetch_size * utils::align<u32>(s_size, 16);
|
||||
arg_lsa += fetch_size * rx::alignUp<u32>(s_size, 16);
|
||||
item_ptr += fetch_size;
|
||||
arg_size -= fetch_size * 8;
|
||||
|
||||
|
|
@ -3269,7 +3216,7 @@ bool spu_thread::do_list_transfer(spu_mfc_cmd& args)
|
|||
#define MOV_T(type, index, _ea) \
|
||||
{ \
|
||||
const usz ea = _ea; \
|
||||
*reinterpret_cast<type*>(dst + index * utils::align<u32>(sizeof(type), 16) + ea % (sizeof(type) < 16 ? 16 : 1)) = *reinterpret_cast<const type*>(src + ea); \
|
||||
*reinterpret_cast<type*>(dst + index * rx::alignUp<u32>(sizeof(type), 16) + ea % (sizeof(type) < 16 ? 16 : 1)) = *reinterpret_cast<const type*>(src + ea); \
|
||||
} \
|
||||
void()
|
||||
#define MOV_128(index, ea) mov_rdata(*reinterpret_cast<decltype(rdata)*>(dst + index * _128), *reinterpret_cast<const decltype(rdata)*>(src + (ea)))
|
||||
|
|
@ -3522,7 +3469,7 @@ bool spu_thread::do_list_transfer(spu_mfc_cmd& args)
|
|||
#undef MOV_T
|
||||
#undef MOV_128
|
||||
// Optimization miss, revert changes
|
||||
arg_lsa -= fetch_size * utils::align<u32>(s_size, 16);
|
||||
arg_lsa -= fetch_size * rx::alignUp<u32>(s_size, 16);
|
||||
item_ptr -= fetch_size;
|
||||
arg_size += fetch_size * 8;
|
||||
}
|
||||
|
|
@ -3604,7 +3551,7 @@ bool spu_thread::do_list_transfer(spu_mfc_cmd& args)
|
|||
}
|
||||
}
|
||||
|
||||
arg_lsa += utils::align<u32>(size, 16);
|
||||
arg_lsa += rx::alignUp<u32>(size, 16);
|
||||
}
|
||||
// Avoid inlining huge transfers because it intentionally drops range lock unlock
|
||||
else if (optimization_compatible == MFC_PUT_CMD && ((addr >> 28 == rsx::constants::local_mem_base >> 28) || (addr < RAW_SPU_BASE_ADDR && size - 1 <= 0x400 - 1 && (addr % 0x10000 + (size - 1)) < 0x10000)))
|
||||
|
|
@ -3615,7 +3562,7 @@ bool spu_thread::do_list_transfer(spu_mfc_cmd& args)
|
|||
|
||||
if (!g_use_rtm)
|
||||
{
|
||||
vm::range_lock(range_lock, addr & -128, utils::align<u32>(addr + size, 128) - (addr & -128));
|
||||
vm::range_lock(range_lock, addr & -128, rx::alignUp<u32>(addr + size, 128) - (addr & -128));
|
||||
}
|
||||
}
|
||||
else
|
||||
|
|
@ -3690,7 +3637,7 @@ bool spu_thread::do_list_transfer(spu_mfc_cmd& args)
|
|||
}
|
||||
}
|
||||
|
||||
arg_lsa += utils::align<u32>(size, 16);
|
||||
arg_lsa += rx::alignUp<u32>(size, 16);
|
||||
}
|
||||
else if (size)
|
||||
{
|
||||
|
|
@ -3703,7 +3650,7 @@ bool spu_thread::do_list_transfer(spu_mfc_cmd& args)
|
|||
transfer.lsa = arg_lsa | (addr & 0xf);
|
||||
transfer.size = size;
|
||||
|
||||
arg_lsa += utils::align<u32>(size, 16);
|
||||
arg_lsa += rx::alignUp<u32>(size, 16);
|
||||
do_dma_transfer(this, transfer, ls);
|
||||
}
|
||||
|
||||
|
|
@ -3721,14 +3668,14 @@ bool spu_thread::do_list_transfer(spu_mfc_cmd& args)
|
|||
{
|
||||
range_lock->release(0);
|
||||
|
||||
ch_stall_mask |= utils::rol32(1, args.tag);
|
||||
ch_stall_mask |= rx::rol32(1, args.tag);
|
||||
|
||||
if (!ch_stall_stat.get_count())
|
||||
{
|
||||
set_events(SPU_EVENT_SN);
|
||||
}
|
||||
|
||||
ch_stall_stat.set_value(utils::rol32(1, args.tag) | ch_stall_stat.get_value());
|
||||
ch_stall_stat.set_value(rx::rol32(1, args.tag) | ch_stall_stat.get_value());
|
||||
|
||||
args.tag |= 0x80; // Set stalled status
|
||||
args.eal = ::narrow<u32>(reinterpret_cast<const u8*>(item_ptr) - this->ls);
|
||||
|
|
@ -3853,7 +3800,7 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args)
|
|||
return false;
|
||||
});
|
||||
|
||||
const u64 count2 = utils::get_tsc() - perf2.get();
|
||||
const u64 count2 = rx::get_tsc() - perf2.get();
|
||||
|
||||
if (count2 > 20000 && g_cfg.core.perf_report) [[unlikely]]
|
||||
{
|
||||
|
|
@ -3881,11 +3828,11 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args)
|
|||
return false;
|
||||
}
|
||||
|
||||
utils::prefetch_read(rdata);
|
||||
utils::prefetch_read(rdata + 64);
|
||||
rx::prefetch_read(rdata);
|
||||
rx::prefetch_read(rdata + 64);
|
||||
last_faddr = addr;
|
||||
last_ftime = res.load() & -128;
|
||||
last_ftsc = utils::get_tsc();
|
||||
last_ftsc = rx::get_tsc();
|
||||
return false;
|
||||
}
|
||||
default:
|
||||
|
|
@ -3973,7 +3920,7 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args)
|
|||
|
||||
if (!vm::check_addr(addr, vm::page_writable))
|
||||
{
|
||||
utils::trigger_write_page_fault(vm::base(addr));
|
||||
rx::trigger_write_page_fault(vm::base(addr));
|
||||
}
|
||||
|
||||
raddr = 0;
|
||||
|
|
@ -4036,7 +3983,7 @@ void do_cell_atomic_128_store(u32 addr, const void* to_write)
|
|||
}
|
||||
else if (k < 15)
|
||||
{
|
||||
busy_wait(500);
|
||||
rx::busy_wait(500);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
@ -4053,7 +4000,7 @@ void do_cell_atomic_128_store(u32 addr, const void* to_write)
|
|||
}
|
||||
else if (j < 15)
|
||||
{
|
||||
busy_wait(500);
|
||||
rx::busy_wait(500);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
@ -4075,7 +4022,7 @@ void do_cell_atomic_128_store(u32 addr, const void* to_write)
|
|||
else if (!g_use_rtm)
|
||||
{
|
||||
// Provoke page fault
|
||||
utils::trigger_write_page_fault(vm::base(addr));
|
||||
rx::trigger_write_page_fault(vm::base(addr));
|
||||
|
||||
// Hard lock
|
||||
auto spu = cpu ? cpu->try_get<spu_thread>() : nullptr;
|
||||
|
|
@ -4102,7 +4049,7 @@ void do_cell_atomic_128_store(u32 addr, const void* to_write)
|
|||
});
|
||||
|
||||
vm::reservation_acquire(addr) += 32;
|
||||
result = utils::get_tsc() - perf0.get();
|
||||
result = rx::get_tsc() - perf0.get();
|
||||
}
|
||||
|
||||
if (result > 20000 && g_cfg.core.perf_report) [[unlikely]]
|
||||
|
|
@ -4150,7 +4097,7 @@ bool spu_thread::do_mfc(bool can_escape, bool must_finish)
|
|||
auto process_command = [&](spu_mfc_cmd& args)
|
||||
{
|
||||
// Select tag bit in the tag mask or the stall mask
|
||||
const u32 mask = utils::rol32(1, args.tag);
|
||||
const u32 mask = rx::rol32(1, args.tag);
|
||||
|
||||
if ((args.cmd & ~0xc) == MFC_BARRIER_CMD)
|
||||
{
|
||||
|
|
@ -4240,7 +4187,7 @@ bool spu_thread::do_mfc(bool can_escape, bool must_finish)
|
|||
{
|
||||
// Get commands' execution mask
|
||||
// Mask bits are always set when mfc_transfers_shuffling is 0
|
||||
return static_cast<u16>((0 - (1u << std::min<u32>(g_cfg.core.mfc_transfers_shuffling, size))) | utils::get_tsc());
|
||||
return static_cast<u16>((0 - (1u << std::min<u32>(g_cfg.core.mfc_transfers_shuffling, size))) | rx::get_tsc());
|
||||
};
|
||||
|
||||
// Process enqueued commands
|
||||
|
|
@ -4733,7 +4680,7 @@ bool spu_thread::process_mfc_cmd()
|
|||
else
|
||||
#endif
|
||||
{
|
||||
busy_wait(300);
|
||||
rx::busy_wait(300);
|
||||
}
|
||||
|
||||
if (getllar_spin_count == 3)
|
||||
|
|
@ -4875,7 +4822,7 @@ bool spu_thread::process_mfc_cmd()
|
|||
if (i < 24) [[likely]]
|
||||
{
|
||||
i++;
|
||||
busy_wait(300);
|
||||
rx::busy_wait(300);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
@ -5159,7 +5106,7 @@ bool spu_thread::process_mfc_cmd()
|
|||
std::memcpy(dump.data, _ptr<u8>(ch_mfc_cmd.lsa & 0x3ff80), 128);
|
||||
}
|
||||
|
||||
const u32 mask = utils::rol32(1, ch_mfc_cmd.tag);
|
||||
const u32 mask = rx::rol32(1, ch_mfc_cmd.tag);
|
||||
|
||||
if ((mfc_barrier | mfc_fence) & mask) [[unlikely]]
|
||||
{
|
||||
|
|
@ -5214,11 +5161,11 @@ bool spu_thread::process_mfc_cmd()
|
|||
}
|
||||
|
||||
mfc_queue[mfc_size++] = ch_mfc_cmd;
|
||||
mfc_fence |= utils::rol32(1, ch_mfc_cmd.tag);
|
||||
mfc_fence |= rx::rol32(1, ch_mfc_cmd.tag);
|
||||
|
||||
if (ch_mfc_cmd.cmd & MFC_BARRIER_MASK)
|
||||
{
|
||||
mfc_barrier |= utils::rol32(1, ch_mfc_cmd.tag);
|
||||
mfc_barrier |= rx::rol32(1, ch_mfc_cmd.tag);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
@ -5267,11 +5214,11 @@ bool spu_thread::process_mfc_cmd()
|
|||
}
|
||||
|
||||
mfc_size++;
|
||||
mfc_fence |= utils::rol32(1, cmd.tag);
|
||||
mfc_fence |= rx::rol32(1, cmd.tag);
|
||||
|
||||
if (cmd.cmd & MFC_BARRIER_MASK)
|
||||
{
|
||||
mfc_barrier |= utils::rol32(1, cmd.tag);
|
||||
mfc_barrier |= rx::rol32(1, cmd.tag);
|
||||
}
|
||||
|
||||
if (check_mfc_interrupts(pc + 4))
|
||||
|
|
@ -5297,7 +5244,7 @@ bool spu_thread::process_mfc_cmd()
|
|||
{
|
||||
mfc_queue[mfc_size++] = ch_mfc_cmd;
|
||||
mfc_barrier |= -1;
|
||||
mfc_fence |= utils::rol32(1, ch_mfc_cmd.tag);
|
||||
mfc_fence |= rx::rol32(1, ch_mfc_cmd.tag);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
@ -5592,7 +5539,7 @@ retry:
|
|||
|
||||
if (reading && res.locks && mask_hint & (SPU_EVENT_S1 | SPU_EVENT_S2))
|
||||
{
|
||||
busy_wait(100);
|
||||
rx::busy_wait(100);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
|
|
@ -5899,7 +5846,7 @@ s64 spu_thread::get_ch_value(u32 ch)
|
|||
}
|
||||
}
|
||||
|
||||
const usz seed = (utils::get_tsc() >> 8) % 100;
|
||||
const usz seed = (rx::get_tsc() >> 8) % 100;
|
||||
|
||||
#ifdef __linux__
|
||||
const bool reservation_busy_waiting = false;
|
||||
|
|
@ -5998,7 +5945,7 @@ s64 spu_thread::get_ch_value(u32 ch)
|
|||
{
|
||||
if (u32 work_count = g_spu_work_count)
|
||||
{
|
||||
const u32 true_free = utils::sub_saturate<u32>(utils::get_thread_count(), 10);
|
||||
const u32 true_free = rx::sub_saturate<u32>(utils::get_thread_count(), 10);
|
||||
|
||||
if (work_count > true_free)
|
||||
{
|
||||
|
|
@ -6123,7 +6070,7 @@ s64 spu_thread::get_ch_value(u32 ch)
|
|||
}
|
||||
else
|
||||
{
|
||||
busy_wait();
|
||||
rx::busy_wait();
|
||||
}
|
||||
|
||||
continue;
|
||||
|
|
@ -6490,7 +6437,7 @@ bool spu_thread::set_ch_value(u32 ch, u32 value)
|
|||
value &= 0x1f;
|
||||
|
||||
// Reset stall status for specified tag
|
||||
const u32 tag_mask = utils::rol32(1, value);
|
||||
const u32 tag_mask = rx::rol32(1, value);
|
||||
|
||||
if (ch_stall_mask & tag_mask)
|
||||
{
|
||||
|
|
@ -7320,7 +7267,7 @@ bool spu_thread::try_load_debug_capture()
|
|||
void spu_thread::wakeup_delay(u32 div) const
|
||||
{
|
||||
if (g_cfg.core.spu_wakeup_delay_mask & (1u << index))
|
||||
thread_ctrl::wait_for_accurate(utils::aligned_div(+g_cfg.core.spu_wakeup_delay, div));
|
||||
thread_ctrl::wait_for_accurate(rx::aligned_div(+g_cfg.core.spu_wakeup_delay, div));
|
||||
}
|
||||
|
||||
spu_function_logger::spu_function_logger(spu_thread& spu, const char* func) noexcept
|
||||
|
|
@ -7397,7 +7344,7 @@ s64 spu_channel::pop_wait(cpu_thread& spu, bool pop)
|
|||
|
||||
for (int i = 0; i < 10; i++)
|
||||
{
|
||||
busy_wait();
|
||||
rx::busy_wait();
|
||||
|
||||
if (!(data & bit_wait))
|
||||
{
|
||||
|
|
@ -7473,7 +7420,7 @@ bool spu_channel::push_wait(cpu_thread& spu, u32 value, bool push)
|
|||
return true;
|
||||
}
|
||||
|
||||
busy_wait();
|
||||
rx::busy_wait();
|
||||
state = data;
|
||||
}
|
||||
|
||||
|
|
@ -7528,7 +7475,7 @@ std::pair<u32, u32> spu_channel_4_t::pop_wait(cpu_thread& spu, bool pop_value)
|
|||
|
||||
for (int i = 0; i < 10; i++)
|
||||
{
|
||||
busy_wait();
|
||||
rx::busy_wait();
|
||||
|
||||
if (!atomic_storage<u8>::load(values.raw().waiting))
|
||||
{
|
||||
|
|
|
|||
|
|
@ -1,4 +1,6 @@
|
|||
#include "stdafx.h"
|
||||
|
||||
#include "rx/align.hpp"
|
||||
#include "vm_locking.h"
|
||||
#include "vm_ptr.h"
|
||||
#include "vm_ref.h"
|
||||
|
|
@ -14,7 +16,8 @@
|
|||
#include <span>
|
||||
|
||||
#include "util/vm.hpp"
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include "rx/align.hpp"
|
||||
#include "util/simd.hpp"
|
||||
#include "util/serialization.hpp"
|
||||
|
||||
|
|
@ -245,7 +248,7 @@ namespace vm
|
|||
|
||||
// Try triggering a page fault (write)
|
||||
// TODO: Read memory if needed
|
||||
utils::trigger_write_page_fault(vm::base(test / 4096 == begin / 4096 ? begin : test));
|
||||
rx::trigger_write_page_fault(vm::base(test / 4096 == begin / 4096 ? begin : test));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
|
@ -258,7 +261,7 @@ namespace vm
|
|||
perf0.restart();
|
||||
}
|
||||
|
||||
busy_wait(200);
|
||||
rx::busy_wait(200);
|
||||
|
||||
if (i >= 2 && !_cpu)
|
||||
{
|
||||
|
|
@ -339,9 +342,9 @@ namespace vm
|
|||
auto range_lock = &*std::prev(std::end(vm::g_range_lock_set));
|
||||
*range_lock = addr | u64{size} << 32 | flags;
|
||||
|
||||
utils::prefetch_read(g_range_lock_set + 0);
|
||||
utils::prefetch_read(g_range_lock_set + 2);
|
||||
utils::prefetch_read(g_range_lock_set + 4);
|
||||
rx::prefetch_read(g_range_lock_set + 0);
|
||||
rx::prefetch_read(g_range_lock_set + 2);
|
||||
rx::prefetch_read(g_range_lock_set + 4);
|
||||
|
||||
const auto range = utils::address_range::start_length(addr, size);
|
||||
|
||||
|
|
@ -364,7 +367,7 @@ namespace vm
|
|||
break;
|
||||
}
|
||||
|
||||
utils::pause();
|
||||
rx::pause();
|
||||
}
|
||||
|
||||
return range_lock;
|
||||
|
|
@ -407,7 +410,7 @@ namespace vm
|
|||
}
|
||||
|
||||
if (i < 100)
|
||||
busy_wait(200);
|
||||
rx::busy_wait(200);
|
||||
else
|
||||
std::this_thread::yield();
|
||||
|
||||
|
|
@ -516,12 +519,12 @@ namespace vm
|
|||
if (to_prepare_memory)
|
||||
{
|
||||
// We have some spare time, prepare cache lines (todo: reservation tests here)
|
||||
utils::prefetch_write(vm::get_super_ptr(addr));
|
||||
utils::prefetch_write(vm::get_super_ptr(addr) + 64);
|
||||
rx::prefetch_write(vm::get_super_ptr(addr));
|
||||
rx::prefetch_write(vm::get_super_ptr(addr) + 64);
|
||||
to_prepare_memory = false;
|
||||
}
|
||||
|
||||
busy_wait(200);
|
||||
rx::busy_wait(200);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
@ -552,9 +555,9 @@ namespace vm
|
|||
addr1 = static_cast<u16>(addr) | is_shared;
|
||||
}
|
||||
|
||||
utils::prefetch_read(g_range_lock_set + 0);
|
||||
utils::prefetch_read(g_range_lock_set + 2);
|
||||
utils::prefetch_read(g_range_lock_set + 4);
|
||||
rx::prefetch_read(g_range_lock_set + 0);
|
||||
rx::prefetch_read(g_range_lock_set + 2);
|
||||
rx::prefetch_read(g_range_lock_set + 4);
|
||||
|
||||
u64 to_clear = get_range_lock_bits(false);
|
||||
|
||||
|
|
@ -568,7 +571,7 @@ namespace vm
|
|||
for (u64 hi = addr2 >> 16, max = (addr2 + size2 - 1) >> 16; hi <= max; hi++)
|
||||
{
|
||||
u64 addr3 = addr2;
|
||||
u64 size3 = std::min<u64>(addr2 + size2, utils::align(addr2, 0x10000)) - addr2;
|
||||
u64 size3 = std::min<u64>(addr2 + size2, rx::alignUp(addr2, 0x10000)) - addr2;
|
||||
|
||||
if (u64 is_shared = g_shmem[hi]) [[unlikely]]
|
||||
{
|
||||
|
|
@ -594,12 +597,12 @@ namespace vm
|
|||
|
||||
if (to_prepare_memory)
|
||||
{
|
||||
utils::prefetch_write(vm::get_super_ptr(addr));
|
||||
utils::prefetch_write(vm::get_super_ptr(addr) + 64);
|
||||
rx::prefetch_write(vm::get_super_ptr(addr));
|
||||
rx::prefetch_write(vm::get_super_ptr(addr) + 64);
|
||||
to_prepare_memory = false;
|
||||
}
|
||||
|
||||
utils::pause();
|
||||
rx::pause();
|
||||
}
|
||||
|
||||
for (auto lock = g_locks.cbegin(), end = lock + g_cfg.core.ppu_threads; lock != end; lock++)
|
||||
|
|
@ -610,12 +613,12 @@ namespace vm
|
|||
{
|
||||
if (to_prepare_memory)
|
||||
{
|
||||
utils::prefetch_write(vm::get_super_ptr(addr));
|
||||
utils::prefetch_write(vm::get_super_ptr(addr) + 64);
|
||||
rx::prefetch_write(vm::get_super_ptr(addr));
|
||||
rx::prefetch_write(vm::get_super_ptr(addr) + 64);
|
||||
to_prepare_memory = false;
|
||||
}
|
||||
|
||||
utils::pause();
|
||||
rx::pause();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -642,7 +645,7 @@ namespace vm
|
|||
}
|
||||
else if (i < 15)
|
||||
{
|
||||
busy_wait(500);
|
||||
rx::busy_wait(500);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
@ -683,7 +686,7 @@ namespace vm
|
|||
}
|
||||
else if (i < 15)
|
||||
{
|
||||
busy_wait(500);
|
||||
rx::busy_wait(500);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
@ -1078,13 +1081,13 @@ namespace vm
|
|||
|
||||
if (state & page_1m_size)
|
||||
{
|
||||
i = utils::align(i + 1, 0x100000 / 4096);
|
||||
i = rx::alignUp(i + 1, 0x100000 / 4096);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (state & page_64k_size)
|
||||
{
|
||||
i = utils::align(i + 1, 0x10000 / 4096);
|
||||
i = rx::alignUp(i + 1, 0x10000 / 4096);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -1359,7 +1362,7 @@ namespace vm
|
|||
const u32 min_page_size = flags & page_size_4k ? 0x1000 : 0x10000;
|
||||
|
||||
// Align to minimal page size
|
||||
const u32 size = utils::align(orig_size, min_page_size) + (flags & stack_guarded ? 0x2000 : 0);
|
||||
const u32 size = rx::alignUp(orig_size, min_page_size) + (flags & stack_guarded ? 0x2000 : 0);
|
||||
|
||||
// Check alignment (it's page allocation, so passing small values there is just silly)
|
||||
if (align < min_page_size || align != (0x80000000u >> std::countl_zero(align)))
|
||||
|
|
@ -1387,7 +1390,7 @@ namespace vm
|
|||
|
||||
const u32 max = (this->addr + this->size - size) & (0 - align);
|
||||
|
||||
u32 addr = utils::align(this->addr, align);
|
||||
u32 addr = rx::alignUp(this->addr, align);
|
||||
|
||||
if (this->addr > max || addr > max)
|
||||
{
|
||||
|
|
@ -1434,7 +1437,7 @@ namespace vm
|
|||
const u32 size0 = orig_size + addr % min_page_size;
|
||||
|
||||
// Align to minimal page size
|
||||
const u32 size = utils::align(size0, min_page_size);
|
||||
const u32 size = rx::alignUp(size0, min_page_size);
|
||||
|
||||
// Return if addr or size is invalid
|
||||
// If shared memory is provided, addr/size must be aligned
|
||||
|
|
@ -1870,7 +1873,7 @@ namespace vm
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
for (u32 addr = utils::align<u32>(0x10000000, align);; addr += align)
|
||||
for (u32 addr = rx::alignUp<u32>(0x10000000, align);; addr += align)
|
||||
{
|
||||
if (_test_map(addr, size))
|
||||
{
|
||||
|
|
@ -1950,7 +1953,7 @@ namespace vm
|
|||
vm::writer_lock lock;
|
||||
|
||||
// Align to minimal page size
|
||||
const u32 size = utils::align(orig_size, 0x10000);
|
||||
const u32 size = rx::alignUp(orig_size, 0x10000);
|
||||
|
||||
// Check alignment
|
||||
if (align < 0x10000 || align != (0x80000000u >> std::countl_zero(align)))
|
||||
|
|
@ -2178,7 +2181,7 @@ namespace vm
|
|||
// Wait a bit before accessing global lock
|
||||
range_lock->release(0);
|
||||
|
||||
busy_wait(200);
|
||||
rx::busy_wait(200);
|
||||
}
|
||||
|
||||
const bool result = try_access_internal(begin, ptr, size, is_write);
|
||||
|
|
@ -2399,7 +2402,7 @@ namespace vm
|
|||
// Prevent overflow
|
||||
const u32 size = 0 - max_size < addr ? (0 - addr) : max_size;
|
||||
|
||||
for (u32 i = addr, end = utils::align(addr + size, 4096) - 1; i <= end;)
|
||||
for (u32 i = addr, end = rx::alignUp(addr + size, 4096) - 1; i <= end;)
|
||||
{
|
||||
if (check_pages && !vm::check_addr(i, vm::page_readable))
|
||||
{
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
#include "vm.h"
|
||||
#include "vm_locking.h"
|
||||
#include "util/atomic.hpp"
|
||||
#include "util/tsc.hpp"
|
||||
#include "rx/tsc.hpp"
|
||||
#include <functional>
|
||||
|
||||
extern bool g_use_rtm;
|
||||
|
|
@ -209,7 +209,7 @@ namespace vm
|
|||
unsigned status = -1;
|
||||
u64 _old = 0;
|
||||
|
||||
auto stamp0 = utils::get_tsc(), stamp1 = stamp0, stamp2 = stamp0;
|
||||
auto stamp0 = rx::get_tsc(), stamp1 = stamp0, stamp2 = stamp0;
|
||||
|
||||
#ifndef _MSC_VER
|
||||
__asm__ goto("xbegin %l[stage2];" ::: "memory" : stage2);
|
||||
|
|
@ -271,16 +271,16 @@ namespace vm
|
|||
#ifndef _MSC_VER
|
||||
__asm__ volatile("mov %%eax, %0;" : "=r"(status)::"memory");
|
||||
#endif
|
||||
stamp1 = utils::get_tsc();
|
||||
stamp1 = rx::get_tsc();
|
||||
|
||||
// Stage 2: try to lock reservation first
|
||||
_old = res.fetch_add(1);
|
||||
|
||||
// Compute stamps excluding memory touch
|
||||
stamp2 = utils::get_tsc() - (stamp1 - stamp0);
|
||||
stamp2 = rx::get_tsc() - (stamp1 - stamp0);
|
||||
|
||||
// Start lightened transaction
|
||||
for (; !(_old & vm::rsrv_unique_lock) && stamp2 - stamp0 <= g_rtm_tx_limit2; stamp2 = utils::get_tsc())
|
||||
for (; !(_old & vm::rsrv_unique_lock) && stamp2 - stamp0 <= g_rtm_tx_limit2; stamp2 = rx::get_tsc())
|
||||
{
|
||||
if (cpu.has_pause_flag())
|
||||
{
|
||||
|
|
|
|||
|
|
@ -4,7 +4,8 @@
|
|||
|
||||
#include "Emu/Memory/vm_ptr.h"
|
||||
#include "util/mutex.h"
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include "rx/align.hpp"
|
||||
#include "util/logs.hpp"
|
||||
|
||||
LOG_CHANNEL(np_mem_allocator);
|
||||
|
|
@ -52,7 +53,7 @@ namespace np
|
|||
}
|
||||
|
||||
// Align allocs
|
||||
const u32 alloc_size = utils::align(size, 4);
|
||||
const u32 alloc_size = rx::alignUp(size, 4);
|
||||
if (alloc_size > m_avail)
|
||||
{
|
||||
np_mem_allocator.error("Not enough memory available in NP pool!");
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
#pragma once
|
||||
|
||||
#include "Emu/Memory/vm_ptr.h"
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include "rx/align.hpp"
|
||||
|
||||
namespace np
|
||||
{
|
||||
|
|
@ -9,7 +10,7 @@ namespace np
|
|||
{
|
||||
public:
|
||||
event_data(u32 vm_addr, u32 initial_size, u32 max_size)
|
||||
: m_max_size(max_size), m_cur_size(utils::align(initial_size, 4))
|
||||
: m_max_size(max_size), m_cur_size(rx::alignUp(initial_size, 4))
|
||||
{
|
||||
m_data_ptr.set(vm_addr);
|
||||
}
|
||||
|
|
@ -50,7 +51,7 @@ namespace np
|
|||
template <typename T>
|
||||
T* allocate(u32 size, vm::bptr<T>& dest)
|
||||
{
|
||||
const u32 to_alloc = utils::align(size, 4);
|
||||
const u32 to_alloc = rx::alignUp(size, 4);
|
||||
ensure((m_cur_size + to_alloc) <= m_max_size, "event_data::allocate: size would overflow the allocated buffer!");
|
||||
|
||||
u8* dest_ptr = reinterpret_cast<u8*>(&dest);
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
#include "stdafx.h"
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include "rx/align.hpp"
|
||||
#include "np_gui_cache.h"
|
||||
|
||||
LOG_CHANNEL(np_gui_cache);
|
||||
|
|
@ -72,7 +73,7 @@ namespace np
|
|||
|
||||
const auto& room = ::at32(rooms, room_id);
|
||||
|
||||
const u32 room_size = ::narrow<u32>(utils::align(sizeof(SceNpMatchingRoomStatus), 8) + (utils::align(sizeof(SceNpMatchingRoomMember), 8) * room.members.size()));
|
||||
const u32 room_size = ::narrow<u32>(rx::alignUp(sizeof(SceNpMatchingRoomStatus), 8) + (rx::alignUp(sizeof(SceNpMatchingRoomMember), 8) * room.members.size()));
|
||||
|
||||
if (!data)
|
||||
return not_an_error(room_size);
|
||||
|
|
@ -94,12 +95,12 @@ namespace np
|
|||
{
|
||||
if (!cur_member_ptr)
|
||||
{
|
||||
room_status->members = vm::cast(data.addr() + utils::align(sizeof(SceNpMatchingRoomStatus), 8));
|
||||
room_status->members = vm::cast(data.addr() + rx::alignUp(sizeof(SceNpMatchingRoomStatus), 8));
|
||||
cur_member_ptr = room_status->members;
|
||||
}
|
||||
else
|
||||
{
|
||||
cur_member_ptr->next = vm::cast(cur_member_ptr.addr() + utils::align(sizeof(SceNpMatchingRoomMember), 8));
|
||||
cur_member_ptr->next = vm::cast(cur_member_ptr.addr() + rx::alignUp(sizeof(SceNpMatchingRoomMember), 8));
|
||||
cur_member_ptr = cur_member_ptr->next;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -7,7 +7,8 @@
|
|||
#include "cellos/sys_memory.h"
|
||||
#include "Emu/RSX/RSXThread.h"
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include "rx/align.hpp"
|
||||
|
||||
#include <thread>
|
||||
|
||||
|
|
@ -26,7 +27,7 @@ namespace rsx
|
|||
}
|
||||
|
||||
// User memory + fifo size
|
||||
buffer_size = utils::align<u32>(buffer_size, 0x100000) + 0x10000000;
|
||||
buffer_size = rx::alignUp<u32>(buffer_size, 0x100000) + 0x10000000;
|
||||
// We are not allowed to drain all memory so add a little
|
||||
g_fxo->init<lv2_memory_container>(buffer_size + 0x1000000);
|
||||
|
||||
|
|
|
|||
|
|
@ -5,7 +5,8 @@
|
|||
#include "../rsx_utils.h"
|
||||
#include "3rdparty/bcdec/bcdec.hpp"
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include "rx/align.hpp"
|
||||
|
||||
namespace utils
|
||||
{
|
||||
|
|
@ -661,13 +662,13 @@ namespace
|
|||
}
|
||||
else if constexpr (block_edge_in_texel == 4)
|
||||
{
|
||||
current_subresource_layout.width_in_block = utils::aligned_div(miplevel_width_in_texel, block_edge_in_texel);
|
||||
current_subresource_layout.height_in_block = utils::aligned_div(miplevel_height_in_texel, block_edge_in_texel);
|
||||
current_subresource_layout.width_in_block = rx::aligned_div(miplevel_width_in_texel, block_edge_in_texel);
|
||||
current_subresource_layout.height_in_block = rx::aligned_div(miplevel_height_in_texel, block_edge_in_texel);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Only the width is compressed
|
||||
current_subresource_layout.width_in_block = utils::aligned_div(miplevel_width_in_texel, block_edge_in_texel);
|
||||
current_subresource_layout.width_in_block = rx::aligned_div(miplevel_width_in_texel, block_edge_in_texel);
|
||||
current_subresource_layout.height_in_block = miplevel_height_in_texel;
|
||||
}
|
||||
|
||||
|
|
@ -699,7 +700,7 @@ namespace
|
|||
|
||||
if (!padded_row) // Only swizzled textures obey this restriction
|
||||
{
|
||||
offset_in_src = utils::align(offset_in_src, 128);
|
||||
offset_in_src = rx::alignUp(offset_in_src, 128);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1429,8 +1430,8 @@ namespace rsx
|
|||
usz result = 0;
|
||||
for (u16 i = 0; i < mipmap; ++i)
|
||||
{
|
||||
usz rowPitch = utils::align(block_size_in_byte * width_in_blocks, row_pitch_alignment);
|
||||
result += utils::align(rowPitch * height_in_blocks * depth, mipmap_alignment);
|
||||
usz rowPitch = rx::alignUp(block_size_in_byte * width_in_blocks, row_pitch_alignment);
|
||||
result += rx::alignUp(rowPitch * height_in_blocks * depth, mipmap_alignment);
|
||||
height_in_blocks = std::max<usz>(height_in_blocks / 2, 1);
|
||||
width_in_blocks = std::max<usz>(width_in_blocks / 2, 1);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
#pragma once
|
||||
|
||||
#include "util/StrFmt.h"
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include "rx/align.hpp"
|
||||
|
||||
/**
|
||||
* Ring buffer memory helper :
|
||||
|
|
@ -20,8 +21,8 @@ protected:
|
|||
template <int Alignment>
|
||||
bool can_alloc(usz size) const
|
||||
{
|
||||
usz alloc_size = utils::align(size, Alignment);
|
||||
usz aligned_put_pos = utils::align(m_put_pos, Alignment);
|
||||
usz alloc_size = rx::alignUp(size, Alignment);
|
||||
usz aligned_put_pos = rx::alignUp(m_put_pos, Alignment);
|
||||
if (aligned_put_pos + alloc_size < m_size)
|
||||
{
|
||||
// range before get
|
||||
|
|
@ -85,8 +86,8 @@ public:
|
|||
template <int Alignment>
|
||||
usz alloc(usz size)
|
||||
{
|
||||
const usz alloc_size = utils::align(size, Alignment);
|
||||
const usz aligned_put_pos = utils::align(m_put_pos, Alignment);
|
||||
const usz alloc_size = rx::alignUp(size, Alignment);
|
||||
const usz aligned_put_pos = rx::alignUp(m_put_pos, Alignment);
|
||||
|
||||
if (!can_alloc<Alignment>(size) && !grow(alloc_size))
|
||||
{
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
#include "stdafx.h"
|
||||
#include "surface_store.h"
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include "rx/align.hpp"
|
||||
|
||||
namespace rsx
|
||||
{
|
||||
|
|
@ -39,20 +40,20 @@ namespace rsx
|
|||
{
|
||||
switch (format)
|
||||
{
|
||||
case surface_color_format::b8: return utils::align(width, 256);
|
||||
case surface_color_format::b8: return rx::alignUp(width, 256);
|
||||
case surface_color_format::g8b8:
|
||||
case surface_color_format::x1r5g5b5_o1r5g5b5:
|
||||
case surface_color_format::x1r5g5b5_z1r5g5b5:
|
||||
case surface_color_format::r5g6b5: return utils::align(width * 2, 256);
|
||||
case surface_color_format::r5g6b5: return rx::alignUp(width * 2, 256);
|
||||
case surface_color_format::a8b8g8r8:
|
||||
case surface_color_format::x8b8g8r8_o8b8g8r8:
|
||||
case surface_color_format::x8b8g8r8_z8b8g8r8:
|
||||
case surface_color_format::x8r8g8b8_o8r8g8b8:
|
||||
case surface_color_format::x8r8g8b8_z8r8g8b8:
|
||||
case surface_color_format::x32:
|
||||
case surface_color_format::a8r8g8b8: return utils::align(width * 4, 256);
|
||||
case surface_color_format::w16z16y16x16: return utils::align(width * 8, 256);
|
||||
case surface_color_format::w32z32y32x32: return utils::align(width * 16, 256);
|
||||
case surface_color_format::a8r8g8b8: return rx::alignUp(width * 4, 256);
|
||||
case surface_color_format::w16z16y16x16: return rx::alignUp(width * 8, 256);
|
||||
case surface_color_format::w32z32y32x32: return rx::alignUp(width * 16, 256);
|
||||
}
|
||||
fmt::throw_exception("Unknown color surface format");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,7 +8,8 @@
|
|||
#include "../rsx_utils.h"
|
||||
#include <list>
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/align.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
|
||||
namespace rsx
|
||||
{
|
||||
|
|
@ -806,7 +807,7 @@ namespace rsx
|
|||
continue;
|
||||
}
|
||||
|
||||
num_rows = utils::aligned_div(this_range.length(), rsx_pitch);
|
||||
num_rows = rx::aligned_div(this_range.length(), rsx_pitch);
|
||||
}
|
||||
|
||||
for (u32 row = 0, offset = (this_range.start - range.start), section_len = (this_range.end - range.start + 1);
|
||||
|
|
@ -1186,7 +1187,7 @@ namespace rsx
|
|||
{
|
||||
// Width is calculated in the coordinate-space of the requester; normalize
|
||||
info.src_area.x = (info.src_area.x * required_bpp) / surface_bpp;
|
||||
info.src_area.width = utils::align(width * required_bpp, surface_bpp) / surface_bpp;
|
||||
info.src_area.width = rx::alignUp(width * required_bpp, surface_bpp) / surface_bpp;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
#pragma once
|
||||
|
||||
#include <util/asm.hpp>
|
||||
#include <rx/asm.hpp>
|
||||
#include <util/sysinfo.hpp>
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
#include "GLCompute.h"
|
||||
#include "GLTexture.h"
|
||||
#include "util/StrUtil.h"
|
||||
#include "rx/align.hpp"
|
||||
|
||||
namespace gl
|
||||
{
|
||||
|
|
@ -196,7 +197,7 @@ namespace gl
|
|||
m_data_length = data_length;
|
||||
|
||||
const auto num_bytes_per_invocation = optimal_group_size * kernel_size * 4;
|
||||
const auto num_bytes_to_process = utils::align(data_length, num_bytes_per_invocation);
|
||||
const auto num_bytes_to_process = rx::alignUp(data_length, num_bytes_per_invocation);
|
||||
const auto num_invocations = num_bytes_to_process / num_bytes_per_invocation;
|
||||
|
||||
if ((num_bytes_to_process + data_offset) > data->size())
|
||||
|
|
@ -364,7 +365,7 @@ namespace gl
|
|||
|
||||
dst->bind_range(gl::buffer::target::ssbo, GL_COMPUTE_BUFFER_SLOT(2), out_offset, row_pitch * 4 * region.height);
|
||||
|
||||
const int num_invocations = utils::aligned_div(region.width * region.height, optimal_kernel_size * optimal_group_size);
|
||||
const int num_invocations = rx::aligned_div(region.width * region.height, optimal_kernel_size * optimal_group_size);
|
||||
compute_task::run(cmd, num_invocations);
|
||||
}
|
||||
|
||||
|
|
@ -411,7 +412,7 @@ namespace gl
|
|||
|
||||
dst->bind_range(gl::buffer::target::ssbo, GL_COMPUTE_BUFFER_SLOT(1), out_offset, row_pitch * 4 * region.height);
|
||||
|
||||
const int num_invocations = utils::aligned_div(region.width * region.height, optimal_kernel_size * optimal_group_size);
|
||||
const int num_invocations = rx::aligned_div(region.width * region.height, optimal_kernel_size * optimal_group_size);
|
||||
compute_task::run(cmd, num_invocations);
|
||||
}
|
||||
|
||||
|
|
@ -437,7 +438,7 @@ namespace gl
|
|||
void cs_ssbo_to_color_image::run(gl::command_context& cmd, const buffer* src, const texture_view* dst, const u32 src_offset, const coordu& dst_region, const pixel_buffer_layout& layout)
|
||||
{
|
||||
const u32 bpp = dst->image()->pitch() / dst->image()->width();
|
||||
const u32 row_length = utils::align(dst_region.width * bpp, std::max<int>(layout.alignment, 1)) / bpp;
|
||||
const u32 row_length = rx::alignUp(dst_region.width * bpp, std::max<int>(layout.alignment, 1)) / bpp;
|
||||
|
||||
m_program.uniforms["swap_bytes"] = layout.swap_bytes;
|
||||
m_program.uniforms["src_pitch"] = row_length;
|
||||
|
|
@ -448,7 +449,7 @@ namespace gl
|
|||
src->bind_range(gl::buffer::target::ssbo, GL_COMPUTE_BUFFER_SLOT(0), src_offset, row_length * bpp * dst_region.height);
|
||||
glBindImageTexture(GL_COMPUTE_IMAGE_SLOT(0), dst->id(), 0, GL_FALSE, 0, GL_WRITE_ONLY, dst->view_format());
|
||||
|
||||
const int num_invocations = utils::aligned_div(dst_region.width * dst_region.height, optimal_kernel_size * optimal_group_size);
|
||||
const int num_invocations = rx::aligned_div(dst_region.width * dst_region.height, optimal_kernel_size * optimal_group_size);
|
||||
compute_task::run(cmd, num_invocations);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -337,7 +337,7 @@ namespace gl
|
|||
set_parameters(cmd);
|
||||
|
||||
const u32 num_bytes_per_invocation = (sizeof(_BlockType) * optimal_group_size);
|
||||
const u32 linear_invocations = utils::aligned_div(data_length, num_bytes_per_invocation);
|
||||
const u32 linear_invocations = rx::aligned_div(data_length, num_bytes_per_invocation);
|
||||
compute_task::run(cmd, linear_invocations);
|
||||
}
|
||||
};
|
||||
|
|
|
|||
|
|
@ -12,6 +12,8 @@
|
|||
#include "Emu/RSX/Host/RSXDMAWriter.h"
|
||||
#include "Emu/RSX/NV47/HW/context_accessors.define.h"
|
||||
|
||||
#include "rx/align.hpp"
|
||||
|
||||
[[noreturn]] extern void report_fatal_error(std::string_view _text, bool is_html = false, bool include_help_text = true);
|
||||
|
||||
namespace
|
||||
|
|
@ -895,7 +897,7 @@ void GLGSRender::load_program_env()
|
|||
if (update_fragment_texture_env)
|
||||
m_texture_parameters_buffer->reserve_storage_on_heap(256);
|
||||
if (update_fragment_constants)
|
||||
m_fragment_constants_buffer->reserve_storage_on_heap(utils::align(fragment_constants_size, 256));
|
||||
m_fragment_constants_buffer->reserve_storage_on_heap(rx::alignUp(fragment_constants_size, 256));
|
||||
if (update_transform_constants)
|
||||
m_transform_constants_buffer->reserve_storage_on_heap(8192);
|
||||
if (update_raster_env)
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@
|
|||
#include "util/geometry.h"
|
||||
#include "util/File.h"
|
||||
#include "util/logs.hpp"
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
|
||||
#include "glutils/common.h"
|
||||
// TODO: Include on use
|
||||
|
|
|
|||
|
|
@ -4,6 +4,8 @@
|
|||
#include "../Program/RSXOverlay.h"
|
||||
#include "Emu/Cell/timers.hpp"
|
||||
|
||||
#include "rx/align.hpp"
|
||||
|
||||
namespace gl
|
||||
{
|
||||
// Lame
|
||||
|
|
@ -544,7 +546,7 @@ namespace gl
|
|||
const pixel_buffer_layout& layout)
|
||||
{
|
||||
const u32 bpp = dst->image()->pitch() / dst->image()->width();
|
||||
const u32 row_length = utils::align(dst_region.width * bpp, std::max<int>(layout.alignment, 1)) / bpp;
|
||||
const u32 row_length = rx::alignUp(dst_region.width * bpp, std::max<int>(layout.alignment, 1)) / bpp;
|
||||
|
||||
program_handle.uniforms["src_pitch"] = row_length;
|
||||
program_handle.uniforms["swap_bytes"] = layout.swap_bytes;
|
||||
|
|
|
|||
|
|
@ -2,6 +2,8 @@
|
|||
#include "GLResolveHelper.h"
|
||||
#include "GLTexture.h"
|
||||
|
||||
#include "rx/align.hpp"
|
||||
|
||||
#include <unordered_map>
|
||||
#include <stack>
|
||||
|
||||
|
|
@ -225,8 +227,8 @@ namespace gl
|
|||
multisampled = msaa_image;
|
||||
resolve = resolve_image;
|
||||
|
||||
const u32 invocations_x = utils::align(resolve_image->width(), cs_wave_x) / cs_wave_x;
|
||||
const u32 invocations_y = utils::align(resolve_image->height(), cs_wave_y) / cs_wave_y;
|
||||
const u32 invocations_x = rx::alignUp(resolve_image->width(), cs_wave_x) / cs_wave_x;
|
||||
const u32 invocations_y = rx::alignUp(resolve_image->height(), cs_wave_y) / cs_wave_y;
|
||||
|
||||
compute_task::run(cmd, invocations_x, invocations_y);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,7 +9,8 @@
|
|||
|
||||
#include "../RSXThread.h"
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/align.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
|
||||
namespace gl
|
||||
{
|
||||
|
|
@ -664,7 +665,7 @@ namespace gl
|
|||
u8 block_size_in_bytes = rsx::get_format_block_size_in_bytes(format);
|
||||
u64 image_linear_size = staging_buffer.size();
|
||||
|
||||
const auto min_required_buffer_size = std::max<u64>(utils::align(image_linear_size * 4, 0x100000), 16 * 0x100000);
|
||||
const auto min_required_buffer_size = std::max<u64>(rx::alignUp(image_linear_size * 4, 0x100000), 16 * 0x100000);
|
||||
|
||||
if (driver_caps.ARB_compute_shader_supported)
|
||||
{
|
||||
|
|
@ -825,7 +826,7 @@ namespace gl
|
|||
}
|
||||
else
|
||||
{
|
||||
const auto aligned_pitch = utils::align<u32>(dst->pitch(), 4);
|
||||
const auto aligned_pitch = rx::alignUp<u32>(dst->pitch(), 4);
|
||||
const u32 texture_data_sz = dst->depth() * dst->height() * aligned_pitch;
|
||||
data_upload_buf.resize(texture_data_sz);
|
||||
}
|
||||
|
|
@ -1002,7 +1003,7 @@ namespace gl
|
|||
|
||||
u32 scratch_offset = 0;
|
||||
const u64 min_storage_requirement = src_mem.image_size_in_bytes + dst_mem.image_size_in_bytes;
|
||||
const u64 min_required_buffer_size = utils::align(min_storage_requirement, 256);
|
||||
const u64 min_required_buffer_size = rx::alignUp(min_storage_requirement, 256);
|
||||
|
||||
if (g_typeless_transfer_buffer.size() >= min_required_buffer_size) [[likely]]
|
||||
{
|
||||
|
|
|
|||
|
|
@ -3,7 +3,8 @@
|
|||
#include "GLTextureCache.h"
|
||||
#include "../Common/BufferUtils.h"
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/align.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
|
||||
namespace gl
|
||||
{
|
||||
|
|
@ -82,7 +83,7 @@ namespace gl
|
|||
}
|
||||
else
|
||||
{
|
||||
const u32 num_rows = utils::align(valid_length, rsx_pitch) / rsx_pitch;
|
||||
const u32 num_rows = rx::alignUp(valid_length, rsx_pitch) / rsx_pitch;
|
||||
u32* data = static_cast<u32*>(dst);
|
||||
for (u32 row = 0; row < num_rows; ++row)
|
||||
{
|
||||
|
|
@ -212,7 +213,7 @@ namespace gl
|
|||
// Dimensions were given in 'dst' space. Work out the real source coordinates
|
||||
const auto src_bpp = slice.src->pitch() / slice.src->width();
|
||||
src_x = (src_x * dst_bpp) / src_bpp;
|
||||
src_w = utils::aligned_div<u16>(src_w * dst_bpp, src_bpp);
|
||||
src_w = rx::aligned_div<u16>(src_w * dst_bpp, src_bpp);
|
||||
}
|
||||
|
||||
if (auto surface = dynamic_cast<gl::render_target*>(slice.src))
|
||||
|
|
|
|||
|
|
@ -7,6 +7,8 @@
|
|||
|
||||
#include "../Common/texture_cache.h"
|
||||
|
||||
#include "rx/align.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
|
|
@ -49,7 +51,7 @@ namespace gl
|
|||
void init_buffer(const gl::texture* src)
|
||||
{
|
||||
const u32 vram_size = src->pitch() * src->height();
|
||||
const u32 buffer_size = utils::align(vram_size, 4096);
|
||||
const u32 buffer_size = rx::alignUp(vram_size, 4096);
|
||||
|
||||
if (pbo)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
#include "../OpenGL.h"
|
||||
#include <util/types.hpp>
|
||||
#include <util/asm.hpp>
|
||||
#include <rx/asm.hpp>
|
||||
#include <util/logs.hpp>
|
||||
|
||||
namespace gl
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
#include "buffer_object.h"
|
||||
#include "state_tracker.hpp"
|
||||
#include "pixel_settings.hpp"
|
||||
#include "rx/align.hpp"
|
||||
|
||||
namespace gl
|
||||
{
|
||||
|
|
@ -119,14 +120,14 @@ namespace gl
|
|||
case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
|
||||
{
|
||||
m_compressed = true;
|
||||
m_pitch = utils::align(width, 4) / 2;
|
||||
m_pitch = rx::alignUp(width, 4) / 2;
|
||||
break;
|
||||
}
|
||||
case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
|
||||
case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
|
||||
{
|
||||
m_compressed = true;
|
||||
m_pitch = utils::align(width, 4);
|
||||
m_pitch = rx::alignUp(width, 4);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
#include "stdafx.h"
|
||||
#include "ring_buffer.h"
|
||||
|
||||
#include "rx/align.hpp"
|
||||
|
||||
namespace gl
|
||||
{
|
||||
void ring_buffer::recreate(GLsizeiptr size, const void* data)
|
||||
|
|
@ -37,7 +39,7 @@ namespace gl
|
|||
{
|
||||
u32 offset = m_data_loc;
|
||||
if (m_data_loc)
|
||||
offset = utils::align(offset, alignment);
|
||||
offset = rx::alignUp(offset, alignment);
|
||||
|
||||
if ((offset + alloc_size) > m_size)
|
||||
{
|
||||
|
|
@ -56,7 +58,7 @@ namespace gl
|
|||
}
|
||||
|
||||
// Align data loc to 256; allows some "guard" region so we dont trample our own data inadvertently
|
||||
m_data_loc = utils::align(offset + alloc_size, 256);
|
||||
m_data_loc = rx::alignUp(offset + alloc_size, 256);
|
||||
return std::make_pair(static_cast<char*>(m_memory_mapping) + offset, offset);
|
||||
}
|
||||
|
||||
|
|
@ -108,9 +110,9 @@ namespace gl
|
|||
|
||||
u32 offset = m_data_loc;
|
||||
if (m_data_loc)
|
||||
offset = utils::align(offset, 256);
|
||||
offset = rx::alignUp(offset, 256);
|
||||
|
||||
const u32 block_size = utils::align(alloc_size + 16, 256); // Overallocate just in case we need to realign base
|
||||
const u32 block_size = rx::alignUp(alloc_size + 16, 256); // Overallocate just in case we need to realign base
|
||||
|
||||
if ((offset + block_size) > m_size)
|
||||
{
|
||||
|
|
@ -144,10 +146,10 @@ namespace gl
|
|||
{
|
||||
u32 offset = m_data_loc;
|
||||
if (m_data_loc)
|
||||
offset = utils::align(offset, alignment);
|
||||
offset = rx::alignUp(offset, alignment);
|
||||
|
||||
u32 padding = (offset - m_data_loc);
|
||||
u32 real_size = utils::align(padding + alloc_size, alignment); // Ensures we leave the loc pointer aligned after we exit
|
||||
u32 real_size = rx::alignUp(padding + alloc_size, alignment); // Ensures we leave the loc pointer aligned after we exit
|
||||
|
||||
if (real_size > m_mapped_bytes)
|
||||
{
|
||||
|
|
@ -158,10 +160,10 @@ namespace gl
|
|||
|
||||
offset = m_data_loc;
|
||||
if (m_data_loc)
|
||||
offset = utils::align(offset, alignment);
|
||||
offset = rx::alignUp(offset, alignment);
|
||||
|
||||
padding = (offset - m_data_loc);
|
||||
real_size = utils::align(padding + alloc_size, alignment);
|
||||
real_size = rx::alignUp(padding + alloc_size, alignment);
|
||||
}
|
||||
|
||||
m_data_loc = offset + real_size;
|
||||
|
|
@ -270,7 +272,7 @@ namespace gl
|
|||
|
||||
u32 scratch_ring_buffer::alloc(u32 size, u32 alignment)
|
||||
{
|
||||
u64 start = utils::align(m_alloc_pointer, alignment);
|
||||
u64 start = rx::alignUp(m_alloc_pointer, alignment);
|
||||
m_alloc_pointer = (start + size);
|
||||
|
||||
if (static_cast<GLsizeiptr>(m_alloc_pointer) > m_storage.size())
|
||||
|
|
|
|||
|
|
@ -58,7 +58,7 @@ namespace gl
|
|||
m_src = fmt::replace_all(m_src, replacement_table);
|
||||
|
||||
// Fill with 0 to avoid sending incomplete/unused variables to the GPU
|
||||
m_constants_buf.resize(utils::rounded_div(push_constants_size, 4), 0);
|
||||
m_constants_buf.resize(rx::rounded_div(push_constants_size, 4), 0);
|
||||
|
||||
create();
|
||||
|
||||
|
|
@ -106,8 +106,8 @@ namespace gl
|
|||
glBindImageTexture(GL_COMPUTE_IMAGE_SLOT(0), dst->id(), 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8);
|
||||
|
||||
constexpr auto wg_size = 16;
|
||||
const auto invocations_x = utils::aligned_div(output_size.width, wg_size);
|
||||
const auto invocations_y = utils::aligned_div(output_size.height, wg_size);
|
||||
const auto invocations_x = rx::aligned_div(output_size.width, wg_size);
|
||||
const auto invocations_y = rx::aligned_div(output_size.height, wg_size);
|
||||
|
||||
ensure(invocations_x == (output_size.width + (wg_size - 1)) / wg_size);
|
||||
ensure(invocations_y == (output_size.height + (wg_size - 1)) / wg_size);
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
#include "RSXDMAWriter.h"
|
||||
|
||||
#include "util//Thread.h"
|
||||
#include <util/asm.hpp>
|
||||
#include <rx/asm.hpp>
|
||||
|
||||
namespace rsx
|
||||
{
|
||||
|
|
@ -56,7 +56,7 @@ namespace rsx
|
|||
// FIXME: This is a busy wait, consider yield to improve responsiveness on weak devices.
|
||||
while (!m_host_context_ptr->in_flight_commands_completed())
|
||||
{
|
||||
utils::pause();
|
||||
rx::pause();
|
||||
|
||||
if (thread_ctrl::state() == thread_state::aborting)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
#include "Emu/System.h"
|
||||
#include "rpcsx/fw/ps3/cellMsgDialog.h"
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
|
||||
namespace rsx
|
||||
{
|
||||
|
|
@ -36,7 +36,7 @@ namespace rsx
|
|||
|
||||
while (ref_cnt.load() && !Emu.IsStopped())
|
||||
{
|
||||
utils::pause();
|
||||
rx::pause();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -112,7 +112,7 @@ namespace rsx
|
|||
{
|
||||
while (ref_cnt.load() && !Emu.IsStopped())
|
||||
{
|
||||
utils::pause();
|
||||
rx::pause();
|
||||
}
|
||||
}
|
||||
} // namespace rsx
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
#include "stdafx.h"
|
||||
#include "overlay_manager.h"
|
||||
#include "Emu/System.h"
|
||||
#include <util/asm.hpp>
|
||||
#include <rx/asm.hpp>
|
||||
|
||||
namespace rsx
|
||||
{
|
||||
|
|
@ -37,7 +37,7 @@ namespace rsx
|
|||
*m_input_thread = thread_state::aborting;
|
||||
while (*m_input_thread <= thread_state::aborting)
|
||||
{
|
||||
utils::pause();
|
||||
rx::pause();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,7 +9,8 @@
|
|||
#include "cellos/sys_rsx.h"
|
||||
#include "NV47/HW/context.h"
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/align.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
|
||||
#include <thread>
|
||||
#include <bitset>
|
||||
|
|
@ -139,7 +140,7 @@ namespace rsx
|
|||
u32 bytes_read = 0;
|
||||
|
||||
// Find the next set bit after every iteration
|
||||
for (int i = 0;; i = (std::countr_zero<u32>(utils::rol8(to_fetch, 0 - i - 1)) + i + 1) % 8)
|
||||
for (int i = 0;; i = (std::countr_zero<u32>(rx::rol8(to_fetch, 0 - i - 1)) + i + 1) % 8)
|
||||
{
|
||||
// If a reservation is being updated, try to load another
|
||||
const auto& res = vm::reservation_acquire(addr1 + i * 128);
|
||||
|
|
@ -193,7 +194,7 @@ namespace rsx
|
|||
}
|
||||
else
|
||||
{
|
||||
busy_wait(200);
|
||||
rx::busy_wait(200);
|
||||
}
|
||||
|
||||
if (strict_fetch_ordering)
|
||||
|
|
@ -247,7 +248,7 @@ namespace rsx
|
|||
|
||||
for (u32 remaining = size, addr = m_internal_get, ptr = from; remaining > 0;)
|
||||
{
|
||||
const u32 next_block = utils::align(addr + 1, _1M);
|
||||
const u32 next_block = rx::alignUp(addr + 1, _1M);
|
||||
const u32 available = (next_block - addr);
|
||||
if (remaining <= available)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@
|
|||
#include "util/lockless.h"
|
||||
|
||||
#include <thread>
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
|
||||
namespace rsx
|
||||
{
|
||||
|
|
@ -181,13 +181,13 @@ namespace rsx
|
|||
while (_thr.m_enqueued_count.load() > _thr.m_processed_count.load())
|
||||
{
|
||||
rsxthr->on_semaphore_acquire_wait();
|
||||
utils::pause();
|
||||
rx::pause();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while (_thr.m_enqueued_count.load() > _thr.m_processed_count.load())
|
||||
utils::pause();
|
||||
rx::pause();
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -27,7 +27,8 @@
|
|||
|
||||
#include "util/date_time.h"
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/align.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
|
||||
#include <span>
|
||||
#include <thread>
|
||||
|
|
@ -332,11 +333,11 @@ namespace rsx
|
|||
{
|
||||
// Division operator
|
||||
_min_index = std::min(_min_index, first / attrib.frequency);
|
||||
_max_index = std::max<u32>(_max_index, utils::aligned_div(max_index, attrib.frequency));
|
||||
_max_index = std::max<u32>(_max_index, rx::aligned_div(max_index, attrib.frequency));
|
||||
|
||||
if (freq_count > 0 && freq_count != umax)
|
||||
{
|
||||
const u32 max = utils::aligned_div(max_index, attrib.frequency);
|
||||
const u32 max = rx::aligned_div(max_index, attrib.frequency);
|
||||
max_result_by_division = std::max<u32>(max_result_by_division, max);
|
||||
|
||||
// Discard lower frequencies because it has been proven that there are indices higher than them
|
||||
|
|
@ -365,7 +366,7 @@ namespace rsx
|
|||
// The alternative would be re-iterating again over all of them
|
||||
if (get_location(real_offset_address) == CELL_GCM_LOCATION_LOCAL)
|
||||
{
|
||||
if (utils::add_saturate<u32>(real_offset_address - rsx::constants::local_mem_base, (_max_index + 1) * attribute_stride) <= render->local_mem_size)
|
||||
if (rx::add_saturate<u32>(real_offset_address - rsx::constants::local_mem_base, (_max_index + 1) * attribute_stride) <= render->local_mem_size)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
|
@ -734,7 +735,7 @@ namespace rsx
|
|||
{
|
||||
// Be compatible with previous bitwise serialization
|
||||
ar(std::span<u8>(reinterpret_cast<u8*>(this), OFFSET_OF(avconf, scan_mode)));
|
||||
ar.pos += utils::align<usz>(OFFSET_OF(avconf, scan_mode), alignof(avconf)) - OFFSET_OF(avconf, scan_mode);
|
||||
ar.pos += rx::alignUp<usz>(OFFSET_OF(avconf, scan_mode), alignof(avconf)) - OFFSET_OF(avconf, scan_mode);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -1169,7 +1170,7 @@ namespace rsx
|
|||
|
||||
for (; t == now; now = get_time_ns())
|
||||
{
|
||||
utils::pause();
|
||||
rx::pause();
|
||||
}
|
||||
|
||||
timestamp_ctrl = now;
|
||||
|
|
@ -2590,7 +2591,7 @@ namespace rsx
|
|||
{
|
||||
if (u32 advance = disasm.disasm(pcs_of_valid_cmds.back()))
|
||||
{
|
||||
pcs_of_valid_cmds.push_back(utils::add_saturate<u32>(pcs_of_valid_cmds.back(), advance));
|
||||
pcs_of_valid_cmds.push_back(rx::add_saturate<u32>(pcs_of_valid_cmds.back(), advance));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
@ -2722,7 +2723,7 @@ namespace rsx
|
|||
}
|
||||
|
||||
// Some cases do not need full delay
|
||||
remaining = utils::aligned_div(remaining, div);
|
||||
remaining = rx::aligned_div(remaining, div);
|
||||
const u64 until = get_system_time() + remaining;
|
||||
|
||||
while (true)
|
||||
|
|
@ -2751,7 +2752,7 @@ namespace rsx
|
|||
}
|
||||
else
|
||||
{
|
||||
busy_wait(100);
|
||||
rx::busy_wait(100);
|
||||
}
|
||||
|
||||
const u64 current = get_system_time();
|
||||
|
|
@ -2862,7 +2863,7 @@ namespace rsx
|
|||
|
||||
for (u32 ea = address >> 20, end = ea + (size >> 20); ea < end; ea++)
|
||||
{
|
||||
const u32 io = utils::rol32(iomap_table.io[ea], 32 - 20);
|
||||
const u32 io = rx::rol32(iomap_table.io[ea], 32 - 20);
|
||||
|
||||
if (io + 1)
|
||||
{
|
||||
|
|
@ -2892,7 +2893,7 @@ namespace rsx
|
|||
|
||||
while (to_unmap)
|
||||
{
|
||||
bit = (std::countr_zero<u64>(utils::rol64(to_unmap, 0 - bit)) + bit);
|
||||
bit = (std::countr_zero<u64>(rx::rol64(to_unmap, 0 - bit)) + bit);
|
||||
to_unmap &= ~(1ull << bit);
|
||||
|
||||
constexpr u16 null_entry = 0xFFFF;
|
||||
|
|
@ -2998,7 +2999,7 @@ namespace rsx
|
|||
|
||||
while (!external_interrupt_ack && !is_stopped())
|
||||
{
|
||||
utils::pause();
|
||||
rx::pause();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -3022,7 +3023,7 @@ namespace rsx
|
|||
while (external_interrupt_lock && (cpu_flag::ret - state))
|
||||
{
|
||||
// TODO: Investigate non busy-spinning method
|
||||
utils::pause();
|
||||
rx::pause();
|
||||
}
|
||||
|
||||
external_interrupt_ack.store(false);
|
||||
|
|
@ -3364,7 +3365,7 @@ namespace rsx
|
|||
}
|
||||
|
||||
const u64 current_time = get_system_time();
|
||||
const u64 current_tsc = utils::get_tsc();
|
||||
const u64 current_tsc = rx::get_tsc();
|
||||
u64 preempt_count = 0;
|
||||
|
||||
if (frame_times.size() >= 60)
|
||||
|
|
|
|||
|
|
@ -4,6 +4,8 @@
|
|||
#include "vkutils/buffer_object.h"
|
||||
#include "VKPipelineCompiler.h"
|
||||
|
||||
#include "rx/align.hpp"
|
||||
|
||||
#define VK_MAX_COMPUTE_TASKS 8192 // Max number of jobs per frame
|
||||
|
||||
namespace vk
|
||||
|
|
@ -219,7 +221,7 @@ namespace vk
|
|||
#include "../Program/GLSLSnippets/ShuffleBytes.glsl"
|
||||
;
|
||||
|
||||
const auto parameters_size = utils::align(push_constants_size, 16) / 16;
|
||||
const auto parameters_size = rx::alignUp(push_constants_size, 16) / 16;
|
||||
const std::pair<std::string_view, std::string> syntax_replace[] =
|
||||
{
|
||||
{"%loc", "0"},
|
||||
|
|
@ -387,7 +389,7 @@ namespace vk
|
|||
word_count = num_words;
|
||||
block_length = num_words * 4;
|
||||
|
||||
const u32 linear_invocations = utils::aligned_div(word_count, optimal_group_size);
|
||||
const u32 linear_invocations = rx::aligned_div(word_count, optimal_group_size);
|
||||
compute_task::run(cmd, linear_invocations);
|
||||
}
|
||||
} // namespace vk
|
||||
|
|
|
|||
|
|
@ -6,7 +6,8 @@
|
|||
#include "Emu/IdManager.h"
|
||||
|
||||
#include "util/StrUtil.h"
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include "rx/align.hpp"
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
|
|
@ -484,7 +485,7 @@ namespace vk
|
|||
set_parameters(cmd);
|
||||
|
||||
const u32 num_bytes_per_invocation = (sizeof(_BlockType) * optimal_group_size);
|
||||
const u32 linear_invocations = utils::aligned_div(data_length, num_bytes_per_invocation);
|
||||
const u32 linear_invocations = rx::aligned_div(data_length, num_bytes_per_invocation);
|
||||
compute_task::run(cmd, linear_invocations);
|
||||
}
|
||||
};
|
||||
|
|
@ -602,8 +603,8 @@ namespace vk
|
|||
this->out_offset = config.dst_offset;
|
||||
|
||||
const auto tile_aligned_height = std::min(
|
||||
utils::align<u32>(config.image_height, 64),
|
||||
utils::aligned_div(config.tile_size - config.tile_base_offset, config.tile_pitch));
|
||||
rx::alignUp<u32>(config.image_height, 64),
|
||||
rx::aligned_div(config.tile_size - config.tile_base_offset, config.tile_pitch));
|
||||
|
||||
if constexpr (Op == RSX_detiler_op::decode)
|
||||
{
|
||||
|
|
@ -656,7 +657,7 @@ namespace vk
|
|||
|
||||
const u32 subtexels_per_invocation = (config.image_bpp < 4) ? (4 / config.image_bpp) : 1;
|
||||
const u32 virtual_width = config.image_width / subtexels_per_invocation;
|
||||
const u32 invocations_x = utils::aligned_div(virtual_width, optimal_group_size);
|
||||
const u32 invocations_x = rx::aligned_div(virtual_width, optimal_group_size);
|
||||
compute_task::run(cmd, invocations_x, config.image_height, 1);
|
||||
}
|
||||
};
|
||||
|
|
|
|||
|
|
@ -7,7 +7,9 @@
|
|||
#include "Emu/RSX/RSXThread.h"
|
||||
#include "util/mutex.h"
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include "rx/align.hpp"
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
namespace vk
|
||||
|
|
@ -413,7 +415,7 @@ namespace vk
|
|||
std::lock_guard lock(g_dma_mutex);
|
||||
|
||||
const u32 start = (local_address & s_dma_block_mask);
|
||||
const u32 end = utils::align(local_address + length, static_cast<u32>(s_dma_block_length));
|
||||
const u32 end = rx::alignUp(local_address + length, static_cast<u32>(s_dma_block_length));
|
||||
|
||||
for (u32 block = start; block < end;)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -22,7 +22,8 @@
|
|||
|
||||
#include "../Program/SPIRVCommon.h"
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include "rx/align.hpp"
|
||||
|
||||
namespace vk
|
||||
{
|
||||
|
|
@ -919,7 +920,7 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
|
|||
// Wait for deadlock to clear
|
||||
while (m_queue_status & flush_queue_state::deadlock)
|
||||
{
|
||||
utils::pause();
|
||||
rx::pause();
|
||||
}
|
||||
|
||||
g_fxo->get<rsx::dma_manager>().clear_mem_fault_flag();
|
||||
|
|
@ -2081,13 +2082,13 @@ void VKGSRender::load_program_env()
|
|||
|
||||
rsx::io_buffer indirection_table_buf([&](usz size) -> std::pair<void*, usz>
|
||||
{
|
||||
indirection_table_offset = m_instancing_buffer_ring_info.alloc<1>(utils::align(size, alignment));
|
||||
indirection_table_offset = m_instancing_buffer_ring_info.alloc<1>(rx::alignUp(size, alignment));
|
||||
return std::make_pair(m_instancing_buffer_ring_info.map(indirection_table_offset, size), size);
|
||||
});
|
||||
|
||||
rsx::io_buffer constants_array_buf([&](usz size) -> std::pair<void*, usz>
|
||||
{
|
||||
constants_data_table_offset = m_instancing_buffer_ring_info.alloc<1>(utils::align(size, alignment));
|
||||
constants_data_table_offset = m_instancing_buffer_ring_info.alloc<1>(rx::alignUp(size, alignment));
|
||||
return std::make_pair(m_instancing_buffer_ring_info.map(constants_data_table_offset, size), size);
|
||||
});
|
||||
|
||||
|
|
@ -2105,7 +2106,7 @@ void VKGSRender::load_program_env()
|
|||
auto alloc_storage = [&](usz size) -> std::pair<void*, usz>
|
||||
{
|
||||
const auto alignment = m_device->gpu().get_limits().minUniformBufferOffsetAlignment;
|
||||
mem_offset = m_transform_constants_ring_info.alloc<1>(utils::align(size, alignment));
|
||||
mem_offset = m_transform_constants_ring_info.alloc<1>(rx::alignUp(size, alignment));
|
||||
return std::make_pair(m_transform_constants_ring_info.map(mem_offset, size), size);
|
||||
};
|
||||
|
||||
|
|
@ -2921,7 +2922,7 @@ void VKGSRender::get_occlusion_query_result(rsx::reports::occlusion_query_info*
|
|||
}
|
||||
|
||||
rsx_log.warning("[Performance warning] Unexpected ZCULL read caused a hard sync");
|
||||
busy_wait();
|
||||
rx::busy_wait();
|
||||
}
|
||||
|
||||
data.sync();
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@
|
|||
#include "Emu/RSX/rsx_utils.h"
|
||||
#include "Emu/RSX/rsx_cache.h"
|
||||
#include "util/mutex.h"
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
|
||||
#include <optional>
|
||||
#include <thread>
|
||||
|
|
@ -289,7 +289,7 @@ namespace vk
|
|||
{
|
||||
while (num_waiters.load() != 0)
|
||||
{
|
||||
utils::pause();
|
||||
rx::pause();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -8,7 +8,8 @@
|
|||
#include "upscalers/bilinear_pass.hpp"
|
||||
#include "upscalers/fsr_pass.h"
|
||||
#include "upscalers/nearest_pass.hpp"
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include "rx/align.hpp"
|
||||
#include "util/video_provider.h"
|
||||
|
||||
extern atomic_t<bool> g_user_asked_for_screenshot;
|
||||
|
|
@ -762,7 +763,7 @@ void VKGSRender::flip(const rsx::display_flip_info_t& info)
|
|||
{
|
||||
const usz sshot_size = buffer_height * buffer_width * 4;
|
||||
|
||||
vk::buffer sshot_vkbuf(*m_device, utils::align(sshot_size, 0x100000), m_device->get_memory_mapping().host_visible_coherent,
|
||||
vk::buffer sshot_vkbuf(*m_device, rx::alignUp(sshot_size, 0x100000), m_device->get_memory_mapping().host_visible_coherent,
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0, VMM_ALLOCATION_POOL_UNDEFINED);
|
||||
|
||||
VkBufferImageCopy copy_info;
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
#include "VKQueryPool.h"
|
||||
#include "VKRenderPass.h"
|
||||
#include "VKResourceManager.h"
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include "VKGSRender.h"
|
||||
|
||||
namespace vk
|
||||
|
|
@ -172,7 +172,7 @@ namespace vk
|
|||
|
||||
while (!query_info.ready)
|
||||
{
|
||||
utils::pause();
|
||||
rx::pause();
|
||||
poke_query(query_info, index, result_flags);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
#include "VKOverlays.h"
|
||||
|
||||
#include "vkutils/image.h"
|
||||
#include "rx/align.hpp"
|
||||
|
||||
namespace vk
|
||||
{
|
||||
|
|
@ -65,8 +66,8 @@ namespace vk
|
|||
multisampled = msaa_image;
|
||||
resolve = resolve_image;
|
||||
|
||||
const u32 invocations_x = utils::align(resolve_image->width(), cs_wave_x) / cs_wave_x;
|
||||
const u32 invocations_y = utils::align(resolve_image->height(), cs_wave_y) / cs_wave_y;
|
||||
const u32 invocations_x = rx::alignUp(resolve_image->width(), cs_wave_x) / cs_wave_x;
|
||||
const u32 invocations_y = rx::alignUp(resolve_image->height(), cs_wave_y) / cs_wave_y;
|
||||
|
||||
compute_task::run(cmd, invocations_x, invocations_y, 1);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,7 +13,8 @@
|
|||
#include "../GCM.h"
|
||||
#include "../rsx_utils.h"
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/align.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
|
||||
namespace vk
|
||||
{
|
||||
|
|
@ -94,7 +95,7 @@ namespace vk
|
|||
ensure(dst->size() >= allocation_end);
|
||||
|
||||
const auto data_offset = u32(region.bufferOffset);
|
||||
const auto z32_offset = utils::align<u32>(data_offset + packed16_length, 256);
|
||||
const auto z32_offset = rx::alignUp<u32>(data_offset + packed16_length, 256);
|
||||
|
||||
// 1. Copy the depth to buffer
|
||||
VkBufferImageCopy region2;
|
||||
|
|
@ -148,8 +149,8 @@ namespace vk
|
|||
ensure(dst->size() >= allocation_end);
|
||||
|
||||
const auto data_offset = u32(region.bufferOffset);
|
||||
const auto z_offset = utils::align<u32>(data_offset + packed_length, 256);
|
||||
const auto s_offset = utils::align<u32>(z_offset + in_depth_size, 256);
|
||||
const auto z_offset = rx::alignUp<u32>(data_offset + packed_length, 256);
|
||||
const auto s_offset = rx::alignUp<u32>(z_offset + in_depth_size, 256);
|
||||
|
||||
// 1. Copy the depth and stencil blocks to separate banks
|
||||
VkBufferImageCopy sub_regions[2];
|
||||
|
|
@ -246,7 +247,7 @@ namespace vk
|
|||
ensure(src->size() >= allocation_end);
|
||||
|
||||
const auto data_offset = u32(region.bufferOffset);
|
||||
const auto z32_offset = utils::align<u32>(data_offset + packed16_length, 256);
|
||||
const auto z32_offset = rx::alignUp<u32>(data_offset + packed16_length, 256);
|
||||
|
||||
// 1. Pre-compute barrier
|
||||
vk::insert_buffer_memory_barrier(cmd, src->value, z32_offset, packed32_length,
|
||||
|
|
@ -281,11 +282,11 @@ namespace vk
|
|||
ensure(src->size() >= allocation_end); // "Out of memory (compute heap). Lower your resolution scale setting."
|
||||
|
||||
const auto data_offset = u32(region.bufferOffset);
|
||||
const auto z_offset = utils::align<u32>(data_offset + packed_length, 256);
|
||||
const auto s_offset = utils::align<u32>(z_offset + in_depth_size, 256);
|
||||
const auto z_offset = rx::alignUp<u32>(data_offset + packed_length, 256);
|
||||
const auto s_offset = rx::alignUp<u32>(z_offset + in_depth_size, 256);
|
||||
|
||||
// Zero out the stencil block
|
||||
VK_GET_SYMBOL(vkCmdFillBuffer)(cmd, src->value, s_offset, utils::align(in_stencil_size, 4), 0);
|
||||
VK_GET_SYMBOL(vkCmdFillBuffer)(cmd, src->value, s_offset, rx::alignUp(in_stencil_size, 4), 0);
|
||||
|
||||
vk::insert_buffer_memory_barrier(cmd, src->value, s_offset, in_stencil_size,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
|
|
@ -848,7 +849,7 @@ namespace vk
|
|||
const auto src_offset = section.bufferOffset;
|
||||
|
||||
// Align output to 128-byte boundary to keep some drivers happy
|
||||
dst_offset = utils::align(dst_offset, 128);
|
||||
dst_offset = rx::alignUp(dst_offset, 128);
|
||||
|
||||
u32 data_length = 0;
|
||||
for (unsigned i = 0, j = packet.first; i < packet.second; ++i, ++j)
|
||||
|
|
@ -1124,7 +1125,7 @@ namespace vk
|
|||
if (layout.level == 0)
|
||||
{
|
||||
// Align mip0 on a 128-byte boundary
|
||||
scratch_offset = utils::align(scratch_offset, 128);
|
||||
scratch_offset = rx::alignUp(scratch_offset, 128);
|
||||
}
|
||||
|
||||
// Copy from upload heap to scratch mem
|
||||
|
|
@ -1254,7 +1255,7 @@ namespace vk
|
|||
{
|
||||
// Calculate the true length of the usable memory section
|
||||
const auto available_tile_size = tiled_region.tile->size - (range.start - tiled_region.base_address);
|
||||
const auto max_content_size = tiled_region.tile->pitch * utils::align<u32>(height, 64);
|
||||
const auto max_content_size = tiled_region.tile->pitch * rx::alignUp<u32>(height, 64);
|
||||
const auto section_length = std::min(max_content_size, available_tile_size);
|
||||
|
||||
// Sync the DMA layer
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
#include "VKCompute.h"
|
||||
#include "VKAsyncScheduler.h"
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
|
||||
namespace vk
|
||||
{
|
||||
|
|
@ -450,7 +450,7 @@ namespace vk
|
|||
// Dimensions were given in 'dst' space. Work out the real source coordinates
|
||||
const auto src_bpp = vk::get_format_texel_width(section.src->format());
|
||||
src_x = (src_x * dst_bpp) / src_bpp;
|
||||
src_w = utils::aligned_div<u16>(src_w * dst_bpp, src_bpp);
|
||||
src_w = rx::aligned_div<u16>(src_w * dst_bpp, src_bpp);
|
||||
|
||||
transform &= ~(rsx::surface_transform::coordinate_transform);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,11 +4,14 @@
|
|||
#include "VKRenderTargets.h"
|
||||
#include "VKResourceManager.h"
|
||||
#include "VKRenderPass.h"
|
||||
#include "VKGSRenderTypes.hpp"
|
||||
#include "vkutils/image_helpers.h"
|
||||
|
||||
#include "../Common/texture_cache.h"
|
||||
#include "../Common/tiled_dma_copy.hpp"
|
||||
|
||||
#include "rx/align.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
|
|
@ -289,7 +292,7 @@ namespace vk
|
|||
if (tiled_region)
|
||||
{
|
||||
const auto available_tile_size = tiled_region.tile->size - (range.start - tiled_region.base_address);
|
||||
const auto max_content_size = tiled_region.tile->pitch * utils::align(height, 64);
|
||||
const auto max_content_size = tiled_region.tile->pitch * rx::alignUp(height, 64);
|
||||
flush_length = std::min(max_content_size, available_tile_size);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -117,8 +117,8 @@ namespace vk
|
|||
configure(cmd);
|
||||
|
||||
constexpr auto wg_size = 16;
|
||||
const auto invocations_x = utils::aligned_div(output_size.width, wg_size);
|
||||
const auto invocations_y = utils::aligned_div(output_size.height, wg_size);
|
||||
const auto invocations_x = rx::aligned_div(output_size.width, wg_size);
|
||||
const auto invocations_y = rx::aligned_div(output_size.height, wg_size);
|
||||
|
||||
ensure(invocations_x == (output_size.width + (wg_size - 1)) / wg_size);
|
||||
ensure(invocations_y == (output_size.height + (wg_size - 1)) / wg_size);
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@
|
|||
#include "../VKHelpers.h"
|
||||
#include "../VKResourceManager.h"
|
||||
#include "Emu/IdManager.h"
|
||||
#include "rx/align.hpp"
|
||||
|
||||
#include <memory>
|
||||
|
||||
|
|
@ -60,7 +61,7 @@ namespace vk
|
|||
|
||||
// Create new heap. All sizes are aligned up by 64M, upto 1GiB
|
||||
const usz size_limit = 1024 * 0x100000;
|
||||
usz aligned_new_size = utils::align(m_size + size, 64 * 0x100000);
|
||||
usz aligned_new_size = rx::alignUp(m_size + size, 64 * 0x100000);
|
||||
|
||||
if (aligned_new_size >= size_limit)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -4,7 +4,8 @@
|
|||
|
||||
#include "../VKResourceManager.h"
|
||||
|
||||
#include <util/asm.hpp>
|
||||
#include <rx/align.hpp>
|
||||
#include <rx/asm.hpp>
|
||||
|
||||
namespace vk
|
||||
{
|
||||
|
|
@ -123,8 +124,8 @@ namespace vk
|
|||
{
|
||||
auto create_texture = [&]()
|
||||
{
|
||||
u32 new_width = utils::align(requested_width, 256u);
|
||||
u32 new_height = utils::align(requested_height, 256u);
|
||||
u32 new_width = rx::alignUp(requested_width, 256u);
|
||||
u32 new_height = rx::alignUp(requested_height, 256u);
|
||||
|
||||
return new vk::image(*g_render_device, g_render_device->get_memory_mapping().device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
|
||||
VK_IMAGE_TYPE_2D, format, new_width, new_height, 1, 1, 1, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
|
|
@ -165,7 +166,7 @@ namespace vk
|
|||
if (!scratch_buffer)
|
||||
{
|
||||
// Choose optimal size
|
||||
const u64 alloc_size = utils::align(min_required_size, 0x100000);
|
||||
const u64 alloc_size = rx::alignUp(min_required_size, 0x100000);
|
||||
|
||||
scratch_buffer = std::make_unique<vk::buffer>(*g_render_device, alloc_size,
|
||||
g_render_device->get_memory_mapping().device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
|
||||
|
|
@ -184,7 +185,7 @@ namespace vk
|
|||
if (init_mem || zero_memory)
|
||||
{
|
||||
// Zero-initialize the allocated VRAM
|
||||
const u64 zero_length = init_mem ? buf->size() : utils::align(min_required_size, 4);
|
||||
const u64 zero_length = init_mem ? buf->size() : rx::alignUp(min_required_size, 4);
|
||||
VK_GET_SYMBOL(vkCmdFillBuffer)(cmd, buf->value, 0, zero_length, 0);
|
||||
|
||||
insert_buffer_memory_barrier(cmd, buf->value, 0, zero_length,
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@
|
|||
#include "Emu/Cell/timers.hpp"
|
||||
|
||||
#include "util/sysinfo.hpp"
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
|
||||
namespace vk
|
||||
{
|
||||
|
|
@ -170,7 +170,7 @@ namespace vk
|
|||
{
|
||||
while (!flushed)
|
||||
{
|
||||
utils::pause();
|
||||
rx::pause();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -553,7 +553,7 @@ namespace vk
|
|||
switch (status)
|
||||
{
|
||||
case VK_NOT_READY:
|
||||
utils::pause();
|
||||
rx::pause();
|
||||
continue;
|
||||
default:
|
||||
die_with_error(status);
|
||||
|
|
@ -592,7 +592,7 @@ namespace vk
|
|||
|
||||
if (timeout)
|
||||
{
|
||||
const auto now = freq ? utils::get_tsc() : get_system_time();
|
||||
const auto now = freq ? rx::get_tsc() : get_system_time();
|
||||
|
||||
if (!start)
|
||||
{
|
||||
|
|
@ -608,7 +608,7 @@ namespace vk
|
|||
}
|
||||
}
|
||||
|
||||
utils::pause();
|
||||
rx::pause();
|
||||
}
|
||||
}
|
||||
} // namespace vk
|
||||
|
|
|
|||
|
|
@ -52,6 +52,7 @@
|
|||
#include "util/logs.hpp"
|
||||
#include "util/init_mutex.hpp"
|
||||
#include "util/sysinfo.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <regex>
|
||||
|
|
@ -1266,7 +1267,7 @@ game_boot_result Emulator::Load(const std::string& title_id, bool is_disc_patch,
|
|||
auto load_tar = [&](const std::string& path, const std::string& special_file)
|
||||
{
|
||||
const usz size = m_ar->pop<usz>();
|
||||
const usz max_data_size = m_ar->get_size(utils::add_saturate<usz>(size, m_ar->pos));
|
||||
const usz max_data_size = m_ar->get_size(rx::add_saturate<usz>(size, m_ar->pos));
|
||||
|
||||
if (size % 512 || max_data_size < size || max_data_size - size < m_ar->pos)
|
||||
{
|
||||
|
|
@ -3687,7 +3688,7 @@ void Emulator::Kill(bool allow_autoexit, bool savestate, savestate_stage* save_s
|
|||
{
|
||||
// Write merged TTY output after emulation has been safely stopped
|
||||
|
||||
if (usz attempted_read_size = utils::sub_saturate<usz>(g_tty.pos(), m_tty_file_init_pos))
|
||||
if (usz attempted_read_size = rx::sub_saturate<usz>(g_tty.pos(), m_tty_file_init_pos))
|
||||
{
|
||||
if (fs::file tty_read_fd{fs::get_log_dir() + "TTY.log"})
|
||||
{
|
||||
|
|
@ -3731,7 +3732,7 @@ void Emulator::Kill(bool allow_autoexit, bool savestate, savestate_stage* save_s
|
|||
{
|
||||
std::string_view to_log = not_logged;
|
||||
to_log = to_log.substr(0, 0x8000);
|
||||
to_log = to_log.substr(0, utils::add_saturate<usz>(to_log.rfind("\n========== SPU BLOCK"sv), 1));
|
||||
to_log = to_log.substr(0, rx::add_saturate<usz>(to_log.rfind("\n========== SPU BLOCK"sv), 1));
|
||||
to_remove = to_log.size();
|
||||
|
||||
std::string new_log(to_log);
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
#include "util/sysinfo.hpp"
|
||||
#include "util/fence.hpp"
|
||||
#include "util/tsc.hpp"
|
||||
#include "rx/tsc.hpp"
|
||||
#include "util/Thread.h"
|
||||
#include "util/mutex.h"
|
||||
|
||||
|
|
@ -75,7 +75,7 @@ SAFE_BUFFERS(void)
|
|||
perf_stat_base::push(u64 data[66], u64 start_time, const char* name) noexcept
|
||||
{
|
||||
// Event end
|
||||
const u64 end_time = (utils::lfence(), utils::get_tsc());
|
||||
const u64 end_time = (utils::lfence(), rx::get_tsc());
|
||||
|
||||
// Compute difference in seconds
|
||||
const f64 diff = (end_time - start_time) * 1. / utils::get_tsc_freq();
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
#include "util/types.hpp"
|
||||
#include "util/logs.hpp"
|
||||
#include "util/tsc.hpp"
|
||||
#include "rx/tsc.hpp"
|
||||
#include "system_config.h"
|
||||
#include <array>
|
||||
#include <cmath>
|
||||
|
|
@ -146,7 +146,7 @@ public:
|
|||
if constexpr (std::array<bool, sizeof...(SubEvents)>{(SubEvents == Event)...}[Index])
|
||||
{
|
||||
// Push actual timestamp into an array
|
||||
m_timestamps[Index + 1] = utils::get_tsc();
|
||||
m_timestamps[Index + 1] = rx::get_tsc();
|
||||
}
|
||||
else if constexpr (Index < sizeof...(SubEvents))
|
||||
{
|
||||
|
|
@ -170,7 +170,7 @@ public:
|
|||
// Re-initialize first timestamp
|
||||
FORCE_INLINE SAFE_BUFFERS(void) restart() noexcept
|
||||
{
|
||||
m_timestamps[0] = utils::get_tsc();
|
||||
m_timestamps[0] = rx::get_tsc();
|
||||
std::memset(m_timestamps + 1, 0, sizeof(m_timestamps) - sizeof(u64));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@
|
|||
#include "Emu/RSX/Overlays/overlay_compile_notification.h"
|
||||
#include "Emu/System.h"
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
|
||||
LOG_CHANNEL(sys_log, "SYS");
|
||||
|
||||
|
|
@ -226,7 +226,7 @@ void progress_dialog_server::operator()()
|
|||
if (pdone < ptotal && g_cfg.misc.show_ppu_compilation_hint)
|
||||
{
|
||||
const u64 passed_usec = (get_system_time() - start_time);
|
||||
const u64 remaining_usec = pdone ? utils::rational_mul<u64>(passed_usec, static_cast<u64>(ptotal) - pdone, pdone) : (passed_usec * ptotal);
|
||||
const u64 remaining_usec = pdone ? rx::rational_mul<u64>(passed_usec, static_cast<u64>(ptotal) - pdone, pdone) : (passed_usec * ptotal);
|
||||
|
||||
// Only show compile notification if we estimate at least 100ms
|
||||
if (remaining_usec >= 100'000ULL)
|
||||
|
|
@ -260,7 +260,7 @@ void progress_dialog_server::operator()()
|
|||
// Assume not all programs were found if files were not compiled (as it may contain more)
|
||||
const bool use_bits = fknown_bits && ftotal_bits;
|
||||
const u64 known_files = use_bits ? fknown_bits : ftotal;
|
||||
const u64 total = utils::rational_mul<u64>(std::max<u64>(ptotal, 1), std::max<u64>(use_bits ? ftotal_bits : ftotal, 1), std::max<u64>(known_files, 1));
|
||||
const u64 total = rx::rational_mul<u64>(std::max<u64>(ptotal, 1), std::max<u64>(use_bits ? ftotal_bits : ftotal, 1), std::max<u64>(known_files, 1));
|
||||
const u64 done = pdone;
|
||||
const u32 value = static_cast<u32>(done >= total ? 100 : done * 100 / total);
|
||||
|
||||
|
|
@ -280,7 +280,7 @@ void progress_dialog_server::operator()()
|
|||
if (of_1000 >= 2)
|
||||
{
|
||||
const u64 passed = (get_system_time() - start_time);
|
||||
const u64 total = utils::rational_mul<u64>(passed, 1000, of_1000);
|
||||
const u64 total = rx::rational_mul<u64>(passed, 1000, of_1000);
|
||||
const u64 remaining = total - passed;
|
||||
|
||||
// Stabilize the result by using the maximum one from the recent history
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
#include "stdafx.h"
|
||||
#include "PSF.h"
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/align.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include <span>
|
||||
|
||||
LOG_CHANNEL(psf_log, "PSF");
|
||||
|
|
@ -307,7 +308,7 @@ namespace psf
|
|||
}
|
||||
|
||||
// Align next section (data) offset
|
||||
key_offset = utils::align(key_offset, 4);
|
||||
key_offset = rx::alignUp(key_offset, 4);
|
||||
|
||||
// Generate header
|
||||
header_t header{};
|
||||
|
|
|
|||
|
|
@ -7,8 +7,8 @@
|
|||
|
||||
#include "TAR.h"
|
||||
|
||||
#include "util/asm.hpp"
|
||||
|
||||
#include "rx/asm.hpp"
|
||||
#include "rx/align.hpp"
|
||||
#include "util/serialization_ext.hpp"
|
||||
|
||||
#include <charconv>
|
||||
|
|
@ -164,7 +164,7 @@ std::unique_ptr<utils::serial> tar_object::get_file(const std::string& path, std
|
|||
const u64 size = emplace_single_entry(largest_offset, m_ar->get_size(umax) - m_ar_tar_start).first;
|
||||
|
||||
// Advance offset to next block
|
||||
largest_offset += utils::align(size, 512);
|
||||
largest_offset += rx::alignUp(size, 512);
|
||||
}
|
||||
// Continue scanning from last file entered
|
||||
else if (m_file)
|
||||
|
|
@ -181,7 +181,7 @@ std::unique_ptr<utils::serial> tar_object::get_file(const std::string& path, std
|
|||
}
|
||||
|
||||
// Advance offset to next block
|
||||
largest_offset += utils::align(size, 512);
|
||||
largest_offset += rx::alignUp(size, 512);
|
||||
|
||||
if (!path.empty() && path == filename)
|
||||
{
|
||||
|
|
@ -408,7 +408,7 @@ void tar_object::save_directory(const std::string& target_path, utils::serial& a
|
|||
return;
|
||||
}
|
||||
|
||||
ptr += utils::aligned_div(static_cast<u32>(std::bit_width(i)), 3) - 1;
|
||||
ptr += rx::aligned_div(static_cast<u32>(std::bit_width(i)), 3) - 1;
|
||||
|
||||
for (; i; ptr--, i /= 8)
|
||||
{
|
||||
|
|
@ -425,7 +425,7 @@ void tar_object::save_directory(const std::string& target_path, utils::serial& a
|
|||
|
||||
if (is_null && !func)
|
||||
{
|
||||
ar.pos += utils::align(file_stat.size, 512);
|
||||
ar.pos += rx::alignUp(file_stat.size, 512);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -458,7 +458,7 @@ void tar_object::save_directory(const std::string& target_path, utils::serial& a
|
|||
if (is_null)
|
||||
{
|
||||
// Align
|
||||
ar.pos += utils::align(ar.pos - old_pos, 512);
|
||||
ar.pos += rx::alignUp(ar.pos - old_pos, 512);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
|
@ -485,7 +485,7 @@ void tar_object::save_directory(const std::string& target_path, utils::serial& a
|
|||
|
||||
// Align
|
||||
const usz diff = ar.pos - old_pos;
|
||||
ar.data.resize(ar.data.size() + utils::align(diff, 512) - diff);
|
||||
ar.data.resize(ar.data.size() + rx::alignUp(diff, 512) - diff);
|
||||
ar.seek_end();
|
||||
|
||||
fd.close();
|
||||
|
|
|
|||
|
|
@ -9,7 +9,8 @@
|
|||
#include <map>
|
||||
#include <iostream>
|
||||
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/align.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include "util/coro.hpp"
|
||||
|
||||
using namespace std::literals::string_literals;
|
||||
|
|
@ -2386,7 +2387,7 @@ u64 fs::get_dir_size(const std::string& path, u64 rounding_alignment, atomic_t<b
|
|||
|
||||
if (!entry.is_directory)
|
||||
{
|
||||
result += utils::align(entry.size, rounding_alignment);
|
||||
result += rx::alignUp(entry.size, rounding_alignment);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
|||
|
|
@ -5,9 +5,10 @@
|
|||
#include "File.h"
|
||||
#include "util/logs.hpp"
|
||||
#include "util/vm.hpp"
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include "util/v128.hpp"
|
||||
#include "util/simd.hpp"
|
||||
#include "rx/align.hpp"
|
||||
|
||||
#ifdef __linux__
|
||||
#include <unistd.h>
|
||||
|
|
@ -158,8 +159,8 @@ static u8* add_jit_memory(usz size, usz align)
|
|||
// Simple allocation by incrementing pointer to the next free data
|
||||
const u64 pos = Ctr.atomic_op([&](u64& ctr) -> u64
|
||||
{
|
||||
const u64 _pos = utils::align(ctr & 0xffff'ffff, align);
|
||||
const u64 _new = utils::align(_pos + size, align);
|
||||
const u64 _pos = rx::alignUp(ctr & 0xffff'ffff, align);
|
||||
const u64 _new = rx::alignUp(_pos + size, align);
|
||||
|
||||
if (_new > 0x40000000) [[unlikely]]
|
||||
{
|
||||
|
|
@ -175,7 +176,7 @@ static u8* add_jit_memory(usz size, usz align)
|
|||
// Check the necessity to commit more memory
|
||||
if (_new > olda) [[unlikely]]
|
||||
{
|
||||
newa = utils::align(_new, 0x200000);
|
||||
newa = rx::alignUp(_new, 0x200000);
|
||||
}
|
||||
|
||||
ctr += _new - (ctr & 0xffff'ffff);
|
||||
|
|
@ -237,9 +238,9 @@ void* jit_runtime_base::_add(asmjit::CodeHolder* code, usz align) noexcept
|
|||
|
||||
for (asmjit::Section* section : code->_sections)
|
||||
{
|
||||
if (section->offset() + section->bufferSize() > utils::align<usz>(codeSize, align))
|
||||
if (section->offset() + section->bufferSize() > rx::alignUp<usz>(codeSize, align))
|
||||
{
|
||||
fmt::throw_exception("CodeHolder section exceeds range: Section->offset: 0x%x, Section->bufferSize: 0x%x, alloted-memory=0x%x", section->offset(), section->bufferSize(), utils::align<usz>(codeSize, align));
|
||||
fmt::throw_exception("CodeHolder section exceeds range: Section->offset: 0x%x, Section->bufferSize: 0x%x, alloted-memory=0x%x", section->offset(), section->bufferSize(), rx::alignUp<usz>(codeSize, align));
|
||||
}
|
||||
|
||||
std::memcpy(p + section->offset(), section->data(), section->bufferSize());
|
||||
|
|
@ -365,7 +366,7 @@ jit_runtime_base& asmjit::get_global_runtime()
|
|||
{
|
||||
return m_pos.atomic_op([&](uchar*& pos) -> uchar*
|
||||
{
|
||||
const auto r = reinterpret_cast<uchar*>(utils::align(uptr(pos), align));
|
||||
const auto r = reinterpret_cast<uchar*>(rx::alignUp(uptr(pos), align));
|
||||
|
||||
if (r >= pos && r + size > pos && r + size <= m_max)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -7,7 +7,8 @@
|
|||
#include "util/logs.hpp"
|
||||
#include "mutex.h"
|
||||
#include "util/vm.hpp"
|
||||
#include "util/asm.hpp"
|
||||
#include "rx/asm.hpp"
|
||||
#include "rx/align.hpp"
|
||||
#include "Crypto/unzip.h"
|
||||
|
||||
#include <charconv>
|
||||
|
|
@ -216,7 +217,7 @@ struct MemoryManager1 : llvm::RTDyldMemoryManager
|
|||
~MemoryManager1() override
|
||||
{
|
||||
// Hack: don't release to prevent reuse of address space, see jit_announce
|
||||
// constexpr auto how_much = [](u64 pos) { return utils::align(pos, pos < c_page_size ? c_page_size / 4 : c_page_size); };
|
||||
// constexpr auto how_much = [](u64 pos) { return rx::alignUp(pos, pos < c_page_size ? c_page_size / 4 : c_page_size); };
|
||||
// utils::memory_decommit(m_code_mems, how_much(code_ptr));
|
||||
// utils::memory_decommit(m_data_ro_mems, how_much(data_ro_ptr));
|
||||
// utils::memory_decommit(m_data_rw_mems, how_much(data_rw_ptr));
|
||||
|
|
@ -249,7 +250,7 @@ struct MemoryManager1 : llvm::RTDyldMemoryManager
|
|||
{
|
||||
align = align ? align : 16;
|
||||
|
||||
const u64 sizea = utils::align(size, align);
|
||||
const u64 sizea = rx::alignUp(size, align);
|
||||
|
||||
if (!size || align > c_page_size || sizea > c_max_size || sizea < size)
|
||||
{
|
||||
|
|
@ -259,7 +260,7 @@ struct MemoryManager1 : llvm::RTDyldMemoryManager
|
|||
|
||||
u64 oldp = alloc_pos;
|
||||
|
||||
u64 olda = utils::align(oldp, align);
|
||||
u64 olda = rx::alignUp(oldp, align);
|
||||
|
||||
ensure(olda >= oldp);
|
||||
ensure(olda < ~sizea);
|
||||
|
|
@ -285,8 +286,8 @@ struct MemoryManager1 : llvm::RTDyldMemoryManager
|
|||
// Optimization: split the first allocation to 512 KiB for single-module compilers
|
||||
if (oldp < c_page_size && align < page_quarter && (std::min(newp, c_page_size) - 1) / page_quarter != (oldp - 1) / page_quarter)
|
||||
{
|
||||
const u64 pagea = utils::align(oldp, page_quarter);
|
||||
const u64 psize = utils::align(std::min(newp, c_page_size) - pagea, page_quarter);
|
||||
const u64 pagea = rx::alignUp(oldp, page_quarter);
|
||||
const u64 psize = rx::alignUp(std::min(newp, c_page_size) - pagea, page_quarter);
|
||||
utils::memory_commit(reinterpret_cast<u8*>(block) + (pagea % c_max_size), psize, prot);
|
||||
|
||||
// Advance
|
||||
|
|
@ -296,8 +297,8 @@ struct MemoryManager1 : llvm::RTDyldMemoryManager
|
|||
if ((newp - 1) / c_page_size != (oldp - 1) / c_page_size)
|
||||
{
|
||||
// Allocate pages on demand
|
||||
const u64 pagea = utils::align(oldp, c_page_size);
|
||||
const u64 psize = utils::align(newp - pagea, c_page_size);
|
||||
const u64 pagea = rx::alignUp(oldp, c_page_size);
|
||||
const u64 psize = rx::alignUp(newp - pagea, c_page_size);
|
||||
utils::memory_commit(reinterpret_cast<u8*>(block) + (pagea % c_max_size), psize, prot);
|
||||
}
|
||||
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue