mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-12-06 07:12:28 +01:00
Merge branch 'master' into windows-clang
This commit is contained in:
commit
d77d8c6f1a
|
|
@ -12,7 +12,7 @@ pkg info # debug
|
||||||
pkg install "llvm$LLVM_COMPILER_VER"
|
pkg install "llvm$LLVM_COMPILER_VER"
|
||||||
|
|
||||||
# Mandatory dependencies (qtX-base is pulled via qtX-multimedia)
|
# Mandatory dependencies (qtX-base is pulled via qtX-multimedia)
|
||||||
pkg install git ccache cmake ninja "qt$QT_VER_MAIN-multimedia" "qt$QT_VER_MAIN-svg" glew openal-soft ffmpeg
|
pkg install git ccache cmake ninja "qt$QT_VER_MAIN-multimedia" "qt$QT_VER_MAIN-svg" glew openal-soft ffmpeg pcre2
|
||||||
|
|
||||||
# Optional dependencies (libevdev is pulled by qtX-base)
|
# Optional dependencies (libevdev is pulled by qtX-base)
|
||||||
pkg install pkgconf alsa-lib pulseaudio sdl3 evdev-proto vulkan-headers vulkan-loader opencv
|
pkg install pkgconf alsa-lib pulseaudio sdl3 evdev-proto vulkan-headers vulkan-loader opencv
|
||||||
|
|
|
||||||
2
3rdparty/CMakeLists.txt
vendored
2
3rdparty/CMakeLists.txt
vendored
|
|
@ -278,7 +278,7 @@ if(USE_FAUDIO)
|
||||||
target_compile_definitions(FAudio-static INTERFACE -DHAVE_FAUDIO)
|
target_compile_definitions(FAudio-static INTERFACE -DHAVE_FAUDIO)
|
||||||
set(FAUDIO_TARGET FAudio-static)
|
set(FAUDIO_TARGET FAudio-static)
|
||||||
else()
|
else()
|
||||||
message(FATAL_ERROR
|
message(WARNING
|
||||||
"-- RPCS3: 3rdparty FAudio requires SDL 3.2.0 or newer. Since a valid SDL3"
|
"-- RPCS3: 3rdparty FAudio requires SDL 3.2.0 or newer. Since a valid SDL3"
|
||||||
">=3.2.0 version cannot be found, building with FAudio will be skipped.")
|
">=3.2.0 version cannot be found, building with FAudio will be skipped.")
|
||||||
set(USE_FAUDIO OFF CACHE BOOL "Disabled FAudio with SDL < 3.2.0" FORCE)
|
set(USE_FAUDIO OFF CACHE BOOL "Disabled FAudio with SDL < 3.2.0" FORCE)
|
||||||
|
|
|
||||||
|
|
@ -135,7 +135,7 @@ if(MSVC)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
|
if(NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||||
message( FATAL_ERROR "RPCS3 can only be compiled on 64-bit platforms." )
|
message(FATAL_ERROR "RPCS3 can only be compiled on 64-bit platforms.")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(APPLE AND CMAKE_OSX_ARCHITECTURES STREQUAL "arm64")
|
if(APPLE AND CMAKE_OSX_ARCHITECTURES STREQUAL "arm64")
|
||||||
|
|
|
||||||
|
|
@ -190,7 +190,7 @@ struct cpu_prof
|
||||||
reservation_samples = 0;
|
reservation_samples = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::string format(const std::multimap<u64, u64, std::greater<u64>>& chart, u64 samples, u64 idle, bool extended_print = false)
|
static std::string format(const std::multimap<u64, u64, std::greater<u64>>& chart, u64 samples, u64 idle, u32 type_id, bool extended_print = false)
|
||||||
{
|
{
|
||||||
// Print results
|
// Print results
|
||||||
std::string results;
|
std::string results;
|
||||||
|
|
@ -204,11 +204,18 @@ struct cpu_prof
|
||||||
const f64 _frac = count / busy / samples;
|
const f64 _frac = count / busy / samples;
|
||||||
|
|
||||||
// Print only 7 hash characters out of 11 (which covers roughly 48 bits)
|
// Print only 7 hash characters out of 11 (which covers roughly 48 bits)
|
||||||
fmt::append(results, "\n\t[%s", fmt::base57(be_t<u64>{name}));
|
if (type_id == 2)
|
||||||
results.resize(results.size() - 4);
|
{
|
||||||
|
fmt::append(results, "\n\t[%s", fmt::base57(be_t<u64>{name}));
|
||||||
|
results.resize(results.size() - 4);
|
||||||
|
|
||||||
// Print chunk address from lowest 16 bits
|
// Print chunk address from lowest 16 bits
|
||||||
fmt::append(results, "...chunk-0x%05x]: %.4f%% (%u)", (name & 0xffff) * 4, _frac * 100., count);
|
fmt::append(results, "...chunk-0x%05x]: %.4f%% (%u)", (name & 0xffff) * 4, _frac * 100., count);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
fmt::append(results, "\n\t[0x%07x]: %.4f%% (%u)", name, _frac * 100., count);
|
||||||
|
}
|
||||||
|
|
||||||
if (results.size() >= (extended_print ? 10000 : 5000))
|
if (results.size() >= (extended_print ? 10000 : 5000))
|
||||||
{
|
{
|
||||||
|
|
@ -257,27 +264,37 @@ struct cpu_prof
|
||||||
}
|
}
|
||||||
|
|
||||||
// Print results
|
// Print results
|
||||||
const std::string results = format(chart, samples, idle);
|
const std::string results = format(chart, samples, idle, ptr->id_type());
|
||||||
profiler.notice("Thread \"%s\" [0x%08x]: %u samples (%.4f%% idle), %u new, %u reservation (%.4f%%):\n%s", ptr->get_name(), ptr->id, samples, get_percent(idle, samples), new_samples, reservation_samples, get_percent(reservation_samples, samples - idle), results);
|
profiler.notice("Thread \"%s\" [0x%08x]: %u samples (%.4f%% idle), %u new, %u reservation (%.4f%%):\n%s", ptr->get_name(), ptr->id, samples, get_percent(idle, samples), new_samples, reservation_samples, get_percent(reservation_samples, samples - idle), results);
|
||||||
|
|
||||||
new_samples = 0;
|
new_samples = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void print_all(std::unordered_map<shared_ptr<cpu_thread>, sample_info>& threads, sample_info& all_info)
|
static void print_all(std::unordered_map<shared_ptr<cpu_thread>, sample_info>& threads, sample_info& all_info, u32 type_id)
|
||||||
{
|
{
|
||||||
u64 new_samples = 0;
|
u64 new_samples = 0;
|
||||||
|
|
||||||
// Print all results and cleanup
|
// Print all results and cleanup
|
||||||
for (auto& [ptr, info] : threads)
|
for (auto& [ptr, info] : threads)
|
||||||
{
|
{
|
||||||
|
if (ptr->id_type() != type_id)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
new_samples += info.new_samples;
|
new_samples += info.new_samples;
|
||||||
info.print(ptr);
|
info.print(ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::multimap<u64, u64, std::greater<u64>> chart;
|
std::multimap<u64, u64, std::greater<u64>> chart;
|
||||||
|
|
||||||
for (auto& [_, info] : threads)
|
for (auto& [ptr, info] : threads)
|
||||||
{
|
{
|
||||||
|
if (ptr->id_type() != type_id)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// This function collects thread information regardless of 'new_samples' member state
|
// This function collects thread information regardless of 'new_samples' member state
|
||||||
for (auto& [name, count] : info.freq)
|
for (auto& [name, count] : info.freq)
|
||||||
{
|
{
|
||||||
|
|
@ -301,7 +318,7 @@ struct cpu_prof
|
||||||
|
|
||||||
if (new_samples < min_print_all_samples && thread_ctrl::state() != thread_state::aborting)
|
if (new_samples < min_print_all_samples && thread_ctrl::state() != thread_state::aborting)
|
||||||
{
|
{
|
||||||
profiler.notice("All Threads: %u samples (%.4f%% idle), %u new, %u reservation (%.4f%%): Not enough new samples have been collected since the last print.", samples, get_percent(idle, samples), new_samples, reservation, get_percent(reservation, samples - idle));
|
profiler.notice("All %s Threads: %u samples (%.4f%% idle), %u new, %u reservation (%.4f%%): Not enough new samples have been collected since the last print.", type_id == 1 ? "PPU" : "SPU", samples, get_percent(idle, samples), new_samples, reservation, get_percent(reservation, samples - idle));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -310,12 +327,13 @@ struct cpu_prof
|
||||||
chart.emplace(count, name);
|
chart.emplace(count, name);
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::string results = format(chart, samples, idle, true);
|
const std::string results = format(chart, samples, idle, type_id, true);
|
||||||
profiler.notice("All Threads: %u samples (%.4f%% idle), %u new, %u reservation (%.4f%%):%s", samples, get_percent(idle, samples), new_samples, reservation, get_percent(reservation, samples - idle), results);
|
profiler.notice("All %s Threads: %u samples (%.4f%% idle), %u new, %u reservation (%.4f%%):%s", type_id == 1 ? "PPU" : "SPU", samples, get_percent(idle, samples), new_samples, reservation, get_percent(reservation, samples - idle), results);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
sample_info all_threads_info{};
|
sample_info all_spu_threads_info{};
|
||||||
|
sample_info all_ppu_threads_info{};
|
||||||
|
|
||||||
void operator()()
|
void operator()()
|
||||||
{
|
{
|
||||||
|
|
@ -376,8 +394,11 @@ struct cpu_prof
|
||||||
{
|
{
|
||||||
if (auto state = +ptr->state; cpu_flag::exit - state)
|
if (auto state = +ptr->state; cpu_flag::exit - state)
|
||||||
{
|
{
|
||||||
|
const auto spu = ptr->try_get<spu_thread>();
|
||||||
|
const auto ppu = ptr->try_get<ppu_thread>();
|
||||||
|
|
||||||
// Get short function hash
|
// Get short function hash
|
||||||
const u64 name = atomic_storage<u64>::load(ptr->block_hash);
|
const u64 name = ppu ? atomic_storage<u32>::load(ppu->cia) : atomic_storage<u64>::load(ptr->block_hash);
|
||||||
|
|
||||||
// Append occurrence
|
// Append occurrence
|
||||||
info.samples++;
|
info.samples++;
|
||||||
|
|
@ -387,17 +408,17 @@ struct cpu_prof
|
||||||
info.freq[name]++;
|
info.freq[name]++;
|
||||||
info.new_samples++;
|
info.new_samples++;
|
||||||
|
|
||||||
if (auto spu = ptr->try_get<spu_thread>())
|
if (spu)
|
||||||
{
|
{
|
||||||
if (spu->raddr)
|
if (spu->raddr)
|
||||||
{
|
{
|
||||||
info.reservation_samples++;
|
info.reservation_samples++;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// Append verification time to fixed common name 0000000...chunk-0x3fffc
|
// Append verification time to fixed common name 0000000...chunk-0x3fffc
|
||||||
if (name >> 16 && (name & 0xffff) == 0)
|
if (name >> 16 && (name & 0xffff) == 0)
|
||||||
info.freq[0xffff]++;
|
info.freq[0xffff]++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
@ -420,8 +441,10 @@ struct cpu_prof
|
||||||
{
|
{
|
||||||
profiler.success("Flushing profiling results...");
|
profiler.success("Flushing profiling results...");
|
||||||
|
|
||||||
all_threads_info = {};
|
all_ppu_threads_info = {};
|
||||||
sample_info::print_all(threads, all_threads_info);
|
all_spu_threads_info = {};
|
||||||
|
sample_info::print_all(threads, all_ppu_threads_info, 1);
|
||||||
|
sample_info::print_all(threads, all_spu_threads_info, 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Emu.IsPaused())
|
if (Emu.IsPaused())
|
||||||
|
|
@ -442,7 +465,8 @@ struct cpu_prof
|
||||||
}
|
}
|
||||||
|
|
||||||
// Print all remaining results
|
// Print all remaining results
|
||||||
sample_info::print_all(threads, all_threads_info);
|
sample_info::print_all(threads, all_ppu_threads_info, 1);
|
||||||
|
sample_info::print_all(threads, all_spu_threads_info, 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
static constexpr auto thread_name = "CPU Profiler"sv;
|
static constexpr auto thread_name = "CPU Profiler"sv;
|
||||||
|
|
@ -459,7 +483,7 @@ extern f64 get_cpu_program_usage_percent(u64 hash)
|
||||||
{
|
{
|
||||||
u64 total = 0;
|
u64 total = 0;
|
||||||
|
|
||||||
for (auto [name, count] : prof->all_threads_info.freq)
|
for (auto [name, count] : prof->all_spu_threads_info.freq)
|
||||||
{
|
{
|
||||||
if ((name & -65536) == hash)
|
if ((name & -65536) == hash)
|
||||||
{
|
{
|
||||||
|
|
@ -472,7 +496,7 @@ extern f64 get_cpu_program_usage_percent(u64 hash)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
return std::max<f64>(0.0001, static_cast<f64>(total) * 100 / (prof->all_threads_info.samples - prof->all_threads_info.idle));
|
return std::max<f64>(0.0001, static_cast<f64>(total) * 100 / (prof->all_spu_threads_info.samples - prof->all_spu_threads_info.idle));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -639,22 +663,17 @@ void cpu_thread::operator()()
|
||||||
thread_ctrl::set_thread_affinity_mask(thread_ctrl::get_affinity_mask(get_class()));
|
thread_ctrl::set_thread_affinity_mask(thread_ctrl::get_affinity_mask(get_class()));
|
||||||
}
|
}
|
||||||
|
|
||||||
while (!g_fxo->is_init<cpu_profiler>())
|
ensure(g_fxo->is_init<cpu_profiler>());
|
||||||
{
|
|
||||||
if (Emu.IsStopped())
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Can we have a little race, right? First thread is started concurrently with g_fxo->init()
|
|
||||||
thread_ctrl::wait_for(1000);
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (get_class())
|
switch (get_class())
|
||||||
{
|
{
|
||||||
case thread_class::ppu:
|
case thread_class::ppu:
|
||||||
{
|
{
|
||||||
//g_fxo->get<cpu_profiler>().registered.push(id);
|
if (g_cfg.core.ppu_prof)
|
||||||
|
{
|
||||||
|
g_fxo->get<cpu_profiler>().registered.push(id);
|
||||||
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case thread_class::spu:
|
case thread_class::spu:
|
||||||
|
|
@ -1546,7 +1565,7 @@ void cpu_thread::flush_profilers() noexcept
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (g_cfg.core.spu_prof || g_cfg.core.spu_debug)
|
if (g_cfg.core.spu_prof || g_cfg.core.spu_debug || g_cfg.core.ppu_prof)
|
||||||
{
|
{
|
||||||
g_fxo->get<cpu_profiler>().registered.push(0);
|
g_fxo->get<cpu_profiler>().registered.push(0);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -4007,7 +4007,7 @@ llvm::CallInst* llvm_asm(
|
||||||
const std::string& constraints,
|
const std::string& constraints,
|
||||||
llvm::LLVMContext& context)
|
llvm::LLVMContext& context)
|
||||||
{
|
{
|
||||||
llvm::ArrayRef<llvm::Type*> types_ref = std::nullopt;
|
llvm::ArrayRef<llvm::Type*> types_ref {};
|
||||||
std::vector<llvm::Type*> types;
|
std::vector<llvm::Type*> types;
|
||||||
types.reserve(args.size());
|
types.reserve(args.size());
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1139,8 +1139,10 @@ error_code cellCameraGetBufferInfo(s32 dev_num, vm::ptr<CellCameraInfo> info)
|
||||||
return CELL_OK;
|
return CELL_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
error_code cellCameraGetBufferInfoEx(s32 dev_num, vm::ptr<CellCameraInfoEx> info)
|
error_code cellCameraGetBufferInfoEx(ppu_thread& ppu, s32 dev_num, vm::ptr<CellCameraInfoEx> info)
|
||||||
{
|
{
|
||||||
|
ppu.state += cpu_flag::wait;
|
||||||
|
|
||||||
cellCamera.notice("cellCameraGetBufferInfoEx(dev_num=%d, info=0x%x)", dev_num, info);
|
cellCamera.notice("cellCameraGetBufferInfoEx(dev_num=%d, info=0x%x)", dev_num, info);
|
||||||
|
|
||||||
// calls cellCameraGetBufferInfo
|
// calls cellCameraGetBufferInfo
|
||||||
|
|
@ -1151,10 +1153,16 @@ error_code cellCameraGetBufferInfoEx(s32 dev_num, vm::ptr<CellCameraInfoEx> info
|
||||||
}
|
}
|
||||||
|
|
||||||
auto& g_camera = g_fxo->get<camera_thread>();
|
auto& g_camera = g_fxo->get<camera_thread>();
|
||||||
std::lock_guard lock(g_camera.mutex);
|
|
||||||
|
|
||||||
*info = g_camera.info;
|
CellCameraInfoEx info_out;
|
||||||
|
|
||||||
|
{
|
||||||
|
std::lock_guard lock(g_camera.mutex);
|
||||||
|
|
||||||
|
info_out = g_camera.info;
|
||||||
|
}
|
||||||
|
|
||||||
|
*info = info_out;
|
||||||
return CELL_OK;
|
return CELL_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2308,8 +2308,10 @@ error_code cellGemConvertVideoFinish(ppu_thread& ppu)
|
||||||
return CELL_OK;
|
return CELL_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
error_code cellGemConvertVideoStart(vm::cptr<void> video_frame)
|
error_code cellGemConvertVideoStart(ppu_thread& ppu, vm::cptr<void> video_frame)
|
||||||
{
|
{
|
||||||
|
ppu.state += cpu_flag::wait;
|
||||||
|
|
||||||
cellGem.warning("cellGemConvertVideoStart(video_frame=*0x%x)", video_frame);
|
cellGem.warning("cellGemConvertVideoStart(video_frame=*0x%x)", video_frame);
|
||||||
|
|
||||||
auto& gem = g_fxo->get<gem_config>();
|
auto& gem = g_fxo->get<gem_config>();
|
||||||
|
|
@ -2461,6 +2463,8 @@ error_code cellGemEnableMagnetometer2(u32 gem_num, u32 enable)
|
||||||
|
|
||||||
error_code cellGemEnd(ppu_thread& ppu)
|
error_code cellGemEnd(ppu_thread& ppu)
|
||||||
{
|
{
|
||||||
|
ppu.state += cpu_flag::wait;
|
||||||
|
|
||||||
cellGem.warning("cellGemEnd()");
|
cellGem.warning("cellGemEnd()");
|
||||||
|
|
||||||
auto& gem = g_fxo->get<gem_config>();
|
auto& gem = g_fxo->get<gem_config>();
|
||||||
|
|
@ -3265,7 +3269,7 @@ error_code cellGemPrepareCamera(s32 max_exposure, f32 image_quality)
|
||||||
|
|
||||||
extern error_code cellCameraGetAttribute(s32 dev_num, s32 attrib, vm::ptr<u32> arg1, vm::ptr<u32> arg2);
|
extern error_code cellCameraGetAttribute(s32 dev_num, s32 attrib, vm::ptr<u32> arg1, vm::ptr<u32> arg2);
|
||||||
extern error_code cellCameraSetAttribute(s32 dev_num, s32 attrib, u32 arg1, u32 arg2);
|
extern error_code cellCameraSetAttribute(s32 dev_num, s32 attrib, u32 arg1, u32 arg2);
|
||||||
extern error_code cellCameraGetBufferInfoEx(s32 dev_num, vm::ptr<CellCameraInfoEx> info);
|
extern error_code cellCameraGetBufferInfoEx(ppu_thread&, s32 dev_num, vm::ptr<CellCameraInfoEx> info);
|
||||||
|
|
||||||
vm::var<CellCameraInfoEx> info = vm::make_var<CellCameraInfoEx>({});
|
vm::var<CellCameraInfoEx> info = vm::make_var<CellCameraInfoEx>({});
|
||||||
vm::var<u32> arg1 = vm::make_var<u32>({});
|
vm::var<u32> arg1 = vm::make_var<u32>({});
|
||||||
|
|
@ -3273,7 +3277,7 @@ error_code cellGemPrepareCamera(s32 max_exposure, f32 image_quality)
|
||||||
|
|
||||||
cellCameraGetAttribute(0, 0x3e6, arg1, arg2);
|
cellCameraGetAttribute(0, 0x3e6, arg1, arg2);
|
||||||
cellCameraSetAttribute(0, 0x3e6, 0x3e, *arg2 | 0x80);
|
cellCameraSetAttribute(0, 0x3e6, 0x3e, *arg2 | 0x80);
|
||||||
cellCameraGetBufferInfoEx(0, info);
|
cellCameraGetBufferInfoEx(*cpu_thread::get_current<ppu_thread>(), 0, info);
|
||||||
|
|
||||||
if (info->width == 640)
|
if (info->width == 640)
|
||||||
{
|
{
|
||||||
|
|
@ -3605,6 +3609,8 @@ error_code cellGemTrackHues(vm::cptr<u32> req_hues, vm::ptr<u32> res_hues)
|
||||||
|
|
||||||
error_code cellGemUpdateFinish(ppu_thread& ppu)
|
error_code cellGemUpdateFinish(ppu_thread& ppu)
|
||||||
{
|
{
|
||||||
|
ppu.state += cpu_flag::wait;
|
||||||
|
|
||||||
cellGem.warning("cellGemUpdateFinish()");
|
cellGem.warning("cellGemUpdateFinish()");
|
||||||
|
|
||||||
auto& gem = g_fxo->get<gem_config>();
|
auto& gem = g_fxo->get<gem_config>();
|
||||||
|
|
|
||||||
|
|
@ -221,7 +221,7 @@ std::pair<PPUDisAsm::const_op, u64> PPUDisAsm::try_get_const_op_gpr_value(u32 re
|
||||||
|
|
||||||
GET_CONST_REG(reg_rs, op.rs);
|
GET_CONST_REG(reg_rs, op.rs);
|
||||||
|
|
||||||
return { form, utils::rol64(reg_rs, op.sh64) & (~0ull << (op.mbe64 ^ 63)) };
|
return {form, std::rotl<u64>(reg_rs, op.sh64) & (~0ull << (op.mbe64 ^ 63))};
|
||||||
}
|
}
|
||||||
case ppu_itype::OR:
|
case ppu_itype::OR:
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -3483,7 +3483,7 @@ auto RLWIMI()
|
||||||
|
|
||||||
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
||||||
const u64 mask = ppu_rotate_mask(32 + op.mb32, 32 + op.me32);
|
const u64 mask = ppu_rotate_mask(32 + op.mb32, 32 + op.me32);
|
||||||
ppu.gpr[op.ra] = (ppu.gpr[op.ra] & ~mask) | (dup32(utils::rol32(static_cast<u32>(ppu.gpr[op.rs]), op.sh32)) & mask);
|
ppu.gpr[op.ra] = (ppu.gpr[op.ra] & ~mask) | (dup32(std::rotl<u32>(static_cast<u32>(ppu.gpr[op.rs]), op.sh32)) & mask);
|
||||||
if constexpr (((Flags == has_rc) || ...))
|
if constexpr (((Flags == has_rc) || ...))
|
||||||
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
||||||
};
|
};
|
||||||
|
|
@ -3497,7 +3497,7 @@ auto RLWINM()
|
||||||
return ppu_exec_select<Flags...>::template select<>();
|
return ppu_exec_select<Flags...>::template select<>();
|
||||||
|
|
||||||
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
||||||
ppu.gpr[op.ra] = dup32(utils::rol32(static_cast<u32>(ppu.gpr[op.rs]), op.sh32)) & ppu_rotate_mask(32 + op.mb32, 32 + op.me32);
|
ppu.gpr[op.ra] = dup32(std::rotl<u32>(static_cast<u32>(ppu.gpr[op.rs]), op.sh32)) & ppu_rotate_mask(32 + op.mb32, 32 + op.me32);
|
||||||
if constexpr (((Flags == has_rc) || ...))
|
if constexpr (((Flags == has_rc) || ...))
|
||||||
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
||||||
};
|
};
|
||||||
|
|
@ -3511,7 +3511,7 @@ auto RLWNM()
|
||||||
return ppu_exec_select<Flags...>::template select<>();
|
return ppu_exec_select<Flags...>::template select<>();
|
||||||
|
|
||||||
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
||||||
ppu.gpr[op.ra] = dup32(utils::rol32(static_cast<u32>(ppu.gpr[op.rs]), ppu.gpr[op.rb] & 0x1f)) & ppu_rotate_mask(32 + op.mb32, 32 + op.me32);
|
ppu.gpr[op.ra] = dup32(std::rotl<u32>(static_cast<u32>(ppu.gpr[op.rs]), ppu.gpr[op.rb] & 0x1f)) & ppu_rotate_mask(32 + op.mb32, 32 + op.me32);
|
||||||
if constexpr (((Flags == has_rc) || ...))
|
if constexpr (((Flags == has_rc) || ...))
|
||||||
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
||||||
};
|
};
|
||||||
|
|
@ -3599,7 +3599,7 @@ auto RLDICL()
|
||||||
return ppu_exec_select<Flags...>::template select<>();
|
return ppu_exec_select<Flags...>::template select<>();
|
||||||
|
|
||||||
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
||||||
ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], op.sh64) & (~0ull >> op.mbe64);
|
ppu.gpr[op.ra] = std::rotl<u64>(ppu.gpr[op.rs], op.sh64) & (~0ull >> op.mbe64);
|
||||||
if constexpr (((Flags == has_rc) || ...))
|
if constexpr (((Flags == has_rc) || ...))
|
||||||
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
||||||
};
|
};
|
||||||
|
|
@ -3613,7 +3613,7 @@ auto RLDICR()
|
||||||
return ppu_exec_select<Flags...>::template select<>();
|
return ppu_exec_select<Flags...>::template select<>();
|
||||||
|
|
||||||
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
||||||
ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], op.sh64) & (~0ull << (op.mbe64 ^ 63));
|
ppu.gpr[op.ra] = std::rotl<u64>(ppu.gpr[op.rs], op.sh64) & (~0ull << (op.mbe64 ^ 63));
|
||||||
if constexpr (((Flags == has_rc) || ...))
|
if constexpr (((Flags == has_rc) || ...))
|
||||||
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
||||||
};
|
};
|
||||||
|
|
@ -3627,7 +3627,7 @@ auto RLDIC()
|
||||||
return ppu_exec_select<Flags...>::template select<>();
|
return ppu_exec_select<Flags...>::template select<>();
|
||||||
|
|
||||||
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
||||||
ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], op.sh64) & ppu_rotate_mask(op.mbe64, op.sh64 ^ 63);
|
ppu.gpr[op.ra] = std::rotl<u64>(ppu.gpr[op.rs], op.sh64) & ppu_rotate_mask(op.mbe64, op.sh64 ^ 63);
|
||||||
if constexpr (((Flags == has_rc) || ...))
|
if constexpr (((Flags == has_rc) || ...))
|
||||||
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
||||||
};
|
};
|
||||||
|
|
@ -3642,7 +3642,7 @@ auto RLDIMI()
|
||||||
|
|
||||||
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
||||||
const u64 mask = ppu_rotate_mask(op.mbe64, op.sh64 ^ 63);
|
const u64 mask = ppu_rotate_mask(op.mbe64, op.sh64 ^ 63);
|
||||||
ppu.gpr[op.ra] = (ppu.gpr[op.ra] & ~mask) | (utils::rol64(ppu.gpr[op.rs], op.sh64) & mask);
|
ppu.gpr[op.ra] = (ppu.gpr[op.ra] & ~mask) | (std::rotl<u64>(ppu.gpr[op.rs], op.sh64) & mask);
|
||||||
if constexpr (((Flags == has_rc) || ...))
|
if constexpr (((Flags == has_rc) || ...))
|
||||||
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
||||||
};
|
};
|
||||||
|
|
@ -3656,7 +3656,7 @@ auto RLDCL()
|
||||||
return ppu_exec_select<Flags...>::template select<>();
|
return ppu_exec_select<Flags...>::template select<>();
|
||||||
|
|
||||||
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
||||||
ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], ppu.gpr[op.rb] & 0x3f) & (~0ull >> op.mbe64);
|
ppu.gpr[op.ra] = std::rotl<u64>(ppu.gpr[op.rs], ppu.gpr[op.rb] & 0x3f) & (~0ull >> op.mbe64);
|
||||||
if constexpr (((Flags == has_rc) || ...))
|
if constexpr (((Flags == has_rc) || ...))
|
||||||
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
||||||
};
|
};
|
||||||
|
|
@ -3670,7 +3670,7 @@ auto RLDCR()
|
||||||
return ppu_exec_select<Flags...>::template select<>();
|
return ppu_exec_select<Flags...>::template select<>();
|
||||||
|
|
||||||
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
||||||
ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], ppu.gpr[op.rb] & 0x3f) & (~0ull << (op.mbe64 ^ 63));
|
ppu.gpr[op.ra] = std::rotl<u64>(ppu.gpr[op.rs], ppu.gpr[op.rb] & 0x3f) & (~0ull << (op.mbe64 ^ 63));
|
||||||
if constexpr (((Flags == has_rc) || ...))
|
if constexpr (((Flags == has_rc) || ...))
|
||||||
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -827,6 +827,9 @@ extern void ppu_register_function_at(u32 addr, u32 size, ppu_intrp_func_t ptr =
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size = utils::align<u32>(size + addr % 4, 4);
|
||||||
|
addr &= -4;
|
||||||
|
|
||||||
if (g_cfg.core.ppu_decoder == ppu_decoder_type::llvm)
|
if (g_cfg.core.ppu_decoder == ppu_decoder_type::llvm)
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
|
|
@ -2308,7 +2311,7 @@ void ppu_thread::cpu_sleep()
|
||||||
raddr = 0;
|
raddr = 0;
|
||||||
|
|
||||||
// Setup wait flag and memory flags to relock itself
|
// Setup wait flag and memory flags to relock itself
|
||||||
state += g_use_rtm ? cpu_flag::wait : cpu_flag::wait + cpu_flag::memory;
|
state += cpu_flag::wait + cpu_flag::memory;
|
||||||
|
|
||||||
if (auto ptr = vm::g_tls_locked)
|
if (auto ptr = vm::g_tls_locked)
|
||||||
{
|
{
|
||||||
|
|
@ -2454,10 +2457,8 @@ ppu_thread::ppu_thread(const ppu_thread_params& param, std::string_view name, u3
|
||||||
// Trigger the scheduler
|
// Trigger the scheduler
|
||||||
state += cpu_flag::suspend;
|
state += cpu_flag::suspend;
|
||||||
|
|
||||||
if (!g_use_rtm)
|
// Acquire memory passive lock
|
||||||
{
|
state += cpu_flag::memory;
|
||||||
state += cpu_flag::memory;
|
|
||||||
}
|
|
||||||
|
|
||||||
call_history.data.resize(g_cfg.core.ppu_call_history ? call_history_max_size : 1);
|
call_history.data.resize(g_cfg.core.ppu_call_history ? call_history_max_size : 1);
|
||||||
syscall_history.data.resize(g_cfg.core.ppu_call_history ? syscall_history_max_size : 1);
|
syscall_history.data.resize(g_cfg.core.ppu_call_history ? syscall_history_max_size : 1);
|
||||||
|
|
@ -2703,11 +2704,7 @@ ppu_thread::ppu_thread(utils::serial& ar)
|
||||||
|
|
||||||
// Trigger the scheduler
|
// Trigger the scheduler
|
||||||
state += cpu_flag::suspend;
|
state += cpu_flag::suspend;
|
||||||
|
state += cpu_flag::memory;
|
||||||
if (!g_use_rtm)
|
|
||||||
{
|
|
||||||
state += cpu_flag::memory;
|
|
||||||
}
|
|
||||||
|
|
||||||
ppu_tname = make_single<std::string>(ar.pop<std::string>());
|
ppu_tname = make_single<std::string>(ar.pop<std::string>());
|
||||||
|
|
||||||
|
|
@ -3191,221 +3188,6 @@ extern u64 ppu_ldarx(ppu_thread& ppu, u32 addr)
|
||||||
return ppu_load_acquire_reservation<u64>(ppu, addr);
|
return ppu_load_acquire_reservation<u64>(ppu, addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto ppu_stcx_accurate_tx = build_function_asm<u64(*)(u32 raddr, u64 rtime, const void* _old, u64 _new)>("ppu_stcx_accurate_tx", [](native_asm& c, auto& args)
|
|
||||||
{
|
|
||||||
using namespace asmjit;
|
|
||||||
|
|
||||||
#if defined(ARCH_X64)
|
|
||||||
Label fall = c.newLabel();
|
|
||||||
Label fail = c.newLabel();
|
|
||||||
Label _ret = c.newLabel();
|
|
||||||
Label load = c.newLabel();
|
|
||||||
|
|
||||||
//if (utils::has_avx() && !s_tsx_avx)
|
|
||||||
//{
|
|
||||||
// c.vzeroupper();
|
|
||||||
//}
|
|
||||||
|
|
||||||
// Create stack frame if necessary (Windows ABI has only 6 volatile vector registers)
|
|
||||||
c.push(x86::rbp);
|
|
||||||
c.push(x86::r13);
|
|
||||||
c.push(x86::r14);
|
|
||||||
c.sub(x86::rsp, 48);
|
|
||||||
#ifdef _WIN32
|
|
||||||
if (!s_tsx_avx)
|
|
||||||
{
|
|
||||||
c.movups(x86::oword_ptr(x86::rsp, 0), x86::xmm6);
|
|
||||||
c.movups(x86::oword_ptr(x86::rsp, 16), x86::xmm7);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Prepare registers
|
|
||||||
build_swap_rdx_with(c, args, x86::r10);
|
|
||||||
c.movabs(x86::rbp, reinterpret_cast<u64>(&vm::g_sudo_addr));
|
|
||||||
c.mov(x86::rbp, x86::qword_ptr(x86::rbp));
|
|
||||||
c.lea(x86::rbp, x86::qword_ptr(x86::rbp, args[0]));
|
|
||||||
c.and_(x86::rbp, -128);
|
|
||||||
c.prefetchw(x86::byte_ptr(x86::rbp, 0));
|
|
||||||
c.prefetchw(x86::byte_ptr(x86::rbp, 64));
|
|
||||||
c.movzx(args[0].r32(), args[0].r16());
|
|
||||||
c.shr(args[0].r32(), 1);
|
|
||||||
c.movabs(x86::r11, reinterpret_cast<u64>(+vm::g_reservations));
|
|
||||||
c.lea(x86::r11, x86::qword_ptr(x86::r11, args[0]));
|
|
||||||
c.and_(x86::r11, -128 / 2);
|
|
||||||
c.and_(args[0].r32(), 63);
|
|
||||||
|
|
||||||
// Prepare data
|
|
||||||
if (s_tsx_avx)
|
|
||||||
{
|
|
||||||
c.vmovups(x86::ymm0, x86::ymmword_ptr(args[2], 0));
|
|
||||||
c.vmovups(x86::ymm1, x86::ymmword_ptr(args[2], 32));
|
|
||||||
c.vmovups(x86::ymm2, x86::ymmword_ptr(args[2], 64));
|
|
||||||
c.vmovups(x86::ymm3, x86::ymmword_ptr(args[2], 96));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
c.movaps(x86::xmm0, x86::oword_ptr(args[2], 0));
|
|
||||||
c.movaps(x86::xmm1, x86::oword_ptr(args[2], 16));
|
|
||||||
c.movaps(x86::xmm2, x86::oword_ptr(args[2], 32));
|
|
||||||
c.movaps(x86::xmm3, x86::oword_ptr(args[2], 48));
|
|
||||||
c.movaps(x86::xmm4, x86::oword_ptr(args[2], 64));
|
|
||||||
c.movaps(x86::xmm5, x86::oword_ptr(args[2], 80));
|
|
||||||
c.movaps(x86::xmm6, x86::oword_ptr(args[2], 96));
|
|
||||||
c.movaps(x86::xmm7, x86::oword_ptr(args[2], 112));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Alloc r14 to stamp0
|
|
||||||
const auto stamp0 = x86::r14;
|
|
||||||
build_get_tsc(c, stamp0);
|
|
||||||
|
|
||||||
Label fail2 = c.newLabel();
|
|
||||||
|
|
||||||
Label tx1 = build_transaction_enter(c, fall, [&]()
|
|
||||||
{
|
|
||||||
build_get_tsc(c);
|
|
||||||
c.sub(x86::rax, stamp0);
|
|
||||||
c.movabs(x86::r13, reinterpret_cast<u64>(&g_rtm_tx_limit2));
|
|
||||||
c.cmp(x86::rax, x86::qword_ptr(x86::r13));
|
|
||||||
c.jae(fall);
|
|
||||||
});
|
|
||||||
|
|
||||||
// Check pause flag
|
|
||||||
c.bt(x86::dword_ptr(args[2], ::offset32(&ppu_thread::state) - ::offset32(&ppu_thread::rdata)), static_cast<u32>(cpu_flag::pause));
|
|
||||||
c.jc(fall);
|
|
||||||
c.xbegin(tx1);
|
|
||||||
|
|
||||||
if (s_tsx_avx)
|
|
||||||
{
|
|
||||||
c.vxorps(x86::ymm0, x86::ymm0, x86::ymmword_ptr(x86::rbp, 0));
|
|
||||||
c.vxorps(x86::ymm1, x86::ymm1, x86::ymmword_ptr(x86::rbp, 32));
|
|
||||||
c.vxorps(x86::ymm2, x86::ymm2, x86::ymmword_ptr(x86::rbp, 64));
|
|
||||||
c.vxorps(x86::ymm3, x86::ymm3, x86::ymmword_ptr(x86::rbp, 96));
|
|
||||||
c.vorps(x86::ymm0, x86::ymm0, x86::ymm1);
|
|
||||||
c.vorps(x86::ymm1, x86::ymm2, x86::ymm3);
|
|
||||||
c.vorps(x86::ymm0, x86::ymm1, x86::ymm0);
|
|
||||||
c.vptest(x86::ymm0, x86::ymm0);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
c.xorps(x86::xmm0, x86::oword_ptr(x86::rbp, 0));
|
|
||||||
c.xorps(x86::xmm1, x86::oword_ptr(x86::rbp, 16));
|
|
||||||
c.xorps(x86::xmm2, x86::oword_ptr(x86::rbp, 32));
|
|
||||||
c.xorps(x86::xmm3, x86::oword_ptr(x86::rbp, 48));
|
|
||||||
c.xorps(x86::xmm4, x86::oword_ptr(x86::rbp, 64));
|
|
||||||
c.xorps(x86::xmm5, x86::oword_ptr(x86::rbp, 80));
|
|
||||||
c.xorps(x86::xmm6, x86::oword_ptr(x86::rbp, 96));
|
|
||||||
c.xorps(x86::xmm7, x86::oword_ptr(x86::rbp, 112));
|
|
||||||
c.orps(x86::xmm0, x86::xmm1);
|
|
||||||
c.orps(x86::xmm2, x86::xmm3);
|
|
||||||
c.orps(x86::xmm4, x86::xmm5);
|
|
||||||
c.orps(x86::xmm6, x86::xmm7);
|
|
||||||
c.orps(x86::xmm0, x86::xmm2);
|
|
||||||
c.orps(x86::xmm4, x86::xmm6);
|
|
||||||
c.orps(x86::xmm0, x86::xmm4);
|
|
||||||
c.ptest(x86::xmm0, x86::xmm0);
|
|
||||||
}
|
|
||||||
|
|
||||||
c.jnz(fail);
|
|
||||||
|
|
||||||
// Store 8 bytes
|
|
||||||
c.mov(x86::qword_ptr(x86::rbp, args[0], 1, 0), args[3]);
|
|
||||||
|
|
||||||
c.xend();
|
|
||||||
c.lock().add(x86::qword_ptr(x86::r11), 64);
|
|
||||||
build_get_tsc(c);
|
|
||||||
c.sub(x86::rax, stamp0);
|
|
||||||
c.jmp(_ret);
|
|
||||||
|
|
||||||
// XABORT is expensive so try to finish with xend instead
|
|
||||||
c.bind(fail);
|
|
||||||
|
|
||||||
// Load old data to store back in rdata
|
|
||||||
if (s_tsx_avx)
|
|
||||||
{
|
|
||||||
c.vmovaps(x86::ymm0, x86::ymmword_ptr(x86::rbp, 0));
|
|
||||||
c.vmovaps(x86::ymm1, x86::ymmword_ptr(x86::rbp, 32));
|
|
||||||
c.vmovaps(x86::ymm2, x86::ymmword_ptr(x86::rbp, 64));
|
|
||||||
c.vmovaps(x86::ymm3, x86::ymmword_ptr(x86::rbp, 96));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
c.movaps(x86::xmm0, x86::oword_ptr(x86::rbp, 0));
|
|
||||||
c.movaps(x86::xmm1, x86::oword_ptr(x86::rbp, 16));
|
|
||||||
c.movaps(x86::xmm2, x86::oword_ptr(x86::rbp, 32));
|
|
||||||
c.movaps(x86::xmm3, x86::oword_ptr(x86::rbp, 48));
|
|
||||||
c.movaps(x86::xmm4, x86::oword_ptr(x86::rbp, 64));
|
|
||||||
c.movaps(x86::xmm5, x86::oword_ptr(x86::rbp, 80));
|
|
||||||
c.movaps(x86::xmm6, x86::oword_ptr(x86::rbp, 96));
|
|
||||||
c.movaps(x86::xmm7, x86::oword_ptr(x86::rbp, 112));
|
|
||||||
}
|
|
||||||
|
|
||||||
c.xend();
|
|
||||||
c.jmp(fail2);
|
|
||||||
|
|
||||||
c.bind(fall);
|
|
||||||
c.mov(x86::rax, -1);
|
|
||||||
c.jmp(_ret);
|
|
||||||
|
|
||||||
c.bind(fail2);
|
|
||||||
c.lock().sub(x86::qword_ptr(x86::r11), 64);
|
|
||||||
c.bind(load);
|
|
||||||
|
|
||||||
// Store previous data back to rdata
|
|
||||||
if (s_tsx_avx)
|
|
||||||
{
|
|
||||||
c.vmovaps(x86::ymmword_ptr(args[2], 0), x86::ymm0);
|
|
||||||
c.vmovaps(x86::ymmword_ptr(args[2], 32), x86::ymm1);
|
|
||||||
c.vmovaps(x86::ymmword_ptr(args[2], 64), x86::ymm2);
|
|
||||||
c.vmovaps(x86::ymmword_ptr(args[2], 96), x86::ymm3);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
c.movaps(x86::oword_ptr(args[2], 0), x86::xmm0);
|
|
||||||
c.movaps(x86::oword_ptr(args[2], 16), x86::xmm1);
|
|
||||||
c.movaps(x86::oword_ptr(args[2], 32), x86::xmm2);
|
|
||||||
c.movaps(x86::oword_ptr(args[2], 48), x86::xmm3);
|
|
||||||
c.movaps(x86::oword_ptr(args[2], 64), x86::xmm4);
|
|
||||||
c.movaps(x86::oword_ptr(args[2], 80), x86::xmm5);
|
|
||||||
c.movaps(x86::oword_ptr(args[2], 96), x86::xmm6);
|
|
||||||
c.movaps(x86::oword_ptr(args[2], 112), x86::xmm7);
|
|
||||||
}
|
|
||||||
|
|
||||||
c.mov(x86::rax, -1);
|
|
||||||
c.mov(x86::qword_ptr(args[2], ::offset32(&ppu_thread::last_ftime) - ::offset32(&ppu_thread::rdata)), x86::rax);
|
|
||||||
c.xor_(x86::eax, x86::eax);
|
|
||||||
//c.jmp(_ret);
|
|
||||||
|
|
||||||
c.bind(_ret);
|
|
||||||
|
|
||||||
#ifdef _WIN32
|
|
||||||
if (!s_tsx_avx)
|
|
||||||
{
|
|
||||||
c.vmovups(x86::xmm6, x86::oword_ptr(x86::rsp, 0));
|
|
||||||
c.vmovups(x86::xmm7, x86::oword_ptr(x86::rsp, 16));
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (s_tsx_avx)
|
|
||||||
{
|
|
||||||
c.vzeroupper();
|
|
||||||
}
|
|
||||||
|
|
||||||
c.add(x86::rsp, 48);
|
|
||||||
c.pop(x86::r14);
|
|
||||||
c.pop(x86::r13);
|
|
||||||
c.pop(x86::rbp);
|
|
||||||
|
|
||||||
maybe_flush_lbr(c);
|
|
||||||
c.ret();
|
|
||||||
#else
|
|
||||||
UNUSED(args);
|
|
||||||
|
|
||||||
// Unimplemented should fail.
|
|
||||||
c.brk(Imm(0x42));
|
|
||||||
c.ret(a64::x30);
|
|
||||||
#endif
|
|
||||||
});
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value)
|
static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value)
|
||||||
{
|
{
|
||||||
|
|
@ -3486,77 +3268,6 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (g_use_rtm) [[likely]]
|
|
||||||
{
|
|
||||||
switch (u64 count = ppu_stcx_accurate_tx(addr & -8, rtime, ppu.rdata, std::bit_cast<u64>(new_data)))
|
|
||||||
{
|
|
||||||
case umax:
|
|
||||||
{
|
|
||||||
auto& all_data = *vm::get_super_ptr<spu_rdata_t>(addr & -128);
|
|
||||||
auto& sdata = *vm::get_super_ptr<atomic_be_t<u64>>(addr & -8);
|
|
||||||
|
|
||||||
const bool ok = cpu_thread::suspend_all<+3>(&ppu, {all_data, all_data + 64, &res}, [&]
|
|
||||||
{
|
|
||||||
if ((res & -128) == rtime && cmp_rdata(ppu.rdata, all_data))
|
|
||||||
{
|
|
||||||
sdata.release(new_data);
|
|
||||||
res += 64;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
mov_rdata_nt(ppu.rdata, all_data);
|
|
||||||
res -= 64;
|
|
||||||
return false;
|
|
||||||
});
|
|
||||||
|
|
||||||
if (ok)
|
|
||||||
{
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
ppu.last_ftime = -1;
|
|
||||||
[[fallthrough]];
|
|
||||||
}
|
|
||||||
case 0:
|
|
||||||
{
|
|
||||||
if (ppu.last_faddr == addr)
|
|
||||||
{
|
|
||||||
ppu.last_fail++;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ppu.last_ftime != umax)
|
|
||||||
{
|
|
||||||
ppu.last_faddr = 0;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
utils::prefetch_read(ppu.rdata);
|
|
||||||
utils::prefetch_read(ppu.rdata + 64);
|
|
||||||
ppu.last_faddr = addr;
|
|
||||||
ppu.last_ftime = res.load() & -128;
|
|
||||||
ppu.last_ftsc = utils::get_tsc();
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
{
|
|
||||||
if (count > 20000 && g_cfg.core.perf_report) [[unlikely]]
|
|
||||||
{
|
|
||||||
perf_log.warning("STCX: took too long: %.3fus (%u c)", count / (utils::get_tsc_freq() / 1000'000.), count);
|
|
||||||
}
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ppu.last_faddr == addr)
|
|
||||||
{
|
|
||||||
ppu.last_succ++;
|
|
||||||
}
|
|
||||||
|
|
||||||
ppu.last_faddr = 0;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Align address: we do not need the lower 7 bits anymore
|
// Align address: we do not need the lower 7 bits anymore
|
||||||
addr &= -128;
|
addr &= -128;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -593,6 +593,11 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect)
|
||||||
{
|
{
|
||||||
callee = m_module->getOrInsertFunction(fmt::format("__0x%x", target_last - base), type);
|
callee = m_module->getOrInsertFunction(fmt::format("__0x%x", target_last - base), type);
|
||||||
cast<Function>(callee.getCallee())->setCallingConv(CallingConv::GHC);
|
cast<Function>(callee.getCallee())->setCallingConv(CallingConv::GHC);
|
||||||
|
|
||||||
|
if (g_cfg.core.ppu_prof)
|
||||||
|
{
|
||||||
|
m_ir->CreateStore(m_ir->getInt32(target_last), m_ir->CreateStructGEP(m_thread_type, m_thread, static_cast<uint>(&m_cia - m_locals)));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
|
|
||||||
|
|
@ -3212,7 +3212,7 @@ void spu_recompiler::ROTQBYI(spu_opcode_t op)
|
||||||
}
|
}
|
||||||
else if (s == 4 || s == 8 || s == 12)
|
else if (s == 4 || s == 8 || s == 12)
|
||||||
{
|
{
|
||||||
c->pshufd(va, va, utils::rol8(0xE4, s / 2));
|
c->pshufd(va, va, std::rotl<u8>(0xE4, s / 2));
|
||||||
}
|
}
|
||||||
else if (utils::has_ssse3())
|
else if (utils::has_ssse3())
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -38,26 +38,9 @@ constexpr u32 s_reg_max = spu_recompiler_base::s_reg_max;
|
||||||
template<typename T>
|
template<typename T>
|
||||||
struct span_less
|
struct span_less
|
||||||
{
|
{
|
||||||
static int compare(const std::span<T>& lhs, const std::span<T>& rhs) noexcept
|
static auto compare(const std::span<T>& lhs, const std::span<T>& rhs) noexcept
|
||||||
{
|
{
|
||||||
// TODO: Replace with std::lexicographical_compare_three_way when it becomes available to all compilers
|
return std::lexicographical_compare_three_way(lhs.begin(), lhs.end(), rhs.begin(), rhs.end());
|
||||||
for (usz i = 0, last = std::min(lhs.size(), rhs.size()); i != last; i++)
|
|
||||||
{
|
|
||||||
const T vl = lhs[i];
|
|
||||||
const T vr = rhs[i];
|
|
||||||
|
|
||||||
if (vl != vr)
|
|
||||||
{
|
|
||||||
return vl < vr ? -1 : 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (lhs.size() != rhs.size())
|
|
||||||
{
|
|
||||||
return lhs.size() < rhs.size() ? -1 : 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool operator()(const std::span<T>& lhs, const std::span<T>& rhs) const noexcept
|
bool operator()(const std::span<T>& lhs, const std::span<T>& rhs) const noexcept
|
||||||
|
|
@ -1309,7 +1292,7 @@ bool spu_program::operator<(const spu_program& rhs) const noexcept
|
||||||
std::span<const u32> lhs_data(data.data() + lhs_offs, data.size() - lhs_offs);
|
std::span<const u32> lhs_data(data.data() + lhs_offs, data.size() - lhs_offs);
|
||||||
std::span<const u32> rhs_data(rhs.data.data() + rhs_offs, rhs.data.size() - rhs_offs);
|
std::span<const u32> rhs_data(rhs.data.data() + rhs_offs, rhs.data.size() - rhs_offs);
|
||||||
|
|
||||||
const int cmp0 = span_less<const u32>::compare(lhs_data, rhs_data);
|
const auto cmp0 = span_less<const u32>::compare(lhs_data, rhs_data);
|
||||||
|
|
||||||
if (cmp0 < 0)
|
if (cmp0 < 0)
|
||||||
return true;
|
return true;
|
||||||
|
|
@ -1320,7 +1303,7 @@ bool spu_program::operator<(const spu_program& rhs) const noexcept
|
||||||
lhs_data = {data.data(), lhs_offs};
|
lhs_data = {data.data(), lhs_offs};
|
||||||
rhs_data = {rhs.data.data(), rhs_offs};
|
rhs_data = {rhs.data.data(), rhs_offs};
|
||||||
|
|
||||||
const int cmp1 = span_less<const u32>::compare(lhs_data, rhs_data);
|
const auto cmp1 = span_less<const u32>::compare(lhs_data, rhs_data);
|
||||||
|
|
||||||
if (cmp1 < 0)
|
if (cmp1 < 0)
|
||||||
return true;
|
return true;
|
||||||
|
|
@ -2333,7 +2316,7 @@ std::vector<u32> spu_thread::discover_functions(u32 base_addr, std::span<const u
|
||||||
// Search for BRSL LR and BRASL LR or BR
|
// Search for BRSL LR and BRASL LR or BR
|
||||||
// TODO: BISL
|
// TODO: BISL
|
||||||
const v128 inst = read_from_ptr<be_t<v128>>(ls.data(), i - base_addr);
|
const v128 inst = read_from_ptr<be_t<v128>>(ls.data(), i - base_addr);
|
||||||
const v128 cleared_i16 = gv_and32(inst, v128::from32p(utils::rol32(~0xffff, 7)));
|
const v128 cleared_i16 = gv_and32(inst, v128::from32p(std::rotl<u32>(~0xffff, 7)));
|
||||||
const v128 eq_brsl = gv_eq32(cleared_i16, v128::from32p(0x66u << 23));
|
const v128 eq_brsl = gv_eq32(cleared_i16, v128::from32p(0x66u << 23));
|
||||||
const v128 eq_brasl = gv_eq32(cleared_i16, brasl_mask);
|
const v128 eq_brasl = gv_eq32(cleared_i16, brasl_mask);
|
||||||
const v128 eq_br = gv_eq32(cleared_i16, v128::from32p(0x64u << 23));
|
const v128 eq_br = gv_eq32(cleared_i16, v128::from32p(0x64u << 23));
|
||||||
|
|
@ -5396,7 +5379,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||||
const usz block_tail = duplicate_positions[it_begin - it_tail];
|
const usz block_tail = duplicate_positions[it_begin - it_tail];
|
||||||
|
|
||||||
// Check if the distance is precisely two times from the end
|
// Check if the distance is precisely two times from the end
|
||||||
if (reg_state_it.size() - block_start != utils::rol64(reg_state_it.size() - block_tail, 1))
|
if (reg_state_it.size() - block_start != std::rotl<u64>(reg_state_it.size() - block_tail, 1))
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -291,7 +291,7 @@ bool ROT(spu_thread& spu, spu_opcode_t op)
|
||||||
|
|
||||||
for (u32 i = 0; i < 4; i++)
|
for (u32 i = 0; i < 4; i++)
|
||||||
{
|
{
|
||||||
spu.gpr[op.rt]._u32[i] = utils::rol32(a._u32[i], b._u32[i]);
|
spu.gpr[op.rt]._u32[i] = std::rotl<u32>(a._u32[i], b._u32[i]);
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
@ -346,7 +346,7 @@ bool ROTH(spu_thread& spu, spu_opcode_t op)
|
||||||
|
|
||||||
for (u32 i = 0; i < 8; i++)
|
for (u32 i = 0; i < 8; i++)
|
||||||
{
|
{
|
||||||
spu.gpr[op.rt]._u16[i] = utils::rol16(a._u16[i], b._u16[i]);
|
spu.gpr[op.rt]._u16[i] = std::rotl<u16>(a._u16[i], b._u16[i]);
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3991,7 +3991,7 @@ public:
|
||||||
|
|
||||||
bool must_use_cpp_functions = !!g_cfg.core.spu_accurate_dma;
|
bool must_use_cpp_functions = !!g_cfg.core.spu_accurate_dma;
|
||||||
|
|
||||||
if (u64 cmdh = ci->getZExtValue() & ~(MFC_BARRIER_MASK | MFC_FENCE_MASK | MFC_RESULT_MASK); g_cfg.core.rsx_fifo_accuracy || g_cfg.video.strict_rendering_mode || !g_use_rtm)
|
if (u64 cmdh = ci->getZExtValue() & ~(MFC_BARRIER_MASK | MFC_FENCE_MASK | MFC_RESULT_MASK); g_cfg.core.rsx_fifo_accuracy || g_cfg.video.strict_rendering_mode || /*!g_use_rtm*/ true)
|
||||||
{
|
{
|
||||||
// TODO: don't require TSX (current implementation is TSX-only)
|
// TODO: don't require TSX (current implementation is TSX-only)
|
||||||
if (cmdh == MFC_PUT_CMD || cmdh == MFC_SNDSIG_CMD)
|
if (cmdh == MFC_PUT_CMD || cmdh == MFC_SNDSIG_CMD)
|
||||||
|
|
|
||||||
|
|
@ -639,549 +639,6 @@ std::array<u32, 2> op_branch_targets(u32 pc, spu_opcode_t op)
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto spu_putllc_tx = build_function_asm<u64(*)(u32 raddr, u64 rtime, void* _old, const void* _new)>("spu_putllc_tx", [](native_asm& c, auto& args)
|
|
||||||
{
|
|
||||||
using namespace asmjit;
|
|
||||||
|
|
||||||
#if defined(ARCH_X64)
|
|
||||||
Label fall = c.newLabel();
|
|
||||||
Label fail = c.newLabel();
|
|
||||||
Label _ret = c.newLabel();
|
|
||||||
Label load = c.newLabel();
|
|
||||||
|
|
||||||
//if (utils::has_avx() && !s_tsx_avx)
|
|
||||||
//{
|
|
||||||
// c.vzeroupper();
|
|
||||||
//}
|
|
||||||
|
|
||||||
// Create stack frame if necessary (Windows ABI has only 6 volatile vector registers)
|
|
||||||
c.push(x86::rbp);
|
|
||||||
c.push(x86::rbx);
|
|
||||||
#ifdef _WIN32
|
|
||||||
c.sub(x86::rsp, 168);
|
|
||||||
if (s_tsx_avx)
|
|
||||||
{
|
|
||||||
c.vmovups(x86::oword_ptr(x86::rsp, 0), x86::xmm6);
|
|
||||||
c.vmovups(x86::oword_ptr(x86::rsp, 16), x86::xmm7);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
c.movups(x86::oword_ptr(x86::rsp, 0), x86::xmm6);
|
|
||||||
c.movups(x86::oword_ptr(x86::rsp, 16), x86::xmm7);
|
|
||||||
c.movups(x86::oword_ptr(x86::rsp, 32), x86::xmm8);
|
|
||||||
c.movups(x86::oword_ptr(x86::rsp, 48), x86::xmm9);
|
|
||||||
c.movups(x86::oword_ptr(x86::rsp, 64), x86::xmm10);
|
|
||||||
c.movups(x86::oword_ptr(x86::rsp, 80), x86::xmm11);
|
|
||||||
c.movups(x86::oword_ptr(x86::rsp, 96), x86::xmm12);
|
|
||||||
c.movups(x86::oword_ptr(x86::rsp, 112), x86::xmm13);
|
|
||||||
c.movups(x86::oword_ptr(x86::rsp, 128), x86::xmm14);
|
|
||||||
c.movups(x86::oword_ptr(x86::rsp, 144), x86::xmm15);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
c.sub(x86::rsp, 40);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Prepare registers
|
|
||||||
build_swap_rdx_with(c, args, x86::r10);
|
|
||||||
c.movabs(args[1], reinterpret_cast<u64>(&vm::g_sudo_addr));
|
|
||||||
c.mov(args[1], x86::qword_ptr(args[1]));
|
|
||||||
c.lea(args[1], x86::qword_ptr(args[1], args[0]));
|
|
||||||
c.prefetchw(x86::byte_ptr(args[1], 0));
|
|
||||||
c.prefetchw(x86::byte_ptr(args[1], 64));
|
|
||||||
c.and_(args[0].r32(), 0xff80);
|
|
||||||
c.shr(args[0].r32(), 1);
|
|
||||||
c.movabs(x86::r11, reinterpret_cast<u64>(+vm::g_reservations));
|
|
||||||
c.lea(x86::r11, x86::qword_ptr(x86::r11, args[0]));
|
|
||||||
|
|
||||||
// Prepare data
|
|
||||||
if (s_tsx_avx)
|
|
||||||
{
|
|
||||||
c.vmovups(x86::ymm0, x86::ymmword_ptr(args[2], 0));
|
|
||||||
c.vmovups(x86::ymm1, x86::ymmword_ptr(args[2], 32));
|
|
||||||
c.vmovups(x86::ymm2, x86::ymmword_ptr(args[2], 64));
|
|
||||||
c.vmovups(x86::ymm3, x86::ymmword_ptr(args[2], 96));
|
|
||||||
c.vmovups(x86::ymm4, x86::ymmword_ptr(args[3], 0));
|
|
||||||
c.vmovups(x86::ymm5, x86::ymmword_ptr(args[3], 32));
|
|
||||||
c.vmovups(x86::ymm6, x86::ymmword_ptr(args[3], 64));
|
|
||||||
c.vmovups(x86::ymm7, x86::ymmword_ptr(args[3], 96));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
c.movaps(x86::xmm0, x86::oword_ptr(args[2], 0));
|
|
||||||
c.movaps(x86::xmm1, x86::oword_ptr(args[2], 16));
|
|
||||||
c.movaps(x86::xmm2, x86::oword_ptr(args[2], 32));
|
|
||||||
c.movaps(x86::xmm3, x86::oword_ptr(args[2], 48));
|
|
||||||
c.movaps(x86::xmm4, x86::oword_ptr(args[2], 64));
|
|
||||||
c.movaps(x86::xmm5, x86::oword_ptr(args[2], 80));
|
|
||||||
c.movaps(x86::xmm6, x86::oword_ptr(args[2], 96));
|
|
||||||
c.movaps(x86::xmm7, x86::oword_ptr(args[2], 112));
|
|
||||||
c.movaps(x86::xmm8, x86::oword_ptr(args[3], 0));
|
|
||||||
c.movaps(x86::xmm9, x86::oword_ptr(args[3], 16));
|
|
||||||
c.movaps(x86::xmm10, x86::oword_ptr(args[3], 32));
|
|
||||||
c.movaps(x86::xmm11, x86::oword_ptr(args[3], 48));
|
|
||||||
c.movaps(x86::xmm12, x86::oword_ptr(args[3], 64));
|
|
||||||
c.movaps(x86::xmm13, x86::oword_ptr(args[3], 80));
|
|
||||||
c.movaps(x86::xmm14, x86::oword_ptr(args[3], 96));
|
|
||||||
c.movaps(x86::xmm15, x86::oword_ptr(args[3], 112));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Alloc args[0] to stamp0
|
|
||||||
const auto stamp0 = args[0];
|
|
||||||
build_get_tsc(c, stamp0);
|
|
||||||
|
|
||||||
Label fail2 = c.newLabel();
|
|
||||||
|
|
||||||
Label tx1 = build_transaction_enter(c, fall, [&]()
|
|
||||||
{
|
|
||||||
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::ftx) - ::offset32(&spu_thread::rdata)), 1);
|
|
||||||
build_get_tsc(c);
|
|
||||||
c.sub(x86::rax, stamp0);
|
|
||||||
c.movabs(x86::rbx, reinterpret_cast<u64>(&g_rtm_tx_limit2));
|
|
||||||
c.cmp(x86::rax, x86::qword_ptr(x86::rbx));
|
|
||||||
c.jae(fall);
|
|
||||||
});
|
|
||||||
|
|
||||||
// Check pause flag
|
|
||||||
c.bt(x86::dword_ptr(args[2], ::offset32(&spu_thread::state) - ::offset32(&spu_thread::rdata)), static_cast<u32>(cpu_flag::pause));
|
|
||||||
c.jc(fall);
|
|
||||||
c.xbegin(tx1);
|
|
||||||
|
|
||||||
if (s_tsx_avx)
|
|
||||||
{
|
|
||||||
c.vxorps(x86::ymm0, x86::ymm0, x86::ymmword_ptr(args[1], 0));
|
|
||||||
c.vxorps(x86::ymm1, x86::ymm1, x86::ymmword_ptr(args[1], 32));
|
|
||||||
c.vxorps(x86::ymm2, x86::ymm2, x86::ymmword_ptr(args[1], 64));
|
|
||||||
c.vxorps(x86::ymm3, x86::ymm3, x86::ymmword_ptr(args[1], 96));
|
|
||||||
c.vorps(x86::ymm0, x86::ymm0, x86::ymm1);
|
|
||||||
c.vorps(x86::ymm1, x86::ymm2, x86::ymm3);
|
|
||||||
c.vorps(x86::ymm0, x86::ymm1, x86::ymm0);
|
|
||||||
c.vptest(x86::ymm0, x86::ymm0);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
c.xorps(x86::xmm0, x86::oword_ptr(args[1], 0));
|
|
||||||
c.xorps(x86::xmm1, x86::oword_ptr(args[1], 16));
|
|
||||||
c.xorps(x86::xmm2, x86::oword_ptr(args[1], 32));
|
|
||||||
c.xorps(x86::xmm3, x86::oword_ptr(args[1], 48));
|
|
||||||
c.xorps(x86::xmm4, x86::oword_ptr(args[1], 64));
|
|
||||||
c.xorps(x86::xmm5, x86::oword_ptr(args[1], 80));
|
|
||||||
c.xorps(x86::xmm6, x86::oword_ptr(args[1], 96));
|
|
||||||
c.xorps(x86::xmm7, x86::oword_ptr(args[1], 112));
|
|
||||||
c.orps(x86::xmm0, x86::xmm1);
|
|
||||||
c.orps(x86::xmm2, x86::xmm3);
|
|
||||||
c.orps(x86::xmm4, x86::xmm5);
|
|
||||||
c.orps(x86::xmm6, x86::xmm7);
|
|
||||||
c.orps(x86::xmm0, x86::xmm2);
|
|
||||||
c.orps(x86::xmm4, x86::xmm6);
|
|
||||||
c.orps(x86::xmm0, x86::xmm4);
|
|
||||||
c.ptest(x86::xmm0, x86::xmm0);
|
|
||||||
}
|
|
||||||
|
|
||||||
c.jnz(fail);
|
|
||||||
|
|
||||||
if (s_tsx_avx)
|
|
||||||
{
|
|
||||||
c.vmovaps(x86::ymmword_ptr(args[1], 0), x86::ymm4);
|
|
||||||
c.vmovaps(x86::ymmword_ptr(args[1], 32), x86::ymm5);
|
|
||||||
c.vmovaps(x86::ymmword_ptr(args[1], 64), x86::ymm6);
|
|
||||||
c.vmovaps(x86::ymmword_ptr(args[1], 96), x86::ymm7);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
c.movaps(x86::oword_ptr(args[1], 0), x86::xmm8);
|
|
||||||
c.movaps(x86::oword_ptr(args[1], 16), x86::xmm9);
|
|
||||||
c.movaps(x86::oword_ptr(args[1], 32), x86::xmm10);
|
|
||||||
c.movaps(x86::oword_ptr(args[1], 48), x86::xmm11);
|
|
||||||
c.movaps(x86::oword_ptr(args[1], 64), x86::xmm12);
|
|
||||||
c.movaps(x86::oword_ptr(args[1], 80), x86::xmm13);
|
|
||||||
c.movaps(x86::oword_ptr(args[1], 96), x86::xmm14);
|
|
||||||
c.movaps(x86::oword_ptr(args[1], 112), x86::xmm15);
|
|
||||||
}
|
|
||||||
|
|
||||||
c.xend();
|
|
||||||
c.lock().add(x86::qword_ptr(x86::r11), 64);
|
|
||||||
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::stx) - ::offset32(&spu_thread::rdata)), 1);
|
|
||||||
build_get_tsc(c);
|
|
||||||
c.sub(x86::rax, stamp0);
|
|
||||||
c.jmp(_ret);
|
|
||||||
|
|
||||||
// XABORT is expensive so try to finish with xend instead
|
|
||||||
c.bind(fail);
|
|
||||||
|
|
||||||
// Load previous data to store back to rdata
|
|
||||||
if (s_tsx_avx)
|
|
||||||
{
|
|
||||||
c.vmovaps(x86::ymm0, x86::ymmword_ptr(args[1], 0));
|
|
||||||
c.vmovaps(x86::ymm1, x86::ymmword_ptr(args[1], 32));
|
|
||||||
c.vmovaps(x86::ymm2, x86::ymmword_ptr(args[1], 64));
|
|
||||||
c.vmovaps(x86::ymm3, x86::ymmword_ptr(args[1], 96));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
c.movaps(x86::xmm0, x86::oword_ptr(args[1], 0));
|
|
||||||
c.movaps(x86::xmm1, x86::oword_ptr(args[1], 16));
|
|
||||||
c.movaps(x86::xmm2, x86::oword_ptr(args[1], 32));
|
|
||||||
c.movaps(x86::xmm3, x86::oword_ptr(args[1], 48));
|
|
||||||
c.movaps(x86::xmm4, x86::oword_ptr(args[1], 64));
|
|
||||||
c.movaps(x86::xmm5, x86::oword_ptr(args[1], 80));
|
|
||||||
c.movaps(x86::xmm6, x86::oword_ptr(args[1], 96));
|
|
||||||
c.movaps(x86::xmm7, x86::oword_ptr(args[1], 112));
|
|
||||||
}
|
|
||||||
|
|
||||||
c.xend();
|
|
||||||
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::stx) - ::offset32(&spu_thread::rdata)), 1);
|
|
||||||
c.jmp(fail2);
|
|
||||||
|
|
||||||
c.bind(fall);
|
|
||||||
c.mov(x86::rax, -1);
|
|
||||||
c.jmp(_ret);
|
|
||||||
|
|
||||||
c.bind(fail2);
|
|
||||||
c.lock().sub(x86::qword_ptr(x86::r11), 64);
|
|
||||||
c.bind(load);
|
|
||||||
|
|
||||||
// Store previous data back to rdata
|
|
||||||
if (s_tsx_avx)
|
|
||||||
{
|
|
||||||
c.vmovaps(x86::ymmword_ptr(args[2], 0), x86::ymm0);
|
|
||||||
c.vmovaps(x86::ymmword_ptr(args[2], 32), x86::ymm1);
|
|
||||||
c.vmovaps(x86::ymmword_ptr(args[2], 64), x86::ymm2);
|
|
||||||
c.vmovaps(x86::ymmword_ptr(args[2], 96), x86::ymm3);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
c.movaps(x86::oword_ptr(args[2], 0), x86::xmm0);
|
|
||||||
c.movaps(x86::oword_ptr(args[2], 16), x86::xmm1);
|
|
||||||
c.movaps(x86::oword_ptr(args[2], 32), x86::xmm2);
|
|
||||||
c.movaps(x86::oword_ptr(args[2], 48), x86::xmm3);
|
|
||||||
c.movaps(x86::oword_ptr(args[2], 64), x86::xmm4);
|
|
||||||
c.movaps(x86::oword_ptr(args[2], 80), x86::xmm5);
|
|
||||||
c.movaps(x86::oword_ptr(args[2], 96), x86::xmm6);
|
|
||||||
c.movaps(x86::oword_ptr(args[2], 112), x86::xmm7);
|
|
||||||
}
|
|
||||||
|
|
||||||
c.mov(x86::rax, -1);
|
|
||||||
c.mov(x86::qword_ptr(args[2], ::offset32(&spu_thread::last_ftime) - ::offset32(&spu_thread::rdata)), x86::rax);
|
|
||||||
c.xor_(x86::eax, x86::eax);
|
|
||||||
//c.jmp(_ret);
|
|
||||||
|
|
||||||
c.bind(_ret);
|
|
||||||
|
|
||||||
#ifdef _WIN32
|
|
||||||
if (s_tsx_avx)
|
|
||||||
{
|
|
||||||
c.vmovups(x86::xmm6, x86::oword_ptr(x86::rsp, 0));
|
|
||||||
c.vmovups(x86::xmm7, x86::oword_ptr(x86::rsp, 16));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
c.movups(x86::xmm6, x86::oword_ptr(x86::rsp, 0));
|
|
||||||
c.movups(x86::xmm7, x86::oword_ptr(x86::rsp, 16));
|
|
||||||
c.movups(x86::xmm8, x86::oword_ptr(x86::rsp, 32));
|
|
||||||
c.movups(x86::xmm9, x86::oword_ptr(x86::rsp, 48));
|
|
||||||
c.movups(x86::xmm10, x86::oword_ptr(x86::rsp, 64));
|
|
||||||
c.movups(x86::xmm11, x86::oword_ptr(x86::rsp, 80));
|
|
||||||
c.movups(x86::xmm12, x86::oword_ptr(x86::rsp, 96));
|
|
||||||
c.movups(x86::xmm13, x86::oword_ptr(x86::rsp, 112));
|
|
||||||
c.movups(x86::xmm14, x86::oword_ptr(x86::rsp, 128));
|
|
||||||
c.movups(x86::xmm15, x86::oword_ptr(x86::rsp, 144));
|
|
||||||
}
|
|
||||||
c.add(x86::rsp, 168);
|
|
||||||
#else
|
|
||||||
c.add(x86::rsp, 40);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
c.pop(x86::rbx);
|
|
||||||
c.pop(x86::rbp);
|
|
||||||
|
|
||||||
if (s_tsx_avx)
|
|
||||||
{
|
|
||||||
c.vzeroupper();
|
|
||||||
}
|
|
||||||
|
|
||||||
maybe_flush_lbr(c);
|
|
||||||
c.ret();
|
|
||||||
#else
|
|
||||||
UNUSED(args);
|
|
||||||
|
|
||||||
c.brk(Imm(0x42));
|
|
||||||
c.ret(a64::x30);
|
|
||||||
#endif
|
|
||||||
});
|
|
||||||
|
|
||||||
const auto spu_putlluc_tx = build_function_asm<u64(*)(u32 raddr, const void* rdata, u64* _stx, u64* _ftx)>("spu_putlluc_tx", [](native_asm& c, auto& args)
|
|
||||||
{
|
|
||||||
using namespace asmjit;
|
|
||||||
|
|
||||||
#if defined(ARCH_X64)
|
|
||||||
Label fall = c.newLabel();
|
|
||||||
Label _ret = c.newLabel();
|
|
||||||
|
|
||||||
//if (utils::has_avx() && !s_tsx_avx)
|
|
||||||
//{
|
|
||||||
// c.vzeroupper();
|
|
||||||
//}
|
|
||||||
|
|
||||||
// Create stack frame if necessary (Windows ABI has only 6 volatile vector registers)
|
|
||||||
c.push(x86::rbp);
|
|
||||||
c.push(x86::rbx);
|
|
||||||
c.sub(x86::rsp, 40);
|
|
||||||
#ifdef _WIN32
|
|
||||||
if (!s_tsx_avx)
|
|
||||||
{
|
|
||||||
c.movups(x86::oword_ptr(x86::rsp, 0), x86::xmm6);
|
|
||||||
c.movups(x86::oword_ptr(x86::rsp, 16), x86::xmm7);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
// Prepare registers
|
|
||||||
build_swap_rdx_with(c, args, x86::r10);
|
|
||||||
c.movabs(x86::r11, reinterpret_cast<u64>(&vm::g_sudo_addr));
|
|
||||||
c.mov(x86::r11, x86::qword_ptr(x86::r11));
|
|
||||||
c.lea(x86::r11, x86::qword_ptr(x86::r11, args[0]));
|
|
||||||
c.prefetchw(x86::byte_ptr(x86::r11, 0));
|
|
||||||
c.prefetchw(x86::byte_ptr(x86::r11, 64));
|
|
||||||
|
|
||||||
// Prepare data
|
|
||||||
if (s_tsx_avx)
|
|
||||||
{
|
|
||||||
c.vmovups(x86::ymm0, x86::ymmword_ptr(args[1], 0));
|
|
||||||
c.vmovups(x86::ymm1, x86::ymmword_ptr(args[1], 32));
|
|
||||||
c.vmovups(x86::ymm2, x86::ymmword_ptr(args[1], 64));
|
|
||||||
c.vmovups(x86::ymm3, x86::ymmword_ptr(args[1], 96));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
c.movaps(x86::xmm0, x86::oword_ptr(args[1], 0));
|
|
||||||
c.movaps(x86::xmm1, x86::oword_ptr(args[1], 16));
|
|
||||||
c.movaps(x86::xmm2, x86::oword_ptr(args[1], 32));
|
|
||||||
c.movaps(x86::xmm3, x86::oword_ptr(args[1], 48));
|
|
||||||
c.movaps(x86::xmm4, x86::oword_ptr(args[1], 64));
|
|
||||||
c.movaps(x86::xmm5, x86::oword_ptr(args[1], 80));
|
|
||||||
c.movaps(x86::xmm6, x86::oword_ptr(args[1], 96));
|
|
||||||
c.movaps(x86::xmm7, x86::oword_ptr(args[1], 112));
|
|
||||||
}
|
|
||||||
|
|
||||||
c.and_(args[0].r32(), 0xff80);
|
|
||||||
c.shr(args[0].r32(), 1);
|
|
||||||
c.movabs(args[1], reinterpret_cast<u64>(+vm::g_reservations));
|
|
||||||
c.lea(args[1], x86::qword_ptr(args[1], args[0]));
|
|
||||||
|
|
||||||
// Alloc args[0] to stamp0
|
|
||||||
const auto stamp0 = args[0];
|
|
||||||
build_get_tsc(c, stamp0);
|
|
||||||
|
|
||||||
Label tx1 = build_transaction_enter(c, fall, [&]()
|
|
||||||
{
|
|
||||||
// ftx++;
|
|
||||||
c.add(x86::qword_ptr(args[3]), 1);
|
|
||||||
build_get_tsc(c);
|
|
||||||
c.sub(x86::rax, stamp0);
|
|
||||||
c.movabs(x86::rbx, reinterpret_cast<u64>(&g_rtm_tx_limit2));
|
|
||||||
c.cmp(x86::rax, x86::qword_ptr(x86::rbx));
|
|
||||||
c.jae(fall);
|
|
||||||
});
|
|
||||||
|
|
||||||
c.xbegin(tx1);
|
|
||||||
|
|
||||||
if (s_tsx_avx)
|
|
||||||
{
|
|
||||||
c.vmovaps(x86::ymmword_ptr(x86::r11, 0), x86::ymm0);
|
|
||||||
c.vmovaps(x86::ymmword_ptr(x86::r11, 32), x86::ymm1);
|
|
||||||
c.vmovaps(x86::ymmword_ptr(x86::r11, 64), x86::ymm2);
|
|
||||||
c.vmovaps(x86::ymmword_ptr(x86::r11, 96), x86::ymm3);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
c.movaps(x86::oword_ptr(x86::r11, 0), x86::xmm0);
|
|
||||||
c.movaps(x86::oword_ptr(x86::r11, 16), x86::xmm1);
|
|
||||||
c.movaps(x86::oword_ptr(x86::r11, 32), x86::xmm2);
|
|
||||||
c.movaps(x86::oword_ptr(x86::r11, 48), x86::xmm3);
|
|
||||||
c.movaps(x86::oword_ptr(x86::r11, 64), x86::xmm4);
|
|
||||||
c.movaps(x86::oword_ptr(x86::r11, 80), x86::xmm5);
|
|
||||||
c.movaps(x86::oword_ptr(x86::r11, 96), x86::xmm6);
|
|
||||||
c.movaps(x86::oword_ptr(x86::r11, 112), x86::xmm7);
|
|
||||||
}
|
|
||||||
|
|
||||||
c.xend();
|
|
||||||
c.lock().add(x86::qword_ptr(args[1]), 32);
|
|
||||||
// stx++
|
|
||||||
c.add(x86::qword_ptr(args[2]), 1);
|
|
||||||
build_get_tsc(c);
|
|
||||||
c.sub(x86::rax, stamp0);
|
|
||||||
c.jmp(_ret);
|
|
||||||
|
|
||||||
c.bind(fall);
|
|
||||||
c.xor_(x86::eax, x86::eax);
|
|
||||||
//c.jmp(_ret);
|
|
||||||
|
|
||||||
c.bind(_ret);
|
|
||||||
|
|
||||||
#ifdef _WIN32
|
|
||||||
if (!s_tsx_avx)
|
|
||||||
{
|
|
||||||
c.movups(x86::xmm6, x86::oword_ptr(x86::rsp, 0));
|
|
||||||
c.movups(x86::xmm7, x86::oword_ptr(x86::rsp, 16));
|
|
||||||
}
|
|
||||||
c.add(x86::rsp, 40);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (s_tsx_avx)
|
|
||||||
{
|
|
||||||
c.vzeroupper();
|
|
||||||
}
|
|
||||||
|
|
||||||
c.add(x86::rsp, 40);
|
|
||||||
c.pop(x86::rbx);
|
|
||||||
c.pop(x86::rbp);
|
|
||||||
|
|
||||||
maybe_flush_lbr(c);
|
|
||||||
c.ret();
|
|
||||||
#else
|
|
||||||
UNUSED(args);
|
|
||||||
|
|
||||||
c.brk(Imm(0x42));
|
|
||||||
c.ret(a64::x30);
|
|
||||||
#endif
|
|
||||||
});
|
|
||||||
|
|
||||||
const auto spu_getllar_tx = build_function_asm<u64(*)(u32 raddr, void* rdata, cpu_thread* _cpu, u64 rtime)>("spu_getllar_tx", [](native_asm& c, auto& args)
|
|
||||||
{
|
|
||||||
using namespace asmjit;
|
|
||||||
|
|
||||||
#if defined(ARCH_X64)
|
|
||||||
Label fall = c.newLabel();
|
|
||||||
Label _ret = c.newLabel();
|
|
||||||
|
|
||||||
//if (utils::has_avx() && !s_tsx_avx)
|
|
||||||
//{
|
|
||||||
// c.vzeroupper();
|
|
||||||
//}
|
|
||||||
|
|
||||||
// Create stack frame if necessary (Windows ABI has only 6 volatile vector registers)
|
|
||||||
c.push(x86::rbp);
|
|
||||||
c.push(x86::rbx);
|
|
||||||
c.sub(x86::rsp, 40);
|
|
||||||
#ifdef _WIN32
|
|
||||||
if (!s_tsx_avx)
|
|
||||||
{
|
|
||||||
c.movups(x86::oword_ptr(x86::rsp, 0), x86::xmm6);
|
|
||||||
c.movups(x86::oword_ptr(x86::rsp, 16), x86::xmm7);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Prepare registers
|
|
||||||
build_swap_rdx_with(c, args, x86::r10);
|
|
||||||
c.movabs(x86::rbp, reinterpret_cast<u64>(&vm::g_sudo_addr));
|
|
||||||
c.mov(x86::rbp, x86::qword_ptr(x86::rbp));
|
|
||||||
c.lea(x86::rbp, x86::qword_ptr(x86::rbp, args[0]));
|
|
||||||
c.and_(args[0].r32(), 0xff80);
|
|
||||||
c.shr(args[0].r32(), 1);
|
|
||||||
c.movabs(x86::r11, reinterpret_cast<u64>(+vm::g_reservations));
|
|
||||||
c.lea(x86::r11, x86::qword_ptr(x86::r11, args[0]));
|
|
||||||
|
|
||||||
// Alloc args[0] to stamp0
|
|
||||||
const auto stamp0 = args[0];
|
|
||||||
build_get_tsc(c, stamp0);
|
|
||||||
|
|
||||||
// Begin transaction
|
|
||||||
Label tx0 = build_transaction_enter(c, fall, [&]()
|
|
||||||
{
|
|
||||||
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::ftx)), 1);
|
|
||||||
build_get_tsc(c);
|
|
||||||
c.sub(x86::rax, stamp0);
|
|
||||||
c.movabs(x86::rbx, reinterpret_cast<u64>(&g_rtm_tx_limit1));
|
|
||||||
c.cmp(x86::rax, x86::qword_ptr(x86::rbx));
|
|
||||||
c.jae(fall);
|
|
||||||
});
|
|
||||||
|
|
||||||
// Check pause flag
|
|
||||||
c.bt(x86::dword_ptr(args[2], ::offset32(&cpu_thread::state)), static_cast<u32>(cpu_flag::pause));
|
|
||||||
c.jc(fall);
|
|
||||||
c.mov(x86::rax, x86::qword_ptr(x86::r11));
|
|
||||||
c.and_(x86::rax, -128);
|
|
||||||
c.cmp(x86::rax, args[3]);
|
|
||||||
c.jne(fall);
|
|
||||||
c.xbegin(tx0);
|
|
||||||
|
|
||||||
// Just read data to registers
|
|
||||||
if (s_tsx_avx)
|
|
||||||
{
|
|
||||||
c.vmovups(x86::ymm0, x86::ymmword_ptr(x86::rbp, 0));
|
|
||||||
c.vmovups(x86::ymm1, x86::ymmword_ptr(x86::rbp, 32));
|
|
||||||
c.vmovups(x86::ymm2, x86::ymmword_ptr(x86::rbp, 64));
|
|
||||||
c.vmovups(x86::ymm3, x86::ymmword_ptr(x86::rbp, 96));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
c.movaps(x86::xmm0, x86::oword_ptr(x86::rbp, 0));
|
|
||||||
c.movaps(x86::xmm1, x86::oword_ptr(x86::rbp, 16));
|
|
||||||
c.movaps(x86::xmm2, x86::oword_ptr(x86::rbp, 32));
|
|
||||||
c.movaps(x86::xmm3, x86::oword_ptr(x86::rbp, 48));
|
|
||||||
c.movaps(x86::xmm4, x86::oword_ptr(x86::rbp, 64));
|
|
||||||
c.movaps(x86::xmm5, x86::oword_ptr(x86::rbp, 80));
|
|
||||||
c.movaps(x86::xmm6, x86::oword_ptr(x86::rbp, 96));
|
|
||||||
c.movaps(x86::xmm7, x86::oword_ptr(x86::rbp, 112));
|
|
||||||
}
|
|
||||||
|
|
||||||
c.xend();
|
|
||||||
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::stx)), 1);
|
|
||||||
build_get_tsc(c);
|
|
||||||
c.sub(x86::rax, stamp0);
|
|
||||||
|
|
||||||
// Store data
|
|
||||||
if (s_tsx_avx)
|
|
||||||
{
|
|
||||||
c.vmovaps(x86::ymmword_ptr(args[1], 0), x86::ymm0);
|
|
||||||
c.vmovaps(x86::ymmword_ptr(args[1], 32), x86::ymm1);
|
|
||||||
c.vmovaps(x86::ymmword_ptr(args[1], 64), x86::ymm2);
|
|
||||||
c.vmovaps(x86::ymmword_ptr(args[1], 96), x86::ymm3);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
c.movaps(x86::oword_ptr(args[1], 0), x86::xmm0);
|
|
||||||
c.movaps(x86::oword_ptr(args[1], 16), x86::xmm1);
|
|
||||||
c.movaps(x86::oword_ptr(args[1], 32), x86::xmm2);
|
|
||||||
c.movaps(x86::oword_ptr(args[1], 48), x86::xmm3);
|
|
||||||
c.movaps(x86::oword_ptr(args[1], 64), x86::xmm4);
|
|
||||||
c.movaps(x86::oword_ptr(args[1], 80), x86::xmm5);
|
|
||||||
c.movaps(x86::oword_ptr(args[1], 96), x86::xmm6);
|
|
||||||
c.movaps(x86::oword_ptr(args[1], 112), x86::xmm7);
|
|
||||||
}
|
|
||||||
|
|
||||||
c.jmp(_ret);
|
|
||||||
c.bind(fall);
|
|
||||||
c.xor_(x86::eax, x86::eax);
|
|
||||||
//c.jmp(_ret);
|
|
||||||
|
|
||||||
c.bind(_ret);
|
|
||||||
|
|
||||||
#ifdef _WIN32
|
|
||||||
if (!s_tsx_avx)
|
|
||||||
{
|
|
||||||
c.movups(x86::xmm6, x86::oword_ptr(x86::rsp, 0));
|
|
||||||
c.movups(x86::xmm7, x86::oword_ptr(x86::rsp, 16));
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (s_tsx_avx)
|
|
||||||
{
|
|
||||||
c.vzeroupper();
|
|
||||||
}
|
|
||||||
|
|
||||||
c.add(x86::rsp, 40);
|
|
||||||
c.pop(x86::rbx);
|
|
||||||
c.pop(x86::rbp);
|
|
||||||
|
|
||||||
maybe_flush_lbr(c);
|
|
||||||
c.ret();
|
|
||||||
#else
|
|
||||||
UNUSED(args);
|
|
||||||
|
|
||||||
c.brk(Imm(0x42));
|
|
||||||
c.ret(a64::x30);
|
|
||||||
#endif
|
|
||||||
});
|
|
||||||
|
|
||||||
void spu_int_ctrl_t::set(u64 ints)
|
void spu_int_ctrl_t::set(u64 ints)
|
||||||
{
|
{
|
||||||
// leave only enabled interrupts
|
// leave only enabled interrupts
|
||||||
|
|
@ -2396,60 +1853,6 @@ void spu_thread::push_snr(u32 number, u32 value)
|
||||||
const u32 event_bit = SPU_EVENT_S1 >> (number & 1);
|
const u32 event_bit = SPU_EVENT_S1 >> (number & 1);
|
||||||
const bool bitor_bit = !!((snr_config >> number) & 1);
|
const bool bitor_bit = !!((snr_config >> number) & 1);
|
||||||
|
|
||||||
// Redundant, g_use_rtm is checked inside tx_start now.
|
|
||||||
if (g_use_rtm && false)
|
|
||||||
{
|
|
||||||
bool channel_notify = false;
|
|
||||||
bool thread_notify = false;
|
|
||||||
|
|
||||||
const bool ok = utils::tx_start([&]
|
|
||||||
{
|
|
||||||
channel_notify = (channel->data.raw() == spu_channel::bit_wait);
|
|
||||||
thread_notify = (channel->data.raw() & spu_channel::bit_count) == 0;
|
|
||||||
|
|
||||||
if (channel_notify)
|
|
||||||
{
|
|
||||||
ensure(channel->jostling_value.raw() == spu_channel::bit_wait);
|
|
||||||
channel->jostling_value.raw() = value;
|
|
||||||
channel->data.raw() = 0;
|
|
||||||
}
|
|
||||||
else if (bitor_bit)
|
|
||||||
{
|
|
||||||
channel->data.raw() &= ~spu_channel::bit_wait;
|
|
||||||
channel->data.raw() |= spu_channel::bit_count | value;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
channel->data.raw() = spu_channel::bit_count | value;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (thread_notify)
|
|
||||||
{
|
|
||||||
ch_events.raw().events |= event_bit;
|
|
||||||
|
|
||||||
if (ch_events.raw().mask & event_bit)
|
|
||||||
{
|
|
||||||
ch_events.raw().count = 1;
|
|
||||||
thread_notify = ch_events.raw().waiting != 0;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
thread_notify = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
if (ok)
|
|
||||||
{
|
|
||||||
if (channel_notify)
|
|
||||||
channel->data.notify_one();
|
|
||||||
if (thread_notify)
|
|
||||||
this->notify();
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Lock event channel in case it needs event notification
|
// Lock event channel in case it needs event notification
|
||||||
ch_events.atomic_op([](ch_events_t& ev)
|
ch_events.atomic_op([](ch_events_t& ev)
|
||||||
{
|
{
|
||||||
|
|
@ -2590,7 +1993,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8*
|
||||||
|
|
||||||
rsx::reservation_lock<false, 1> rsx_lock(eal, args.size, !is_get && (g_cfg.video.strict_rendering_mode || (g_cfg.core.rsx_fifo_accuracy && !g_cfg.core.spu_accurate_dma && eal < rsx::constants::local_mem_base)));
|
rsx::reservation_lock<false, 1> rsx_lock(eal, args.size, !is_get && (g_cfg.video.strict_rendering_mode || (g_cfg.core.rsx_fifo_accuracy && !g_cfg.core.spu_accurate_dma && eal < rsx::constants::local_mem_base)));
|
||||||
|
|
||||||
if ((!g_use_rtm && !is_get) || g_cfg.core.spu_accurate_dma) [[unlikely]]
|
if (!is_get || g_cfg.core.spu_accurate_dma) [[unlikely]]
|
||||||
{
|
{
|
||||||
perf_meter<"ADMA_GET"_u64> perf_get = perf_;
|
perf_meter<"ADMA_GET"_u64> perf_get = perf_;
|
||||||
perf_meter<"ADMA_PUT"_u64> perf_put = perf_;
|
perf_meter<"ADMA_PUT"_u64> perf_put = perf_;
|
||||||
|
|
@ -3220,7 +2623,7 @@ plain_access:
|
||||||
|
|
||||||
bool spu_thread::do_dma_check(const spu_mfc_cmd& args)
|
bool spu_thread::do_dma_check(const spu_mfc_cmd& args)
|
||||||
{
|
{
|
||||||
const u32 mask = utils::rol32(1, args.tag);
|
const u32 mask = std::rotl<u32>(1, args.tag);
|
||||||
|
|
||||||
if (mfc_barrier & mask || (args.cmd & (MFC_BARRIER_MASK | MFC_FENCE_MASK) && mfc_fence & mask)) [[unlikely]]
|
if (mfc_barrier & mask || (args.cmd & (MFC_BARRIER_MASK | MFC_FENCE_MASK) && mfc_fence & mask)) [[unlikely]]
|
||||||
{
|
{
|
||||||
|
|
@ -3236,13 +2639,13 @@ bool spu_thread::do_dma_check(const spu_mfc_cmd& args)
|
||||||
if ((mfc_queue[i].cmd & ~0xc) == MFC_BARRIER_CMD)
|
if ((mfc_queue[i].cmd & ~0xc) == MFC_BARRIER_CMD)
|
||||||
{
|
{
|
||||||
mfc_barrier |= -1;
|
mfc_barrier |= -1;
|
||||||
mfc_fence |= utils::rol32(1, mfc_queue[i].tag);
|
mfc_fence |= std::rotl<u32>(1, mfc_queue[i].tag);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (true)
|
if (true)
|
||||||
{
|
{
|
||||||
const u32 _mask = utils::rol32(1u, mfc_queue[i].tag);
|
const u32 _mask = std::rotl<u32>(1u, mfc_queue[i].tag);
|
||||||
|
|
||||||
// A command with barrier hard blocks that tag until it's been dealt with
|
// A command with barrier hard blocks that tag until it's been dealt with
|
||||||
if (mfc_queue[i].cmd & MFC_BARRIER_MASK)
|
if (mfc_queue[i].cmd & MFC_BARRIER_MASK)
|
||||||
|
|
@ -3697,10 +3100,7 @@ bool spu_thread::do_list_transfer(spu_mfc_cmd& args)
|
||||||
{
|
{
|
||||||
rsx_lock.update_if_enabled(addr, size, range_lock);
|
rsx_lock.update_if_enabled(addr, size, range_lock);
|
||||||
|
|
||||||
if (!g_use_rtm)
|
vm::range_lock(range_lock, addr & -128, utils::align<u32>(addr + size, 128) - (addr & -128));
|
||||||
{
|
|
||||||
vm::range_lock(range_lock, addr & -128, utils::align<u32>(addr + size, 128) - (addr & -128));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
@ -3805,14 +3205,14 @@ bool spu_thread::do_list_transfer(spu_mfc_cmd& args)
|
||||||
{
|
{
|
||||||
range_lock->release(0);
|
range_lock->release(0);
|
||||||
|
|
||||||
ch_stall_mask |= utils::rol32(1, args.tag);
|
ch_stall_mask |= std::rotl<u32>(1, args.tag);
|
||||||
|
|
||||||
if (!ch_stall_stat.get_count())
|
if (!ch_stall_stat.get_count())
|
||||||
{
|
{
|
||||||
set_events(SPU_EVENT_SN);
|
set_events(SPU_EVENT_SN);
|
||||||
}
|
}
|
||||||
|
|
||||||
ch_stall_stat.set_value(utils::rol32(1, args.tag) | ch_stall_stat.get_value());
|
ch_stall_stat.set_value(std::rotl<u32>(1, args.tag) | ch_stall_stat.get_value());
|
||||||
|
|
||||||
args.tag |= 0x80; // Set stalled status
|
args.tag |= 0x80; // Set stalled status
|
||||||
args.eal = ::narrow<u32>(reinterpret_cast<const u8*>(item_ptr) - this->ls);
|
args.eal = ::narrow<u32>(reinterpret_cast<const u8*>(item_ptr) - this->ls);
|
||||||
|
|
@ -3912,90 +3312,9 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (!g_use_rtm)
|
else
|
||||||
{
|
{
|
||||||
*vm::_ptr<atomic_t<u32>>(addr) += 0;
|
utils::trigger_write_page_fault(vm::base(addr));
|
||||||
}
|
|
||||||
|
|
||||||
if (g_use_rtm) [[likely]]
|
|
||||||
{
|
|
||||||
switch (u64 count = spu_putllc_tx(addr, rtime, rdata, to_write))
|
|
||||||
{
|
|
||||||
case umax:
|
|
||||||
{
|
|
||||||
auto& data = *vm::get_super_ptr<spu_rdata_t>(addr);
|
|
||||||
|
|
||||||
const bool ok = cpu_thread::suspend_all<+3>(this, {data, data + 64, &res}, [&]()
|
|
||||||
{
|
|
||||||
if ((res & -128) == rtime)
|
|
||||||
{
|
|
||||||
if (cmp_rdata(rdata, data))
|
|
||||||
{
|
|
||||||
mov_rdata(data, to_write);
|
|
||||||
res += 64;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Save previous data
|
|
||||||
mov_rdata_nt(rdata, data);
|
|
||||||
res -= 64;
|
|
||||||
return false;
|
|
||||||
});
|
|
||||||
|
|
||||||
const u64 count2 = utils::get_tsc() - perf2.get();
|
|
||||||
|
|
||||||
if (count2 > 20000 && g_cfg.core.perf_report) [[unlikely]]
|
|
||||||
{
|
|
||||||
perf_log.warning("PUTLLC: took too long: %.3fus (%u c) (addr=0x%x) (S)", count2 / (utils::get_tsc_freq() / 1000'000.), count2, addr);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ok)
|
|
||||||
{
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
last_ftime = -1;
|
|
||||||
[[fallthrough]];
|
|
||||||
}
|
|
||||||
case 0:
|
|
||||||
{
|
|
||||||
if (addr == last_faddr)
|
|
||||||
{
|
|
||||||
last_fail++;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (last_ftime != umax)
|
|
||||||
{
|
|
||||||
last_faddr = 0;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
utils::prefetch_read(rdata);
|
|
||||||
utils::prefetch_read(rdata + 64);
|
|
||||||
last_faddr = addr;
|
|
||||||
last_ftime = res.load() & -128;
|
|
||||||
last_ftsc = utils::get_tsc();
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
{
|
|
||||||
if (count > 20000 && g_cfg.core.perf_report) [[unlikely]]
|
|
||||||
{
|
|
||||||
perf_log.warning("PUTLLC: took too long: %.3fus (%u c) (addr = 0x%x)", count / (utils::get_tsc_freq() / 1000'000.), count, addr);
|
|
||||||
}
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (addr == last_faddr)
|
|
||||||
{
|
|
||||||
last_succ++;
|
|
||||||
}
|
|
||||||
|
|
||||||
last_faddr = 0;
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
auto& super_data = *vm::get_super_ptr<spu_rdata_t>(addr);
|
auto& super_data = *vm::get_super_ptr<spu_rdata_t>(addr);
|
||||||
|
|
@ -4189,7 +3508,7 @@ void do_cell_atomic_128_store(u32 addr, const void* to_write)
|
||||||
{
|
{
|
||||||
result = 0;
|
result = 0;
|
||||||
}
|
}
|
||||||
else if (!g_use_rtm)
|
else
|
||||||
{
|
{
|
||||||
// Provoke page fault
|
// Provoke page fault
|
||||||
utils::trigger_write_page_fault(vm::base(addr));
|
utils::trigger_write_page_fault(vm::base(addr));
|
||||||
|
|
@ -4200,16 +3519,6 @@ void do_cell_atomic_128_store(u32 addr, const void* to_write)
|
||||||
mov_rdata(sdata, *static_cast<const spu_rdata_t*>(to_write));
|
mov_rdata(sdata, *static_cast<const spu_rdata_t*>(to_write));
|
||||||
vm::reservation_acquire(addr) += 32;
|
vm::reservation_acquire(addr) += 32;
|
||||||
}
|
}
|
||||||
else if (cpu->get_class() != thread_class::spu)
|
|
||||||
{
|
|
||||||
u64 stx, ftx;
|
|
||||||
result = spu_putlluc_tx(addr, to_write, &stx, &ftx);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
auto _spu = static_cast<spu_thread*>(cpu);
|
|
||||||
result = spu_putlluc_tx(addr, to_write, &_spu->stx, &_spu->ftx);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (result == 0)
|
if (result == 0)
|
||||||
{
|
{
|
||||||
|
|
@ -4271,7 +3580,7 @@ bool spu_thread::do_mfc(bool can_escape, bool must_finish)
|
||||||
auto process_command = [&](spu_mfc_cmd& args)
|
auto process_command = [&](spu_mfc_cmd& args)
|
||||||
{
|
{
|
||||||
// Select tag bit in the tag mask or the stall mask
|
// Select tag bit in the tag mask or the stall mask
|
||||||
const u32 mask = utils::rol32(1, args.tag);
|
const u32 mask = std::rotl<u32>(1, args.tag);
|
||||||
|
|
||||||
if ((args.cmd & ~0xc) == MFC_BARRIER_CMD)
|
if ((args.cmd & ~0xc) == MFC_BARRIER_CMD)
|
||||||
{
|
{
|
||||||
|
|
@ -4565,7 +3874,7 @@ bool spu_thread::is_exec_code(u32 addr, std::span<const u8> ls_ptr, u32 base_add
|
||||||
return is_range_limited;
|
return is_range_limited;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (type == spu_itype::BRSL)
|
if (type == spu_itype::BRSL && op.rt == 0)
|
||||||
{
|
{
|
||||||
// Insert a virtual return-to-next, because it is usually a call
|
// Insert a virtual return-to-next, because it is usually a call
|
||||||
results[1] = addr + 4;
|
results[1] = addr + 4;
|
||||||
|
|
@ -4816,7 +4125,7 @@ bool spu_thread::process_mfc_cmd()
|
||||||
if (raddr != addr)
|
if (raddr != addr)
|
||||||
{
|
{
|
||||||
// Last check for event before we replace the reservation with a new one
|
// Last check for event before we replace the reservation with a new one
|
||||||
if (reservation_check(raddr, rdata))
|
if (~ch_events.load().events & SPU_EVENT_LR && reservation_check(raddr, rdata, addr))
|
||||||
{
|
{
|
||||||
set_events(SPU_EVENT_LR);
|
set_events(SPU_EVENT_LR);
|
||||||
}
|
}
|
||||||
|
|
@ -5104,29 +4413,15 @@ bool spu_thread::process_mfc_cmd()
|
||||||
{
|
{
|
||||||
ntime = vm::reservation_acquire(addr);
|
ntime = vm::reservation_acquire(addr);
|
||||||
|
|
||||||
if (ntime & vm::rsrv_unique_lock)
|
if (ntime & 127)
|
||||||
{
|
{
|
||||||
// There's an on-going reservation store, wait
|
// There's an on-going reservation store, wait
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 test_mask = -1;
|
mov_rdata(rdata, data);
|
||||||
|
|
||||||
if (ntime & 127)
|
if (u64 time0 = vm::reservation_acquire(addr); ntime != time0)
|
||||||
{
|
|
||||||
// Try to use TSX to obtain data atomically
|
|
||||||
if (!g_use_rtm || !spu_getllar_tx(addr, rdata, this, ntime & -128))
|
|
||||||
{
|
|
||||||
// See previous ntime check.
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
mov_rdata(rdata, data);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (u64 time0 = vm::reservation_acquire(addr); (ntime & test_mask) != (time0 & test_mask))
|
|
||||||
{
|
{
|
||||||
// Reservation data has been modified recently
|
// Reservation data has been modified recently
|
||||||
if (time0 & vm::rsrv_unique_lock) i += 12;
|
if (time0 & vm::rsrv_unique_lock) i += 12;
|
||||||
|
|
@ -5373,7 +4668,7 @@ bool spu_thread::process_mfc_cmd()
|
||||||
std::memcpy(dump.data, _ptr<u8>(ch_mfc_cmd.lsa & 0x3ff80), 128);
|
std::memcpy(dump.data, _ptr<u8>(ch_mfc_cmd.lsa & 0x3ff80), 128);
|
||||||
}
|
}
|
||||||
|
|
||||||
const u32 mask = utils::rol32(1, ch_mfc_cmd.tag);
|
const u32 mask = std::rotl<u32>(1, ch_mfc_cmd.tag);
|
||||||
|
|
||||||
if ((mfc_barrier | mfc_fence) & mask) [[unlikely]]
|
if ((mfc_barrier | mfc_fence) & mask) [[unlikely]]
|
||||||
{
|
{
|
||||||
|
|
@ -5428,11 +4723,11 @@ bool spu_thread::process_mfc_cmd()
|
||||||
}
|
}
|
||||||
|
|
||||||
mfc_queue[mfc_size++] = ch_mfc_cmd;
|
mfc_queue[mfc_size++] = ch_mfc_cmd;
|
||||||
mfc_fence |= utils::rol32(1, ch_mfc_cmd.tag);
|
mfc_fence |= std::rotl<u32>(1, ch_mfc_cmd.tag);
|
||||||
|
|
||||||
if (ch_mfc_cmd.cmd & MFC_BARRIER_MASK)
|
if (ch_mfc_cmd.cmd & MFC_BARRIER_MASK)
|
||||||
{
|
{
|
||||||
mfc_barrier |= utils::rol32(1, ch_mfc_cmd.tag);
|
mfc_barrier |= std::rotl<u32>(1, ch_mfc_cmd.tag);
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
|
@ -5481,11 +4776,11 @@ bool spu_thread::process_mfc_cmd()
|
||||||
}
|
}
|
||||||
|
|
||||||
mfc_size++;
|
mfc_size++;
|
||||||
mfc_fence |= utils::rol32(1, cmd.tag);
|
mfc_fence |= std::rotl<u32>(1, cmd.tag);
|
||||||
|
|
||||||
if (cmd.cmd & MFC_BARRIER_MASK)
|
if (cmd.cmd & MFC_BARRIER_MASK)
|
||||||
{
|
{
|
||||||
mfc_barrier |= utils::rol32(1, cmd.tag);
|
mfc_barrier |= std::rotl<u32>(1, cmd.tag);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (check_mfc_interrupts(pc + 4))
|
if (check_mfc_interrupts(pc + 4))
|
||||||
|
|
@ -5511,7 +4806,7 @@ bool spu_thread::process_mfc_cmd()
|
||||||
{
|
{
|
||||||
mfc_queue[mfc_size++] = ch_mfc_cmd;
|
mfc_queue[mfc_size++] = ch_mfc_cmd;
|
||||||
mfc_barrier |= -1;
|
mfc_barrier |= -1;
|
||||||
mfc_fence |= utils::rol32(1, ch_mfc_cmd.tag);
|
mfc_fence |= std::rotl<u32>(1, ch_mfc_cmd.tag);
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
|
@ -5526,7 +4821,7 @@ bool spu_thread::process_mfc_cmd()
|
||||||
ch_mfc_cmd.cmd, ch_mfc_cmd.lsa, ch_mfc_cmd.eal, ch_mfc_cmd.tag, ch_mfc_cmd.size);
|
ch_mfc_cmd.cmd, ch_mfc_cmd.lsa, ch_mfc_cmd.eal, ch_mfc_cmd.tag, ch_mfc_cmd.size);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool spu_thread::reservation_check(u32 addr, const decltype(rdata)& data) const
|
bool spu_thread::reservation_check(u32 addr, const decltype(rdata)& data, u32 current_eal) const
|
||||||
{
|
{
|
||||||
if (!addr)
|
if (!addr)
|
||||||
{
|
{
|
||||||
|
|
@ -5545,9 +4840,24 @@ bool spu_thread::reservation_check(u32 addr, const decltype(rdata)& data) const
|
||||||
return !cmp_rdata(data, *vm::get_super_ptr<decltype(rdata)>(addr));
|
return !cmp_rdata(data, *vm::get_super_ptr<decltype(rdata)>(addr));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ((addr >> 20) == (current_eal >> 20))
|
||||||
|
{
|
||||||
|
if (vm::check_addr(addr, vm::page_1m_size))
|
||||||
|
{
|
||||||
|
// Same random-access-memory page as the current MFC command, assume allocated
|
||||||
|
return !cmp_rdata(data, vm::_ref<decltype(rdata)>(addr));
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((addr >> 16) == (current_eal >> 16) && vm::check_addr(addr, vm::page_64k_size))
|
||||||
|
{
|
||||||
|
// Same random-access-memory page as the current MFC command, assume allocated
|
||||||
|
return !cmp_rdata(data, vm::_ref<decltype(rdata)>(addr));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Ensure data is allocated (HACK: would raise LR event if not)
|
// Ensure data is allocated (HACK: would raise LR event if not)
|
||||||
// Set range_lock first optimistically
|
// Set range_lock first optimistically
|
||||||
range_lock->store(u64{128} << 32 | addr);
|
range_lock->store(u64{128} << 32 | addr | vm::range_readable);
|
||||||
|
|
||||||
u64 lock_val = *std::prev(std::end(vm::g_range_lock_set));
|
u64 lock_val = *std::prev(std::end(vm::g_range_lock_set));
|
||||||
u64 old_lock = 0;
|
u64 old_lock = 0;
|
||||||
|
|
@ -5628,7 +4938,7 @@ bool spu_thread::reservation_check(u32 addr, u32 hash, atomic_t<u64, 64>* range_
|
||||||
|
|
||||||
// Ensure data is allocated (HACK: would raise LR event if not)
|
// Ensure data is allocated (HACK: would raise LR event if not)
|
||||||
// Set range_lock first optimistically
|
// Set range_lock first optimistically
|
||||||
range_lock->store(u64{128} << 32 | addr);
|
range_lock->store(u64{128} << 32 | addr | vm::range_readable);
|
||||||
|
|
||||||
u64 lock_val = *std::prev(std::end(vm::g_range_lock_set));
|
u64 lock_val = *std::prev(std::end(vm::g_range_lock_set));
|
||||||
u64 old_lock = 0;
|
u64 old_lock = 0;
|
||||||
|
|
@ -6209,7 +5519,7 @@ s64 spu_thread::get_ch_value(u32 ch)
|
||||||
|
|
||||||
eventstat_busy_waiting_switch = value ? 1 : 0;
|
eventstat_busy_waiting_switch = value ? 1 : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (bool is_first = true; !events.count; events = get_events(mask1 & ~SPU_EVENT_LR, true, true), is_first = false)
|
for (bool is_first = true; !events.count; events = get_events(mask1 & ~SPU_EVENT_LR, true, true), is_first = false)
|
||||||
{
|
{
|
||||||
const auto old = +state;
|
const auto old = +state;
|
||||||
|
|
@ -6872,7 +6182,7 @@ bool spu_thread::set_ch_value(u32 ch, u32 value)
|
||||||
value &= 0x1f;
|
value &= 0x1f;
|
||||||
|
|
||||||
// Reset stall status for specified tag
|
// Reset stall status for specified tag
|
||||||
const u32 tag_mask = utils::rol32(1, value);
|
const u32 tag_mask = std::rotl<u32>(1, value);
|
||||||
|
|
||||||
if (ch_stall_mask & tag_mask)
|
if (ch_stall_mask & tag_mask)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -901,7 +901,8 @@ public:
|
||||||
|
|
||||||
// Returns true if reservation existed but was just discovered to be lost
|
// Returns true if reservation existed but was just discovered to be lost
|
||||||
// It is safe to use on any address, even if not directly accessed by SPU (so it's slower)
|
// It is safe to use on any address, even if not directly accessed by SPU (so it's slower)
|
||||||
bool reservation_check(u32 addr, const decltype(rdata)& data) const;
|
// Optionally pass a known allocated address for internal optimization (the current Effective-Address of the MFC command)
|
||||||
|
bool reservation_check(u32 addr, const decltype(rdata)& data, u32 current_eal = 0) const;
|
||||||
static bool reservation_check(u32 addr, u32 hash, atomic_t<u64, 64>* range_lock);
|
static bool reservation_check(u32 addr, u32 hash, atomic_t<u64, 64>* range_lock);
|
||||||
usz register_cache_line_waiter(u32 addr);
|
usz register_cache_line_waiter(u32 addr);
|
||||||
void deregister_cache_line_waiter(usz index);
|
void deregister_cache_line_waiter(usz index);
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@
|
||||||
#include "Emu/CPU/CPUThread.h"
|
#include "Emu/CPU/CPUThread.h"
|
||||||
#include "Emu/Cell/ErrorCodes.h"
|
#include "Emu/Cell/ErrorCodes.h"
|
||||||
#include "Emu/Cell/SPUThread.h"
|
#include "Emu/Cell/SPUThread.h"
|
||||||
|
#include "Emu/Cell/PPUThread.h"
|
||||||
#include "Emu/IdManager.h"
|
#include "Emu/IdManager.h"
|
||||||
|
|
||||||
#include "util/asm.hpp"
|
#include "util/asm.hpp"
|
||||||
|
|
@ -249,17 +250,37 @@ error_code sys_memory_free(cpu_thread& cpu, u32 addr)
|
||||||
return CELL_OK;
|
return CELL_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
error_code sys_memory_get_page_attribute(cpu_thread& cpu, u32 addr, vm::ptr<sys_page_attr_t> attr)
|
error_code sys_memory_get_page_attribute(ppu_thread& ppu, u32 addr, vm::ptr<sys_page_attr_t> attr)
|
||||||
{
|
{
|
||||||
cpu.state += cpu_flag::wait;
|
ppu.state += cpu_flag::wait;
|
||||||
|
|
||||||
sys_memory.trace("sys_memory_get_page_attribute(addr=0x%x, attr=*0x%x)", addr, attr);
|
sys_memory.trace("sys_memory_get_page_attribute(addr=0x%x, attr=*0x%x)", addr, attr);
|
||||||
|
|
||||||
vm::writer_lock rlock;
|
if ((addr >> 28) == (ppu.stack_addr >> 28))
|
||||||
|
|
||||||
if (!vm::check_addr(addr) || addr >= SPU_FAKE_BASE_ADDR)
|
|
||||||
{
|
{
|
||||||
return CELL_EINVAL;
|
// Stack address: fast path
|
||||||
|
if (!(addr >= ppu.stack_addr && addr < ppu.stack_addr + ppu.stack_size) && !vm::check_addr(addr))
|
||||||
|
{
|
||||||
|
return { CELL_EINVAL, addr };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!vm::check_addr(attr.addr(), vm::page_readable, attr.size()))
|
||||||
|
{
|
||||||
|
return CELL_EFAULT;
|
||||||
|
}
|
||||||
|
|
||||||
|
attr->attribute = 0x40000ull; // SYS_MEMORY_PROT_READ_WRITE
|
||||||
|
attr->access_right = SYS_MEMORY_ACCESS_RIGHT_PPU_THR;
|
||||||
|
attr->page_size = 4096;
|
||||||
|
attr->pad = 0; // Always write 0
|
||||||
|
return CELL_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto [ok, vm_flags] = vm::get_addr_flags(addr);
|
||||||
|
|
||||||
|
if (!ok || addr >= SPU_FAKE_BASE_ADDR)
|
||||||
|
{
|
||||||
|
return { CELL_EINVAL, addr };
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!vm::check_addr(attr.addr(), vm::page_readable, attr.size()))
|
if (!vm::check_addr(attr.addr(), vm::page_readable, attr.size()))
|
||||||
|
|
@ -268,19 +289,20 @@ error_code sys_memory_get_page_attribute(cpu_thread& cpu, u32 addr, vm::ptr<sys_
|
||||||
}
|
}
|
||||||
|
|
||||||
attr->attribute = 0x40000ull; // SYS_MEMORY_PROT_READ_WRITE (TODO)
|
attr->attribute = 0x40000ull; // SYS_MEMORY_PROT_READ_WRITE (TODO)
|
||||||
attr->access_right = addr >> 28 == 0xdu ? SYS_MEMORY_ACCESS_RIGHT_PPU_THR : SYS_MEMORY_ACCESS_RIGHT_ANY;// (TODO)
|
attr->access_right = SYS_MEMORY_ACCESS_RIGHT_ANY; // TODO: Report accurately
|
||||||
|
|
||||||
if (vm::check_addr(addr, vm::page_1m_size))
|
if (vm_flags & vm::page_1m_size)
|
||||||
{
|
{
|
||||||
attr->page_size = 0x100000;
|
attr->page_size = 0x100000;
|
||||||
}
|
}
|
||||||
else if (vm::check_addr(addr, vm::page_64k_size))
|
else if (vm_flags & vm::page_64k_size)
|
||||||
{
|
{
|
||||||
attr->page_size = 0x10000;
|
attr->page_size = 0x10000;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
attr->page_size = 4096;
|
//attr->page_size = 4096;
|
||||||
|
fmt::throw_exception("Unreachable");
|
||||||
}
|
}
|
||||||
|
|
||||||
attr->pad = 0; // Always write 0
|
attr->pad = 0; // Always write 0
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@
|
||||||
#include "Emu/Cell/ErrorCodes.h"
|
#include "Emu/Cell/ErrorCodes.h"
|
||||||
|
|
||||||
class cpu_thread;
|
class cpu_thread;
|
||||||
|
class ppu_thread;
|
||||||
|
|
||||||
enum lv2_mem_container_id : u32
|
enum lv2_mem_container_id : u32
|
||||||
{
|
{
|
||||||
|
|
@ -131,7 +132,7 @@ struct sys_memory_user_memory_stat_t
|
||||||
error_code sys_memory_allocate(cpu_thread& cpu, u64 size, u64 flags, vm::ptr<u32> alloc_addr);
|
error_code sys_memory_allocate(cpu_thread& cpu, u64 size, u64 flags, vm::ptr<u32> alloc_addr);
|
||||||
error_code sys_memory_allocate_from_container(cpu_thread& cpu, u64 size, u32 cid, u64 flags, vm::ptr<u32> alloc_addr);
|
error_code sys_memory_allocate_from_container(cpu_thread& cpu, u64 size, u32 cid, u64 flags, vm::ptr<u32> alloc_addr);
|
||||||
error_code sys_memory_free(cpu_thread& cpu, u32 start_addr);
|
error_code sys_memory_free(cpu_thread& cpu, u32 start_addr);
|
||||||
error_code sys_memory_get_page_attribute(cpu_thread& cpu, u32 addr, vm::ptr<sys_page_attr_t> attr);
|
error_code sys_memory_get_page_attribute(ppu_thread& cpu, u32 addr, vm::ptr<sys_page_attr_t> attr);
|
||||||
error_code sys_memory_get_user_memory_size(cpu_thread& cpu, vm::ptr<sys_memory_info_t> mem_info);
|
error_code sys_memory_get_user_memory_size(cpu_thread& cpu, vm::ptr<sys_memory_info_t> mem_info);
|
||||||
error_code sys_memory_get_user_memory_stat(cpu_thread& cpu, vm::ptr<sys_memory_user_memory_stat_t> mem_stat);
|
error_code sys_memory_get_user_memory_stat(cpu_thread& cpu, vm::ptr<sys_memory_user_memory_stat_t> mem_stat);
|
||||||
error_code sys_memory_container_create(cpu_thread& cpu, vm::ptr<u32> cid, u64 size);
|
error_code sys_memory_container_create(cpu_thread& cpu, vm::ptr<u32> cid, u64 size);
|
||||||
|
|
|
||||||
|
|
@ -547,6 +547,13 @@ namespace vm
|
||||||
{
|
{
|
||||||
to_clear = for_all_range_locks(to_clear & ~get_range_lock_bits(true), [&](u64 addr2, u32 size2)
|
to_clear = for_all_range_locks(to_clear & ~get_range_lock_bits(true), [&](u64 addr2, u32 size2)
|
||||||
{
|
{
|
||||||
|
constexpr u32 range_size_loc = vm::range_pos - 32;
|
||||||
|
|
||||||
|
if ((size2 >> range_size_loc) == (vm::range_readable >> vm::range_pos))
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
// Split and check every 64K page separately
|
// Split and check every 64K page separately
|
||||||
for (u64 hi = addr2 >> 16, max = (addr2 + size2 - 1) >> 16; hi <= max; hi++)
|
for (u64 hi = addr2 >> 16, max = (addr2 + size2 - 1) >> 16; hi <= max; hi++)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -81,7 +81,7 @@ namespace vm
|
||||||
bool check_addr(u32 addr, u8 flags, u32 size);
|
bool check_addr(u32 addr, u8 flags, u32 size);
|
||||||
|
|
||||||
template <u32 Size = 1>
|
template <u32 Size = 1>
|
||||||
bool check_addr(u32 addr, u8 flags = page_readable)
|
inline bool check_addr(u32 addr, u8 flags = page_readable)
|
||||||
{
|
{
|
||||||
extern std::array<memory_page, 0x100000000 / 4096> g_pages;
|
extern std::array<memory_page, 0x100000000 / 4096> g_pages;
|
||||||
|
|
||||||
|
|
@ -94,6 +94,16 @@ namespace vm
|
||||||
return !(~g_pages[addr / 4096] & (flags | page_allocated));
|
return !(~g_pages[addr / 4096] & (flags | page_allocated));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Like check_addr but should only be used in lock-free context with care
|
||||||
|
inline std::pair<bool, u8> get_addr_flags(u32 addr) noexcept
|
||||||
|
{
|
||||||
|
extern std::array<memory_page, 0x100000000 / 4096> g_pages;
|
||||||
|
|
||||||
|
const u8 flags = g_pages[addr / 4096].load();
|
||||||
|
|
||||||
|
return std::make_pair(!!(flags & page_allocated), flags);
|
||||||
|
}
|
||||||
|
|
||||||
// Read string in a safe manner (page aware) (bool true = if null-termination)
|
// Read string in a safe manner (page aware) (bool true = if null-termination)
|
||||||
bool read_string(u32 addr, u32 max_size, std::string& out_string, bool check_pages = true) noexcept;
|
bool read_string(u32 addr, u32 max_size, std::string& out_string, bool check_pages = true) noexcept;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,9 +6,6 @@
|
||||||
#include "util/tsc.hpp"
|
#include "util/tsc.hpp"
|
||||||
#include <functional>
|
#include <functional>
|
||||||
|
|
||||||
extern bool g_use_rtm;
|
|
||||||
extern u64 g_rtm_tx_limit2;
|
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
extern "C"
|
extern "C"
|
||||||
{
|
{
|
||||||
|
|
@ -143,7 +140,7 @@ namespace vm
|
||||||
void reservation_op_internal(u32 addr, std::function<bool()> func);
|
void reservation_op_internal(u32 addr, std::function<bool()> func);
|
||||||
|
|
||||||
template <bool Ack = false, typename CPU, typename T, typename AT = u32, typename F>
|
template <bool Ack = false, typename CPU, typename T, typename AT = u32, typename F>
|
||||||
inline SAFE_BUFFERS(auto) reservation_op(CPU& cpu, _ptr_base<T, AT> ptr, F op)
|
inline SAFE_BUFFERS(auto) reservation_op(CPU& /*cpu*/, _ptr_base<T, AT> ptr, F op)
|
||||||
{
|
{
|
||||||
// Atomic operation will be performed on aligned 128 bytes of data, so the data size and alignment must comply
|
// Atomic operation will be performed on aligned 128 bytes of data, so the data size and alignment must comply
|
||||||
static_assert(sizeof(T) <= 128 && alignof(T) == sizeof(T), "vm::reservation_op: unsupported type");
|
static_assert(sizeof(T) <= 128 && alignof(T) == sizeof(T), "vm::reservation_op: unsupported type");
|
||||||
|
|
@ -162,188 +159,6 @@ namespace vm
|
||||||
auto& res = vm::reservation_acquire(addr);
|
auto& res = vm::reservation_acquire(addr);
|
||||||
//_m_prefetchw(&res);
|
//_m_prefetchw(&res);
|
||||||
|
|
||||||
#if defined(ARCH_X64)
|
|
||||||
if (g_use_rtm)
|
|
||||||
{
|
|
||||||
// Stage 1: single optimistic transaction attempt
|
|
||||||
unsigned status = -1;
|
|
||||||
u64 _old = 0;
|
|
||||||
|
|
||||||
auto stamp0 = utils::get_tsc(), stamp1 = stamp0, stamp2 = stamp0;
|
|
||||||
|
|
||||||
#if !defined(_MSC_VER) || (defined(__clang__) && defined(_MSC_VER))
|
|
||||||
__asm__ goto ("xbegin %l[stage2];" ::: "memory" : stage2);
|
|
||||||
#else
|
|
||||||
status = _xbegin();
|
|
||||||
if (status == umax)
|
|
||||||
#endif
|
|
||||||
{
|
|
||||||
if (res & rsrv_unique_lock)
|
|
||||||
{
|
|
||||||
#if !defined(_MSC_VER) || (defined(__clang__) && defined(_MSC_VER))
|
|
||||||
__asm__ volatile ("xend; mov $-1, %%eax;" ::: "memory");
|
|
||||||
#else
|
|
||||||
_xend();
|
|
||||||
#endif
|
|
||||||
goto stage2;
|
|
||||||
}
|
|
||||||
|
|
||||||
if constexpr (std::is_void_v<std::invoke_result_t<F, T&>>)
|
|
||||||
{
|
|
||||||
std::invoke(op, *sptr);
|
|
||||||
const u64 old_time = res.fetch_add(128);
|
|
||||||
#if !defined(_MSC_VER) || (defined(__clang__) && defined(_MSC_VER))
|
|
||||||
__asm__ volatile ("xend;" ::: "memory");
|
|
||||||
#else
|
|
||||||
_xend();
|
|
||||||
#endif
|
|
||||||
if constexpr (Ack)
|
|
||||||
reservation_notifier_notify(addr, old_time);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (auto result = std::invoke(op, *sptr))
|
|
||||||
{
|
|
||||||
const u64 old_time = res.fetch_add(128);
|
|
||||||
#if !defined(_MSC_VER) || (defined(__clang__) && defined(_MSC_VER))
|
|
||||||
__asm__ volatile ("xend;" ::: "memory");
|
|
||||||
#else
|
|
||||||
_xend();
|
|
||||||
#endif
|
|
||||||
if constexpr (Ack)
|
|
||||||
reservation_notifier_notify(addr, old_time);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
#if !defined(_MSC_VER) || (defined(__clang__) && defined(_MSC_VER))
|
|
||||||
__asm__ volatile ("xend;" ::: "memory");
|
|
||||||
#else
|
|
||||||
_xend();
|
|
||||||
#endif
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
stage2:
|
|
||||||
#ifndef _MSC_VER
|
|
||||||
__asm__ volatile ("mov %%eax, %0;" : "=r" (status) :: "memory");
|
|
||||||
#endif
|
|
||||||
stamp1 = utils::get_tsc();
|
|
||||||
|
|
||||||
// Stage 2: try to lock reservation first
|
|
||||||
_old = res.fetch_add(1);
|
|
||||||
|
|
||||||
// Compute stamps excluding memory touch
|
|
||||||
stamp2 = utils::get_tsc() - (stamp1 - stamp0);
|
|
||||||
|
|
||||||
// Start lightened transaction
|
|
||||||
for (; !(_old & vm::rsrv_unique_lock) && stamp2 - stamp0 <= g_rtm_tx_limit2; stamp2 = utils::get_tsc())
|
|
||||||
{
|
|
||||||
if (cpu.has_pause_flag())
|
|
||||||
{
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
#if !defined(_MSC_VER) || (defined(__clang__) && defined(_MSC_VER))
|
|
||||||
__asm__ goto ("xbegin %l[retry];" ::: "memory" : retry);
|
|
||||||
#else
|
|
||||||
status = _xbegin();
|
|
||||||
|
|
||||||
if (status != umax) [[unlikely]]
|
|
||||||
{
|
|
||||||
goto retry;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
if constexpr (std::is_void_v<std::invoke_result_t<F, T&>>)
|
|
||||||
{
|
|
||||||
std::invoke(op, *sptr);
|
|
||||||
#if !defined(_MSC_VER) || (defined(__clang__) && defined(_MSC_VER))
|
|
||||||
__asm__ volatile ("xend;" ::: "memory");
|
|
||||||
#else
|
|
||||||
_xend();
|
|
||||||
#endif
|
|
||||||
res += 127;
|
|
||||||
if (Ack)
|
|
||||||
reservation_notifier_notify(addr, _old);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (auto result = std::invoke(op, *sptr))
|
|
||||||
{
|
|
||||||
#if !defined(_MSC_VER) || (defined(__clang__) && defined(_MSC_VER))
|
|
||||||
__asm__ volatile ("xend;" ::: "memory");
|
|
||||||
#else
|
|
||||||
_xend();
|
|
||||||
#endif
|
|
||||||
res += 127;
|
|
||||||
if (Ack)
|
|
||||||
reservation_notifier_notify(addr, _old);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
#if !defined(_MSC_VER) || (defined(__clang__) && defined(_MSC_VER))
|
|
||||||
__asm__ volatile ("xend;" ::: "memory");
|
|
||||||
#else
|
|
||||||
_xend();
|
|
||||||
#endif
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
retry:
|
|
||||||
#ifndef _MSC_VER
|
|
||||||
__asm__ volatile ("mov %%eax, %0;" : "=r" (status) :: "memory");
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (!status)
|
|
||||||
{
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Stage 3: all failed, heavyweight fallback (see comments at the bottom)
|
|
||||||
if constexpr (std::is_void_v<std::invoke_result_t<F, T&>>)
|
|
||||||
{
|
|
||||||
vm::reservation_op_internal(addr, [&]
|
|
||||||
{
|
|
||||||
std::invoke(op, *sptr);
|
|
||||||
return true;
|
|
||||||
});
|
|
||||||
|
|
||||||
if constexpr (Ack)
|
|
||||||
reservation_notifier_notify(addr, _old);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
auto result = std::invoke_result_t<F, T&>();
|
|
||||||
|
|
||||||
vm::reservation_op_internal(addr, [&]
|
|
||||||
{
|
|
||||||
if ((result = std::invoke(op, *sptr)))
|
|
||||||
{
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
if (Ack && result)
|
|
||||||
reservation_notifier_notify(addr, _old);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
static_cast<void>(cpu);
|
|
||||||
#endif /* ARCH_X64 */
|
|
||||||
|
|
||||||
// Lock reservation and perform heavyweight lock
|
// Lock reservation and perform heavyweight lock
|
||||||
reservation_shared_lock_internal(res);
|
reservation_shared_lock_internal(res);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1096,80 +1096,65 @@ namespace rsx
|
||||||
fmt::throw_exception("Wrong format 0x%x", format);
|
fmt::throw_exception("Wrong format 0x%x", format);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (word_size)
|
if (!word_size)
|
||||||
{
|
{
|
||||||
if (word_size == 1)
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
result.element_size = word_size;
|
||||||
|
result.block_length = words_per_block;
|
||||||
|
|
||||||
|
bool require_cpu_swizzle = !caps.supports_hw_deswizzle && is_swizzled;
|
||||||
|
bool require_cpu_byteswap = word_size > 1 && !caps.supports_byteswap;
|
||||||
|
|
||||||
|
if (is_swizzled && caps.supports_hw_deswizzle)
|
||||||
|
{
|
||||||
|
result.require_deswizzle = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!require_cpu_byteswap && !require_cpu_swizzle)
|
||||||
|
{
|
||||||
|
result.require_swap = (word_size > 1);
|
||||||
|
|
||||||
|
if (caps.supports_zero_copy)
|
||||||
{
|
{
|
||||||
if (is_swizzled)
|
result.require_upload = true;
|
||||||
{
|
result.deferred_cmds = build_transfer_cmds(src_layout.data.data(), word_size * words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
||||||
copy_unmodified_block_swizzled::copy_mipmap_level(dst_buffer.as_span<u8>(), src_layout.data.as_span<const u8>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
|
|
||||||
}
|
|
||||||
else if (caps.supports_zero_copy)
|
|
||||||
{
|
|
||||||
result.require_upload = true;
|
|
||||||
result.deferred_cmds = build_transfer_cmds(src_layout.data.data(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span<u8>(), src_layout.data.as_span<const u8>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
else if (word_size == 1)
|
||||||
|
{
|
||||||
|
copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span<u8>(), src_layout.data.as_span<const u8>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
||||||
|
}
|
||||||
|
else if (word_size == 2)
|
||||||
|
{
|
||||||
|
copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span<u16>(), src_layout.data.as_span<const u16>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
||||||
|
}
|
||||||
|
else if (word_size == 4)
|
||||||
|
{
|
||||||
|
copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span<u32>(), src_layout.data.as_span<const u32>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (word_size == 1)
|
||||||
|
{
|
||||||
|
ensure(is_swizzled);
|
||||||
|
copy_unmodified_block_swizzled::copy_mipmap_level(dst_buffer.as_span<u8>(), src_layout.data.as_span<const u8>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
|
||||||
|
}
|
||||||
|
else if (word_size == 2)
|
||||||
|
{
|
||||||
|
if (is_swizzled)
|
||||||
|
copy_unmodified_block_swizzled::copy_mipmap_level(dst_buffer.as_span<u16>(), src_layout.data.as_span<const be_t<u16>>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
|
||||||
else
|
else
|
||||||
{
|
copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span<u16>(), src_layout.data.as_span<const be_t<u16>>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
||||||
result.element_size = word_size;
|
}
|
||||||
result.block_length = words_per_block;
|
else if (word_size == 4)
|
||||||
|
{
|
||||||
bool require_cpu_swizzle = !caps.supports_hw_deswizzle && is_swizzled;
|
if (is_swizzled)
|
||||||
bool require_cpu_byteswap = !caps.supports_byteswap;
|
copy_unmodified_block_swizzled::copy_mipmap_level(dst_buffer.as_span<u32>(), src_layout.data.as_span<const be_t<u32>>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
|
||||||
|
else
|
||||||
if (is_swizzled && caps.supports_hw_deswizzle)
|
copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span<u32>(), src_layout.data.as_span<const be_t<u32>>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
||||||
{
|
|
||||||
if (word_size == 4 || (((word_size * words_per_block) & 3) == 0))
|
|
||||||
{
|
|
||||||
result.require_deswizzle = true;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
require_cpu_swizzle = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!require_cpu_byteswap && !require_cpu_swizzle)
|
|
||||||
{
|
|
||||||
result.require_swap = true;
|
|
||||||
|
|
||||||
if (caps.supports_zero_copy)
|
|
||||||
{
|
|
||||||
result.require_upload = true;
|
|
||||||
result.deferred_cmds = build_transfer_cmds(src_layout.data.data(), word_size * words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
|
||||||
}
|
|
||||||
else if (word_size == 2)
|
|
||||||
{
|
|
||||||
copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span<u16>(), src_layout.data.as_span<const u16>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
|
||||||
}
|
|
||||||
else if (word_size == 4)
|
|
||||||
{
|
|
||||||
copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span<u32>(), src_layout.data.as_span<const u32>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (word_size == 2)
|
|
||||||
{
|
|
||||||
if (is_swizzled)
|
|
||||||
copy_unmodified_block_swizzled::copy_mipmap_level(dst_buffer.as_span<u16>(), src_layout.data.as_span<const be_t<u16>>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
|
|
||||||
else
|
|
||||||
copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span<u16>(), src_layout.data.as_span<const be_t<u16>>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
|
||||||
}
|
|
||||||
else if (word_size == 4)
|
|
||||||
{
|
|
||||||
if (is_swizzled)
|
|
||||||
copy_unmodified_block_swizzled::copy_mipmap_level(dst_buffer.as_span<u32>(), src_layout.data.as_span<const be_t<u32>>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
|
|
||||||
else
|
|
||||||
copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span<u32>(), src_layout.data.as_span<const be_t<u32>>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
|
|
|
||||||
|
|
@ -80,6 +80,7 @@ namespace rsx
|
||||||
std::span<T> as_span() const
|
std::span<T> as_span() const
|
||||||
{
|
{
|
||||||
auto bytes = data();
|
auto bytes = data();
|
||||||
|
ensure((reinterpret_cast<uintptr_t>(bytes) & (sizeof(T) - 1)) == 0, "IO buffer span cast requires naturally aligned pointers.");
|
||||||
return { utils::bless<T>(bytes), m_size / sizeof(T) };
|
return { utils::bless<T>(bytes), m_size / sizeof(T) };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -3,11 +3,46 @@
|
||||||
#include <util/types.hpp>
|
#include <util/types.hpp>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <cstdlib>
|
||||||
|
|
||||||
#include "reverse_ptr.hpp"
|
#include "reverse_ptr.hpp"
|
||||||
|
|
||||||
namespace rsx
|
namespace rsx
|
||||||
{
|
{
|
||||||
|
namespace aligned_allocator
|
||||||
|
{
|
||||||
|
template <size_t Align>
|
||||||
|
void* malloc(size_t size)
|
||||||
|
{
|
||||||
|
#ifdef _WIN32
|
||||||
|
return _aligned_malloc(size, Align);
|
||||||
|
#else
|
||||||
|
return std::aligned_alloc(Align, size);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template <size_t Align>
|
||||||
|
void* realloc(void* prev_ptr, [[maybe_unused]] size_t prev_size, size_t new_size)
|
||||||
|
{
|
||||||
|
#ifdef _WIN32
|
||||||
|
return _aligned_realloc(prev_ptr, new_size, Align);
|
||||||
|
#else
|
||||||
|
void* ret = std::aligned_alloc(Align, new_size);
|
||||||
|
std::memcpy(ret, prev_ptr, std::min(prev_size, new_size));
|
||||||
|
return ret;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void free(void* ptr)
|
||||||
|
{
|
||||||
|
#ifdef _WIN32
|
||||||
|
_aligned_free(ptr);
|
||||||
|
#else
|
||||||
|
std::free(ptr);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <typename C, typename T>
|
template <typename C, typename T>
|
||||||
concept span_like =
|
concept span_like =
|
||||||
requires(C& c) {
|
requires(C& c) {
|
||||||
|
|
@ -15,7 +50,7 @@ namespace rsx
|
||||||
{ c.size() } -> std::integral;
|
{ c.size() } -> std::integral;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Ty>
|
template <typename Ty, size_t Align=alignof(Ty)>
|
||||||
requires std::is_trivially_destructible_v<Ty> && std::is_trivially_copyable_v<Ty>
|
requires std::is_trivially_destructible_v<Ty> && std::is_trivially_copyable_v<Ty>
|
||||||
struct simple_array
|
struct simple_array
|
||||||
{
|
{
|
||||||
|
|
@ -28,7 +63,7 @@ namespace rsx
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static constexpr u32 _local_capacity = std::max<u32>(64u / sizeof(Ty), 1u);
|
static constexpr u32 _local_capacity = std::max<u32>(64u / sizeof(Ty), 1u);
|
||||||
char _local_storage[_local_capacity * sizeof(Ty)];
|
alignas(Align) char _local_storage[_local_capacity * sizeof(Ty)];
|
||||||
|
|
||||||
u32 _capacity = _local_capacity;
|
u32 _capacity = _local_capacity;
|
||||||
Ty* _data = _local_capacity ? reinterpret_cast<Ty*>(_local_storage) : nullptr;
|
Ty* _data = _local_capacity ? reinterpret_cast<Ty*>(_local_storage) : nullptr;
|
||||||
|
|
@ -128,7 +163,7 @@ namespace rsx
|
||||||
{
|
{
|
||||||
if (!is_local_storage())
|
if (!is_local_storage())
|
||||||
{
|
{
|
||||||
free(_data);
|
aligned_allocator::free(_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
_data = nullptr;
|
_data = nullptr;
|
||||||
|
|
@ -196,13 +231,13 @@ namespace rsx
|
||||||
if (is_local_storage())
|
if (is_local_storage())
|
||||||
{
|
{
|
||||||
// Switch to heap storage
|
// Switch to heap storage
|
||||||
ensure(_data = static_cast<Ty*>(std::malloc(sizeof(Ty) * size)));
|
ensure(_data = static_cast<Ty*>(aligned_allocator::malloc<Align>(sizeof(Ty) * size)));
|
||||||
std::memcpy(static_cast<void*>(_data), _local_storage, size_bytes());
|
std::memcpy(static_cast<void*>(_data), _local_storage, size_bytes());
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// Extend heap storage
|
// Extend heap storage
|
||||||
ensure(_data = static_cast<Ty*>(std::realloc(_data, sizeof(Ty) * size))); // "realloc() failed!"
|
ensure(_data = static_cast<Ty*>(aligned_allocator::realloc<Align>(_data, size_bytes(), sizeof(Ty) * size))); // "realloc() failed!"
|
||||||
}
|
}
|
||||||
|
|
||||||
_capacity = size;
|
_capacity = size;
|
||||||
|
|
|
||||||
|
|
@ -1219,7 +1219,7 @@ namespace rsx
|
||||||
|
|
||||||
if (result.size() > 1)
|
if (result.size() > 1)
|
||||||
{
|
{
|
||||||
std::sort(result.begin(), result.end(), [](const auto &a, const auto &b)
|
result.sort([](const auto &a, const auto &b)
|
||||||
{
|
{
|
||||||
if (a.surface->last_use_tag == b.surface->last_use_tag)
|
if (a.surface->last_use_tag == b.surface->last_use_tag)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -1496,7 +1496,7 @@ namespace rsx
|
||||||
|
|
||||||
void on_miss()
|
void on_miss()
|
||||||
{
|
{
|
||||||
rsx_log.warning("Cache miss at address 0x%X. This is gonna hurt...", get_section_base());
|
// rsx_log.trace("Cache miss at address 0x%X. This is gonna hurt...", get_section_base());
|
||||||
m_tex_cache->on_miss(*derived());
|
m_tex_cache->on_miss(*derived());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -263,8 +263,6 @@ namespace gl
|
||||||
|
|
||||||
cs_deswizzle_3d()
|
cs_deswizzle_3d()
|
||||||
{
|
{
|
||||||
ensure((sizeof(_BlockType) & 3) == 0); // "Unsupported block type"
|
|
||||||
|
|
||||||
initialize();
|
initialize();
|
||||||
|
|
||||||
m_src =
|
m_src =
|
||||||
|
|
@ -294,8 +292,10 @@ namespace gl
|
||||||
{ "%loc", std::to_string(GL_COMPUTE_BUFFER_SLOT(0))},
|
{ "%loc", std::to_string(GL_COMPUTE_BUFFER_SLOT(0))},
|
||||||
{ "%push_block", fmt::format("binding=%d, std140", GL_COMPUTE_BUFFER_SLOT(2)) },
|
{ "%push_block", fmt::format("binding=%d, std140", GL_COMPUTE_BUFFER_SLOT(2)) },
|
||||||
{ "%ws", std::to_string(optimal_group_size) },
|
{ "%ws", std::to_string(optimal_group_size) },
|
||||||
{ "%_wordcount", std::to_string(sizeof(_BlockType) / 4) },
|
{ "%_wordcount", std::to_string(std::max<u32>(sizeof(_BlockType) / 4u, 1u)) },
|
||||||
{ "%f", transform }
|
{ "%f", transform },
|
||||||
|
{ "%_8bit", sizeof(_BlockType) == 1 ? "1" : "0" },
|
||||||
|
{ "%_16bit", sizeof(_BlockType) == 2 ? "1" : "0" },
|
||||||
};
|
};
|
||||||
|
|
||||||
m_src = fmt::replace_all(m_src, syntax_replace);
|
m_src = fmt::replace_all(m_src, syntax_replace);
|
||||||
|
|
@ -338,9 +338,10 @@ namespace gl
|
||||||
params.logd = rsx::ceil_log2(depth);
|
params.logd = rsx::ceil_log2(depth);
|
||||||
set_parameters(cmd);
|
set_parameters(cmd);
|
||||||
|
|
||||||
const u32 num_bytes_per_invocation = (sizeof(_BlockType) * optimal_group_size);
|
const u32 word_count_per_invocation = std::max<u32>(sizeof(_BlockType) / 4u, 1u);
|
||||||
const u32 linear_invocations = utils::aligned_div(data_length, num_bytes_per_invocation);
|
const u32 num_bytes_per_invocation = (word_count_per_invocation * 4u * optimal_group_size);
|
||||||
compute_task::run(cmd, linear_invocations);
|
const u32 workgroup_invocations = utils::aligned_div(data_length, num_bytes_per_invocation);
|
||||||
|
compute_task::run(cmd, workgroup_invocations);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -36,6 +36,16 @@ namespace gl
|
||||||
{
|
{
|
||||||
switch (block_size)
|
switch (block_size)
|
||||||
{
|
{
|
||||||
|
case 1:
|
||||||
|
gl::get_compute_task<gl::cs_deswizzle_3d<u8, WordType, SwapBytes>>()->run(
|
||||||
|
cmd, dst, dst_offset, src, src_offset,
|
||||||
|
data_length, width, height, depth, 1);
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
gl::get_compute_task<gl::cs_deswizzle_3d<u16, WordType, SwapBytes>>()->run(
|
||||||
|
cmd, dst, dst_offset, src, src_offset,
|
||||||
|
data_length, width, height, depth, 1);
|
||||||
|
break;
|
||||||
case 4:
|
case 4:
|
||||||
gl::get_compute_task<gl::cs_deswizzle_3d<u32, WordType, SwapBytes>>()->run(
|
gl::get_compute_task<gl::cs_deswizzle_3d<u32, WordType, SwapBytes>>()->run(
|
||||||
cmd, dst, dst_offset, src, src_offset,
|
cmd, dst, dst_offset, src, src_offset,
|
||||||
|
|
@ -580,7 +590,7 @@ namespace gl
|
||||||
|
|
||||||
void fill_texture(gl::command_context& cmd, texture* dst, int format,
|
void fill_texture(gl::command_context& cmd, texture* dst, int format,
|
||||||
const std::vector<rsx::subresource_layout> &input_layouts,
|
const std::vector<rsx::subresource_layout> &input_layouts,
|
||||||
bool is_swizzled, GLenum gl_format, GLenum gl_type, rsx::simple_array<std::byte>& staging_buffer)
|
bool is_swizzled, GLenum gl_format, GLenum gl_type, std::span<std::byte> staging_buffer)
|
||||||
{
|
{
|
||||||
const auto& driver_caps = gl::get_driver_caps();
|
const auto& driver_caps = gl::get_driver_caps();
|
||||||
rsx::texture_uploader_capabilities caps
|
rsx::texture_uploader_capabilities caps
|
||||||
|
|
@ -707,7 +717,7 @@ namespace gl
|
||||||
}
|
}
|
||||||
|
|
||||||
rsx::io_buffer io_buf = dst_buffer;
|
rsx::io_buffer io_buf = dst_buffer;
|
||||||
caps.supports_hw_deswizzle = (is_swizzled && driver_caps.ARB_compute_shader_supported && image_linear_size > 4096);
|
caps.supports_hw_deswizzle = (is_swizzled && driver_caps.ARB_compute_shader_supported && image_linear_size > 1024);
|
||||||
auto op = upload_texture_subresource(io_buf, layout, format, is_swizzled, caps);
|
auto op = upload_texture_subresource(io_buf, layout, format, is_swizzled, caps);
|
||||||
|
|
||||||
// Define upload region
|
// Define upload region
|
||||||
|
|
@ -748,39 +758,54 @@ namespace gl
|
||||||
g_upload_transfer_buffer.copy_to(&g_deswizzle_scratch_buffer.get(), upload_scratch_mem.second, deswizzle_data_offset, static_cast<u32>(image_linear_size));
|
g_upload_transfer_buffer.copy_to(&g_deswizzle_scratch_buffer.get(), upload_scratch_mem.second, deswizzle_data_offset, static_cast<u32>(image_linear_size));
|
||||||
|
|
||||||
// 2.2 Apply compute transform to deswizzle input and dump it in compute_scratch_mem
|
// 2.2 Apply compute transform to deswizzle input and dump it in compute_scratch_mem
|
||||||
ensure(op.element_size == 2 || op.element_size == 4);
|
|
||||||
const auto block_size = op.element_size * op.block_length;
|
const auto block_size = op.element_size * op.block_length;
|
||||||
|
|
||||||
if (op.require_swap)
|
if (op.require_swap)
|
||||||
{
|
{
|
||||||
mem_layout.swap_bytes = false;
|
mem_layout.swap_bytes = false;
|
||||||
|
|
||||||
if (op.element_size == 4) [[ likely ]]
|
switch (op.element_size)
|
||||||
{
|
{
|
||||||
do_deswizzle_transformation<u32, true>(cmd, block_size,
|
case 1:
|
||||||
|
do_deswizzle_transformation<u8, true>(cmd, block_size,
|
||||||
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
|
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
|
||||||
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
|
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
|
||||||
}
|
break;
|
||||||
else
|
case 2:
|
||||||
{
|
|
||||||
do_deswizzle_transformation<u16, true>(cmd, block_size,
|
do_deswizzle_transformation<u16, true>(cmd, block_size,
|
||||||
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
|
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
|
||||||
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
|
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
do_deswizzle_transformation<u32, true>(cmd, block_size,
|
||||||
|
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
|
||||||
|
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
fmt::throw_exception("Unimplemented element size deswizzle");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (op.element_size == 4) [[ likely ]]
|
switch (op.element_size)
|
||||||
{
|
{
|
||||||
do_deswizzle_transformation<u32, false>(cmd, block_size,
|
case 1:
|
||||||
|
do_deswizzle_transformation<u8, false>(cmd, block_size,
|
||||||
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
|
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
|
||||||
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
|
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
|
||||||
}
|
break;
|
||||||
else
|
case 2:
|
||||||
{
|
|
||||||
do_deswizzle_transformation<u16, false>(cmd, block_size,
|
do_deswizzle_transformation<u16, false>(cmd, block_size,
|
||||||
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
|
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
|
||||||
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
|
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
do_deswizzle_transformation<u32, false>(cmd, block_size,
|
||||||
|
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
|
||||||
|
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
fmt::throw_exception("Unimplemented element size deswizzle");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -816,7 +841,7 @@ namespace gl
|
||||||
void upload_texture(gl::command_context& cmd, texture* dst, u32 gcm_format, bool is_swizzled, const std::vector<rsx::subresource_layout>& subresources_layout)
|
void upload_texture(gl::command_context& cmd, texture* dst, u32 gcm_format, bool is_swizzled, const std::vector<rsx::subresource_layout>& subresources_layout)
|
||||||
{
|
{
|
||||||
// Calculate staging buffer size
|
// Calculate staging buffer size
|
||||||
rsx::simple_array<std::byte> data_upload_buf;
|
rsx::simple_array<std::byte, sizeof(u128)> data_upload_buf;
|
||||||
|
|
||||||
rsx::texture_uploader_capabilities caps { .supports_dxt = gl::get_driver_caps().EXT_texture_compression_s3tc_supported };
|
rsx::texture_uploader_capabilities caps { .supports_dxt = gl::get_driver_caps().EXT_texture_compression_s3tc_supported };
|
||||||
if (rsx::is_compressed_host_format(caps, gcm_format))
|
if (rsx::is_compressed_host_format(caps, gcm_format))
|
||||||
|
|
|
||||||
|
|
@ -607,7 +607,7 @@ namespace rsx
|
||||||
compiled_resources_temp.clear();
|
compiled_resources_temp.clear();
|
||||||
auto& cmd_text = compiled_resources_temp.append({});
|
auto& cmd_text = compiled_resources_temp.append({});
|
||||||
|
|
||||||
cmd_text.config.set_font(font_ref ? font_ref : fontmgr::get("Arial", 12));
|
cmd_text.config.set_font(get_font());
|
||||||
cmd_text.config.color = fore_color;
|
cmd_text.config.color = fore_color;
|
||||||
cmd_text.verts = render_text(text.c_str(), static_cast<f32>(x), static_cast<f32>(y));
|
cmd_text.verts = render_text(text.c_str(), static_cast<f32>(x), static_cast<f32>(y));
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -199,7 +199,7 @@ namespace rsx
|
||||||
if (!is_compiled())
|
if (!is_compiled())
|
||||||
{
|
{
|
||||||
auto renderer = get_font();
|
auto renderer = get_font();
|
||||||
const auto [caret_x, caret_y] = renderer->get_char_offset(text.c_str(), caret_position, clip_text ? w : -1, wrap_text);
|
const auto& [caret_x, caret_y] = renderer->get_char_offset(text.c_str(), caret_position, clip_text ? w : -1, wrap_text);
|
||||||
|
|
||||||
overlay_element caret;
|
overlay_element caret;
|
||||||
caret.set_pos(static_cast<u16>(caret_x) + padding_left + x, static_cast<u16>(caret_y) + padding_top + y);
|
caret.set_pos(static_cast<u16>(caret_x) + padding_left + x, static_cast<u16>(caret_y) + padding_top + y);
|
||||||
|
|
|
||||||
|
|
@ -52,13 +52,13 @@ namespace rsx
|
||||||
return quad;
|
return quad;
|
||||||
}
|
}
|
||||||
|
|
||||||
font::font(const char* ttf_name, f32 size)
|
font::font(std::string_view ttf_name, f32 size)
|
||||||
{
|
{
|
||||||
// Convert pt to px
|
// Convert pt to px
|
||||||
size_px = ceilf(size * 96.f / 72.f);
|
size_px = ceilf(size * 96.f / 72.f);
|
||||||
size_pt = size;
|
size_pt = size;
|
||||||
|
|
||||||
font_name = ttf_name;
|
font_name = std::string(ttf_name);
|
||||||
initialized = true;
|
initialized = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -135,10 +135,17 @@ namespace rsx
|
||||||
|
|
||||||
// Attempt to load a font from dev_flash before any other source
|
// Attempt to load a font from dev_flash before any other source
|
||||||
result.font_names.emplace_back("SCE-PS3-SR-R-JPN.TTF");
|
result.font_names.emplace_back("SCE-PS3-SR-R-JPN.TTF");
|
||||||
|
result.font_names.emplace_back("SCE-PS3-DH-R-CGB.TTF");
|
||||||
|
|
||||||
// Known system font as last fallback
|
// Known system font as last fallback
|
||||||
result.font_names.emplace_back("Yu Gothic.ttf");
|
result.font_names.emplace_back("Yu Gothic.ttf");
|
||||||
result.font_names.emplace_back("YuGothR.ttc");
|
result.font_names.emplace_back("YuGothR.ttc");
|
||||||
|
#ifdef _WIN32
|
||||||
|
result.font_names.emplace_back("msyh.ttc");
|
||||||
|
result.font_names.emplace_back("simsunb.ttc");
|
||||||
|
result.font_names.emplace_back("simsun.ttc");
|
||||||
|
result.font_names.emplace_back("SimsunExtG.ttf");
|
||||||
|
#endif
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case language_class::hangul:
|
case language_class::hangul:
|
||||||
|
|
@ -159,25 +166,58 @@ namespace rsx
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
codepage* font::initialize_codepage(char32_t codepage_id)
|
codepage* font::initialize_codepage(char32_t c)
|
||||||
{
|
{
|
||||||
// Init glyph
|
// Init glyph
|
||||||
|
const auto codepage_id = get_page_id(c);
|
||||||
const auto class_ = classify(codepage_id);
|
const auto class_ = classify(codepage_id);
|
||||||
const auto fs_settings = get_glyph_files(class_);
|
const auto fs_settings = get_glyph_files(class_);
|
||||||
|
|
||||||
// Attemt to load requested font
|
// Attemt to load requested font
|
||||||
std::vector<u8> bytes;
|
std::vector<u8> bytes;
|
||||||
std::string file_path;
|
std::vector<u8> fallback_bytes;
|
||||||
|
std::string fallback_file;
|
||||||
bool font_found = false;
|
bool font_found = false;
|
||||||
|
|
||||||
|
const auto get_font = [&](const std::string& file_path) -> bool
|
||||||
|
{
|
||||||
|
// Read font
|
||||||
|
fs::file f(file_path);
|
||||||
|
f.read(bytes, f.size());
|
||||||
|
|
||||||
|
// Check if the character exists in the font
|
||||||
|
stbtt_fontinfo info;
|
||||||
|
if (stbtt_InitFont(&info, bytes.data(), stbtt_GetFontOffsetForIndex(bytes.data(), 0)) != 0)
|
||||||
|
{
|
||||||
|
font_found = stbtt_FindGlyphIndex(&info, c) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!font_found)
|
||||||
|
{
|
||||||
|
if (fallback_bytes.empty())
|
||||||
|
{
|
||||||
|
// Save this font as a fallback so we don't get a segfault or exception
|
||||||
|
fallback_bytes = std::move(bytes);
|
||||||
|
fallback_file = file_path;
|
||||||
|
}
|
||||||
|
|
||||||
|
bytes.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
return font_found;
|
||||||
|
};
|
||||||
|
|
||||||
for (const auto& font_file : fs_settings.font_names)
|
for (const auto& font_file : fs_settings.font_names)
|
||||||
{
|
{
|
||||||
if (fs::is_file(font_file))
|
if (fs::is_file(font_file))
|
||||||
{
|
{
|
||||||
// Check for absolute paths or fonts 'installed' to executable folder
|
// Check for absolute paths or fonts 'installed' to executable folder
|
||||||
file_path = font_file;
|
if (get_font(font_file))
|
||||||
font_found = true;
|
{
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string extension;
|
std::string extension;
|
||||||
|
|
@ -196,11 +236,13 @@ namespace rsx
|
||||||
|
|
||||||
for (const auto& font_dir : fs_settings.lookup_font_dirs)
|
for (const auto& font_dir : fs_settings.lookup_font_dirs)
|
||||||
{
|
{
|
||||||
file_path = font_dir + file_name;
|
const std::string file_path = font_dir + file_name;
|
||||||
if (fs::is_file(file_path))
|
if (fs::is_file(file_path))
|
||||||
{
|
{
|
||||||
font_found = true;
|
if (get_font(file_path))
|
||||||
break;
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -210,16 +252,15 @@ namespace rsx
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read font
|
if (!font_found)
|
||||||
if (font_found)
|
|
||||||
{
|
{
|
||||||
fs::file f(file_path);
|
if (fallback_bytes.empty())
|
||||||
f.read(bytes, f.size());
|
{
|
||||||
}
|
fmt::throw_exception("Failed to initialize font for character 0x%x on codepage %d.", static_cast<u32>(c), static_cast<u32>(codepage_id));
|
||||||
else
|
}
|
||||||
{
|
|
||||||
rsx_log.error("Failed to initialize font '%s.ttf' on codepage %d", font_name, static_cast<u32>(codepage_id));
|
rsx_log.error("Failed to initialize font for character 0x%x on codepage %d. Falling back to font '%s'", static_cast<u32>(c), static_cast<u32>(codepage_id), fallback_file);
|
||||||
return nullptr;
|
bytes = std::move(fallback_bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
codepage_cache.page = nullptr;
|
codepage_cache.page = nullptr;
|
||||||
|
|
@ -245,7 +286,8 @@ namespace rsx
|
||||||
if (!initialized)
|
if (!initialized)
|
||||||
return {};
|
return {};
|
||||||
|
|
||||||
const auto page_id = (c >> 8);
|
const auto page_id = get_page_id(c);
|
||||||
|
|
||||||
if (codepage_cache.codepage_id == page_id && codepage_cache.page) [[likely]]
|
if (codepage_cache.codepage_id == page_id && codepage_cache.page) [[likely]]
|
||||||
{
|
{
|
||||||
return codepage_cache.page->get_char(c, x_advance, y_advance);
|
return codepage_cache.page->get_char(c, x_advance, y_advance);
|
||||||
|
|
@ -257,7 +299,7 @@ namespace rsx
|
||||||
|
|
||||||
for (const auto& e : m_glyph_map)
|
for (const auto& e : m_glyph_map)
|
||||||
{
|
{
|
||||||
if (e.first == unsigned(page_id))
|
if (e.first == page_id)
|
||||||
{
|
{
|
||||||
codepage_cache.page = e.second.get();
|
codepage_cache.page = e.second.get();
|
||||||
break;
|
break;
|
||||||
|
|
@ -266,7 +308,7 @@ namespace rsx
|
||||||
|
|
||||||
if (!codepage_cache.page) [[unlikely]]
|
if (!codepage_cache.page) [[unlikely]]
|
||||||
{
|
{
|
||||||
codepage_cache.page = initialize_codepage(page_id);
|
codepage_cache.page = initialize_codepage(c);
|
||||||
}
|
}
|
||||||
|
|
||||||
return codepage_cache.page->get_char(c, x_advance, y_advance);
|
return codepage_cache.page->get_char(c, x_advance, y_advance);
|
||||||
|
|
|
||||||
|
|
@ -64,12 +64,13 @@ namespace rsx
|
||||||
}
|
}
|
||||||
codepage_cache;
|
codepage_cache;
|
||||||
|
|
||||||
|
static char32_t get_page_id(char32_t c) { return c >> 8; }
|
||||||
static language_class classify(char32_t codepage_id);
|
static language_class classify(char32_t codepage_id);
|
||||||
glyph_load_setup get_glyph_files(language_class class_) const;
|
glyph_load_setup get_glyph_files(language_class class_) const;
|
||||||
codepage* initialize_codepage(char32_t codepage_id);
|
codepage* initialize_codepage(char32_t c);
|
||||||
public:
|
public:
|
||||||
|
|
||||||
font(const char* ttf_name, f32 size);
|
font(std::string_view ttf_name, f32 size);
|
||||||
|
|
||||||
stbtt_aligned_quad get_char(char32_t c, f32& x_advance, f32& y_advance);
|
stbtt_aligned_quad get_char(char32_t c, f32& x_advance, f32& y_advance);
|
||||||
|
|
||||||
|
|
@ -79,7 +80,7 @@ namespace rsx
|
||||||
|
|
||||||
std::pair<f32, f32> get_char_offset(const char32_t* text, usz max_length, u16 max_width = -1, bool wrap = false);
|
std::pair<f32, f32> get_char_offset(const char32_t* text, usz max_length, u16 max_width = -1, bool wrap = false);
|
||||||
|
|
||||||
bool matches(const char* name, int size) const { return static_cast<int>(size_pt) == size && font_name == name; }
|
bool matches(std::string_view name, int size) const { return static_cast<int>(size_pt) == size && font_name == name; }
|
||||||
std::string_view get_name() const { return font_name; }
|
std::string_view get_name() const { return font_name; }
|
||||||
f32 get_size_pt() const { return size_pt; }
|
f32 get_size_pt() const { return size_pt; }
|
||||||
f32 get_size_px() const { return size_px; }
|
f32 get_size_px() const { return size_px; }
|
||||||
|
|
@ -97,7 +98,7 @@ namespace rsx
|
||||||
std::vector<std::unique_ptr<font>> fonts;
|
std::vector<std::unique_ptr<font>> fonts;
|
||||||
static fontmgr* m_instance;
|
static fontmgr* m_instance;
|
||||||
|
|
||||||
font* find(const char* name, int size)
|
font* find(std::string_view name, int size)
|
||||||
{
|
{
|
||||||
for (const auto& f : fonts)
|
for (const auto& f : fonts)
|
||||||
{
|
{
|
||||||
|
|
@ -121,7 +122,7 @@ namespace rsx
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static font* get(const char* name, int size)
|
static font* get(std::string_view name, int size)
|
||||||
{
|
{
|
||||||
if (m_instance == nullptr)
|
if (m_instance == nullptr)
|
||||||
m_instance = new fontmgr;
|
m_instance = new fontmgr;
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,9 @@ R"(
|
||||||
|
|
||||||
#define SSBO_LOCATION(x) (x + %loc)
|
#define SSBO_LOCATION(x) (x + %loc)
|
||||||
|
|
||||||
|
#define USE_8BIT_ADDRESSING %_8bit
|
||||||
|
#define USE_16BIT_ADDRESSING %_16bit
|
||||||
|
|
||||||
layout(local_size_x = %ws, local_size_y = 1, local_size_z = 1) in;
|
layout(local_size_x = %ws, local_size_y = 1, local_size_z = 1) in;
|
||||||
|
|
||||||
layout(%set, binding=SSBO_LOCATION(0), std430) buffer ssbo0{ uint data_in[]; };
|
layout(%set, binding=SSBO_LOCATION(0), std430) buffer ssbo0{ uint data_in[]; };
|
||||||
|
|
@ -98,12 +101,71 @@ uint get_z_index(const in uint x_, const in uint y_, const in uint z_)
|
||||||
return offset;
|
return offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if USE_16BIT_ADDRESSING
|
||||||
|
|
||||||
|
void decode_16b(const in uint texel_id, in uint x, const in uint y, const in uint z)
|
||||||
|
{
|
||||||
|
const uint masks[] = { 0x0000FFFF, 0xFFFF0000 };
|
||||||
|
uint accumulator = 0;
|
||||||
|
|
||||||
|
const uint subword_count = min(invocation.size.x, 2);
|
||||||
|
for (uint subword = 0; subword < subword_count; ++subword, ++x)
|
||||||
|
{
|
||||||
|
uint src_texel_id = get_z_index(x, y, z);
|
||||||
|
uint src_id = (src_texel_id + invocation.data_offset);
|
||||||
|
accumulator |= data_in[src_id / 2] & masks[subword];
|
||||||
|
}
|
||||||
|
|
||||||
|
data_out[texel_id / 2] = %f(accumulator);
|
||||||
|
}
|
||||||
|
|
||||||
|
#elif USE_8BIT_ADDRESSING
|
||||||
|
|
||||||
|
void decode_8b(const in uint texel_id, in uint x, const in uint y, const in uint z)
|
||||||
|
{
|
||||||
|
const uint masks[] = { 0x000000FF, 0x0000FF00, 0x00FF0000, 0xFF000000 };
|
||||||
|
uint accumulator = 0;
|
||||||
|
|
||||||
|
const uint subword_count = min(invocation.size.x, 4);
|
||||||
|
for (uint subword = 0; subword < subword_count; ++subword, ++x)
|
||||||
|
{
|
||||||
|
uint src_texel_id = get_z_index(x, y, z);
|
||||||
|
uint src_id = (src_texel_id + invocation.data_offset);
|
||||||
|
accumulator |= data_in[src_id / 4] & masks[subword];
|
||||||
|
}
|
||||||
|
|
||||||
|
data_out[texel_id / 4] = accumulator;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
void decode_32b(const in uint texel_id, const in uint word_count, const in uint x, const in uint y, const in uint z)
|
||||||
|
{
|
||||||
|
uint src_texel_id = get_z_index(x, y, z);
|
||||||
|
uint dst_id = (texel_id * word_count);
|
||||||
|
uint src_id = (src_texel_id + invocation.data_offset) * word_count;
|
||||||
|
|
||||||
|
for (uint i = 0; i < word_count; ++i)
|
||||||
|
{
|
||||||
|
uint value = data_in[src_id++];
|
||||||
|
data_out[dst_id++] = %f(value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
void main()
|
void main()
|
||||||
{
|
{
|
||||||
uint invocations_x = (gl_NumWorkGroups.x * gl_WorkGroupSize.x);
|
uint invocations_x = (gl_NumWorkGroups.x * gl_WorkGroupSize.x);
|
||||||
uint texel_id = (gl_GlobalInvocationID.y * invocations_x) + gl_GlobalInvocationID.x;
|
uint texel_id = (gl_GlobalInvocationID.y * invocations_x) + gl_GlobalInvocationID.x;
|
||||||
uint word_count = %_wordcount;
|
uint word_count = %_wordcount;
|
||||||
|
|
||||||
|
#if USE_8BIT_ADDRESSING
|
||||||
|
texel_id *= 4; // Each invocation consumes 4 texels
|
||||||
|
#elif USE_16BIT_ADDRESSING
|
||||||
|
texel_id *= 2; // Each invocation consumes 2 texels
|
||||||
|
#endif
|
||||||
|
|
||||||
if (!init_invocation_properties(texel_id))
|
if (!init_invocation_properties(texel_id))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
|
@ -116,14 +178,13 @@ void main()
|
||||||
uint y = (slice_offset / row_length);
|
uint y = (slice_offset / row_length);
|
||||||
uint x = (slice_offset % row_length);
|
uint x = (slice_offset % row_length);
|
||||||
|
|
||||||
uint src_texel_id = get_z_index(x, y, z);
|
#if USE_8BIT_ADDRESSING
|
||||||
uint dst_id = (texel_id * word_count);
|
decode_8b(texel_id, x, y, z);
|
||||||
uint src_id = (src_texel_id + invocation.data_offset) * word_count;
|
#elif USE_16BIT_ADDRESSING
|
||||||
|
decode_16b(texel_id, x, y, z);
|
||||||
|
#else
|
||||||
|
decode_32b(texel_id, word_count, x, y, z);
|
||||||
|
#endif
|
||||||
|
|
||||||
for (uint i = 0; i < word_count; ++i)
|
|
||||||
{
|
|
||||||
uint value = data_in[src_id++];
|
|
||||||
data_out[dst_id++] = %f(value);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
)"
|
)"
|
||||||
|
|
|
||||||
|
|
@ -139,7 +139,7 @@ namespace rsx
|
||||||
u32 bytes_read = 0;
|
u32 bytes_read = 0;
|
||||||
|
|
||||||
// Find the next set bit after every iteration
|
// Find the next set bit after every iteration
|
||||||
for (int i = 0;; i = (std::countr_zero<u32>(utils::rol8(to_fetch, 0 - i - 1)) + i + 1) % 8)
|
for (int i = 0;; i = (std::countr_zero<u32>(std::rotl<u8>(to_fetch, 0 - i - 1)) + i + 1) % 8)
|
||||||
{
|
{
|
||||||
// If a reservation is being updated, try to load another
|
// If a reservation is being updated, try to load another
|
||||||
const auto& res = vm::reservation_acquire(addr1 + i * 128);
|
const auto& res = vm::reservation_acquire(addr1 + i * 128);
|
||||||
|
|
|
||||||
|
|
@ -2856,7 +2856,7 @@ namespace rsx
|
||||||
|
|
||||||
for (u32 ea = address >> 20, end = ea + (size >> 20); ea < end; ea++)
|
for (u32 ea = address >> 20, end = ea + (size >> 20); ea < end; ea++)
|
||||||
{
|
{
|
||||||
const u32 io = utils::rol32(iomap_table.io[ea], 32 - 20);
|
const u32 io = std::rotl<u32>(iomap_table.io[ea], 32 - 20);
|
||||||
|
|
||||||
if (io + 1)
|
if (io + 1)
|
||||||
{
|
{
|
||||||
|
|
@ -2886,7 +2886,7 @@ namespace rsx
|
||||||
|
|
||||||
while (to_unmap)
|
while (to_unmap)
|
||||||
{
|
{
|
||||||
bit = (std::countr_zero<u64>(utils::rol64(to_unmap, 0 - bit)) + bit);
|
bit = (std::countr_zero<u64>(std::rotl<u64>(to_unmap, 0 - bit)) + bit);
|
||||||
to_unmap &= ~(1ull << bit);
|
to_unmap &= ~(1ull << bit);
|
||||||
|
|
||||||
constexpr u16 null_entry = 0xFFFF;
|
constexpr u16 null_entry = 0xFFFF;
|
||||||
|
|
|
||||||
|
|
@ -111,12 +111,6 @@ namespace vk
|
||||||
m_shader.create(::glsl::program_domain::glsl_compute_program, m_src);
|
m_shader.create(::glsl::program_domain::glsl_compute_program, m_src);
|
||||||
auto handle = m_shader.compile();
|
auto handle = m_shader.compile();
|
||||||
|
|
||||||
VkPipelineShaderStageCreateInfo shader_stage{};
|
|
||||||
shader_stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
|
|
||||||
shader_stage.stage = VK_SHADER_STAGE_COMPUTE_BIT;
|
|
||||||
shader_stage.module = handle;
|
|
||||||
shader_stage.pName = "main";
|
|
||||||
|
|
||||||
VkComputePipelineCreateInfo create_info
|
VkComputePipelineCreateInfo create_info
|
||||||
{
|
{
|
||||||
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
|
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
|
||||||
|
|
|
||||||
|
|
@ -403,8 +403,6 @@ namespace vk
|
||||||
|
|
||||||
cs_deswizzle_3d()
|
cs_deswizzle_3d()
|
||||||
{
|
{
|
||||||
ensure((sizeof(_BlockType) & 3) == 0); // "Unsupported block type"
|
|
||||||
|
|
||||||
ssbo_count = 2;
|
ssbo_count = 2;
|
||||||
use_push_constants = true;
|
use_push_constants = true;
|
||||||
push_constants_size = 28;
|
push_constants_size = 28;
|
||||||
|
|
@ -438,8 +436,10 @@ namespace vk
|
||||||
{ "%set", "set = 0" },
|
{ "%set", "set = 0" },
|
||||||
{ "%push_block", "push_constant" },
|
{ "%push_block", "push_constant" },
|
||||||
{ "%ws", std::to_string(optimal_group_size) },
|
{ "%ws", std::to_string(optimal_group_size) },
|
||||||
{ "%_wordcount", std::to_string(sizeof(_BlockType) / 4) },
|
{ "%_wordcount", std::to_string(std::max<u32>(sizeof(_BlockType) / 4u, 1u)) },
|
||||||
{ "%f", transform }
|
{ "%f", transform },
|
||||||
|
{ "%_8bit", sizeof(_BlockType) == 1 ? "1" : "0" },
|
||||||
|
{ "%_16bit", sizeof(_BlockType) == 2 ? "1" : "0" },
|
||||||
};
|
};
|
||||||
|
|
||||||
m_src = fmt::replace_all(m_src, syntax_replace);
|
m_src = fmt::replace_all(m_src, syntax_replace);
|
||||||
|
|
@ -475,9 +475,10 @@ namespace vk
|
||||||
params.logh = rsx::ceil_log2(height);
|
params.logh = rsx::ceil_log2(height);
|
||||||
params.logd = rsx::ceil_log2(depth);
|
params.logd = rsx::ceil_log2(depth);
|
||||||
|
|
||||||
const u32 num_bytes_per_invocation = (sizeof(_BlockType) * optimal_group_size);
|
const u32 word_count_per_invocation = std::max<u32>(sizeof(_BlockType) / 4u, 1u);
|
||||||
const u32 linear_invocations = utils::aligned_div(data_length, num_bytes_per_invocation);
|
const u32 num_bytes_per_invocation = (word_count_per_invocation * 4u * optimal_group_size);
|
||||||
compute_task::run(cmd, linear_invocations);
|
const u32 workgroup_invocations = utils::aligned_div(data_length, num_bytes_per_invocation);
|
||||||
|
compute_task::run(cmd, workgroup_invocations);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -69,8 +69,9 @@ namespace vk
|
||||||
|
|
||||||
void on_query_pool_released(std::unique_ptr<vk::query_pool>& pool);
|
void on_query_pool_released(std::unique_ptr<vk::query_pool>& pool);
|
||||||
|
|
||||||
template<template<class> class _List>
|
template<typename T>
|
||||||
void free_queries(vk::command_buffer& cmd, _List<u32>& list)
|
requires std::ranges::range<T> && std::same_as<std::ranges::range_value_t<T>, u32> // List of u32
|
||||||
|
void free_queries(vk::command_buffer& cmd, T& list)
|
||||||
{
|
{
|
||||||
for (const auto index : list)
|
for (const auto index : list)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -759,6 +759,10 @@ namespace vk
|
||||||
{
|
{
|
||||||
switch (block_size)
|
switch (block_size)
|
||||||
{
|
{
|
||||||
|
case 1:
|
||||||
|
return vk::get_compute_task<cs_deswizzle_3d<u8, u8, false>>();
|
||||||
|
case 2:
|
||||||
|
return vk::get_compute_task<cs_deswizzle_3d<u16, WordType, SwapBytes>>();
|
||||||
case 4:
|
case 4:
|
||||||
return vk::get_compute_task<cs_deswizzle_3d<u32, WordType, SwapBytes>>();
|
return vk::get_compute_task<cs_deswizzle_3d<u32, WordType, SwapBytes>>();
|
||||||
case 8:
|
case 8:
|
||||||
|
|
@ -776,21 +780,27 @@ namespace vk
|
||||||
vk::cs_deswizzle_base* job = nullptr;
|
vk::cs_deswizzle_base* job = nullptr;
|
||||||
const auto block_size = (word_size * word_count);
|
const auto block_size = (word_size * word_count);
|
||||||
|
|
||||||
ensure(word_size == 4 || word_size == 2);
|
|
||||||
|
|
||||||
if (!swap_bytes)
|
if (!swap_bytes)
|
||||||
{
|
{
|
||||||
if (word_size == 4)
|
switch (word_size)
|
||||||
{
|
|
||||||
job = get_deswizzle_transformation<u32, false>(block_size);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
{
|
||||||
|
case 1:
|
||||||
|
job = get_deswizzle_transformation<u8, false>(block_size);
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
job = get_deswizzle_transformation<u16, false>(block_size);
|
job = get_deswizzle_transformation<u16, false>(block_size);
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
job = get_deswizzle_transformation<u32, false>(block_size);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
fmt::throw_exception("Unimplemented deswizzle for format.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
ensure(word_size == 2 || word_size == 4);
|
||||||
|
|
||||||
if (word_size == 4)
|
if (word_size == 4)
|
||||||
{
|
{
|
||||||
job = get_deswizzle_transformation<u32, true>(block_size);
|
job = get_deswizzle_transformation<u32, true>(block_size);
|
||||||
|
|
|
||||||
|
|
@ -68,10 +68,6 @@ LOG_CHANNEL(sys_log, "SYS");
|
||||||
// Preallocate 32 MiB
|
// Preallocate 32 MiB
|
||||||
stx::manual_typemap<void, 0x20'00000, 128> g_fixed_typemap;
|
stx::manual_typemap<void, 0x20'00000, 128> g_fixed_typemap;
|
||||||
|
|
||||||
bool g_use_rtm = false;
|
|
||||||
u64 g_rtm_tx_limit1 = 0;
|
|
||||||
u64 g_rtm_tx_limit2 = 0;
|
|
||||||
|
|
||||||
std::string g_cfg_defaults;
|
std::string g_cfg_defaults;
|
||||||
|
|
||||||
atomic_t<u64> g_watchdog_hold_ctr{0};
|
atomic_t<u64> g_watchdog_hold_ctr{0};
|
||||||
|
|
@ -1540,9 +1536,6 @@ game_boot_result Emulator::Load(const std::string& title_id, bool is_disc_patch,
|
||||||
m_localized_title = std::string(psf::get_string(_psf, fmt::format("TITLE_%02d", static_cast<s32>(g_cfg.sys.language.get())), m_title));
|
m_localized_title = std::string(psf::get_string(_psf, fmt::format("TITLE_%02d", static_cast<s32>(g_cfg.sys.language.get())), m_title));
|
||||||
sys_log.notice("Localized Title: %s", GetLocalizedTitle());
|
sys_log.notice("Localized Title: %s", GetLocalizedTitle());
|
||||||
|
|
||||||
// Set RTM usage
|
|
||||||
g_use_rtm = utils::has_rtm() && (((utils::has_mpx() && !utils::has_tsx_force_abort()) && g_cfg.core.enable_TSX == tsx_usage::enabled) || g_cfg.core.enable_TSX == tsx_usage::forced);
|
|
||||||
|
|
||||||
{
|
{
|
||||||
// Log some extra info in case of boot
|
// Log some extra info in case of boot
|
||||||
#if defined(HAVE_VULKAN)
|
#if defined(HAVE_VULKAN)
|
||||||
|
|
@ -1553,11 +1546,6 @@ game_boot_result Emulator::Load(const std::string& title_id, bool is_disc_patch,
|
||||||
#endif
|
#endif
|
||||||
sys_log.notice("Used configuration:\n%s\n", g_cfg.to_string());
|
sys_log.notice("Used configuration:\n%s\n", g_cfg.to_string());
|
||||||
|
|
||||||
if (g_use_rtm && (!utils::has_mpx() || utils::has_tsx_force_abort()))
|
|
||||||
{
|
|
||||||
sys_log.warning("TSX forced by User");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Initialize patch engine
|
// Initialize patch engine
|
||||||
g_fxo->need<patch_engine>();
|
g_fxo->need<patch_engine>();
|
||||||
|
|
||||||
|
|
@ -1566,14 +1554,6 @@ game_boot_result Emulator::Load(const std::string& title_id, bool is_disc_patch,
|
||||||
g_fxo->get<patch_engine>().append_title_patches(m_title_id);
|
g_fxo->get<patch_engine>().append_title_patches(m_title_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (g_use_rtm)
|
|
||||||
{
|
|
||||||
// Update supplementary settings
|
|
||||||
const f64 _1ns = utils::get_tsc_freq() / 1000'000'000.;
|
|
||||||
g_rtm_tx_limit1 = static_cast<u64>(g_cfg.core.tx_limit1_ns * _1ns);
|
|
||||||
g_rtm_tx_limit2 = static_cast<u64>(g_cfg.core.tx_limit2_ns * _1ns);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set bdvd_dir
|
// Set bdvd_dir
|
||||||
std::string bdvd_dir = g_cfg_vfs.get(g_cfg_vfs.dev_bdvd, rpcs3::utils::get_emu_dir());
|
std::string bdvd_dir = g_cfg_vfs.get(g_cfg_vfs.dev_bdvd, rpcs3::utils::get_emu_dir());
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -478,7 +478,3 @@ public:
|
||||||
};
|
};
|
||||||
|
|
||||||
extern Emulator Emu;
|
extern Emulator Emu;
|
||||||
|
|
||||||
extern bool g_use_rtm;
|
|
||||||
extern u64 g_rtm_tx_limit1;
|
|
||||||
extern u64 g_rtm_tx_limit2;
|
|
||||||
|
|
|
||||||
|
|
@ -8,11 +8,6 @@
|
||||||
cfg_root g_cfg{};
|
cfg_root g_cfg{};
|
||||||
cfg_root g_backup_cfg{};
|
cfg_root g_backup_cfg{};
|
||||||
|
|
||||||
bool cfg_root::node_core::enable_tsx_by_default()
|
|
||||||
{
|
|
||||||
return utils::has_rtm() && utils::has_mpx() && !utils::has_tsx_force_abort();
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string cfg_root::node_sys::get_random_system_name()
|
std::string cfg_root::node_sys::get_random_system_name()
|
||||||
{
|
{
|
||||||
std::srand(static_cast<u32>(std::time(nullptr)));
|
std::srand(static_cast<u32>(std::time(nullptr)));
|
||||||
|
|
|
||||||
|
|
@ -12,11 +12,6 @@ struct cfg_root : cfg::node
|
||||||
{
|
{
|
||||||
struct node_core : cfg::node
|
struct node_core : cfg::node
|
||||||
{
|
{
|
||||||
private:
|
|
||||||
/** We don't wanna include the sysinfo header here */
|
|
||||||
static bool enable_tsx_by_default();
|
|
||||||
|
|
||||||
public:
|
|
||||||
node_core(cfg::node* _this) : cfg::node(_this, "Core") {}
|
node_core(cfg::node* _this) : cfg::node(_this, "Core") {}
|
||||||
|
|
||||||
cfg::_enum<ppu_decoder_type> ppu_decoder{ this, "PPU Decoder", ppu_decoder_type::llvm };
|
cfg::_enum<ppu_decoder_type> ppu_decoder{ this, "PPU Decoder", ppu_decoder_type::llvm };
|
||||||
|
|
@ -62,10 +57,10 @@ struct cfg_root : cfg::node
|
||||||
cfg::_bool spu_verification{ this, "SPU Verification", true }; // Should be enabled
|
cfg::_bool spu_verification{ this, "SPU Verification", true }; // Should be enabled
|
||||||
cfg::_bool spu_cache{ this, "SPU Cache", true };
|
cfg::_bool spu_cache{ this, "SPU Cache", true };
|
||||||
cfg::_bool spu_prof{ this, "SPU Profiler", false };
|
cfg::_bool spu_prof{ this, "SPU Profiler", false };
|
||||||
|
cfg::_bool ppu_prof{ this, "PPU Profiler", false };
|
||||||
cfg::uint<0, 16> mfc_transfers_shuffling{ this, "MFC Commands Shuffling Limit", 0 };
|
cfg::uint<0, 16> mfc_transfers_shuffling{ this, "MFC Commands Shuffling Limit", 0 };
|
||||||
cfg::uint<0, 10000> mfc_transfers_timeout{ this, "MFC Commands Timeout", 0, true };
|
cfg::uint<0, 10000> mfc_transfers_timeout{ this, "MFC Commands Timeout", 0, true };
|
||||||
cfg::_bool mfc_shuffling_in_steps{ this, "MFC Commands Shuffling In Steps", false, true };
|
cfg::_bool mfc_shuffling_in_steps{ this, "MFC Commands Shuffling In Steps", false, true };
|
||||||
cfg::_enum<tsx_usage> enable_TSX{ this, "Enable TSX", enable_tsx_by_default() ? tsx_usage::enabled : tsx_usage::disabled }; // Enable TSX. Forcing this on Haswell/Broadwell CPUs should be used carefully
|
|
||||||
cfg::_enum<xfloat_accuracy> spu_xfloat_accuracy{ this, "XFloat Accuracy", xfloat_accuracy::approximate, false };
|
cfg::_enum<xfloat_accuracy> spu_xfloat_accuracy{ this, "XFloat Accuracy", xfloat_accuracy::approximate, false };
|
||||||
cfg::_int<-1, 14> ppu_128_reservations_loop_max_length{ this, "Accurate PPU 128-byte Reservation Op Max Length", 0, true }; // -1: Always accurate, 0: Never accurate, 1-14: max accurate loop length
|
cfg::_int<-1, 14> ppu_128_reservations_loop_max_length{ this, "Accurate PPU 128-byte Reservation Op Max Length", 0, true }; // -1: Always accurate, 0: Never accurate, 1-14: max accurate loop length
|
||||||
cfg::_int<-64, 64> stub_ppu_traps{ this, "Stub PPU Traps", 0, true }; // Hack, skip PPU traps for rare cases where the trap is continueable (specify relative instructions to skip)
|
cfg::_int<-64, 64> stub_ppu_traps{ this, "Stub PPU Traps", 0, true }; // Hack, skip PPU traps for rare cases where the trap is continueable (specify relative instructions to skip)
|
||||||
|
|
@ -84,8 +79,6 @@ struct cfg_root : cfg::node
|
||||||
cfg::_bool hle_lwmutex{ this, "HLE lwmutex" }; // Force alternative lwmutex/lwcond implementation
|
cfg::_bool hle_lwmutex{ this, "HLE lwmutex" }; // Force alternative lwmutex/lwcond implementation
|
||||||
cfg::uint64 spu_llvm_lower_bound{ this, "SPU LLVM Lower Bound" };
|
cfg::uint64 spu_llvm_lower_bound{ this, "SPU LLVM Lower Bound" };
|
||||||
cfg::uint64 spu_llvm_upper_bound{ this, "SPU LLVM Upper Bound", 0xffffffffffffffff };
|
cfg::uint64 spu_llvm_upper_bound{ this, "SPU LLVM Upper Bound", 0xffffffffffffffff };
|
||||||
cfg::uint64 tx_limit1_ns{this, "TSX Transaction First Limit", 800}; // In nanoseconds
|
|
||||||
cfg::uint64 tx_limit2_ns{this, "TSX Transaction Second Limit", 2000}; // In nanoseconds
|
|
||||||
|
|
||||||
cfg::_int<10, 3000> clocks_scale{ this, "Clocks scale", 100 }; // Changing this from 100 (percentage) may affect game speed in unexpected ways
|
cfg::_int<10, 3000> clocks_scale{ this, "Clocks scale", 100 }; // Changing this from 100 (percentage) may affect game speed in unexpected ways
|
||||||
cfg::uint<0, 3000> spu_wakeup_delay{ this, "SPU Wake-Up Delay", 0, true };
|
cfg::uint<0, 3000> spu_wakeup_delay{ this, "SPU Wake-Up Delay", 0, true };
|
||||||
|
|
|
||||||
|
|
@ -196,22 +196,6 @@ void fmt_class_string<screen_quadrant>::format(std::string& out, u64 arg)
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
|
||||||
void fmt_class_string<tsx_usage>::format(std::string& out, u64 arg)
|
|
||||||
{
|
|
||||||
format_enum(out, arg, [](tsx_usage value)
|
|
||||||
{
|
|
||||||
switch (value)
|
|
||||||
{
|
|
||||||
case tsx_usage::disabled: return "Disabled";
|
|
||||||
case tsx_usage::enabled: return "Enabled";
|
|
||||||
case tsx_usage::forced: return "Forced";
|
|
||||||
}
|
|
||||||
|
|
||||||
return unknown;
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
void fmt_class_string<rsx_fifo_mode>::format(std::string& out, u64 arg)
|
void fmt_class_string<rsx_fifo_mode>::format(std::string& out, u64 arg)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -248,13 +248,6 @@ enum class rsx_fifo_mode : unsigned
|
||||||
as_ps3,
|
as_ps3,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum class tsx_usage
|
|
||||||
{
|
|
||||||
disabled,
|
|
||||||
enabled,
|
|
||||||
forced,
|
|
||||||
};
|
|
||||||
|
|
||||||
enum class enter_button_assign
|
enum class enter_button_assign
|
||||||
{
|
{
|
||||||
circle, // CELL_SYSUTIL_ENTER_BUTTON_ASSIGN_CIRCLE
|
circle, // CELL_SYSUTIL_ENTER_BUTTON_ASSIGN_CIRCLE
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,9 @@
|
||||||
#include "elf_memory_dumping_dialog.h"
|
#include "elf_memory_dumping_dialog.h"
|
||||||
#include "Emu/Cell/SPUThread.h"
|
#include "gui_settings.h"
|
||||||
|
|
||||||
#include "qt_utils.h"
|
#include "qt_utils.h"
|
||||||
|
|
||||||
|
#include "Emu/Cell/SPUThread.h"
|
||||||
|
|
||||||
#include <QFileDialog>
|
#include <QFileDialog>
|
||||||
#include <QCoreApplication>
|
#include <QCoreApplication>
|
||||||
#include <QFontDatabase>
|
#include <QFontDatabase>
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,6 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "util/types.hpp"
|
#include "util/types.hpp"
|
||||||
#include "gui_settings.h"
|
|
||||||
|
|
||||||
#include <QListWidget>
|
#include <QListWidget>
|
||||||
#include <QLineEdit>
|
#include <QLineEdit>
|
||||||
|
|
@ -9,6 +8,8 @@
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
|
||||||
|
class gui_settings;
|
||||||
|
|
||||||
class elf_memory_dumping_dialog : public QDialog
|
class elf_memory_dumping_dialog : public QDialog
|
||||||
{
|
{
|
||||||
Q_OBJECT
|
Q_OBJECT
|
||||||
|
|
|
||||||
|
|
@ -988,14 +988,6 @@ QString emu_settings::GetLocalizedSetting(const QString& original, emu_settings_
|
||||||
case thread_scheduler_mode::os: return tr("Operating System", "Thread Scheduler Mode");
|
case thread_scheduler_mode::os: return tr("Operating System", "Thread Scheduler Mode");
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case emu_settings_type::EnableTSX:
|
|
||||||
switch (static_cast<tsx_usage>(index))
|
|
||||||
{
|
|
||||||
case tsx_usage::disabled: return tr("Disabled", "Enable TSX");
|
|
||||||
case tsx_usage::enabled: return tr("Enabled", "Enable TSX");
|
|
||||||
case tsx_usage::forced: return tr("Forced", "Enable TSX");
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case emu_settings_type::Renderer:
|
case emu_settings_type::Renderer:
|
||||||
switch (static_cast<video_renderer>(index))
|
switch (static_cast<video_renderer>(index))
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -20,7 +20,6 @@ enum class emu_settings_type
|
||||||
MFCDebug,
|
MFCDebug,
|
||||||
MaxLLVMThreads,
|
MaxLLVMThreads,
|
||||||
LLVMPrecompilation,
|
LLVMPrecompilation,
|
||||||
EnableTSX,
|
|
||||||
AccurateSpuDMA,
|
AccurateSpuDMA,
|
||||||
AccurateClineStores,
|
AccurateClineStores,
|
||||||
AccurateRSXAccess,
|
AccurateRSXAccess,
|
||||||
|
|
@ -233,7 +232,6 @@ inline static const std::map<emu_settings_type, cfg_location> settings_location
|
||||||
{ emu_settings_type::MFCDebug, { "Core", "MFC Debug"}},
|
{ emu_settings_type::MFCDebug, { "Core", "MFC Debug"}},
|
||||||
{ emu_settings_type::MaxLLVMThreads, { "Core", "Max LLVM Compile Threads"}},
|
{ emu_settings_type::MaxLLVMThreads, { "Core", "Max LLVM Compile Threads"}},
|
||||||
{ emu_settings_type::LLVMPrecompilation, { "Core", "LLVM Precompilation"}},
|
{ emu_settings_type::LLVMPrecompilation, { "Core", "LLVM Precompilation"}},
|
||||||
{ emu_settings_type::EnableTSX, { "Core", "Enable TSX"}},
|
|
||||||
{ emu_settings_type::AccurateSpuDMA, { "Core", "Accurate SPU DMA"}},
|
{ emu_settings_type::AccurateSpuDMA, { "Core", "Accurate SPU DMA"}},
|
||||||
{ emu_settings_type::AccurateClineStores, { "Core", "Accurate Cache Line Stores"}},
|
{ emu_settings_type::AccurateClineStores, { "Core", "Accurate Cache Line Stores"}},
|
||||||
{ emu_settings_type::AccurateRSXAccess, { "Core", "Accurate RSX reservation access"}},
|
{ emu_settings_type::AccurateRSXAccess, { "Core", "Accurate RSX reservation access"}},
|
||||||
|
|
|
||||||
|
|
@ -35,37 +35,43 @@ void gl_gs_frame::reset()
|
||||||
|
|
||||||
draw_context_t gl_gs_frame::make_context()
|
draw_context_t gl_gs_frame::make_context()
|
||||||
{
|
{
|
||||||
|
// This whole function should run in the main GUI thread.
|
||||||
|
// This really matters on Windows where a lot of wgl internals are stashed in the TEB.
|
||||||
|
|
||||||
auto context = new GLContext();
|
auto context = new GLContext();
|
||||||
context->handle = new QOpenGLContext();
|
context->handle = new QOpenGLContext();
|
||||||
|
bool success = true;
|
||||||
|
|
||||||
if (m_primary_context)
|
Emu.BlockingCallFromMainThread([&]()
|
||||||
{
|
{
|
||||||
QOffscreenSurface* surface = nullptr;
|
if (m_primary_context)
|
||||||
|
|
||||||
// Workaround for the Qt warning: "Attempting to create QWindow-based QOffscreenSurface outside the gui thread. Expect failures."
|
|
||||||
Emu.BlockingCallFromMainThread([&]()
|
|
||||||
{
|
{
|
||||||
surface = new QOffscreenSurface();
|
QOffscreenSurface* surface = new QOffscreenSurface();
|
||||||
surface->setFormat(m_format);
|
surface->setFormat(m_format);
|
||||||
surface->create();
|
surface->create();
|
||||||
});
|
|
||||||
|
|
||||||
// Share resources with the first created context
|
// Share resources with the first created context
|
||||||
context->handle->setShareContext(m_primary_context->handle);
|
context->handle->setShareContext(m_primary_context->handle);
|
||||||
context->surface = surface;
|
context->surface = surface;
|
||||||
context->owner = true;
|
context->owner = true;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// This is the first created context, all others will share resources with this one
|
// This is the first created context, all others will share resources with this one
|
||||||
m_primary_context = context;
|
m_primary_context = context;
|
||||||
context->surface = this;
|
context->surface = this;
|
||||||
context->owner = false;
|
context->owner = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
context->handle->setFormat(m_format);
|
context->handle->setFormat(m_format);
|
||||||
|
|
||||||
if (!context->handle->create())
|
if (!context->handle->create())
|
||||||
|
{
|
||||||
|
success = false;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!success)
|
||||||
{
|
{
|
||||||
fmt::throw_exception("Failed to create OpenGL context");
|
fmt::throw_exception("Failed to create OpenGL context");
|
||||||
}
|
}
|
||||||
|
|
@ -110,8 +116,8 @@ void gl_gs_frame::delete_context(draw_context_t ctx)
|
||||||
gl_ctx->handle->doneCurrent();
|
gl_ctx->handle->doneCurrent();
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
//AMD driver crashes when executing wglDeleteContext
|
// AMD driver crashes when executing wglDeleteContext, probably because the current thread does not own the context.
|
||||||
//Catch with SEH
|
// Catch with SEH
|
||||||
__try
|
__try
|
||||||
{
|
{
|
||||||
delete gl_ctx->handle;
|
delete gl_ctx->handle;
|
||||||
|
|
|
||||||
|
|
@ -651,15 +651,29 @@ void kernel_explorer::update()
|
||||||
return fmt::format(" (%.1fs)", wait_time);
|
return fmt::format(" (%.1fs)", wait_time);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
std::vector<std::pair<s32, std::string>> ppu_threads;
|
||||||
|
|
||||||
idm::select<named_thread<ppu_thread>>([&](u32 id, ppu_thread& ppu)
|
idm::select<named_thread<ppu_thread>>([&](u32 id, ppu_thread& ppu)
|
||||||
{
|
{
|
||||||
const auto func = ppu.last_function;
|
const auto func = ppu.last_function;
|
||||||
const ppu_thread_status status = lv2_obj::ppu_state(&ppu, false, false).first;
|
const ppu_thread_status status = lv2_obj::ppu_state(&ppu, false, false).first;
|
||||||
|
|
||||||
add_leaf(find_node(root, additional_nodes::ppu_threads), QString::fromStdString(fmt::format(u8"PPU 0x%07x: “%s”, PRIO: %d, Joiner: %s, Status: %s, State: %s, %s func: “%s”%s", id, *ppu.ppu_tname.load(), ppu.prio.load().prio, ppu.joiner.load(), status, ppu.state.load()
|
const s32 prio = ppu.prio.load().prio;
|
||||||
, ppu.ack_suspend ? "After" : (ppu.current_function ? "In" : "Last"), func ? func : "", get_wait_time_str(ppu.start_time))));
|
std::string prio_text = fmt::format("%4d", prio);
|
||||||
|
prio_text = fmt::replace_all(prio_text, " ", " ");
|
||||||
|
|
||||||
|
ppu_threads.emplace_back(prio, fmt::format(u8"PPU 0x%07x: PRIO: %s, “%s”Joiner: %s, Status: %s, State: %s, %s func: “%s”%s", id, prio_text, *ppu.ppu_tname.load(), ppu.joiner.load(), status, ppu.state.load()
|
||||||
|
, ppu.ack_suspend ? "After" : (ppu.current_function ? "In" : "Last"), func ? func : "", get_wait_time_str(ppu.start_time)));
|
||||||
}, idm::unlocked);
|
}, idm::unlocked);
|
||||||
|
|
||||||
|
// Sort by priority
|
||||||
|
std::stable_sort(ppu_threads.begin(), ppu_threads.end(), FN(x.first < y.first));
|
||||||
|
|
||||||
|
for (const auto& [prio, text] : ppu_threads)
|
||||||
|
{
|
||||||
|
add_leaf(find_node(root, additional_nodes::ppu_threads), QString::fromStdString(text));
|
||||||
|
}
|
||||||
|
|
||||||
lock_idm_lv2.reset();
|
lock_idm_lv2.reset();
|
||||||
|
|
||||||
idm::select<named_thread<spu_thread>>([&](u32 /*id*/, spu_thread& spu)
|
idm::select<named_thread<spu_thread>>([&](u32 /*id*/, spu_thread& spu)
|
||||||
|
|
|
||||||
|
|
@ -71,7 +71,14 @@ bool qt_camera_video_sink::present(const QVideoFrame& frame)
|
||||||
// Flip image if necessary
|
// Flip image if necessary
|
||||||
if (flip_horizontally || flip_vertically)
|
if (flip_horizontally || flip_vertically)
|
||||||
{
|
{
|
||||||
|
#if QT_VERSION >= QT_VERSION_CHECK(6, 9, 0)
|
||||||
|
Qt::Orientations orientation {};
|
||||||
|
orientation.setFlag(Qt::Orientation::Horizontal, flip_horizontally);
|
||||||
|
orientation.setFlag(Qt::Orientation::Vertical, flip_vertically);
|
||||||
|
image.flip(orientation);
|
||||||
|
#else
|
||||||
image.mirror(flip_horizontally, flip_vertically);
|
image.mirror(flip_horizontally, flip_vertically);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
if (image.format() != QImage::Format_RGBA8888)
|
if (image.format() != QImage::Format_RGBA8888)
|
||||||
|
|
|
||||||
|
|
@ -1252,8 +1252,8 @@ void rsx_debugger::GetVertexProgram() const
|
||||||
rsx::method_registers.clip_planes_mask()
|
rsx::method_registers.clip_planes_mask()
|
||||||
};
|
};
|
||||||
|
|
||||||
vp_blob.resize(vp_blob.size() + vp.data.size());
|
vp_blob.reserve(vp_blob.size() + vp.data.size());
|
||||||
std::copy(vp.data.begin(), vp.data.end(), vp_blob.begin() + 14);
|
vp_blob.insert(vp_blob.end(), vp.data.begin(), vp.data.end());
|
||||||
|
|
||||||
std::span<u32> vp_binary(vp_blob);
|
std::span<u32> vp_binary(vp_blob);
|
||||||
CgBinaryDisasm vp_disasm(vp_binary);
|
CgBinaryDisasm vp_disasm(vp_binary);
|
||||||
|
|
|
||||||
|
|
@ -288,79 +288,6 @@ settings_dialog::settings_dialog(std::shared_ptr<gui_settings> gui_settings, std
|
||||||
SubscribeTooltip(ui->gb_spu_threads, tooltips.settings.preferred_spu_threads);
|
SubscribeTooltip(ui->gb_spu_threads, tooltips.settings.preferred_spu_threads);
|
||||||
ui->preferredSPUThreads->setItemText(ui->preferredSPUThreads->findData(0), tr("Auto", "Preferred SPU threads"));
|
ui->preferredSPUThreads->setItemText(ui->preferredSPUThreads->findData(0), tr("Auto", "Preferred SPU threads"));
|
||||||
|
|
||||||
if (utils::has_rtm())
|
|
||||||
{
|
|
||||||
m_emu_settings->EnhanceComboBox(ui->enableTSX, emu_settings_type::EnableTSX);
|
|
||||||
SubscribeTooltip(ui->gb_tsx, tooltips.settings.enable_tsx);
|
|
||||||
|
|
||||||
if (!utils::has_mpx() || utils::has_tsx_force_abort())
|
|
||||||
{
|
|
||||||
remove_item(ui->enableTSX, static_cast<int>(tsx_usage::enabled), static_cast<int>(g_cfg.core.enable_TSX.def));
|
|
||||||
}
|
|
||||||
|
|
||||||
connect(ui->enableTSX, QOverload<int>::of(&QComboBox::currentIndexChanged), this, [this](int index)
|
|
||||||
{
|
|
||||||
if (index < 0) return;
|
|
||||||
if (const auto [text, value] = get_data(ui->enableTSX, index); value == static_cast<int>(tsx_usage::forced) &&
|
|
||||||
(!utils::has_mpx() || utils::has_tsx_force_abort()))
|
|
||||||
{
|
|
||||||
QString title;
|
|
||||||
QString message;
|
|
||||||
if (!utils::has_mpx())
|
|
||||||
{
|
|
||||||
title = tr("Haswell/Broadwell TSX Warning");
|
|
||||||
message = gui::utils::make_paragraph(tr(
|
|
||||||
"RPCS3 has detected that you are using TSX functions on a Haswell or Broadwell CPU.\n"
|
|
||||||
"Intel has deactivated these functions in newer Microcode revisions, since they can lead to unpredicted behaviour.\n"
|
|
||||||
"That means using TSX may break games or even <font color=\"red\"><b>damage</b></font> your data.\n"
|
|
||||||
"We recommend to disable this feature and update your computer BIOS.\n"
|
|
||||||
"\n"
|
|
||||||
"Do you wish to use TSX anyway?"
|
|
||||||
));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
title = tr("TSX-FA Warning");
|
|
||||||
message = gui::utils::make_paragraph(tr(
|
|
||||||
"RPCS3 has detected your CPU only supports TSX-FA.\n"
|
|
||||||
"That means using TSX may break games or even <font color=\"red\"><b>damage</b></font> your data.\n"
|
|
||||||
"We recommend to disable this feature.\n"
|
|
||||||
"\n"
|
|
||||||
"Do you wish to use TSX anyway?"
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
QMessageBox mb;
|
|
||||||
mb.setWindowModality(Qt::WindowModal);
|
|
||||||
mb.setWindowTitle(title);
|
|
||||||
mb.setIcon(QMessageBox::Critical);
|
|
||||||
mb.setTextFormat(Qt::RichText);
|
|
||||||
mb.setStandardButtons(QMessageBox::Yes | QMessageBox::No);
|
|
||||||
mb.setDefaultButton(QMessageBox::No);
|
|
||||||
mb.setText(message);
|
|
||||||
mb.layout()->setSizeConstraint(QLayout::SetFixedSize);
|
|
||||||
|
|
||||||
if (mb.exec() == QMessageBox::No)
|
|
||||||
{
|
|
||||||
// Reset if the messagebox was answered with no. This prevents the currentIndexChanged signal in EnhanceComboBox
|
|
||||||
ui->enableTSX->setCurrentIndex(find_item(ui->enableTSX, static_cast<int>(g_cfg.core.enable_TSX.def)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
ui->enableTSX->setEnabled(false);
|
|
||||||
ui->enableTSX->setPlaceholderText(tr("Not supported", "Enable TSX"));
|
|
||||||
SubscribeTooltip(ui->enableTSX, tr("Unfortunately, your CPU model does not support this instruction set.", "Enable TSX"));
|
|
||||||
|
|
||||||
m_emu_settings->SetSetting(emu_settings_type::EnableTSX, fmt::format("%s", tsx_usage::disabled));
|
|
||||||
connect(this, &settings_dialog::signal_restore_dependant_defaults, [this]()
|
|
||||||
{
|
|
||||||
m_emu_settings->SetSetting(emu_settings_type::EnableTSX, fmt::format("%s", tsx_usage::disabled));
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// PPU tool tips
|
// PPU tool tips
|
||||||
SubscribeTooltip(ui->ppu__static, tooltips.settings.ppu__static);
|
SubscribeTooltip(ui->ppu__static, tooltips.settings.ppu__static);
|
||||||
SubscribeTooltip(ui->ppu_llvm, tooltips.settings.ppu_llvm);
|
SubscribeTooltip(ui->ppu_llvm, tooltips.settings.ppu_llvm);
|
||||||
|
|
|
||||||
|
|
@ -266,24 +266,6 @@
|
||||||
</item>
|
</item>
|
||||||
<item>
|
<item>
|
||||||
<layout class="QVBoxLayout" name="coreTabRightLayout" stretch="0,0,0,0">
|
<layout class="QVBoxLayout" name="coreTabRightLayout" stretch="0,0,0,0">
|
||||||
<item>
|
|
||||||
<widget class="QGroupBox" name="gb_tsx">
|
|
||||||
<property name="sizePolicy">
|
|
||||||
<sizepolicy hsizetype="Preferred" vsizetype="Minimum">
|
|
||||||
<horstretch>0</horstretch>
|
|
||||||
<verstretch>0</verstretch>
|
|
||||||
</sizepolicy>
|
|
||||||
</property>
|
|
||||||
<property name="title">
|
|
||||||
<string>TSX Instructions</string>
|
|
||||||
</property>
|
|
||||||
<layout class="QVBoxLayout" name="gb_tsx_layout">
|
|
||||||
<item>
|
|
||||||
<widget class="QComboBox" name="enableTSX"/>
|
|
||||||
</item>
|
|
||||||
</layout>
|
|
||||||
</widget>
|
|
||||||
</item>
|
|
||||||
<item>
|
<item>
|
||||||
<widget class="QGroupBox" name="gb_spu_threads">
|
<widget class="QGroupBox" name="gb_spu_threads">
|
||||||
<property name="sizePolicy">
|
<property name="sizePolicy">
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
#include "shortcut_dialog.h"
|
#include "shortcut_dialog.h"
|
||||||
#include "ui_shortcut_dialog.h"
|
#include "ui_shortcut_dialog.h"
|
||||||
#include "shortcut_settings.h"
|
#include "shortcut_settings.h"
|
||||||
|
#include "gui_settings.h"
|
||||||
|
|
||||||
#include <QDialogButtonBox>
|
#include <QDialogButtonBox>
|
||||||
#include <QPushButton>
|
#include <QPushButton>
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "gui_settings.h"
|
|
||||||
|
|
||||||
#include <QDialog>
|
#include <QDialog>
|
||||||
|
|
||||||
|
class gui_settings;
|
||||||
|
|
||||||
namespace Ui
|
namespace Ui
|
||||||
{
|
{
|
||||||
class shortcut_dialog;
|
class shortcut_dialog;
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
#include "stdafx.h"
|
#include "stdafx.h"
|
||||||
#include "shortcut_handler.h"
|
#include "shortcut_handler.h"
|
||||||
|
#include "gui_settings.h"
|
||||||
|
|
||||||
LOG_CHANNEL(shortcut_log, "Shortcuts");
|
LOG_CHANNEL(shortcut_log, "Shortcuts");
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,5 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "gui_settings.h"
|
|
||||||
#include "shortcut_settings.h"
|
#include "shortcut_settings.h"
|
||||||
|
|
||||||
#include <QShortcut>
|
#include <QShortcut>
|
||||||
|
|
@ -8,6 +7,8 @@
|
||||||
|
|
||||||
#include <map>
|
#include <map>
|
||||||
|
|
||||||
|
class gui_settings;
|
||||||
|
|
||||||
class shortcut_handler : public QObject
|
class shortcut_handler : public QObject
|
||||||
{
|
{
|
||||||
Q_OBJECT
|
Q_OBJECT
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
#include "shortcut_settings.h"
|
#include "shortcut_settings.h"
|
||||||
|
#include "gui_settings.h"
|
||||||
|
|
||||||
using namespace gui::shortcuts;
|
using namespace gui::shortcuts;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,11 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "gui_settings.h"
|
#include "gui_save.h"
|
||||||
|
|
||||||
#include <QKeySequence>
|
#include <QKeySequence>
|
||||||
|
|
||||||
|
class gui_settings;
|
||||||
|
|
||||||
namespace gui
|
namespace gui
|
||||||
{
|
{
|
||||||
namespace shortcuts
|
namespace shortcuts
|
||||||
|
|
|
||||||
|
|
@ -91,7 +91,6 @@ public:
|
||||||
const QString xfloat = tr("Control accuracy to SPU float vectors processing.\nFixes bugs in various games at the cost of performance.\nThis setting is only applied when SPU Decoder is set to Dynamic or LLVM.");
|
const QString xfloat = tr("Control accuracy to SPU float vectors processing.\nFixes bugs in various games at the cost of performance.\nThis setting is only applied when SPU Decoder is set to Dynamic or LLVM.");
|
||||||
const QString enable_thread_scheduler = tr("Control how RPCS3 utilizes the threads of your system.\nEach option heavily depends on the game and on your CPU. It's recommended to try each option to find out which performs the best.\nChanging the thread scheduler is not supported on CPUs with less than 12 threads.");
|
const QString enable_thread_scheduler = tr("Control how RPCS3 utilizes the threads of your system.\nEach option heavily depends on the game and on your CPU. It's recommended to try each option to find out which performs the best.\nChanging the thread scheduler is not supported on CPUs with less than 12 threads.");
|
||||||
const QString spu_loop_detection = tr("Try to detect loop conditions in SPU kernels and use them as scheduling hints.\nImproves performance and reduces CPU usage.\nMay cause severe audio stuttering in rare cases.");
|
const QString spu_loop_detection = tr("Try to detect loop conditions in SPU kernels and use them as scheduling hints.\nImproves performance and reduces CPU usage.\nMay cause severe audio stuttering in rare cases.");
|
||||||
const QString enable_tsx = tr("Enable usage of TSX instructions.\nNeeds to be forced on some Haswell or Broadwell CPUs or CPUs with the TSX-FA instruction set.\nForcing TSX in these cases may lead to system and performance instability, use it with caution.");
|
|
||||||
const QString spu_block_size = tr("This option controls the SPU analyser, particularly the size of compiled units. The Mega and Giga modes may improve performance by tying smaller units together, decreasing the number of compiled units but increasing their size.\nUse the Safe mode for maximum compatibility.");
|
const QString spu_block_size = tr("This option controls the SPU analyser, particularly the size of compiled units. The Mega and Giga modes may improve performance by tying smaller units together, decreasing the number of compiled units but increasing their size.\nUse the Safe mode for maximum compatibility.");
|
||||||
const QString preferred_spu_threads = tr("Some SPU stages are sensitive to race conditions and allowing a limited number at a time helps alleviate performance stalls.\nSetting this to a smaller value might improve performance and reduce stuttering in some games.\nLeave this on auto if performance is negatively affected when setting a small value.");
|
const QString preferred_spu_threads = tr("Some SPU stages are sensitive to race conditions and allowing a limited number at a time helps alleviate performance stalls.\nSetting this to a smaller value might improve performance and reduce stuttering in some games.\nLeave this on auto if performance is negatively affected when setting a small value.");
|
||||||
const QString max_cpu_preempt = tr("Reduces CPU usage and power consumption, improving battery life on mobile devices. (0 means disabled)\nHigher values cause a more pronounced effect, but may cause audio or performance issues. A value of 50 or less is recommended.\nThis option forces an FPS limit because it's active when framerate is stable.\nThe lighter the game is on the hardware, the more power is saved by it. (until the preemption count barrier is reached)");
|
const QString max_cpu_preempt = tr("Reduces CPU usage and power consumption, improving battery life on mobile devices. (0 means disabled)\nHigher values cause a more pronounced effect, but may cause audio or performance issues. A value of 50 or less is recommended.\nThis option forces an FPS limit because it's active when framerate is stable.\nThe lighter the game is on the hardware, the more power is saved by it. (until the preemption count barrier is reached)");
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
#include "vfs_dialog_path_widget.h"
|
#include "vfs_dialog_path_widget.h"
|
||||||
|
#include "gui_settings.h"
|
||||||
|
|
||||||
#include <QFileDialog>
|
#include <QFileDialog>
|
||||||
#include <QCoreApplication>
|
#include <QCoreApplication>
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,14 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "gui_settings.h"
|
#include "gui_save.h"
|
||||||
|
|
||||||
#include <QListWidget>
|
#include <QListWidget>
|
||||||
#include <QLabel>
|
#include <QLabel>
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
|
||||||
|
class gui_settings;
|
||||||
|
|
||||||
namespace cfg
|
namespace cfg
|
||||||
{
|
{
|
||||||
class string;
|
class string;
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
#include "vfs_dialog_tab.h"
|
#include "vfs_dialog_tab.h"
|
||||||
|
#include "gui_settings.h"
|
||||||
#include "Utilities/Config.h"
|
#include "Utilities/Config.h"
|
||||||
|
|
||||||
vfs_dialog_tab::vfs_dialog_tab(const QString& name, gui_save list_location, cfg::string* cfg_node, std::shared_ptr<gui_settings> _gui_settings, QWidget* parent)
|
vfs_dialog_tab::vfs_dialog_tab(const QString& name, gui_save list_location, cfg::string* cfg_node, std::shared_ptr<gui_settings> _gui_settings, QWidget* parent)
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
#include "vfs_dialog_usb_tab.h"
|
#include "vfs_dialog_usb_tab.h"
|
||||||
#include "vfs_dialog_usb_input.h"
|
#include "vfs_dialog_usb_input.h"
|
||||||
#include "table_item_delegate.h"
|
#include "table_item_delegate.h"
|
||||||
|
#include "gui_settings.h"
|
||||||
#include "Utilities/Config.h"
|
#include "Utilities/Config.h"
|
||||||
|
|
||||||
#include <QVBoxLayout>
|
#include <QVBoxLayout>
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,12 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "gui_settings.h"
|
|
||||||
|
|
||||||
#include <QTableWidget>
|
#include <QTableWidget>
|
||||||
#include <QLabel>
|
#include <QLabel>
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
|
||||||
|
class gui_settings;
|
||||||
|
|
||||||
namespace cfg
|
namespace cfg
|
||||||
{
|
{
|
||||||
class device_entry;
|
class device_entry;
|
||||||
|
|
|
||||||
|
|
@ -3,44 +3,47 @@
|
||||||
#include "util/types.hpp"
|
#include "util/types.hpp"
|
||||||
#include "util/pair.hpp"
|
#include "util/pair.hpp"
|
||||||
|
|
||||||
struct some_struct
|
namespace utils
|
||||||
{
|
{
|
||||||
u64 v {};
|
struct some_struct
|
||||||
char s[12] = "Hello World";
|
|
||||||
|
|
||||||
bool operator == (const some_struct& r) const
|
|
||||||
{
|
{
|
||||||
return v == r.v && std::memcmp(s, r.s, sizeof(s)) == 0;
|
u64 v{};
|
||||||
|
char s[12] = "Hello World";
|
||||||
|
|
||||||
|
bool operator == (const some_struct& r) const
|
||||||
|
{
|
||||||
|
return v == r.v && std::memcmp(s, r.s, sizeof(s)) == 0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST(Pair, General)
|
||||||
|
{
|
||||||
|
some_struct s{};
|
||||||
|
s.v = 1234;
|
||||||
|
|
||||||
|
utils::pair<int, some_struct> p;
|
||||||
|
EXPECT_EQ(sizeof(p), 32);
|
||||||
|
EXPECT_EQ(p.first, 0);
|
||||||
|
EXPECT_EQ(p.second, some_struct{});
|
||||||
|
|
||||||
|
p = { 666, s };
|
||||||
|
EXPECT_EQ(p.first, 666);
|
||||||
|
EXPECT_EQ(p.second, s);
|
||||||
|
|
||||||
|
const utils::pair<int, some_struct> p1 = p;
|
||||||
|
EXPECT_EQ(p.first, 666);
|
||||||
|
EXPECT_EQ(p.second, s);
|
||||||
|
EXPECT_EQ(p1.first, 666);
|
||||||
|
EXPECT_EQ(p1.second, s);
|
||||||
|
|
||||||
|
utils::pair<int, some_struct> p2 = p1;
|
||||||
|
EXPECT_EQ(p1.first, 666);
|
||||||
|
EXPECT_EQ(p1.second, s);
|
||||||
|
EXPECT_EQ(p2.first, 666);
|
||||||
|
EXPECT_EQ(p2.second, s);
|
||||||
|
|
||||||
|
utils::pair<int, some_struct> p3 = std::move(p);
|
||||||
|
EXPECT_EQ(p3.first, 666);
|
||||||
|
EXPECT_EQ(p3.second, s);
|
||||||
}
|
}
|
||||||
};
|
|
||||||
|
|
||||||
TEST(Utils, Pair)
|
|
||||||
{
|
|
||||||
some_struct s {};
|
|
||||||
s.v = 1234;
|
|
||||||
|
|
||||||
utils::pair<int, some_struct> p;
|
|
||||||
EXPECT_EQ(sizeof(p), 32);
|
|
||||||
EXPECT_EQ(p.first, 0);
|
|
||||||
EXPECT_EQ(p.second, some_struct{});
|
|
||||||
|
|
||||||
p = { 666, s };
|
|
||||||
EXPECT_EQ(p.first, 666);
|
|
||||||
EXPECT_EQ(p.second, s);
|
|
||||||
|
|
||||||
const utils::pair<int, some_struct> p1 = p;
|
|
||||||
EXPECT_EQ(p.first, 666);
|
|
||||||
EXPECT_EQ(p.second, s);
|
|
||||||
EXPECT_EQ(p1.first, 666);
|
|
||||||
EXPECT_EQ(p1.second, s);
|
|
||||||
|
|
||||||
utils::pair<int, some_struct> p2 = p1;
|
|
||||||
EXPECT_EQ(p1.first, 666);
|
|
||||||
EXPECT_EQ(p1.second, s);
|
|
||||||
EXPECT_EQ(p2.first, 666);
|
|
||||||
EXPECT_EQ(p2.second, s);
|
|
||||||
|
|
||||||
utils::pair<int, some_struct> p3 = std::move(p);
|
|
||||||
EXPECT_EQ(p3.first, 666);
|
|
||||||
EXPECT_EQ(p3.second, s);
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -267,4 +267,40 @@ namespace rsx
|
||||||
EXPECT_EQ(std::memcmp(arr[i].second.s, "Hello World", sizeof(arr[i].second.s)), 0);
|
EXPECT_EQ(std::memcmp(arr[i].second.s, "Hello World", sizeof(arr[i].second.s)), 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(SimpleArray, DataAlignment_SmallVector)
|
||||||
|
{
|
||||||
|
struct alignas(16) some_struct {
|
||||||
|
char data[16];
|
||||||
|
};
|
||||||
|
|
||||||
|
rsx::simple_array<some_struct> arr(2);
|
||||||
|
const auto data_ptr = reinterpret_cast<uintptr_t>(arr.data());
|
||||||
|
|
||||||
|
EXPECT_EQ(data_ptr & 15, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(SimpleArray, DataAlignment_HeapAlloc)
|
||||||
|
{
|
||||||
|
struct alignas(16) some_struct {
|
||||||
|
char data[16];
|
||||||
|
};
|
||||||
|
|
||||||
|
rsx::simple_array<some_struct> arr(128);
|
||||||
|
const auto data_ptr = reinterpret_cast<uintptr_t>(arr.data());
|
||||||
|
|
||||||
|
EXPECT_EQ(data_ptr & 15, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(SimpleArray, DataAlignment_Overrides)
|
||||||
|
{
|
||||||
|
rsx::simple_array<std::byte, 16> arr(4);
|
||||||
|
rsx::simple_array<std::byte, 128> arr2(4);
|
||||||
|
|
||||||
|
const auto data_ptr1 = reinterpret_cast<uintptr_t>(arr.data());
|
||||||
|
const auto data_ptr2 = reinterpret_cast<uintptr_t>(arr2.data());
|
||||||
|
|
||||||
|
EXPECT_EQ(data_ptr1 & 15, 0);
|
||||||
|
EXPECT_EQ(data_ptr2 & 127, 0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,113 +2,116 @@
|
||||||
|
|
||||||
#include "util/tuple.hpp"
|
#include "util/tuple.hpp"
|
||||||
|
|
||||||
struct some_struct
|
namespace utils
|
||||||
{
|
{
|
||||||
u64 v {};
|
struct some_struct
|
||||||
char s[12] = "Hello World";
|
|
||||||
|
|
||||||
bool operator == (const some_struct& r) const
|
|
||||||
{
|
{
|
||||||
return v == r.v && std::memcmp(s, r.s, sizeof(s)) == 0;
|
u64 v{};
|
||||||
|
char s[12] = "Hello World";
|
||||||
|
|
||||||
|
bool operator == (const some_struct& r) const
|
||||||
|
{
|
||||||
|
return v == r.v && std::memcmp(s, r.s, sizeof(s)) == 0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST(Tuple, General)
|
||||||
|
{
|
||||||
|
some_struct s{};
|
||||||
|
s.v = 1234;
|
||||||
|
|
||||||
|
utils::tuple t0 = {};
|
||||||
|
EXPECT_EQ(t0.size(), 0);
|
||||||
|
|
||||||
|
utils::tuple<int> t;
|
||||||
|
EXPECT_EQ(sizeof(t), sizeof(int));
|
||||||
|
EXPECT_TRUE((std::is_same_v<decltype(t.get<0>()), int&>));
|
||||||
|
EXPECT_EQ(t.size(), 1);
|
||||||
|
EXPECT_EQ(t.get<0>(), 0);
|
||||||
|
|
||||||
|
utils::tuple<int> t1 = 2;
|
||||||
|
EXPECT_EQ(sizeof(t1), sizeof(int));
|
||||||
|
EXPECT_TRUE((std::is_same_v<decltype(t1.get<0>()), int&>));
|
||||||
|
EXPECT_EQ(t1.size(), 1);
|
||||||
|
EXPECT_EQ(t1.get<0>(), 2);
|
||||||
|
t1 = {};
|
||||||
|
EXPECT_EQ(t1.size(), 1);
|
||||||
|
EXPECT_EQ(t1.get<0>(), 0);
|
||||||
|
|
||||||
|
utils::tuple<int, some_struct> t2 = { 2, s };
|
||||||
|
EXPECT_EQ(sizeof(t2), 32);
|
||||||
|
EXPECT_EQ(t2.size(), 2);
|
||||||
|
EXPECT_TRUE((std::is_same_v<decltype(t2.get<0>()), int&>));
|
||||||
|
EXPECT_TRUE((std::is_same_v<decltype(t2.get<1>()), some_struct&>));
|
||||||
|
EXPECT_EQ(t2.get<0>(), 2);
|
||||||
|
EXPECT_EQ(t2.get<1>(), s);
|
||||||
|
t2 = {};
|
||||||
|
EXPECT_EQ(t2.size(), 2);
|
||||||
|
EXPECT_EQ(t2.get<0>(), 0);
|
||||||
|
EXPECT_EQ(t2.get<1>(), some_struct{});
|
||||||
|
|
||||||
|
t2.get<0>() = 666;
|
||||||
|
t2.get<1>() = s;
|
||||||
|
EXPECT_EQ(t2.get<0>(), 666);
|
||||||
|
EXPECT_EQ(t2.get<1>(), s);
|
||||||
|
|
||||||
|
utils::tuple<int, some_struct, double> t3 = { 2, s, 1234.0 };
|
||||||
|
EXPECT_EQ(sizeof(t3), 40);
|
||||||
|
EXPECT_EQ(t3.size(), 3);
|
||||||
|
EXPECT_TRUE((std::is_same_v<decltype(t3.get<0>()), int&>));
|
||||||
|
EXPECT_TRUE((std::is_same_v<decltype(t3.get<1>()), some_struct&>));
|
||||||
|
EXPECT_TRUE((std::is_same_v<decltype(t3.get<2>()), double&>));
|
||||||
|
EXPECT_EQ(t3.get<0>(), 2);
|
||||||
|
EXPECT_EQ(t3.get<1>(), s);
|
||||||
|
EXPECT_EQ(t3.get<2>(), 1234.0);
|
||||||
|
t3 = {};
|
||||||
|
EXPECT_EQ(t3.size(), 3);
|
||||||
|
EXPECT_EQ(t3.get<0>(), 0);
|
||||||
|
EXPECT_EQ(t3.get<1>(), some_struct{});
|
||||||
|
EXPECT_EQ(t3.get<2>(), 0.0);
|
||||||
|
|
||||||
|
t3.get<0>() = 666;
|
||||||
|
t3.get<1>() = s;
|
||||||
|
t3.get<2>() = 7.0;
|
||||||
|
EXPECT_EQ(t3.get<0>(), 666);
|
||||||
|
EXPECT_EQ(t3.get<1>(), s);
|
||||||
|
EXPECT_EQ(t3.get<2>(), 7.0);
|
||||||
|
|
||||||
|
// const
|
||||||
|
const utils::tuple<int, some_struct> tc = { 2, s };
|
||||||
|
EXPECT_EQ(tc.size(), 2);
|
||||||
|
EXPECT_TRUE((std::is_same_v<decltype(tc.get<0>()), const int&>));
|
||||||
|
EXPECT_TRUE((std::is_same_v<decltype(tc.get<1>()), const some_struct&>));
|
||||||
|
EXPECT_EQ(tc.get<0>(), 2);
|
||||||
|
EXPECT_EQ(tc.get<1>(), s);
|
||||||
|
|
||||||
|
// assignment
|
||||||
|
const utils::tuple<int, some_struct> ta1 = { 2, s };
|
||||||
|
utils::tuple<int, some_struct> ta = ta1;
|
||||||
|
EXPECT_EQ(ta.size(), 2);
|
||||||
|
EXPECT_TRUE((std::is_same_v<decltype(ta.get<0>()), int&>));
|
||||||
|
EXPECT_TRUE((std::is_same_v<decltype(ta.get<1>()), some_struct&>));
|
||||||
|
EXPECT_EQ(ta.get<0>(), 2);
|
||||||
|
EXPECT_EQ(ta.get<1>(), s);
|
||||||
|
|
||||||
|
utils::tuple<int, some_struct> ta2 = { 2, s };
|
||||||
|
ta = ta2;
|
||||||
|
EXPECT_EQ(ta.size(), 2);
|
||||||
|
EXPECT_TRUE((std::is_same_v<decltype(ta.get<0>()), int&>));
|
||||||
|
EXPECT_TRUE((std::is_same_v<decltype(ta.get<1>()), some_struct&>));
|
||||||
|
EXPECT_EQ(ta.get<0>(), 2);
|
||||||
|
EXPECT_EQ(ta.get<1>(), s);
|
||||||
|
EXPECT_EQ(ta2.size(), 2);
|
||||||
|
EXPECT_TRUE((std::is_same_v<decltype(ta2.get<0>()), int&>));
|
||||||
|
EXPECT_TRUE((std::is_same_v<decltype(ta2.get<1>()), some_struct&>));
|
||||||
|
EXPECT_EQ(ta2.get<0>(), 2);
|
||||||
|
EXPECT_EQ(ta2.get<1>(), s);
|
||||||
|
|
||||||
|
ta = std::move(ta2);
|
||||||
|
EXPECT_EQ(ta.size(), 2);
|
||||||
|
EXPECT_TRUE((std::is_same_v<decltype(ta.get<0>()), int&>));
|
||||||
|
EXPECT_TRUE((std::is_same_v<decltype(ta.get<1>()), some_struct&>));
|
||||||
|
EXPECT_EQ(ta.get<0>(), 2);
|
||||||
|
EXPECT_EQ(ta.get<1>(), s);
|
||||||
}
|
}
|
||||||
};
|
|
||||||
|
|
||||||
TEST(Utils, Tuple)
|
|
||||||
{
|
|
||||||
some_struct s {};
|
|
||||||
s.v = 1234;
|
|
||||||
|
|
||||||
utils::tuple t0 = {};
|
|
||||||
EXPECT_EQ(t0.size(), 0);
|
|
||||||
|
|
||||||
utils::tuple<int> t;
|
|
||||||
EXPECT_EQ(sizeof(t), sizeof(int));
|
|
||||||
EXPECT_TRUE((std::is_same_v<decltype(t.get<0>()), int&>));
|
|
||||||
EXPECT_EQ(t.size(), 1);
|
|
||||||
EXPECT_EQ(t.get<0>(), 0);
|
|
||||||
|
|
||||||
utils::tuple<int> t1 = 2;
|
|
||||||
EXPECT_EQ(sizeof(t1), sizeof(int));
|
|
||||||
EXPECT_TRUE((std::is_same_v<decltype(t1.get<0>()), int&>));
|
|
||||||
EXPECT_EQ(t1.size(), 1);
|
|
||||||
EXPECT_EQ(t1.get<0>(), 2);
|
|
||||||
t1 = {};
|
|
||||||
EXPECT_EQ(t1.size(), 1);
|
|
||||||
EXPECT_EQ(t1.get<0>(), 0);
|
|
||||||
|
|
||||||
utils::tuple<int, some_struct> t2 = { 2, s };
|
|
||||||
EXPECT_EQ(sizeof(t2), 32);
|
|
||||||
EXPECT_EQ(t2.size(), 2);
|
|
||||||
EXPECT_TRUE((std::is_same_v<decltype(t2.get<0>()), int&>));
|
|
||||||
EXPECT_TRUE((std::is_same_v<decltype(t2.get<1>()), some_struct&>));
|
|
||||||
EXPECT_EQ(t2.get<0>(), 2);
|
|
||||||
EXPECT_EQ(t2.get<1>(), s);
|
|
||||||
t2 = {};
|
|
||||||
EXPECT_EQ(t2.size(), 2);
|
|
||||||
EXPECT_EQ(t2.get<0>(), 0);
|
|
||||||
EXPECT_EQ(t2.get<1>(), some_struct{});
|
|
||||||
|
|
||||||
t2.get<0>() = 666;
|
|
||||||
t2.get<1>() = s;
|
|
||||||
EXPECT_EQ(t2.get<0>(), 666);
|
|
||||||
EXPECT_EQ(t2.get<1>(), s);
|
|
||||||
|
|
||||||
utils::tuple<int, some_struct, double> t3 = { 2, s, 1234.0 };
|
|
||||||
EXPECT_EQ(sizeof(t3), 40);
|
|
||||||
EXPECT_EQ(t3.size(), 3);
|
|
||||||
EXPECT_TRUE((std::is_same_v<decltype(t3.get<0>()), int&>));
|
|
||||||
EXPECT_TRUE((std::is_same_v<decltype(t3.get<1>()), some_struct&>));
|
|
||||||
EXPECT_TRUE((std::is_same_v<decltype(t3.get<2>()), double&>));
|
|
||||||
EXPECT_EQ(t3.get<0>(), 2);
|
|
||||||
EXPECT_EQ(t3.get<1>(), s);
|
|
||||||
EXPECT_EQ(t3.get<2>(), 1234.0);
|
|
||||||
t3 = {};
|
|
||||||
EXPECT_EQ(t3.size(), 3);
|
|
||||||
EXPECT_EQ(t3.get<0>(), 0);
|
|
||||||
EXPECT_EQ(t3.get<1>(), some_struct{});
|
|
||||||
EXPECT_EQ(t3.get<2>(), 0.0);
|
|
||||||
|
|
||||||
t3.get<0>() = 666;
|
|
||||||
t3.get<1>() = s;
|
|
||||||
t3.get<2>() = 7.0;
|
|
||||||
EXPECT_EQ(t3.get<0>(), 666);
|
|
||||||
EXPECT_EQ(t3.get<1>(), s);
|
|
||||||
EXPECT_EQ(t3.get<2>(), 7.0);
|
|
||||||
|
|
||||||
// const
|
|
||||||
const utils::tuple<int, some_struct> tc = { 2, s };
|
|
||||||
EXPECT_EQ(tc.size(), 2);
|
|
||||||
EXPECT_TRUE((std::is_same_v<decltype(tc.get<0>()), const int&>));
|
|
||||||
EXPECT_TRUE((std::is_same_v<decltype(tc.get<1>()), const some_struct&>));
|
|
||||||
EXPECT_EQ(tc.get<0>(), 2);
|
|
||||||
EXPECT_EQ(tc.get<1>(), s);
|
|
||||||
|
|
||||||
// assignment
|
|
||||||
const utils::tuple<int, some_struct> ta1 = { 2, s };
|
|
||||||
utils::tuple<int, some_struct> ta = ta1;
|
|
||||||
EXPECT_EQ(ta.size(), 2);
|
|
||||||
EXPECT_TRUE((std::is_same_v<decltype(ta.get<0>()), int&>));
|
|
||||||
EXPECT_TRUE((std::is_same_v<decltype(ta.get<1>()), some_struct&>));
|
|
||||||
EXPECT_EQ(ta.get<0>(), 2);
|
|
||||||
EXPECT_EQ(ta.get<1>(), s);
|
|
||||||
|
|
||||||
utils::tuple<int, some_struct> ta2 = { 2, s };
|
|
||||||
ta = ta2;
|
|
||||||
EXPECT_EQ(ta.size(), 2);
|
|
||||||
EXPECT_TRUE((std::is_same_v<decltype(ta.get<0>()), int&>));
|
|
||||||
EXPECT_TRUE((std::is_same_v<decltype(ta.get<1>()), some_struct&>));
|
|
||||||
EXPECT_EQ(ta.get<0>(), 2);
|
|
||||||
EXPECT_EQ(ta.get<1>(), s);
|
|
||||||
EXPECT_EQ(ta2.size(), 2);
|
|
||||||
EXPECT_TRUE((std::is_same_v<decltype(ta2.get<0>()), int&>));
|
|
||||||
EXPECT_TRUE((std::is_same_v<decltype(ta2.get<1>()), some_struct&>));
|
|
||||||
EXPECT_EQ(ta2.get<0>(), 2);
|
|
||||||
EXPECT_EQ(ta2.get<1>(), s);
|
|
||||||
|
|
||||||
ta = std::move(ta2);
|
|
||||||
EXPECT_EQ(ta.size(), 2);
|
|
||||||
EXPECT_TRUE((std::is_same_v<decltype(ta.get<0>()), int&>));
|
|
||||||
EXPECT_TRUE((std::is_same_v<decltype(ta.get<1>()), some_struct&>));
|
|
||||||
EXPECT_EQ(ta.get<0>(), 2);
|
|
||||||
EXPECT_EQ(ta.get<1>(), s);
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -5,102 +5,17 @@
|
||||||
#include "util/atomic.hpp"
|
#include "util/atomic.hpp"
|
||||||
#include <functional>
|
#include <functional>
|
||||||
|
|
||||||
extern bool g_use_rtm;
|
#ifdef ARCH_X64
|
||||||
extern u64 g_rtm_tx_limit1;
|
|
||||||
|
|
||||||
#ifdef _M_X64
|
|
||||||
#if defined(_MSC_VER) && !defined(__clang__)
|
#if defined(_MSC_VER) && !defined(__clang__)
|
||||||
extern "C"
|
|
||||||
{
|
|
||||||
u32 _xbegin();
|
|
||||||
void _xend();
|
|
||||||
void _mm_pause();
|
|
||||||
void _mm_prefetch(const char*, int);
|
|
||||||
void _m_prefetchw(const volatile void*);
|
|
||||||
|
|
||||||
uchar _rotl8(uchar, uchar);
|
|
||||||
ushort _rotl16(ushort, uchar);
|
|
||||||
u64 __popcnt64(u64);
|
|
||||||
|
|
||||||
s64 __mulh(s64, s64);
|
|
||||||
u64 __umulh(u64, u64);
|
|
||||||
|
|
||||||
s64 _div128(s64, s64, s64, s64*);
|
|
||||||
u64 _udiv128(u64, u64, u64, u64*);
|
|
||||||
void __debugbreak();
|
|
||||||
}
|
|
||||||
#include <intrin.h>
|
#include <intrin.h>
|
||||||
#else
|
#else
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
|
#include <x86intrin.h>
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace utils
|
namespace utils
|
||||||
{
|
{
|
||||||
// Transaction helper (result = pair of success and op result, or just bool)
|
|
||||||
template <typename F, typename R = std::invoke_result_t<F>>
|
|
||||||
inline auto tx_start(F op)
|
|
||||||
{
|
|
||||||
#if defined(ARCH_X64)
|
|
||||||
uint status = -1;
|
|
||||||
|
|
||||||
for (auto stamp0 = get_tsc(), stamp1 = stamp0; g_use_rtm && stamp1 - stamp0 <= g_rtm_tx_limit1; stamp1 = get_tsc())
|
|
||||||
{
|
|
||||||
#if !defined(_MSC_VER) || (defined(__clang__) && defined(_MSC_VER))
|
|
||||||
__asm__ goto ("xbegin %l[retry];" ::: "memory" : retry);
|
|
||||||
#else
|
|
||||||
status = _xbegin();
|
|
||||||
|
|
||||||
if (status != _XBEGIN_STARTED) [[unlikely]]
|
|
||||||
{
|
|
||||||
goto retry;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if constexpr (std::is_void_v<R>)
|
|
||||||
{
|
|
||||||
std::invoke(op);
|
|
||||||
#if !defined(_MSC_VER) || (defined(__clang__) && defined(_MSC_VER))
|
|
||||||
__asm__ volatile ("xend;" ::: "memory");
|
|
||||||
#else
|
|
||||||
_xend();
|
|
||||||
#endif
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
auto result = std::invoke(op);
|
|
||||||
#if !defined(_MSC_VER) || (defined(__clang__) && defined(_MSC_VER))
|
|
||||||
__asm__ volatile ("xend;" ::: "memory");
|
|
||||||
#else
|
|
||||||
_xend();
|
|
||||||
#endif
|
|
||||||
return std::make_pair(true, std::move(result));
|
|
||||||
}
|
|
||||||
|
|
||||||
retry:
|
|
||||||
#ifndef _MSC_VER
|
|
||||||
__asm__ volatile ("movl %%eax, %0;" : "=r" (status) :: "memory");
|
|
||||||
#endif
|
|
||||||
if (!status) [[unlikely]]
|
|
||||||
{
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
static_cast<void>(op);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if constexpr (std::is_void_v<R>)
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
return std::make_pair(false, R());
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Try to prefetch to Level 2 cache since it's not split to data/code on most processors
|
// Try to prefetch to Level 2 cache since it's not split to data/code on most processors
|
||||||
template <typename T>
|
template <typename T>
|
||||||
constexpr void prefetch_exec(T func)
|
constexpr void prefetch_exec(T func)
|
||||||
|
|
@ -113,7 +28,7 @@ namespace utils
|
||||||
const u64 value = reinterpret_cast<u64>(func);
|
const u64 value = reinterpret_cast<u64>(func);
|
||||||
const void* ptr = reinterpret_cast<const void*>(value);
|
const void* ptr = reinterpret_cast<const void*>(value);
|
||||||
|
|
||||||
#ifdef _M_X64
|
#ifdef ARCH_X64
|
||||||
return _mm_prefetch(static_cast<const char*>(ptr), _MM_HINT_T1);
|
return _mm_prefetch(static_cast<const char*>(ptr), _MM_HINT_T1);
|
||||||
#else
|
#else
|
||||||
return __builtin_prefetch(ptr, 0, 2);
|
return __builtin_prefetch(ptr, 0, 2);
|
||||||
|
|
@ -128,7 +43,7 @@ namespace utils
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef _M_X64
|
#ifdef ARCH_X64
|
||||||
return _mm_prefetch(static_cast<const char*>(ptr), _MM_HINT_T0);
|
return _mm_prefetch(static_cast<const char*>(ptr), _MM_HINT_T0);
|
||||||
#else
|
#else
|
||||||
return __builtin_prefetch(ptr, 0, 3);
|
return __builtin_prefetch(ptr, 0, 3);
|
||||||
|
|
@ -142,110 +57,19 @@ namespace utils
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(_M_X64) && !defined(__clang__)
|
#if defined(ARCH_X64)
|
||||||
return _m_prefetchw(ptr);
|
return _m_prefetchw(const_cast<void*>(ptr));
|
||||||
#else
|
#else
|
||||||
return __builtin_prefetch(ptr, 1, 0);
|
return __builtin_prefetch(ptr, 1, 0);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr u8 rol8(u8 x, u8 n)
|
|
||||||
{
|
|
||||||
if (std::is_constant_evaluated())
|
|
||||||
{
|
|
||||||
return (x << (n & 7)) | (x >> ((-n & 7)));
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
|
||||||
return _rotl8(x, n);
|
|
||||||
#elif defined(__clang__)
|
|
||||||
return __builtin_rotateleft8(x, n);
|
|
||||||
#elif defined(ARCH_X64)
|
|
||||||
return __builtin_ia32_rolqi(x, n);
|
|
||||||
#else
|
|
||||||
return (x << (n & 7)) | (x >> ((-n & 7)));
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
constexpr u16 rol16(u16 x, u16 n)
|
|
||||||
{
|
|
||||||
if (std::is_constant_evaluated())
|
|
||||||
{
|
|
||||||
return (x << (n & 15)) | (x >> ((-n & 15)));
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
|
||||||
return _rotl16(x, static_cast<uchar>(n));
|
|
||||||
#elif defined(__clang__)
|
|
||||||
return __builtin_rotateleft16(x, n);
|
|
||||||
#elif defined(ARCH_X64)
|
|
||||||
return __builtin_ia32_rolhi(x, n);
|
|
||||||
#else
|
|
||||||
return (x << (n & 15)) | (x >> ((-n & 15)));
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
constexpr u32 rol32(u32 x, u32 n)
|
|
||||||
{
|
|
||||||
if (std::is_constant_evaluated())
|
|
||||||
{
|
|
||||||
return (x << (n & 31)) | (x >> (((0 - n) & 31)));
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
|
||||||
return _rotl(x, n);
|
|
||||||
#elif defined(__clang__)
|
|
||||||
return __builtin_rotateleft32(x, n);
|
|
||||||
#else
|
|
||||||
return (x << (n & 31)) | (x >> (((0 - n) & 31)));
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
constexpr u64 rol64(u64 x, u64 n)
|
|
||||||
{
|
|
||||||
if (std::is_constant_evaluated())
|
|
||||||
{
|
|
||||||
return (x << (n & 63)) | (x >> (((0 - n) & 63)));
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
|
||||||
return _rotl64(x, static_cast<int>(n));
|
|
||||||
#elif defined(__clang__)
|
|
||||||
return __builtin_rotateleft64(x, n);
|
|
||||||
#else
|
|
||||||
return (x << (n & 63)) | (x >> (((0 - n) & 63)));
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
constexpr u32 popcnt64(u64 v)
|
|
||||||
{
|
|
||||||
#if !defined(_MSC_VER) || defined(__SSE4_2__)
|
|
||||||
if (std::is_constant_evaluated())
|
|
||||||
#endif
|
|
||||||
{
|
|
||||||
v = (v & 0xaaaaaaaaaaaaaaaa) / 2 + (v & 0x5555555555555555);
|
|
||||||
v = (v & 0xcccccccccccccccc) / 4 + (v & 0x3333333333333333);
|
|
||||||
v = (v & 0xf0f0f0f0f0f0f0f0) / 16 + (v & 0x0f0f0f0f0f0f0f0f);
|
|
||||||
v = (v & 0xff00ff00ff00ff00) / 256 + (v & 0x00ff00ff00ff00ff);
|
|
||||||
v = ((v & 0xffff0000ffff0000) >> 16) + (v & 0x0000ffff0000ffff);
|
|
||||||
return static_cast<u32>((v >> 32) + v);
|
|
||||||
}
|
|
||||||
|
|
||||||
#if !defined(_MSC_VER) || defined(__SSE4_2__)
|
|
||||||
#ifdef _MSC_VER
|
|
||||||
return static_cast<u32>(__popcnt64(v));
|
|
||||||
#else
|
|
||||||
return __builtin_popcountll(v);
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
constexpr u32 popcnt128(const u128& v)
|
constexpr u32 popcnt128(const u128& v)
|
||||||
{
|
{
|
||||||
#if defined(_MSC_VER) && !defined(__clang__)
|
#if defined(_MSC_VER) && !defined(__clang__)
|
||||||
return popcnt64(v.lo) + popcnt64(v.hi);
|
return std::popcount(v.lo) + std::popcount(v.hi);
|
||||||
#else
|
#else
|
||||||
return popcnt64(v) + popcnt64(v >> 64);
|
return std::popcount(v);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -332,10 +156,7 @@ namespace utils
|
||||||
else
|
else
|
||||||
return std::countr_zero(arg.lo);
|
return std::countr_zero(arg.lo);
|
||||||
#else
|
#else
|
||||||
if (u64 lo = static_cast<u64>(arg))
|
return std::countr_zero(arg);
|
||||||
return std::countr_zero<u64>(lo);
|
|
||||||
else
|
|
||||||
return std::countr_zero<u64>(arg >> 64) + 64;
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -347,10 +168,7 @@ namespace utils
|
||||||
else
|
else
|
||||||
return std::countl_zero(arg.lo) + 64;
|
return std::countl_zero(arg.lo) + 64;
|
||||||
#else
|
#else
|
||||||
if (u64 hi = static_cast<u64>(arg >> 64))
|
return std::countl_zero(arg);
|
||||||
return std::countl_zero<u64>(hi);
|
|
||||||
else
|
|
||||||
return std::countl_zero<u64>(arg) + 64;
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -358,10 +176,8 @@ namespace utils
|
||||||
{
|
{
|
||||||
#if defined(ARCH_ARM64)
|
#if defined(ARCH_ARM64)
|
||||||
__asm__ volatile("yield");
|
__asm__ volatile("yield");
|
||||||
#elif defined(_M_X64)
|
|
||||||
_mm_pause();
|
|
||||||
#elif defined(ARCH_X64)
|
#elif defined(ARCH_X64)
|
||||||
__builtin_ia32_pause();
|
_mm_pause();
|
||||||
#else
|
#else
|
||||||
#error "Missing utils::pause() implementation"
|
#error "Missing utils::pause() implementation"
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -2986,7 +2986,7 @@ inline v128 gv_rol16(const v128& a, const v128& b)
|
||||||
#else
|
#else
|
||||||
v128 r;
|
v128 r;
|
||||||
for (u32 i = 0; i < 8; i++)
|
for (u32 i = 0; i < 8; i++)
|
||||||
r._u16[i] = utils::rol16(a._u16[i], b._u16[i]);
|
r._u16[i] = std::rotl<u16>(a._u16[i], b._u16[i]);
|
||||||
return r;
|
return r;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
@ -3020,7 +3020,7 @@ inline v128 gv_rol32(const v128& a, const v128& b)
|
||||||
#else
|
#else
|
||||||
v128 r;
|
v128 r;
|
||||||
for (u32 i = 0; i < 4; i++)
|
for (u32 i = 0; i < 4; i++)
|
||||||
r._u32[i] = utils::rol32(a._u32[i], b._u32[i]);
|
r._u32[i] = std::rotl<u32>(a._u32[i], b._u32[i]);
|
||||||
return r;
|
return r;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
@ -3039,7 +3039,7 @@ inline v128 gv_rol32(const v128& a)
|
||||||
#else
|
#else
|
||||||
v128 r;
|
v128 r;
|
||||||
for (u32 i = 0; i < 4; i++)
|
for (u32 i = 0; i < 4; i++)
|
||||||
r._u32[i] = utils::rol32(a._u32[i], count);
|
r._u32[i] = std::rotl<u32>(a._u32[i], count);
|
||||||
return r;
|
return r;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -71,9 +71,10 @@ namespace Darwin_ProcessInfo
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
#if !defined(ARCH_X64)
|
||||||
namespace utils
|
namespace utils
|
||||||
{
|
{
|
||||||
#ifdef _WIN32
|
|
||||||
// Some helpers for sanity
|
// Some helpers for sanity
|
||||||
const auto read_reg_dword = [](HKEY hKey, std::string_view value_name) -> std::pair<bool, DWORD>
|
const auto read_reg_dword = [](HKEY hKey, std::string_view value_name) -> std::pair<bool, DWORD>
|
||||||
{
|
{
|
||||||
|
|
@ -110,7 +111,6 @@ namespace utils
|
||||||
return { true, sz };
|
return { true, sz };
|
||||||
};
|
};
|
||||||
|
|
||||||
#if !defined(ARCH_X64)
|
|
||||||
// Alternative way to read OS version using the registry.
|
// Alternative way to read OS version using the registry.
|
||||||
static std::string get_fallback_windows_version()
|
static std::string get_fallback_windows_version()
|
||||||
{
|
{
|
||||||
|
|
@ -152,9 +152,9 @@ namespace utils
|
||||||
|
|
||||||
return fmt::format("Operating system: %s, Version %s", product_name, version_id);
|
return fmt::format("Operating system: %s, Version %s", product_name, version_id);
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
bool utils::has_ssse3()
|
bool utils::has_ssse3()
|
||||||
{
|
{
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue