rsx: implement async shader cache store

cleanup
This commit is contained in:
DH 2025-04-09 06:14:48 +03:00
parent a6e36dbe30
commit 8234d1b470

View file

@ -11,7 +11,11 @@
#include "Emu/RSX/Program/RSXFragmentProgram.h" #include "Emu/RSX/Program/RSXFragmentProgram.h"
#include "Overlays/Shaders/shader_loading_dialog.h" #include "Overlays/Shaders/shader_loading_dialog.h"
#include <atomic>
#include <chrono> #include <chrono>
#include <condition_variable>
#include <mutex>
#include <thread>
#include "util/sysinfo.hpp" #include "util/sysinfo.hpp"
#include "util/fnv_hash.hpp" #include "util/fnv_hash.hpp"
@ -21,13 +25,14 @@ namespace rsx
template <typename pipeline_storage_type, typename backend_storage> template <typename pipeline_storage_type, typename backend_storage>
class shaders_cache class shaders_cache
{ {
using unpacked_type = lf_fifo<std::tuple<pipeline_storage_type, RSXVertexProgram, RSXFragmentProgram>, struct unpacked_shader
#ifdef ANDROID {
200 pipeline_storage_type props;
#else RSXVertexProgram vp;
1000 // TODO: Determine best size RSXFragmentProgram fp;
#endif };
>;
using unpacked_type = lf_fifo<unpacked_shader, 500>;
struct pipeline_data struct pipeline_data
{ {
@ -73,6 +78,69 @@ namespace rsx
backend_storage& m_storage; backend_storage& m_storage;
std::atomic<bool> m_shader_storage_exit{false};
std::condition_variable m_shader_storage_cv;
std::mutex m_shader_storage_mtx;
std::vector<unpacked_shader> m_shader_storage_worker_queue;
std::thread m_shader_storage_worker_thread = std::thread([this]
{
while (!m_shader_storage_exit.load())
{
unpacked_shader item;
{
std::unique_lock lock(m_shader_storage_mtx);
m_shader_storage_cv.wait(lock);
if (m_shader_storage_worker_queue.empty())
{
continue;
}
item = std::move(m_shader_storage_worker_queue.back());
m_shader_storage_worker_queue.pop_back();
}
pipeline_data data = pack(item.props, item.vp, item.fp);
std::string fp_name = root_path + "/raw/" + fmt::format("%llX.fp", data.fragment_program_hash);
std::string vp_name = root_path + "/raw/" + fmt::format("%llX.vp", data.vertex_program_hash);
if (fs::stat_t s{}; !fs::get_stat(fp_name, s) || s.size != item.fp.ucode_length)
{
fs::write_pending_file(fp_name, item.fp.get_data(), item.fp.ucode_length);
}
if (fs::stat_t s{}; !fs::get_stat(vp_name, s) || s.size != item.vp.data.size() * sizeof(u32))
{
fs::write_pending_file(vp_name, item.vp.data);
}
const u32 state_params[] =
{
data.vp_ctrl0,
data.vp_ctrl1,
data.fp_ctrl,
data.vp_texture_dimensions,
data.fp_texture_dimensions,
data.fp_texcoord_control,
data.fp_height,
data.fp_pixel_layout,
data.fp_lighting_flags,
data.fp_shadow_textures,
data.fp_redirected_textures,
data.vp_multisampled_textures,
data.fp_multisampled_textures,
data.fp_mrt_count,
};
const usz state_hash = rpcs3::hash_array(state_params);
const std::string pipeline_file_name = fmt::format("%llX+%llX+%llX+%llX.bin", data.vertex_program_hash, data.fragment_program_hash, data.pipeline_storage_hash, state_hash);
const std::string pipeline_path = root_path + "/pipelines/" + pipeline_class_name + "/" + version_prefix + "/" + pipeline_file_name;
fs::write_pending_file(pipeline_path, &data, sizeof(data));
}
});
static std::string get_message(u32 index, u32 processed, u32 entry_count) static std::string get_message(u32 index, u32 processed, u32 entry_count)
{ {
return fmt::format("%s pipeline object %u of %u", index == 0 ? "Loading" : "Compiling", processed, entry_count); return fmt::format("%s pipeline object %u of %u", index == 0 ? "Loading" : "Compiling", processed, entry_count);
@ -83,20 +151,32 @@ namespace rsx
{ {
atomic_t<u32> processed(0); atomic_t<u32> processed(0);
std::function<void(u32)> shader_load_worker = [&](u32 stop_at) std::function<void(u32, u32)> shader_load_worker = [&](u32 start_at, u32 stop_at)
{ {
u32 pos; u32 thread_processed = 0;
// Processed is incremented before work starts in order to avoid two workers working on the same shader auto update_stats = [&]
while (((pos = processed++) < stop_at) && !Emu.IsStopped())
{ {
fs::dir_entry tmp = entries[pos]; if (thread_processed == 0)
{
return true;
}
processed += thread_processed;
thread_processed = 0;
return !Emu.IsStopped();
};
for (u32 pos = start_at; pos < stop_at; ++pos)
{
const fs::dir_entry& tmp = entries[pos];
thread_processed++;
const auto filename = directory_path + "/" + tmp.name; const auto filename = directory_path + "/" + tmp.name;
fs::file f(filename); fs::file f(filename);
if (!f) if (!f)
{ {
// Unexpected error, but avoid crash fs::remove_file(filename);
continue; continue;
} }
@ -112,17 +192,22 @@ namespace rsx
auto entry = unpack(pdata); auto entry = unpack(pdata);
if (std::get<1>(entry).data.empty() || !std::get<2>(entry).ucode_length) if (entry.vp.data.empty() || !entry.fp.ucode_length)
{ {
continue; continue;
} }
m_storage.preload_programs(nullptr, std::get<1>(entry), std::get<2>(entry)); m_storage.preload_programs(nullptr, entry.vp, entry.fp);
unpacked[unpacked.push_begin()] = std::move(entry); unpacked[unpacked.push_begin()] = std::move(entry);
if (thread_processed >= 10 && !update_stats())
{
return;
} }
// Do not account for an extra shader that was never processed }
processed--;
update_stats();
}; };
await_workers(nb_workers, 0, shader_load_worker, processed, entry_count, dlg); await_workers(nb_workers, 0, shader_load_worker, processed, entry_count, dlg);
@ -133,35 +218,60 @@ namespace rsx
{ {
atomic_t<u32> processed(0); atomic_t<u32> processed(0);
std::function<void(u32)> shader_comp_worker = [&](u32 stop_at) std::function<void(u32, u32)> shader_comp_worker = [&](u32 start_at, u32 stop_at)
{ {
u32 pos; u32 thread_processed = 0;
// Processed is incremented before work starts in order to avoid two workers working on the same shader auto update_stats = [&]
while (((pos = processed++) < stop_at) && !Emu.IsStopped())
{ {
auto& entry = unpacked[pos]; if (thread_processed == 0)
m_storage.add_pipeline_entry(std::get<1>(entry), std::get<2>(entry), std::get<0>(entry), std::forward<Args>(args)...); {
return true;
} }
// Do not account for an extra shader that was never processed
processed--; processed += thread_processed;
thread_processed = 0;
return !Emu.IsStopped();
};
for (u32 pos = start_at; pos < stop_at; ++pos)
{
unpacked_shader& entry = unpacked[pos];
m_storage.add_pipeline_entry(entry.vp, entry.fp, entry.props, std::forward<Args>(args)...);
thread_processed++;
if (thread_processed >= 3 && !update_stats())
{
return;
}
}
update_stats();
}; };
await_workers(nb_workers, 1, shader_comp_worker, processed, entry_count, dlg); await_workers(nb_workers, 1, shader_comp_worker, processed, entry_count, dlg);
} }
void await_workers(uint nb_workers, u8 step, std::function<void(u32)>& worker, atomic_t<u32>& processed, u32 entry_count, shader_loading_dialog* dlg) void await_workers(uint nb_workers, u8 step, std::function<void(u32, u32)>& worker, atomic_t<u32>& processed, u32 entry_count, shader_loading_dialog* dlg)
{ {
if (nb_workers > entry_count)
{
nb_workers = entry_count;
}
if (nb_workers == 1) if (nb_workers == 1)
{ {
steady_clock::time_point last_update; steady_clock::time_point last_update;
// Call the worker function directly, stopping it prematurely to be able update the screen // Call the worker function directly, stopping it prematurely to be able update the screen
u32 stop_at = 0; u32 stop_at = 0;
u32 start_at = 0;
do do
{ {
stop_at = std::min(stop_at + 10, entry_count); stop_at = std::min(start_at + 10, entry_count);
worker(stop_at); worker(start_at, stop_at);
start_at = stop_at;
// Only update the screen at about 60fps since updating it everytime slows down the process // Only update the screen at about 60fps since updating it everytime slows down the process
steady_clock::time_point now = steady_clock::now(); steady_clock::time_point now = steady_clock::now();
@ -175,9 +285,18 @@ namespace rsx
} }
else else
{ {
named_thread_group workers("RSX Worker ", nb_workers, [&]() named_thread_group workers("RSX Worker ", nb_workers, [&](u32 thread_index)
{ {
worker(entry_count); if (nb_workers == entry_count)
{
worker(thread_index, thread_index + 1);
return;
}
auto per_thread_entries = entry_count / nb_workers;
auto start_at = per_thread_entries * thread_index;
auto stop_at = thread_index == nb_workers - 1 ? entry_count : start_at + per_thread_entries;
worker(start_at, stop_at);
}); });
u32 current_progress = 0; u32 current_progress = 0;
@ -219,6 +338,17 @@ namespace rsx
} }
} }
~shaders_cache()
{
{
std::lock_guard lock(m_shader_storage_mtx);
m_shader_storage_exit = true;
m_shader_storage_cv.notify_one();
}
m_shader_storage_worker_thread.join();
}
template <typename... Args> template <typename... Args>
void load(shader_loading_dialog* dlg, Args&&... args) void load(shader_loading_dialog* dlg, Args&&... args)
{ {
@ -240,12 +370,15 @@ namespace rsx
std::vector<fs::dir_entry> entries; std::vector<fs::dir_entry> entries;
for (auto&& tmp : root) for (auto&& entry : root)
{ {
if (tmp.is_directory) if (entry.is_directory)
continue; continue;
entries.push_back(tmp); if (entry.name.ends_with(".bin"))
{
entries.push_back(std::move(entry));
}
} }
u32 entry_count = ::size32(entries); u32 entry_count = ::size32(entries);
@ -271,7 +404,7 @@ namespace rsx
// Preload everything needed to compile the shaders // Preload everything needed to compile the shaders
unpacked_type unpacked; unpacked_type unpacked;
uint nb_workers = g_cfg.video.renderer == video_renderer::vulkan ? utils::get_thread_count() : 1; uint nb_workers = g_cfg.video.renderer == video_renderer::vulkan ? utils::get_thread_count() * 2 : 1;
load_shaders(nb_workers, unpacked, directory_path, entries, entry_count, dlg); load_shaders(nb_workers, unpacked, directory_path, entries, entry_count, dlg);
@ -297,45 +430,27 @@ namespace rsx
return; return;
} }
pipeline_data data = pack(pipeline, vp, fp); auto item = unpacked_shader{pipeline, vp, RSXFragmentProgram::clone(fp) /* ???? */};
std::string fp_name = root_path + "/raw/" + fmt::format("%llX.fp", data.fragment_program_hash); std::lock_guard lock(m_shader_storage_mtx);
std::string vp_name = root_path + "/raw/" + fmt::format("%llX.vp", data.vertex_program_hash); m_shader_storage_worker_queue.push_back(std::move(item));
m_shader_storage_cv.notify_one();
// Writeback to cache either if file does not exist or it is invalid (unexpected size)
// Note: fs::write_file is not atomic, if the process is terminated in the middle an empty file is created
if (fs::stat_t s{}; !fs::get_stat(fp_name, s) || s.size != fp.ucode_length)
{
fs::write_file(fp_name, fs::rewrite, fp.get_data(), fp.ucode_length);
} }
if (fs::stat_t s{}; !fs::get_stat(vp_name, s) || s.size != vp.data.size() * sizeof(u32)) void wait_stores()
{ {
fs::write_file(vp_name, fs::rewrite, vp.data); while (true)
{
{
std::lock_guard lock(m_shader_storage_mtx);
if (m_shader_storage_worker_queue.empty())
{
return;
}
} }
const u32 state_params[] = std::this_thread::sleep_for(std::chrono::milliseconds(50));
{ }
data.vp_ctrl0,
data.vp_ctrl1,
data.fp_ctrl,
data.vp_texture_dimensions,
data.fp_texture_dimensions,
data.fp_texcoord_control,
data.fp_height,
data.fp_pixel_layout,
data.fp_lighting_flags,
data.fp_shadow_textures,
data.fp_redirected_textures,
data.vp_multisampled_textures,
data.fp_multisampled_textures,
data.fp_mrt_count,
};
const usz state_hash = rpcs3::hash_array(state_params);
const std::string pipeline_file_name = fmt::format("%llX+%llX+%llX+%llX.bin", data.vertex_program_hash, data.fragment_program_hash, data.pipeline_storage_hash, state_hash);
const std::string pipeline_path = root_path + "/pipelines/" + pipeline_class_name + "/" + version_prefix + "/" + pipeline_file_name;
fs::write_file(pipeline_path, fs::rewrite, &data, sizeof(data));
} }
RSXVertexProgram load_vp_raw(u64 program_hash) const RSXVertexProgram load_vp_raw(u64 program_hash) const
@ -369,23 +484,21 @@ namespace rsx
return fp; return fp;
} }
std::tuple<pipeline_storage_type, RSXVertexProgram, RSXFragmentProgram> unpack(pipeline_data& data) unpacked_shader unpack(pipeline_data& data)
{ {
std::tuple<pipeline_storage_type, RSXVertexProgram, RSXFragmentProgram> result; unpacked_shader result;
auto& [pipeline, vp, fp] = result; result.vp = load_vp_raw(data.vertex_program_hash);
result.fp = load_fp_raw(data.fragment_program_hash);
result.props = data.pipeline_properties;
vp = load_vp_raw(data.vertex_program_hash); result.vp.ctrl = data.vp_ctrl0;
fp = load_fp_raw(data.fragment_program_hash); result.vp.output_mask = data.vp_ctrl1;
pipeline = data.pipeline_properties; result.vp.texture_state.texture_dimensions = data.vp_texture_dimensions;
result.vp.texture_state.multisampled_textures = data.vp_multisampled_textures;
result.vp.base_address = data.vp_base_address;
result.vp.entry = data.vp_entry;
vp.ctrl = data.vp_ctrl0; pack_bitset<max_vertex_program_instructions>(result.vp.instruction_mask, data.vp_instruction_mask);
vp.output_mask = data.vp_ctrl1;
vp.texture_state.texture_dimensions = data.vp_texture_dimensions;
vp.texture_state.multisampled_textures = data.vp_multisampled_textures;
vp.base_address = data.vp_base_address;
vp.entry = data.vp_entry;
pack_bitset<max_vertex_program_instructions>(vp.instruction_mask, data.vp_instruction_mask);
for (u8 index = 0; index < 32; ++index) for (u8 index = 0; index < 32; ++index)
{ {
@ -396,17 +509,17 @@ namespace rsx
break; break;
} }
vp.jump_table.emplace(address); result.vp.jump_table.emplace(address);
} }
fp.ctrl = data.fp_ctrl; result.fp.ctrl = data.fp_ctrl;
fp.texture_state.texture_dimensions = data.fp_texture_dimensions; result.fp.texture_state.texture_dimensions = data.fp_texture_dimensions;
fp.texture_state.shadow_textures = data.fp_shadow_textures; result.fp.texture_state.shadow_textures = data.fp_shadow_textures;
fp.texture_state.redirected_textures = data.fp_redirected_textures; result.fp.texture_state.redirected_textures = data.fp_redirected_textures;
fp.texture_state.multisampled_textures = data.fp_multisampled_textures; result.fp.texture_state.multisampled_textures = data.fp_multisampled_textures;
fp.texcoord_control_mask = data.fp_texcoord_control; result.fp.texcoord_control_mask = data.fp_texcoord_control;
fp.two_sided_lighting = !!(data.fp_lighting_flags & 0x1); result.fp.two_sided_lighting = !!(data.fp_lighting_flags & 0x1);
fp.mrt_buffers_count = data.fp_mrt_count; result.fp.mrt_buffers_count = data.fp_mrt_count;
return result; return result;
} }