rsx: implement async shader cache store

cleanup
This commit is contained in:
DH 2025-04-09 06:14:48 +03:00
parent a6e36dbe30
commit 8234d1b470

View file

@ -11,7 +11,11 @@
#include "Emu/RSX/Program/RSXFragmentProgram.h"
#include "Overlays/Shaders/shader_loading_dialog.h"
#include <atomic>
#include <chrono>
#include <condition_variable>
#include <mutex>
#include <thread>
#include "util/sysinfo.hpp"
#include "util/fnv_hash.hpp"
@ -21,13 +25,14 @@ namespace rsx
template <typename pipeline_storage_type, typename backend_storage>
class shaders_cache
{
using unpacked_type = lf_fifo<std::tuple<pipeline_storage_type, RSXVertexProgram, RSXFragmentProgram>,
#ifdef ANDROID
200
#else
1000 // TODO: Determine best size
#endif
>;
struct unpacked_shader
{
pipeline_storage_type props;
RSXVertexProgram vp;
RSXFragmentProgram fp;
};
using unpacked_type = lf_fifo<unpacked_shader, 500>;
struct pipeline_data
{
@ -73,6 +78,69 @@ namespace rsx
backend_storage& m_storage;
std::atomic<bool> m_shader_storage_exit{false};
std::condition_variable m_shader_storage_cv;
std::mutex m_shader_storage_mtx;
std::vector<unpacked_shader> m_shader_storage_worker_queue;
std::thread m_shader_storage_worker_thread = std::thread([this]
{
while (!m_shader_storage_exit.load())
{
unpacked_shader item;
{
std::unique_lock lock(m_shader_storage_mtx);
m_shader_storage_cv.wait(lock);
if (m_shader_storage_worker_queue.empty())
{
continue;
}
item = std::move(m_shader_storage_worker_queue.back());
m_shader_storage_worker_queue.pop_back();
}
pipeline_data data = pack(item.props, item.vp, item.fp);
std::string fp_name = root_path + "/raw/" + fmt::format("%llX.fp", data.fragment_program_hash);
std::string vp_name = root_path + "/raw/" + fmt::format("%llX.vp", data.vertex_program_hash);
if (fs::stat_t s{}; !fs::get_stat(fp_name, s) || s.size != item.fp.ucode_length)
{
fs::write_pending_file(fp_name, item.fp.get_data(), item.fp.ucode_length);
}
if (fs::stat_t s{}; !fs::get_stat(vp_name, s) || s.size != item.vp.data.size() * sizeof(u32))
{
fs::write_pending_file(vp_name, item.vp.data);
}
const u32 state_params[] =
{
data.vp_ctrl0,
data.vp_ctrl1,
data.fp_ctrl,
data.vp_texture_dimensions,
data.fp_texture_dimensions,
data.fp_texcoord_control,
data.fp_height,
data.fp_pixel_layout,
data.fp_lighting_flags,
data.fp_shadow_textures,
data.fp_redirected_textures,
data.vp_multisampled_textures,
data.fp_multisampled_textures,
data.fp_mrt_count,
};
const usz state_hash = rpcs3::hash_array(state_params);
const std::string pipeline_file_name = fmt::format("%llX+%llX+%llX+%llX.bin", data.vertex_program_hash, data.fragment_program_hash, data.pipeline_storage_hash, state_hash);
const std::string pipeline_path = root_path + "/pipelines/" + pipeline_class_name + "/" + version_prefix + "/" + pipeline_file_name;
fs::write_pending_file(pipeline_path, &data, sizeof(data));
}
});
static std::string get_message(u32 index, u32 processed, u32 entry_count)
{
return fmt::format("%s pipeline object %u of %u", index == 0 ? "Loading" : "Compiling", processed, entry_count);
@ -83,20 +151,32 @@ namespace rsx
{
atomic_t<u32> processed(0);
std::function<void(u32)> shader_load_worker = [&](u32 stop_at)
std::function<void(u32, u32)> shader_load_worker = [&](u32 start_at, u32 stop_at)
{
u32 pos;
// Processed is incremented before work starts in order to avoid two workers working on the same shader
while (((pos = processed++) < stop_at) && !Emu.IsStopped())
u32 thread_processed = 0;
auto update_stats = [&]
{
fs::dir_entry tmp = entries[pos];
if (thread_processed == 0)
{
return true;
}
processed += thread_processed;
thread_processed = 0;
return !Emu.IsStopped();
};
for (u32 pos = start_at; pos < stop_at; ++pos)
{
const fs::dir_entry& tmp = entries[pos];
thread_processed++;
const auto filename = directory_path + "/" + tmp.name;
fs::file f(filename);
if (!f)
{
// Unexpected error, but avoid crash
fs::remove_file(filename);
continue;
}
@ -112,17 +192,22 @@ namespace rsx
auto entry = unpack(pdata);
if (std::get<1>(entry).data.empty() || !std::get<2>(entry).ucode_length)
if (entry.vp.data.empty() || !entry.fp.ucode_length)
{
continue;
}
m_storage.preload_programs(nullptr, std::get<1>(entry), std::get<2>(entry));
m_storage.preload_programs(nullptr, entry.vp, entry.fp);
unpacked[unpacked.push_begin()] = std::move(entry);
if (thread_processed >= 10 && !update_stats())
{
return;
}
// Do not account for an extra shader that was never processed
processed--;
}
update_stats();
};
await_workers(nb_workers, 0, shader_load_worker, processed, entry_count, dlg);
@ -133,35 +218,60 @@ namespace rsx
{
atomic_t<u32> processed(0);
std::function<void(u32)> shader_comp_worker = [&](u32 stop_at)
std::function<void(u32, u32)> shader_comp_worker = [&](u32 start_at, u32 stop_at)
{
u32 pos;
// Processed is incremented before work starts in order to avoid two workers working on the same shader
while (((pos = processed++) < stop_at) && !Emu.IsStopped())
u32 thread_processed = 0;
auto update_stats = [&]
{
auto& entry = unpacked[pos];
m_storage.add_pipeline_entry(std::get<1>(entry), std::get<2>(entry), std::get<0>(entry), std::forward<Args>(args)...);
if (thread_processed == 0)
{
return true;
}
// Do not account for an extra shader that was never processed
processed--;
processed += thread_processed;
thread_processed = 0;
return !Emu.IsStopped();
};
for (u32 pos = start_at; pos < stop_at; ++pos)
{
unpacked_shader& entry = unpacked[pos];
m_storage.add_pipeline_entry(entry.vp, entry.fp, entry.props, std::forward<Args>(args)...);
thread_processed++;
if (thread_processed >= 3 && !update_stats())
{
return;
}
}
update_stats();
};
await_workers(nb_workers, 1, shader_comp_worker, processed, entry_count, dlg);
}
void await_workers(uint nb_workers, u8 step, std::function<void(u32)>& worker, atomic_t<u32>& processed, u32 entry_count, shader_loading_dialog* dlg)
void await_workers(uint nb_workers, u8 step, std::function<void(u32, u32)>& worker, atomic_t<u32>& processed, u32 entry_count, shader_loading_dialog* dlg)
{
if (nb_workers > entry_count)
{
nb_workers = entry_count;
}
if (nb_workers == 1)
{
steady_clock::time_point last_update;
// Call the worker function directly, stopping it prematurely to be able update the screen
u32 stop_at = 0;
u32 start_at = 0;
do
{
stop_at = std::min(stop_at + 10, entry_count);
stop_at = std::min(start_at + 10, entry_count);
worker(stop_at);
worker(start_at, stop_at);
start_at = stop_at;
// Only update the screen at about 60fps since updating it everytime slows down the process
steady_clock::time_point now = steady_clock::now();
@ -175,9 +285,18 @@ namespace rsx
}
else
{
named_thread_group workers("RSX Worker ", nb_workers, [&]()
named_thread_group workers("RSX Worker ", nb_workers, [&](u32 thread_index)
{
worker(entry_count);
if (nb_workers == entry_count)
{
worker(thread_index, thread_index + 1);
return;
}
auto per_thread_entries = entry_count / nb_workers;
auto start_at = per_thread_entries * thread_index;
auto stop_at = thread_index == nb_workers - 1 ? entry_count : start_at + per_thread_entries;
worker(start_at, stop_at);
});
u32 current_progress = 0;
@ -219,6 +338,17 @@ namespace rsx
}
}
~shaders_cache()
{
{
std::lock_guard lock(m_shader_storage_mtx);
m_shader_storage_exit = true;
m_shader_storage_cv.notify_one();
}
m_shader_storage_worker_thread.join();
}
template <typename... Args>
void load(shader_loading_dialog* dlg, Args&&... args)
{
@ -240,12 +370,15 @@ namespace rsx
std::vector<fs::dir_entry> entries;
for (auto&& tmp : root)
for (auto&& entry : root)
{
if (tmp.is_directory)
if (entry.is_directory)
continue;
entries.push_back(tmp);
if (entry.name.ends_with(".bin"))
{
entries.push_back(std::move(entry));
}
}
u32 entry_count = ::size32(entries);
@ -271,7 +404,7 @@ namespace rsx
// Preload everything needed to compile the shaders
unpacked_type unpacked;
uint nb_workers = g_cfg.video.renderer == video_renderer::vulkan ? utils::get_thread_count() : 1;
uint nb_workers = g_cfg.video.renderer == video_renderer::vulkan ? utils::get_thread_count() * 2 : 1;
load_shaders(nb_workers, unpacked, directory_path, entries, entry_count, dlg);
@ -297,45 +430,27 @@ namespace rsx
return;
}
pipeline_data data = pack(pipeline, vp, fp);
auto item = unpacked_shader{pipeline, vp, RSXFragmentProgram::clone(fp) /* ???? */};
std::string fp_name = root_path + "/raw/" + fmt::format("%llX.fp", data.fragment_program_hash);
std::string vp_name = root_path + "/raw/" + fmt::format("%llX.vp", data.vertex_program_hash);
// Writeback to cache either if file does not exist or it is invalid (unexpected size)
// Note: fs::write_file is not atomic, if the process is terminated in the middle an empty file is created
if (fs::stat_t s{}; !fs::get_stat(fp_name, s) || s.size != fp.ucode_length)
{
fs::write_file(fp_name, fs::rewrite, fp.get_data(), fp.ucode_length);
std::lock_guard lock(m_shader_storage_mtx);
m_shader_storage_worker_queue.push_back(std::move(item));
m_shader_storage_cv.notify_one();
}
if (fs::stat_t s{}; !fs::get_stat(vp_name, s) || s.size != vp.data.size() * sizeof(u32))
void wait_stores()
{
fs::write_file(vp_name, fs::rewrite, vp.data);
while (true)
{
{
std::lock_guard lock(m_shader_storage_mtx);
if (m_shader_storage_worker_queue.empty())
{
return;
}
}
const u32 state_params[] =
{
data.vp_ctrl0,
data.vp_ctrl1,
data.fp_ctrl,
data.vp_texture_dimensions,
data.fp_texture_dimensions,
data.fp_texcoord_control,
data.fp_height,
data.fp_pixel_layout,
data.fp_lighting_flags,
data.fp_shadow_textures,
data.fp_redirected_textures,
data.vp_multisampled_textures,
data.fp_multisampled_textures,
data.fp_mrt_count,
};
const usz state_hash = rpcs3::hash_array(state_params);
const std::string pipeline_file_name = fmt::format("%llX+%llX+%llX+%llX.bin", data.vertex_program_hash, data.fragment_program_hash, data.pipeline_storage_hash, state_hash);
const std::string pipeline_path = root_path + "/pipelines/" + pipeline_class_name + "/" + version_prefix + "/" + pipeline_file_name;
fs::write_file(pipeline_path, fs::rewrite, &data, sizeof(data));
std::this_thread::sleep_for(std::chrono::milliseconds(50));
}
}
RSXVertexProgram load_vp_raw(u64 program_hash) const
@ -369,23 +484,21 @@ namespace rsx
return fp;
}
std::tuple<pipeline_storage_type, RSXVertexProgram, RSXFragmentProgram> unpack(pipeline_data& data)
unpacked_shader unpack(pipeline_data& data)
{
std::tuple<pipeline_storage_type, RSXVertexProgram, RSXFragmentProgram> result;
auto& [pipeline, vp, fp] = result;
unpacked_shader result;
result.vp = load_vp_raw(data.vertex_program_hash);
result.fp = load_fp_raw(data.fragment_program_hash);
result.props = data.pipeline_properties;
vp = load_vp_raw(data.vertex_program_hash);
fp = load_fp_raw(data.fragment_program_hash);
pipeline = data.pipeline_properties;
result.vp.ctrl = data.vp_ctrl0;
result.vp.output_mask = data.vp_ctrl1;
result.vp.texture_state.texture_dimensions = data.vp_texture_dimensions;
result.vp.texture_state.multisampled_textures = data.vp_multisampled_textures;
result.vp.base_address = data.vp_base_address;
result.vp.entry = data.vp_entry;
vp.ctrl = data.vp_ctrl0;
vp.output_mask = data.vp_ctrl1;
vp.texture_state.texture_dimensions = data.vp_texture_dimensions;
vp.texture_state.multisampled_textures = data.vp_multisampled_textures;
vp.base_address = data.vp_base_address;
vp.entry = data.vp_entry;
pack_bitset<max_vertex_program_instructions>(vp.instruction_mask, data.vp_instruction_mask);
pack_bitset<max_vertex_program_instructions>(result.vp.instruction_mask, data.vp_instruction_mask);
for (u8 index = 0; index < 32; ++index)
{
@ -396,17 +509,17 @@ namespace rsx
break;
}
vp.jump_table.emplace(address);
result.vp.jump_table.emplace(address);
}
fp.ctrl = data.fp_ctrl;
fp.texture_state.texture_dimensions = data.fp_texture_dimensions;
fp.texture_state.shadow_textures = data.fp_shadow_textures;
fp.texture_state.redirected_textures = data.fp_redirected_textures;
fp.texture_state.multisampled_textures = data.fp_multisampled_textures;
fp.texcoord_control_mask = data.fp_texcoord_control;
fp.two_sided_lighting = !!(data.fp_lighting_flags & 0x1);
fp.mrt_buffers_count = data.fp_mrt_count;
result.fp.ctrl = data.fp_ctrl;
result.fp.texture_state.texture_dimensions = data.fp_texture_dimensions;
result.fp.texture_state.shadow_textures = data.fp_shadow_textures;
result.fp.texture_state.redirected_textures = data.fp_redirected_textures;
result.fp.texture_state.multisampled_textures = data.fp_multisampled_textures;
result.fp.texcoord_control_mask = data.fp_texcoord_control;
result.fp.two_sided_lighting = !!(data.fp_lighting_flags & 0x1);
result.fp.mrt_buffers_count = data.fp_mrt_count;
return result;
}