mirror of
https://github.com/RPCSX/rpcsx.git
synced 2025-12-06 07:12:14 +01:00
rsx: implement async shader cache store
cleanup
This commit is contained in:
parent
a6e36dbe30
commit
8234d1b470
|
|
@ -11,7 +11,11 @@
|
|||
#include "Emu/RSX/Program/RSXFragmentProgram.h"
|
||||
#include "Overlays/Shaders/shader_loading_dialog.h"
|
||||
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <condition_variable>
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
|
||||
#include "util/sysinfo.hpp"
|
||||
#include "util/fnv_hash.hpp"
|
||||
|
|
@ -21,13 +25,14 @@ namespace rsx
|
|||
template <typename pipeline_storage_type, typename backend_storage>
|
||||
class shaders_cache
|
||||
{
|
||||
using unpacked_type = lf_fifo<std::tuple<pipeline_storage_type, RSXVertexProgram, RSXFragmentProgram>,
|
||||
#ifdef ANDROID
|
||||
200
|
||||
#else
|
||||
1000 // TODO: Determine best size
|
||||
#endif
|
||||
>;
|
||||
struct unpacked_shader
|
||||
{
|
||||
pipeline_storage_type props;
|
||||
RSXVertexProgram vp;
|
||||
RSXFragmentProgram fp;
|
||||
};
|
||||
|
||||
using unpacked_type = lf_fifo<unpacked_shader, 500>;
|
||||
|
||||
struct pipeline_data
|
||||
{
|
||||
|
|
@ -73,6 +78,69 @@ namespace rsx
|
|||
|
||||
backend_storage& m_storage;
|
||||
|
||||
std::atomic<bool> m_shader_storage_exit{false};
|
||||
std::condition_variable m_shader_storage_cv;
|
||||
std::mutex m_shader_storage_mtx;
|
||||
std::vector<unpacked_shader> m_shader_storage_worker_queue;
|
||||
|
||||
std::thread m_shader_storage_worker_thread = std::thread([this]
|
||||
{
|
||||
while (!m_shader_storage_exit.load())
|
||||
{
|
||||
unpacked_shader item;
|
||||
|
||||
{
|
||||
std::unique_lock lock(m_shader_storage_mtx);
|
||||
m_shader_storage_cv.wait(lock);
|
||||
if (m_shader_storage_worker_queue.empty())
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
item = std::move(m_shader_storage_worker_queue.back());
|
||||
m_shader_storage_worker_queue.pop_back();
|
||||
}
|
||||
|
||||
pipeline_data data = pack(item.props, item.vp, item.fp);
|
||||
|
||||
std::string fp_name = root_path + "/raw/" + fmt::format("%llX.fp", data.fragment_program_hash);
|
||||
std::string vp_name = root_path + "/raw/" + fmt::format("%llX.vp", data.vertex_program_hash);
|
||||
|
||||
if (fs::stat_t s{}; !fs::get_stat(fp_name, s) || s.size != item.fp.ucode_length)
|
||||
{
|
||||
fs::write_pending_file(fp_name, item.fp.get_data(), item.fp.ucode_length);
|
||||
}
|
||||
|
||||
if (fs::stat_t s{}; !fs::get_stat(vp_name, s) || s.size != item.vp.data.size() * sizeof(u32))
|
||||
{
|
||||
fs::write_pending_file(vp_name, item.vp.data);
|
||||
}
|
||||
|
||||
const u32 state_params[] =
|
||||
{
|
||||
data.vp_ctrl0,
|
||||
data.vp_ctrl1,
|
||||
data.fp_ctrl,
|
||||
data.vp_texture_dimensions,
|
||||
data.fp_texture_dimensions,
|
||||
data.fp_texcoord_control,
|
||||
data.fp_height,
|
||||
data.fp_pixel_layout,
|
||||
data.fp_lighting_flags,
|
||||
data.fp_shadow_textures,
|
||||
data.fp_redirected_textures,
|
||||
data.vp_multisampled_textures,
|
||||
data.fp_multisampled_textures,
|
||||
data.fp_mrt_count,
|
||||
};
|
||||
const usz state_hash = rpcs3::hash_array(state_params);
|
||||
|
||||
const std::string pipeline_file_name = fmt::format("%llX+%llX+%llX+%llX.bin", data.vertex_program_hash, data.fragment_program_hash, data.pipeline_storage_hash, state_hash);
|
||||
const std::string pipeline_path = root_path + "/pipelines/" + pipeline_class_name + "/" + version_prefix + "/" + pipeline_file_name;
|
||||
fs::write_pending_file(pipeline_path, &data, sizeof(data));
|
||||
}
|
||||
});
|
||||
|
||||
static std::string get_message(u32 index, u32 processed, u32 entry_count)
|
||||
{
|
||||
return fmt::format("%s pipeline object %u of %u", index == 0 ? "Loading" : "Compiling", processed, entry_count);
|
||||
|
|
@ -83,20 +151,32 @@ namespace rsx
|
|||
{
|
||||
atomic_t<u32> processed(0);
|
||||
|
||||
std::function<void(u32)> shader_load_worker = [&](u32 stop_at)
|
||||
std::function<void(u32, u32)> shader_load_worker = [&](u32 start_at, u32 stop_at)
|
||||
{
|
||||
u32 pos;
|
||||
// Processed is incremented before work starts in order to avoid two workers working on the same shader
|
||||
while (((pos = processed++) < stop_at) && !Emu.IsStopped())
|
||||
u32 thread_processed = 0;
|
||||
auto update_stats = [&]
|
||||
{
|
||||
fs::dir_entry tmp = entries[pos];
|
||||
if (thread_processed == 0)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
processed += thread_processed;
|
||||
thread_processed = 0;
|
||||
return !Emu.IsStopped();
|
||||
};
|
||||
|
||||
for (u32 pos = start_at; pos < stop_at; ++pos)
|
||||
{
|
||||
const fs::dir_entry& tmp = entries[pos];
|
||||
thread_processed++;
|
||||
|
||||
const auto filename = directory_path + "/" + tmp.name;
|
||||
fs::file f(filename);
|
||||
|
||||
if (!f)
|
||||
{
|
||||
// Unexpected error, but avoid crash
|
||||
fs::remove_file(filename);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -112,17 +192,22 @@ namespace rsx
|
|||
|
||||
auto entry = unpack(pdata);
|
||||
|
||||
if (std::get<1>(entry).data.empty() || !std::get<2>(entry).ucode_length)
|
||||
if (entry.vp.data.empty() || !entry.fp.ucode_length)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
m_storage.preload_programs(nullptr, std::get<1>(entry), std::get<2>(entry));
|
||||
m_storage.preload_programs(nullptr, entry.vp, entry.fp);
|
||||
|
||||
unpacked[unpacked.push_begin()] = std::move(entry);
|
||||
|
||||
if (thread_processed >= 10 && !update_stats())
|
||||
{
|
||||
return;
|
||||
}
|
||||
// Do not account for an extra shader that was never processed
|
||||
processed--;
|
||||
}
|
||||
|
||||
update_stats();
|
||||
};
|
||||
|
||||
await_workers(nb_workers, 0, shader_load_worker, processed, entry_count, dlg);
|
||||
|
|
@ -133,35 +218,60 @@ namespace rsx
|
|||
{
|
||||
atomic_t<u32> processed(0);
|
||||
|
||||
std::function<void(u32)> shader_comp_worker = [&](u32 stop_at)
|
||||
std::function<void(u32, u32)> shader_comp_worker = [&](u32 start_at, u32 stop_at)
|
||||
{
|
||||
u32 pos;
|
||||
// Processed is incremented before work starts in order to avoid two workers working on the same shader
|
||||
while (((pos = processed++) < stop_at) && !Emu.IsStopped())
|
||||
u32 thread_processed = 0;
|
||||
auto update_stats = [&]
|
||||
{
|
||||
auto& entry = unpacked[pos];
|
||||
m_storage.add_pipeline_entry(std::get<1>(entry), std::get<2>(entry), std::get<0>(entry), std::forward<Args>(args)...);
|
||||
if (thread_processed == 0)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
// Do not account for an extra shader that was never processed
|
||||
processed--;
|
||||
|
||||
processed += thread_processed;
|
||||
thread_processed = 0;
|
||||
return !Emu.IsStopped();
|
||||
};
|
||||
|
||||
for (u32 pos = start_at; pos < stop_at; ++pos)
|
||||
{
|
||||
unpacked_shader& entry = unpacked[pos];
|
||||
m_storage.add_pipeline_entry(entry.vp, entry.fp, entry.props, std::forward<Args>(args)...);
|
||||
thread_processed++;
|
||||
|
||||
if (thread_processed >= 3 && !update_stats())
|
||||
{
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
update_stats();
|
||||
};
|
||||
|
||||
await_workers(nb_workers, 1, shader_comp_worker, processed, entry_count, dlg);
|
||||
}
|
||||
|
||||
void await_workers(uint nb_workers, u8 step, std::function<void(u32)>& worker, atomic_t<u32>& processed, u32 entry_count, shader_loading_dialog* dlg)
|
||||
void await_workers(uint nb_workers, u8 step, std::function<void(u32, u32)>& worker, atomic_t<u32>& processed, u32 entry_count, shader_loading_dialog* dlg)
|
||||
{
|
||||
if (nb_workers > entry_count)
|
||||
{
|
||||
nb_workers = entry_count;
|
||||
}
|
||||
|
||||
if (nb_workers == 1)
|
||||
{
|
||||
steady_clock::time_point last_update;
|
||||
|
||||
// Call the worker function directly, stopping it prematurely to be able update the screen
|
||||
u32 stop_at = 0;
|
||||
u32 start_at = 0;
|
||||
do
|
||||
{
|
||||
stop_at = std::min(stop_at + 10, entry_count);
|
||||
stop_at = std::min(start_at + 10, entry_count);
|
||||
|
||||
worker(stop_at);
|
||||
worker(start_at, stop_at);
|
||||
|
||||
start_at = stop_at;
|
||||
|
||||
// Only update the screen at about 60fps since updating it everytime slows down the process
|
||||
steady_clock::time_point now = steady_clock::now();
|
||||
|
|
@ -175,9 +285,18 @@ namespace rsx
|
|||
}
|
||||
else
|
||||
{
|
||||
named_thread_group workers("RSX Worker ", nb_workers, [&]()
|
||||
named_thread_group workers("RSX Worker ", nb_workers, [&](u32 thread_index)
|
||||
{
|
||||
worker(entry_count);
|
||||
if (nb_workers == entry_count)
|
||||
{
|
||||
worker(thread_index, thread_index + 1);
|
||||
return;
|
||||
}
|
||||
|
||||
auto per_thread_entries = entry_count / nb_workers;
|
||||
auto start_at = per_thread_entries * thread_index;
|
||||
auto stop_at = thread_index == nb_workers - 1 ? entry_count : start_at + per_thread_entries;
|
||||
worker(start_at, stop_at);
|
||||
});
|
||||
|
||||
u32 current_progress = 0;
|
||||
|
|
@ -219,6 +338,17 @@ namespace rsx
|
|||
}
|
||||
}
|
||||
|
||||
~shaders_cache()
|
||||
{
|
||||
{
|
||||
std::lock_guard lock(m_shader_storage_mtx);
|
||||
m_shader_storage_exit = true;
|
||||
m_shader_storage_cv.notify_one();
|
||||
}
|
||||
|
||||
m_shader_storage_worker_thread.join();
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
void load(shader_loading_dialog* dlg, Args&&... args)
|
||||
{
|
||||
|
|
@ -240,12 +370,15 @@ namespace rsx
|
|||
|
||||
std::vector<fs::dir_entry> entries;
|
||||
|
||||
for (auto&& tmp : root)
|
||||
for (auto&& entry : root)
|
||||
{
|
||||
if (tmp.is_directory)
|
||||
if (entry.is_directory)
|
||||
continue;
|
||||
|
||||
entries.push_back(tmp);
|
||||
if (entry.name.ends_with(".bin"))
|
||||
{
|
||||
entries.push_back(std::move(entry));
|
||||
}
|
||||
}
|
||||
|
||||
u32 entry_count = ::size32(entries);
|
||||
|
|
@ -271,7 +404,7 @@ namespace rsx
|
|||
|
||||
// Preload everything needed to compile the shaders
|
||||
unpacked_type unpacked;
|
||||
uint nb_workers = g_cfg.video.renderer == video_renderer::vulkan ? utils::get_thread_count() : 1;
|
||||
uint nb_workers = g_cfg.video.renderer == video_renderer::vulkan ? utils::get_thread_count() * 2 : 1;
|
||||
|
||||
load_shaders(nb_workers, unpacked, directory_path, entries, entry_count, dlg);
|
||||
|
||||
|
|
@ -297,45 +430,27 @@ namespace rsx
|
|||
return;
|
||||
}
|
||||
|
||||
pipeline_data data = pack(pipeline, vp, fp);
|
||||
auto item = unpacked_shader{pipeline, vp, RSXFragmentProgram::clone(fp) /* ???? */};
|
||||
|
||||
std::string fp_name = root_path + "/raw/" + fmt::format("%llX.fp", data.fragment_program_hash);
|
||||
std::string vp_name = root_path + "/raw/" + fmt::format("%llX.vp", data.vertex_program_hash);
|
||||
|
||||
// Writeback to cache either if file does not exist or it is invalid (unexpected size)
|
||||
// Note: fs::write_file is not atomic, if the process is terminated in the middle an empty file is created
|
||||
if (fs::stat_t s{}; !fs::get_stat(fp_name, s) || s.size != fp.ucode_length)
|
||||
{
|
||||
fs::write_file(fp_name, fs::rewrite, fp.get_data(), fp.ucode_length);
|
||||
std::lock_guard lock(m_shader_storage_mtx);
|
||||
m_shader_storage_worker_queue.push_back(std::move(item));
|
||||
m_shader_storage_cv.notify_one();
|
||||
}
|
||||
|
||||
if (fs::stat_t s{}; !fs::get_stat(vp_name, s) || s.size != vp.data.size() * sizeof(u32))
|
||||
void wait_stores()
|
||||
{
|
||||
fs::write_file(vp_name, fs::rewrite, vp.data);
|
||||
while (true)
|
||||
{
|
||||
{
|
||||
std::lock_guard lock(m_shader_storage_mtx);
|
||||
if (m_shader_storage_worker_queue.empty())
|
||||
{
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
const u32 state_params[] =
|
||||
{
|
||||
data.vp_ctrl0,
|
||||
data.vp_ctrl1,
|
||||
data.fp_ctrl,
|
||||
data.vp_texture_dimensions,
|
||||
data.fp_texture_dimensions,
|
||||
data.fp_texcoord_control,
|
||||
data.fp_height,
|
||||
data.fp_pixel_layout,
|
||||
data.fp_lighting_flags,
|
||||
data.fp_shadow_textures,
|
||||
data.fp_redirected_textures,
|
||||
data.vp_multisampled_textures,
|
||||
data.fp_multisampled_textures,
|
||||
data.fp_mrt_count,
|
||||
};
|
||||
const usz state_hash = rpcs3::hash_array(state_params);
|
||||
|
||||
const std::string pipeline_file_name = fmt::format("%llX+%llX+%llX+%llX.bin", data.vertex_program_hash, data.fragment_program_hash, data.pipeline_storage_hash, state_hash);
|
||||
const std::string pipeline_path = root_path + "/pipelines/" + pipeline_class_name + "/" + version_prefix + "/" + pipeline_file_name;
|
||||
fs::write_file(pipeline_path, fs::rewrite, &data, sizeof(data));
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(50));
|
||||
}
|
||||
}
|
||||
|
||||
RSXVertexProgram load_vp_raw(u64 program_hash) const
|
||||
|
|
@ -369,23 +484,21 @@ namespace rsx
|
|||
return fp;
|
||||
}
|
||||
|
||||
std::tuple<pipeline_storage_type, RSXVertexProgram, RSXFragmentProgram> unpack(pipeline_data& data)
|
||||
unpacked_shader unpack(pipeline_data& data)
|
||||
{
|
||||
std::tuple<pipeline_storage_type, RSXVertexProgram, RSXFragmentProgram> result;
|
||||
auto& [pipeline, vp, fp] = result;
|
||||
unpacked_shader result;
|
||||
result.vp = load_vp_raw(data.vertex_program_hash);
|
||||
result.fp = load_fp_raw(data.fragment_program_hash);
|
||||
result.props = data.pipeline_properties;
|
||||
|
||||
vp = load_vp_raw(data.vertex_program_hash);
|
||||
fp = load_fp_raw(data.fragment_program_hash);
|
||||
pipeline = data.pipeline_properties;
|
||||
result.vp.ctrl = data.vp_ctrl0;
|
||||
result.vp.output_mask = data.vp_ctrl1;
|
||||
result.vp.texture_state.texture_dimensions = data.vp_texture_dimensions;
|
||||
result.vp.texture_state.multisampled_textures = data.vp_multisampled_textures;
|
||||
result.vp.base_address = data.vp_base_address;
|
||||
result.vp.entry = data.vp_entry;
|
||||
|
||||
vp.ctrl = data.vp_ctrl0;
|
||||
vp.output_mask = data.vp_ctrl1;
|
||||
vp.texture_state.texture_dimensions = data.vp_texture_dimensions;
|
||||
vp.texture_state.multisampled_textures = data.vp_multisampled_textures;
|
||||
vp.base_address = data.vp_base_address;
|
||||
vp.entry = data.vp_entry;
|
||||
|
||||
pack_bitset<max_vertex_program_instructions>(vp.instruction_mask, data.vp_instruction_mask);
|
||||
pack_bitset<max_vertex_program_instructions>(result.vp.instruction_mask, data.vp_instruction_mask);
|
||||
|
||||
for (u8 index = 0; index < 32; ++index)
|
||||
{
|
||||
|
|
@ -396,17 +509,17 @@ namespace rsx
|
|||
break;
|
||||
}
|
||||
|
||||
vp.jump_table.emplace(address);
|
||||
result.vp.jump_table.emplace(address);
|
||||
}
|
||||
|
||||
fp.ctrl = data.fp_ctrl;
|
||||
fp.texture_state.texture_dimensions = data.fp_texture_dimensions;
|
||||
fp.texture_state.shadow_textures = data.fp_shadow_textures;
|
||||
fp.texture_state.redirected_textures = data.fp_redirected_textures;
|
||||
fp.texture_state.multisampled_textures = data.fp_multisampled_textures;
|
||||
fp.texcoord_control_mask = data.fp_texcoord_control;
|
||||
fp.two_sided_lighting = !!(data.fp_lighting_flags & 0x1);
|
||||
fp.mrt_buffers_count = data.fp_mrt_count;
|
||||
result.fp.ctrl = data.fp_ctrl;
|
||||
result.fp.texture_state.texture_dimensions = data.fp_texture_dimensions;
|
||||
result.fp.texture_state.shadow_textures = data.fp_shadow_textures;
|
||||
result.fp.texture_state.redirected_textures = data.fp_redirected_textures;
|
||||
result.fp.texture_state.multisampled_textures = data.fp_multisampled_textures;
|
||||
result.fp.texcoord_control_mask = data.fp_texcoord_control;
|
||||
result.fp.two_sided_lighting = !!(data.fp_lighting_flags & 0x1);
|
||||
result.fp.mrt_buffers_count = data.fp_mrt_count;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue