mirror of
https://github.com/RPCSX/rpcsx.git
synced 2025-12-06 07:12:14 +01:00
rsx: implement async shader cache store
cleanup
This commit is contained in:
parent
a6e36dbe30
commit
8234d1b470
|
|
@ -11,7 +11,11 @@
|
||||||
#include "Emu/RSX/Program/RSXFragmentProgram.h"
|
#include "Emu/RSX/Program/RSXFragmentProgram.h"
|
||||||
#include "Overlays/Shaders/shader_loading_dialog.h"
|
#include "Overlays/Shaders/shader_loading_dialog.h"
|
||||||
|
|
||||||
|
#include <atomic>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
|
#include <condition_variable>
|
||||||
|
#include <mutex>
|
||||||
|
#include <thread>
|
||||||
|
|
||||||
#include "util/sysinfo.hpp"
|
#include "util/sysinfo.hpp"
|
||||||
#include "util/fnv_hash.hpp"
|
#include "util/fnv_hash.hpp"
|
||||||
|
|
@ -21,13 +25,14 @@ namespace rsx
|
||||||
template <typename pipeline_storage_type, typename backend_storage>
|
template <typename pipeline_storage_type, typename backend_storage>
|
||||||
class shaders_cache
|
class shaders_cache
|
||||||
{
|
{
|
||||||
using unpacked_type = lf_fifo<std::tuple<pipeline_storage_type, RSXVertexProgram, RSXFragmentProgram>,
|
struct unpacked_shader
|
||||||
#ifdef ANDROID
|
{
|
||||||
200
|
pipeline_storage_type props;
|
||||||
#else
|
RSXVertexProgram vp;
|
||||||
1000 // TODO: Determine best size
|
RSXFragmentProgram fp;
|
||||||
#endif
|
};
|
||||||
>;
|
|
||||||
|
using unpacked_type = lf_fifo<unpacked_shader, 500>;
|
||||||
|
|
||||||
struct pipeline_data
|
struct pipeline_data
|
||||||
{
|
{
|
||||||
|
|
@ -73,6 +78,69 @@ namespace rsx
|
||||||
|
|
||||||
backend_storage& m_storage;
|
backend_storage& m_storage;
|
||||||
|
|
||||||
|
std::atomic<bool> m_shader_storage_exit{false};
|
||||||
|
std::condition_variable m_shader_storage_cv;
|
||||||
|
std::mutex m_shader_storage_mtx;
|
||||||
|
std::vector<unpacked_shader> m_shader_storage_worker_queue;
|
||||||
|
|
||||||
|
std::thread m_shader_storage_worker_thread = std::thread([this]
|
||||||
|
{
|
||||||
|
while (!m_shader_storage_exit.load())
|
||||||
|
{
|
||||||
|
unpacked_shader item;
|
||||||
|
|
||||||
|
{
|
||||||
|
std::unique_lock lock(m_shader_storage_mtx);
|
||||||
|
m_shader_storage_cv.wait(lock);
|
||||||
|
if (m_shader_storage_worker_queue.empty())
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
item = std::move(m_shader_storage_worker_queue.back());
|
||||||
|
m_shader_storage_worker_queue.pop_back();
|
||||||
|
}
|
||||||
|
|
||||||
|
pipeline_data data = pack(item.props, item.vp, item.fp);
|
||||||
|
|
||||||
|
std::string fp_name = root_path + "/raw/" + fmt::format("%llX.fp", data.fragment_program_hash);
|
||||||
|
std::string vp_name = root_path + "/raw/" + fmt::format("%llX.vp", data.vertex_program_hash);
|
||||||
|
|
||||||
|
if (fs::stat_t s{}; !fs::get_stat(fp_name, s) || s.size != item.fp.ucode_length)
|
||||||
|
{
|
||||||
|
fs::write_pending_file(fp_name, item.fp.get_data(), item.fp.ucode_length);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fs::stat_t s{}; !fs::get_stat(vp_name, s) || s.size != item.vp.data.size() * sizeof(u32))
|
||||||
|
{
|
||||||
|
fs::write_pending_file(vp_name, item.vp.data);
|
||||||
|
}
|
||||||
|
|
||||||
|
const u32 state_params[] =
|
||||||
|
{
|
||||||
|
data.vp_ctrl0,
|
||||||
|
data.vp_ctrl1,
|
||||||
|
data.fp_ctrl,
|
||||||
|
data.vp_texture_dimensions,
|
||||||
|
data.fp_texture_dimensions,
|
||||||
|
data.fp_texcoord_control,
|
||||||
|
data.fp_height,
|
||||||
|
data.fp_pixel_layout,
|
||||||
|
data.fp_lighting_flags,
|
||||||
|
data.fp_shadow_textures,
|
||||||
|
data.fp_redirected_textures,
|
||||||
|
data.vp_multisampled_textures,
|
||||||
|
data.fp_multisampled_textures,
|
||||||
|
data.fp_mrt_count,
|
||||||
|
};
|
||||||
|
const usz state_hash = rpcs3::hash_array(state_params);
|
||||||
|
|
||||||
|
const std::string pipeline_file_name = fmt::format("%llX+%llX+%llX+%llX.bin", data.vertex_program_hash, data.fragment_program_hash, data.pipeline_storage_hash, state_hash);
|
||||||
|
const std::string pipeline_path = root_path + "/pipelines/" + pipeline_class_name + "/" + version_prefix + "/" + pipeline_file_name;
|
||||||
|
fs::write_pending_file(pipeline_path, &data, sizeof(data));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
static std::string get_message(u32 index, u32 processed, u32 entry_count)
|
static std::string get_message(u32 index, u32 processed, u32 entry_count)
|
||||||
{
|
{
|
||||||
return fmt::format("%s pipeline object %u of %u", index == 0 ? "Loading" : "Compiling", processed, entry_count);
|
return fmt::format("%s pipeline object %u of %u", index == 0 ? "Loading" : "Compiling", processed, entry_count);
|
||||||
|
|
@ -83,20 +151,32 @@ namespace rsx
|
||||||
{
|
{
|
||||||
atomic_t<u32> processed(0);
|
atomic_t<u32> processed(0);
|
||||||
|
|
||||||
std::function<void(u32)> shader_load_worker = [&](u32 stop_at)
|
std::function<void(u32, u32)> shader_load_worker = [&](u32 start_at, u32 stop_at)
|
||||||
{
|
{
|
||||||
u32 pos;
|
u32 thread_processed = 0;
|
||||||
// Processed is incremented before work starts in order to avoid two workers working on the same shader
|
auto update_stats = [&]
|
||||||
while (((pos = processed++) < stop_at) && !Emu.IsStopped())
|
|
||||||
{
|
{
|
||||||
fs::dir_entry tmp = entries[pos];
|
if (thread_processed == 0)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
processed += thread_processed;
|
||||||
|
thread_processed = 0;
|
||||||
|
return !Emu.IsStopped();
|
||||||
|
};
|
||||||
|
|
||||||
|
for (u32 pos = start_at; pos < stop_at; ++pos)
|
||||||
|
{
|
||||||
|
const fs::dir_entry& tmp = entries[pos];
|
||||||
|
thread_processed++;
|
||||||
|
|
||||||
const auto filename = directory_path + "/" + tmp.name;
|
const auto filename = directory_path + "/" + tmp.name;
|
||||||
fs::file f(filename);
|
fs::file f(filename);
|
||||||
|
|
||||||
if (!f)
|
if (!f)
|
||||||
{
|
{
|
||||||
// Unexpected error, but avoid crash
|
fs::remove_file(filename);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -112,17 +192,22 @@ namespace rsx
|
||||||
|
|
||||||
auto entry = unpack(pdata);
|
auto entry = unpack(pdata);
|
||||||
|
|
||||||
if (std::get<1>(entry).data.empty() || !std::get<2>(entry).ucode_length)
|
if (entry.vp.data.empty() || !entry.fp.ucode_length)
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
m_storage.preload_programs(nullptr, std::get<1>(entry), std::get<2>(entry));
|
m_storage.preload_programs(nullptr, entry.vp, entry.fp);
|
||||||
|
|
||||||
unpacked[unpacked.push_begin()] = std::move(entry);
|
unpacked[unpacked.push_begin()] = std::move(entry);
|
||||||
|
|
||||||
|
if (thread_processed >= 10 && !update_stats())
|
||||||
|
{
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
// Do not account for an extra shader that was never processed
|
}
|
||||||
processed--;
|
|
||||||
|
update_stats();
|
||||||
};
|
};
|
||||||
|
|
||||||
await_workers(nb_workers, 0, shader_load_worker, processed, entry_count, dlg);
|
await_workers(nb_workers, 0, shader_load_worker, processed, entry_count, dlg);
|
||||||
|
|
@ -133,35 +218,60 @@ namespace rsx
|
||||||
{
|
{
|
||||||
atomic_t<u32> processed(0);
|
atomic_t<u32> processed(0);
|
||||||
|
|
||||||
std::function<void(u32)> shader_comp_worker = [&](u32 stop_at)
|
std::function<void(u32, u32)> shader_comp_worker = [&](u32 start_at, u32 stop_at)
|
||||||
{
|
{
|
||||||
u32 pos;
|
u32 thread_processed = 0;
|
||||||
// Processed is incremented before work starts in order to avoid two workers working on the same shader
|
auto update_stats = [&]
|
||||||
while (((pos = processed++) < stop_at) && !Emu.IsStopped())
|
|
||||||
{
|
{
|
||||||
auto& entry = unpacked[pos];
|
if (thread_processed == 0)
|
||||||
m_storage.add_pipeline_entry(std::get<1>(entry), std::get<2>(entry), std::get<0>(entry), std::forward<Args>(args)...);
|
{
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
// Do not account for an extra shader that was never processed
|
|
||||||
processed--;
|
processed += thread_processed;
|
||||||
|
thread_processed = 0;
|
||||||
|
return !Emu.IsStopped();
|
||||||
|
};
|
||||||
|
|
||||||
|
for (u32 pos = start_at; pos < stop_at; ++pos)
|
||||||
|
{
|
||||||
|
unpacked_shader& entry = unpacked[pos];
|
||||||
|
m_storage.add_pipeline_entry(entry.vp, entry.fp, entry.props, std::forward<Args>(args)...);
|
||||||
|
thread_processed++;
|
||||||
|
|
||||||
|
if (thread_processed >= 3 && !update_stats())
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
update_stats();
|
||||||
};
|
};
|
||||||
|
|
||||||
await_workers(nb_workers, 1, shader_comp_worker, processed, entry_count, dlg);
|
await_workers(nb_workers, 1, shader_comp_worker, processed, entry_count, dlg);
|
||||||
}
|
}
|
||||||
|
|
||||||
void await_workers(uint nb_workers, u8 step, std::function<void(u32)>& worker, atomic_t<u32>& processed, u32 entry_count, shader_loading_dialog* dlg)
|
void await_workers(uint nb_workers, u8 step, std::function<void(u32, u32)>& worker, atomic_t<u32>& processed, u32 entry_count, shader_loading_dialog* dlg)
|
||||||
{
|
{
|
||||||
|
if (nb_workers > entry_count)
|
||||||
|
{
|
||||||
|
nb_workers = entry_count;
|
||||||
|
}
|
||||||
|
|
||||||
if (nb_workers == 1)
|
if (nb_workers == 1)
|
||||||
{
|
{
|
||||||
steady_clock::time_point last_update;
|
steady_clock::time_point last_update;
|
||||||
|
|
||||||
// Call the worker function directly, stopping it prematurely to be able update the screen
|
// Call the worker function directly, stopping it prematurely to be able update the screen
|
||||||
u32 stop_at = 0;
|
u32 stop_at = 0;
|
||||||
|
u32 start_at = 0;
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
stop_at = std::min(stop_at + 10, entry_count);
|
stop_at = std::min(start_at + 10, entry_count);
|
||||||
|
|
||||||
worker(stop_at);
|
worker(start_at, stop_at);
|
||||||
|
|
||||||
|
start_at = stop_at;
|
||||||
|
|
||||||
// Only update the screen at about 60fps since updating it everytime slows down the process
|
// Only update the screen at about 60fps since updating it everytime slows down the process
|
||||||
steady_clock::time_point now = steady_clock::now();
|
steady_clock::time_point now = steady_clock::now();
|
||||||
|
|
@ -175,9 +285,18 @@ namespace rsx
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
named_thread_group workers("RSX Worker ", nb_workers, [&]()
|
named_thread_group workers("RSX Worker ", nb_workers, [&](u32 thread_index)
|
||||||
{
|
{
|
||||||
worker(entry_count);
|
if (nb_workers == entry_count)
|
||||||
|
{
|
||||||
|
worker(thread_index, thread_index + 1);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto per_thread_entries = entry_count / nb_workers;
|
||||||
|
auto start_at = per_thread_entries * thread_index;
|
||||||
|
auto stop_at = thread_index == nb_workers - 1 ? entry_count : start_at + per_thread_entries;
|
||||||
|
worker(start_at, stop_at);
|
||||||
});
|
});
|
||||||
|
|
||||||
u32 current_progress = 0;
|
u32 current_progress = 0;
|
||||||
|
|
@ -219,6 +338,17 @@ namespace rsx
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
~shaders_cache()
|
||||||
|
{
|
||||||
|
{
|
||||||
|
std::lock_guard lock(m_shader_storage_mtx);
|
||||||
|
m_shader_storage_exit = true;
|
||||||
|
m_shader_storage_cv.notify_one();
|
||||||
|
}
|
||||||
|
|
||||||
|
m_shader_storage_worker_thread.join();
|
||||||
|
}
|
||||||
|
|
||||||
template <typename... Args>
|
template <typename... Args>
|
||||||
void load(shader_loading_dialog* dlg, Args&&... args)
|
void load(shader_loading_dialog* dlg, Args&&... args)
|
||||||
{
|
{
|
||||||
|
|
@ -240,12 +370,15 @@ namespace rsx
|
||||||
|
|
||||||
std::vector<fs::dir_entry> entries;
|
std::vector<fs::dir_entry> entries;
|
||||||
|
|
||||||
for (auto&& tmp : root)
|
for (auto&& entry : root)
|
||||||
{
|
{
|
||||||
if (tmp.is_directory)
|
if (entry.is_directory)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
entries.push_back(tmp);
|
if (entry.name.ends_with(".bin"))
|
||||||
|
{
|
||||||
|
entries.push_back(std::move(entry));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 entry_count = ::size32(entries);
|
u32 entry_count = ::size32(entries);
|
||||||
|
|
@ -271,7 +404,7 @@ namespace rsx
|
||||||
|
|
||||||
// Preload everything needed to compile the shaders
|
// Preload everything needed to compile the shaders
|
||||||
unpacked_type unpacked;
|
unpacked_type unpacked;
|
||||||
uint nb_workers = g_cfg.video.renderer == video_renderer::vulkan ? utils::get_thread_count() : 1;
|
uint nb_workers = g_cfg.video.renderer == video_renderer::vulkan ? utils::get_thread_count() * 2 : 1;
|
||||||
|
|
||||||
load_shaders(nb_workers, unpacked, directory_path, entries, entry_count, dlg);
|
load_shaders(nb_workers, unpacked, directory_path, entries, entry_count, dlg);
|
||||||
|
|
||||||
|
|
@ -297,45 +430,27 @@ namespace rsx
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
pipeline_data data = pack(pipeline, vp, fp);
|
auto item = unpacked_shader{pipeline, vp, RSXFragmentProgram::clone(fp) /* ???? */};
|
||||||
|
|
||||||
std::string fp_name = root_path + "/raw/" + fmt::format("%llX.fp", data.fragment_program_hash);
|
std::lock_guard lock(m_shader_storage_mtx);
|
||||||
std::string vp_name = root_path + "/raw/" + fmt::format("%llX.vp", data.vertex_program_hash);
|
m_shader_storage_worker_queue.push_back(std::move(item));
|
||||||
|
m_shader_storage_cv.notify_one();
|
||||||
// Writeback to cache either if file does not exist or it is invalid (unexpected size)
|
|
||||||
// Note: fs::write_file is not atomic, if the process is terminated in the middle an empty file is created
|
|
||||||
if (fs::stat_t s{}; !fs::get_stat(fp_name, s) || s.size != fp.ucode_length)
|
|
||||||
{
|
|
||||||
fs::write_file(fp_name, fs::rewrite, fp.get_data(), fp.ucode_length);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fs::stat_t s{}; !fs::get_stat(vp_name, s) || s.size != vp.data.size() * sizeof(u32))
|
void wait_stores()
|
||||||
{
|
{
|
||||||
fs::write_file(vp_name, fs::rewrite, vp.data);
|
while (true)
|
||||||
|
{
|
||||||
|
{
|
||||||
|
std::lock_guard lock(m_shader_storage_mtx);
|
||||||
|
if (m_shader_storage_worker_queue.empty())
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const u32 state_params[] =
|
std::this_thread::sleep_for(std::chrono::milliseconds(50));
|
||||||
{
|
}
|
||||||
data.vp_ctrl0,
|
|
||||||
data.vp_ctrl1,
|
|
||||||
data.fp_ctrl,
|
|
||||||
data.vp_texture_dimensions,
|
|
||||||
data.fp_texture_dimensions,
|
|
||||||
data.fp_texcoord_control,
|
|
||||||
data.fp_height,
|
|
||||||
data.fp_pixel_layout,
|
|
||||||
data.fp_lighting_flags,
|
|
||||||
data.fp_shadow_textures,
|
|
||||||
data.fp_redirected_textures,
|
|
||||||
data.vp_multisampled_textures,
|
|
||||||
data.fp_multisampled_textures,
|
|
||||||
data.fp_mrt_count,
|
|
||||||
};
|
|
||||||
const usz state_hash = rpcs3::hash_array(state_params);
|
|
||||||
|
|
||||||
const std::string pipeline_file_name = fmt::format("%llX+%llX+%llX+%llX.bin", data.vertex_program_hash, data.fragment_program_hash, data.pipeline_storage_hash, state_hash);
|
|
||||||
const std::string pipeline_path = root_path + "/pipelines/" + pipeline_class_name + "/" + version_prefix + "/" + pipeline_file_name;
|
|
||||||
fs::write_file(pipeline_path, fs::rewrite, &data, sizeof(data));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
RSXVertexProgram load_vp_raw(u64 program_hash) const
|
RSXVertexProgram load_vp_raw(u64 program_hash) const
|
||||||
|
|
@ -369,23 +484,21 @@ namespace rsx
|
||||||
return fp;
|
return fp;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::tuple<pipeline_storage_type, RSXVertexProgram, RSXFragmentProgram> unpack(pipeline_data& data)
|
unpacked_shader unpack(pipeline_data& data)
|
||||||
{
|
{
|
||||||
std::tuple<pipeline_storage_type, RSXVertexProgram, RSXFragmentProgram> result;
|
unpacked_shader result;
|
||||||
auto& [pipeline, vp, fp] = result;
|
result.vp = load_vp_raw(data.vertex_program_hash);
|
||||||
|
result.fp = load_fp_raw(data.fragment_program_hash);
|
||||||
|
result.props = data.pipeline_properties;
|
||||||
|
|
||||||
vp = load_vp_raw(data.vertex_program_hash);
|
result.vp.ctrl = data.vp_ctrl0;
|
||||||
fp = load_fp_raw(data.fragment_program_hash);
|
result.vp.output_mask = data.vp_ctrl1;
|
||||||
pipeline = data.pipeline_properties;
|
result.vp.texture_state.texture_dimensions = data.vp_texture_dimensions;
|
||||||
|
result.vp.texture_state.multisampled_textures = data.vp_multisampled_textures;
|
||||||
|
result.vp.base_address = data.vp_base_address;
|
||||||
|
result.vp.entry = data.vp_entry;
|
||||||
|
|
||||||
vp.ctrl = data.vp_ctrl0;
|
pack_bitset<max_vertex_program_instructions>(result.vp.instruction_mask, data.vp_instruction_mask);
|
||||||
vp.output_mask = data.vp_ctrl1;
|
|
||||||
vp.texture_state.texture_dimensions = data.vp_texture_dimensions;
|
|
||||||
vp.texture_state.multisampled_textures = data.vp_multisampled_textures;
|
|
||||||
vp.base_address = data.vp_base_address;
|
|
||||||
vp.entry = data.vp_entry;
|
|
||||||
|
|
||||||
pack_bitset<max_vertex_program_instructions>(vp.instruction_mask, data.vp_instruction_mask);
|
|
||||||
|
|
||||||
for (u8 index = 0; index < 32; ++index)
|
for (u8 index = 0; index < 32; ++index)
|
||||||
{
|
{
|
||||||
|
|
@ -396,17 +509,17 @@ namespace rsx
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
vp.jump_table.emplace(address);
|
result.vp.jump_table.emplace(address);
|
||||||
}
|
}
|
||||||
|
|
||||||
fp.ctrl = data.fp_ctrl;
|
result.fp.ctrl = data.fp_ctrl;
|
||||||
fp.texture_state.texture_dimensions = data.fp_texture_dimensions;
|
result.fp.texture_state.texture_dimensions = data.fp_texture_dimensions;
|
||||||
fp.texture_state.shadow_textures = data.fp_shadow_textures;
|
result.fp.texture_state.shadow_textures = data.fp_shadow_textures;
|
||||||
fp.texture_state.redirected_textures = data.fp_redirected_textures;
|
result.fp.texture_state.redirected_textures = data.fp_redirected_textures;
|
||||||
fp.texture_state.multisampled_textures = data.fp_multisampled_textures;
|
result.fp.texture_state.multisampled_textures = data.fp_multisampled_textures;
|
||||||
fp.texcoord_control_mask = data.fp_texcoord_control;
|
result.fp.texcoord_control_mask = data.fp_texcoord_control;
|
||||||
fp.two_sided_lighting = !!(data.fp_lighting_flags & 0x1);
|
result.fp.two_sided_lighting = !!(data.fp_lighting_flags & 0x1);
|
||||||
fp.mrt_buffers_count = data.fp_mrt_count;
|
result.fp.mrt_buffers_count = data.fp_mrt_count;
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue