diff --git a/rpcs3/Emu/RSX/rsx_cache.h b/rpcs3/Emu/RSX/rsx_cache.h index 3d3298532..27f300ce6 100644 --- a/rpcs3/Emu/RSX/rsx_cache.h +++ b/rpcs3/Emu/RSX/rsx_cache.h @@ -11,7 +11,11 @@ #include "Emu/RSX/Program/RSXFragmentProgram.h" #include "Overlays/Shaders/shader_loading_dialog.h" +#include #include +#include +#include +#include #include "util/sysinfo.hpp" #include "util/fnv_hash.hpp" @@ -21,13 +25,14 @@ namespace rsx template class shaders_cache { - using unpacked_type = lf_fifo, -#ifdef ANDROID - 200 -#else - 1000 // TODO: Determine best size -#endif - >; + struct unpacked_shader + { + pipeline_storage_type props; + RSXVertexProgram vp; + RSXFragmentProgram fp; + }; + + using unpacked_type = lf_fifo; struct pipeline_data { @@ -73,6 +78,69 @@ namespace rsx backend_storage& m_storage; + std::atomic m_shader_storage_exit{false}; + std::condition_variable m_shader_storage_cv; + std::mutex m_shader_storage_mtx; + std::vector m_shader_storage_worker_queue; + + std::thread m_shader_storage_worker_thread = std::thread([this] + { + while (!m_shader_storage_exit.load()) + { + unpacked_shader item; + + { + std::unique_lock lock(m_shader_storage_mtx); + m_shader_storage_cv.wait(lock); + if (m_shader_storage_worker_queue.empty()) + { + continue; + } + + item = std::move(m_shader_storage_worker_queue.back()); + m_shader_storage_worker_queue.pop_back(); + } + + pipeline_data data = pack(item.props, item.vp, item.fp); + + std::string fp_name = root_path + "/raw/" + fmt::format("%llX.fp", data.fragment_program_hash); + std::string vp_name = root_path + "/raw/" + fmt::format("%llX.vp", data.vertex_program_hash); + + if (fs::stat_t s{}; !fs::get_stat(fp_name, s) || s.size != item.fp.ucode_length) + { + fs::write_pending_file(fp_name, item.fp.get_data(), item.fp.ucode_length); + } + + if (fs::stat_t s{}; !fs::get_stat(vp_name, s) || s.size != item.vp.data.size() * sizeof(u32)) + { + fs::write_pending_file(vp_name, item.vp.data); + } + + const u32 state_params[] = + { + data.vp_ctrl0, + data.vp_ctrl1, + data.fp_ctrl, + data.vp_texture_dimensions, + data.fp_texture_dimensions, + data.fp_texcoord_control, + data.fp_height, + data.fp_pixel_layout, + data.fp_lighting_flags, + data.fp_shadow_textures, + data.fp_redirected_textures, + data.vp_multisampled_textures, + data.fp_multisampled_textures, + data.fp_mrt_count, + }; + const usz state_hash = rpcs3::hash_array(state_params); + + const std::string pipeline_file_name = fmt::format("%llX+%llX+%llX+%llX.bin", data.vertex_program_hash, data.fragment_program_hash, data.pipeline_storage_hash, state_hash); + const std::string pipeline_path = root_path + "/pipelines/" + pipeline_class_name + "/" + version_prefix + "/" + pipeline_file_name; + fs::write_pending_file(pipeline_path, &data, sizeof(data)); + } + }); + static std::string get_message(u32 index, u32 processed, u32 entry_count) { return fmt::format("%s pipeline object %u of %u", index == 0 ? "Loading" : "Compiling", processed, entry_count); @@ -83,20 +151,32 @@ namespace rsx { atomic_t processed(0); - std::function shader_load_worker = [&](u32 stop_at) + std::function shader_load_worker = [&](u32 start_at, u32 stop_at) { - u32 pos; - // Processed is incremented before work starts in order to avoid two workers working on the same shader - while (((pos = processed++) < stop_at) && !Emu.IsStopped()) + u32 thread_processed = 0; + auto update_stats = [&] { - fs::dir_entry tmp = entries[pos]; + if (thread_processed == 0) + { + return true; + } + + processed += thread_processed; + thread_processed = 0; + return !Emu.IsStopped(); + }; + + for (u32 pos = start_at; pos < stop_at; ++pos) + { + const fs::dir_entry& tmp = entries[pos]; + thread_processed++; const auto filename = directory_path + "/" + tmp.name; fs::file f(filename); if (!f) { - // Unexpected error, but avoid crash + fs::remove_file(filename); continue; } @@ -112,17 +192,22 @@ namespace rsx auto entry = unpack(pdata); - if (std::get<1>(entry).data.empty() || !std::get<2>(entry).ucode_length) + if (entry.vp.data.empty() || !entry.fp.ucode_length) { continue; } - m_storage.preload_programs(nullptr, std::get<1>(entry), std::get<2>(entry)); + m_storage.preload_programs(nullptr, entry.vp, entry.fp); unpacked[unpacked.push_begin()] = std::move(entry); + + if (thread_processed >= 10 && !update_stats()) + { + return; + } } - // Do not account for an extra shader that was never processed - processed--; + + update_stats(); }; await_workers(nb_workers, 0, shader_load_worker, processed, entry_count, dlg); @@ -133,35 +218,60 @@ namespace rsx { atomic_t processed(0); - std::function shader_comp_worker = [&](u32 stop_at) + std::function shader_comp_worker = [&](u32 start_at, u32 stop_at) { - u32 pos; - // Processed is incremented before work starts in order to avoid two workers working on the same shader - while (((pos = processed++) < stop_at) && !Emu.IsStopped()) + u32 thread_processed = 0; + auto update_stats = [&] { - auto& entry = unpacked[pos]; - m_storage.add_pipeline_entry(std::get<1>(entry), std::get<2>(entry), std::get<0>(entry), std::forward(args)...); + if (thread_processed == 0) + { + return true; + } + + processed += thread_processed; + thread_processed = 0; + return !Emu.IsStopped(); + }; + + for (u32 pos = start_at; pos < stop_at; ++pos) + { + unpacked_shader& entry = unpacked[pos]; + m_storage.add_pipeline_entry(entry.vp, entry.fp, entry.props, std::forward(args)...); + thread_processed++; + + if (thread_processed >= 3 && !update_stats()) + { + return; + } } - // Do not account for an extra shader that was never processed - processed--; + + update_stats(); }; await_workers(nb_workers, 1, shader_comp_worker, processed, entry_count, dlg); } - void await_workers(uint nb_workers, u8 step, std::function& worker, atomic_t& processed, u32 entry_count, shader_loading_dialog* dlg) + void await_workers(uint nb_workers, u8 step, std::function& worker, atomic_t& processed, u32 entry_count, shader_loading_dialog* dlg) { + if (nb_workers > entry_count) + { + nb_workers = entry_count; + } + if (nb_workers == 1) { steady_clock::time_point last_update; // Call the worker function directly, stopping it prematurely to be able update the screen u32 stop_at = 0; + u32 start_at = 0; do { - stop_at = std::min(stop_at + 10, entry_count); + stop_at = std::min(start_at + 10, entry_count); - worker(stop_at); + worker(start_at, stop_at); + + start_at = stop_at; // Only update the screen at about 60fps since updating it everytime slows down the process steady_clock::time_point now = steady_clock::now(); @@ -175,9 +285,18 @@ namespace rsx } else { - named_thread_group workers("RSX Worker ", nb_workers, [&]() + named_thread_group workers("RSX Worker ", nb_workers, [&](u32 thread_index) { - worker(entry_count); + if (nb_workers == entry_count) + { + worker(thread_index, thread_index + 1); + return; + } + + auto per_thread_entries = entry_count / nb_workers; + auto start_at = per_thread_entries * thread_index; + auto stop_at = thread_index == nb_workers - 1 ? entry_count : start_at + per_thread_entries; + worker(start_at, stop_at); }); u32 current_progress = 0; @@ -219,6 +338,17 @@ namespace rsx } } + ~shaders_cache() + { + { + std::lock_guard lock(m_shader_storage_mtx); + m_shader_storage_exit = true; + m_shader_storage_cv.notify_one(); + } + + m_shader_storage_worker_thread.join(); + } + template void load(shader_loading_dialog* dlg, Args&&... args) { @@ -240,12 +370,15 @@ namespace rsx std::vector entries; - for (auto&& tmp : root) + for (auto&& entry : root) { - if (tmp.is_directory) + if (entry.is_directory) continue; - entries.push_back(tmp); + if (entry.name.ends_with(".bin")) + { + entries.push_back(std::move(entry)); + } } u32 entry_count = ::size32(entries); @@ -271,7 +404,7 @@ namespace rsx // Preload everything needed to compile the shaders unpacked_type unpacked; - uint nb_workers = g_cfg.video.renderer == video_renderer::vulkan ? utils::get_thread_count() : 1; + uint nb_workers = g_cfg.video.renderer == video_renderer::vulkan ? utils::get_thread_count() * 2 : 1; load_shaders(nb_workers, unpacked, directory_path, entries, entry_count, dlg); @@ -297,45 +430,27 @@ namespace rsx return; } - pipeline_data data = pack(pipeline, vp, fp); + auto item = unpacked_shader{pipeline, vp, RSXFragmentProgram::clone(fp) /* ???? */}; - std::string fp_name = root_path + "/raw/" + fmt::format("%llX.fp", data.fragment_program_hash); - std::string vp_name = root_path + "/raw/" + fmt::format("%llX.vp", data.vertex_program_hash); + std::lock_guard lock(m_shader_storage_mtx); + m_shader_storage_worker_queue.push_back(std::move(item)); + m_shader_storage_cv.notify_one(); + } - // Writeback to cache either if file does not exist or it is invalid (unexpected size) - // Note: fs::write_file is not atomic, if the process is terminated in the middle an empty file is created - if (fs::stat_t s{}; !fs::get_stat(fp_name, s) || s.size != fp.ucode_length) + void wait_stores() + { + while (true) { - fs::write_file(fp_name, fs::rewrite, fp.get_data(), fp.ucode_length); - } - - if (fs::stat_t s{}; !fs::get_stat(vp_name, s) || s.size != vp.data.size() * sizeof(u32)) - { - fs::write_file(vp_name, fs::rewrite, vp.data); - } - - const u32 state_params[] = { - data.vp_ctrl0, - data.vp_ctrl1, - data.fp_ctrl, - data.vp_texture_dimensions, - data.fp_texture_dimensions, - data.fp_texcoord_control, - data.fp_height, - data.fp_pixel_layout, - data.fp_lighting_flags, - data.fp_shadow_textures, - data.fp_redirected_textures, - data.vp_multisampled_textures, - data.fp_multisampled_textures, - data.fp_mrt_count, - }; - const usz state_hash = rpcs3::hash_array(state_params); + std::lock_guard lock(m_shader_storage_mtx); + if (m_shader_storage_worker_queue.empty()) + { + return; + } + } - const std::string pipeline_file_name = fmt::format("%llX+%llX+%llX+%llX.bin", data.vertex_program_hash, data.fragment_program_hash, data.pipeline_storage_hash, state_hash); - const std::string pipeline_path = root_path + "/pipelines/" + pipeline_class_name + "/" + version_prefix + "/" + pipeline_file_name; - fs::write_file(pipeline_path, fs::rewrite, &data, sizeof(data)); + std::this_thread::sleep_for(std::chrono::milliseconds(50)); + } } RSXVertexProgram load_vp_raw(u64 program_hash) const @@ -369,23 +484,21 @@ namespace rsx return fp; } - std::tuple unpack(pipeline_data& data) + unpacked_shader unpack(pipeline_data& data) { - std::tuple result; - auto& [pipeline, vp, fp] = result; + unpacked_shader result; + result.vp = load_vp_raw(data.vertex_program_hash); + result.fp = load_fp_raw(data.fragment_program_hash); + result.props = data.pipeline_properties; - vp = load_vp_raw(data.vertex_program_hash); - fp = load_fp_raw(data.fragment_program_hash); - pipeline = data.pipeline_properties; + result.vp.ctrl = data.vp_ctrl0; + result.vp.output_mask = data.vp_ctrl1; + result.vp.texture_state.texture_dimensions = data.vp_texture_dimensions; + result.vp.texture_state.multisampled_textures = data.vp_multisampled_textures; + result.vp.base_address = data.vp_base_address; + result.vp.entry = data.vp_entry; - vp.ctrl = data.vp_ctrl0; - vp.output_mask = data.vp_ctrl1; - vp.texture_state.texture_dimensions = data.vp_texture_dimensions; - vp.texture_state.multisampled_textures = data.vp_multisampled_textures; - vp.base_address = data.vp_base_address; - vp.entry = data.vp_entry; - - pack_bitset(vp.instruction_mask, data.vp_instruction_mask); + pack_bitset(result.vp.instruction_mask, data.vp_instruction_mask); for (u8 index = 0; index < 32; ++index) { @@ -396,17 +509,17 @@ namespace rsx break; } - vp.jump_table.emplace(address); + result.vp.jump_table.emplace(address); } - fp.ctrl = data.fp_ctrl; - fp.texture_state.texture_dimensions = data.fp_texture_dimensions; - fp.texture_state.shadow_textures = data.fp_shadow_textures; - fp.texture_state.redirected_textures = data.fp_redirected_textures; - fp.texture_state.multisampled_textures = data.fp_multisampled_textures; - fp.texcoord_control_mask = data.fp_texcoord_control; - fp.two_sided_lighting = !!(data.fp_lighting_flags & 0x1); - fp.mrt_buffers_count = data.fp_mrt_count; + result.fp.ctrl = data.fp_ctrl; + result.fp.texture_state.texture_dimensions = data.fp_texture_dimensions; + result.fp.texture_state.shadow_textures = data.fp_shadow_textures; + result.fp.texture_state.redirected_textures = data.fp_redirected_textures; + result.fp.texture_state.multisampled_textures = data.fp_multisampled_textures; + result.fp.texcoord_control_mask = data.fp_texcoord_control; + result.fp.two_sided_lighting = !!(data.fp_lighting_flags & 0x1); + result.fp.mrt_buffers_count = data.fp_mrt_count; return result; }