diff --git a/rpcs3/Emu/RSX/GL/GLShaderInterpreter.cpp b/rpcs3/Emu/RSX/GL/GLShaderInterpreter.cpp index ed77bdb07b..eb52798140 100644 --- a/rpcs3/Emu/RSX/GL/GLShaderInterpreter.cpp +++ b/rpcs3/Emu/RSX/GL/GLShaderInterpreter.cpp @@ -13,6 +13,7 @@ namespace gl { using glsl::shader; + using enum program_common::interpreter::compiler_option; namespace interpreter { @@ -52,14 +53,25 @@ namespace gl dlg->create("Precompiling interpreter variants.\nPlease wait...", "Shader Compilation"); const auto variants = program_common::interpreter::get_interpreter_variants(); - const u32 limit = ::size32(variants); - dlg->set_limit(0, limit); - dlg->set_limit(1, 1); + const u32 limit1 = ::size32(variants.base_pipelines); + const u32 limit2 = ::size32(variants.pipelines); + dlg->set_limit(0, limit1); + dlg->set_limit(1, limit2); atomic_t ctr = 0; auto progress_hook = [&](interpreter::cached_program*) { ctr++; }; - for (auto& variant : variants) + auto update_progress = [&](u32 stage) + { + const auto completed = ctr.load(); + const auto limit = stage ? limit2 : limit1; + const auto message = fmt::format("%s variant %u of %u...", stage ? "Linking" : "Building", ctr.load(), limit); + dlg->update_msg(stage, message); + dlg->set_value(stage, completed); + }; + + // We only need to build the base "compatible pipeline" pairs. + for (const auto& variant : variants.base_pipelines) { build_program_async(variant.first | variant.second, progress_hook); } @@ -67,14 +79,47 @@ namespace gl do { std::this_thread::sleep_for(16ms); + update_progress(0); + } + while (ctr < limit1); - const u32 completed = ctr.load(); - dlg->update_msg(0, fmt::format("Building variant %u of %u...", completed, limit)); - dlg->set_value(0, completed); - } while (ctr < limit); + // Show final progress + update_progress(0); - dlg->inc_value(1, 1); - dlg->refresh(); + // Second stage. Propagate base pipelines to all compatible + ctr = 0; + std::lock_guard lock(m_program_cache_lock); + + for (const auto& variant : variants.pipelines) + { + const u64 compiler_options = variant.vs_opts.shader_opt | variant.fs_opts.shader_opt; + if (m_program_cache.find(compiler_options) != m_program_cache.end()) + { + // Base variant + continue; + } + + const u64 compatible_options = variant.vs_opts.compatible_shader_opts | variant.fs_opts.compatible_shader_opts; + auto base_pipeline = m_program_cache.find(compatible_options); + if (base_pipeline == m_program_cache.end()) + { + fmt::throw_exception("Base variant was not found in the cache."); + } + + auto data = new interpreter::cached_program(); + data->flags |= interpreter::CACHED_PIPE_UNOPTIMIZED; + data->allocator = base_pipeline->second->allocator; + data->vertex_shader = base_pipeline->second->vertex_shader; + data->fragment_shader = base_pipeline->second->fragment_shader; + data->prog = base_pipeline->second->prog; + m_program_cache[compiler_options].reset(data); + } + + ctr = limit2; + update_progress(1); + + // Minor stall to avoid visual flashing + std::this_thread::sleep_for(16ms); } void shader_interpreter::destroy() @@ -85,6 +130,16 @@ namespace gl prog.second->fragment_shader->remove(); prog.second->prog->remove(); } + + for (auto& shader : m_vs_cache) + { + shader.second->remove(); + } + + for (auto& shader : m_fs_cache) + { + shader.second->remove(); + } } glsl::program* shader_interpreter::get(const interpreter::program_metadata& metadata, u32 vp_ctrl, u32 fp_ctrl) @@ -100,49 +155,51 @@ namespace gl case rsx::comparison_function::never: return nullptr; case rsx::comparison_function::greater_or_equal: - opt |= program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_GE; + opt |= COMPILER_OPT_ENABLE_ALPHA_TEST_GE; break; case rsx::comparison_function::greater: - opt |= program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_G; + opt |= COMPILER_OPT_ENABLE_ALPHA_TEST_G; break; case rsx::comparison_function::less_or_equal: - opt |= program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_LE; + opt |= COMPILER_OPT_ENABLE_ALPHA_TEST_LE; break; case rsx::comparison_function::less: - opt |= program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_L; + opt |= COMPILER_OPT_ENABLE_ALPHA_TEST_L; break; case rsx::comparison_function::equal: - opt |= program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_EQ; + opt |= COMPILER_OPT_ENABLE_ALPHA_TEST_EQ; break; case rsx::comparison_function::not_equal: - opt |= program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_NE; + opt |= COMPILER_OPT_ENABLE_ALPHA_TEST_NE; break; } } - if (fp_ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) opt |= program_common::interpreter::COMPILER_OPT_ENABLE_DEPTH_EXPORT; - if (fp_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS) opt |= program_common::interpreter::COMPILER_OPT_ENABLE_F32_EXPORT; - if (fp_ctrl & RSX_SHADER_CONTROL_USES_KIL) opt |= program_common::interpreter::COMPILER_OPT_ENABLE_KIL; - if (metadata.referenced_textures_mask) opt |= program_common::interpreter::COMPILER_OPT_ENABLE_TEXTURES; - if (metadata.has_branch_instructions) opt |= program_common::interpreter::COMPILER_OPT_ENABLE_FLOW_CTRL; - if (metadata.has_pack_instructions) opt |= program_common::interpreter::COMPILER_OPT_ENABLE_PACKING; - if (rsx::method_registers.polygon_stipple_enabled()) opt |= program_common::interpreter::COMPILER_OPT_ENABLE_STIPPLING; - if (vp_ctrl & RSX_SHADER_CONTROL_INSTANCED_CONSTANTS) opt |= program_common::interpreter::COMPILER_OPT_ENABLE_INSTANCING; + if (fp_ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) opt |= COMPILER_OPT_ENABLE_DEPTH_EXPORT; + if (fp_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS) opt |= COMPILER_OPT_ENABLE_F32_EXPORT; + if (fp_ctrl & RSX_SHADER_CONTROL_USES_KIL) opt |= COMPILER_OPT_ENABLE_KIL; + if (metadata.referenced_textures_mask) opt |= COMPILER_OPT_ENABLE_TEXTURES; + if (metadata.has_branch_instructions) opt |= COMPILER_OPT_ENABLE_FLOW_CTRL; + if (metadata.has_pack_instructions) opt |= COMPILER_OPT_ENABLE_PACKING; + if (rsx::method_registers.polygon_stipple_enabled()) opt |= COMPILER_OPT_ENABLE_STIPPLING; + if (vp_ctrl & RSX_SHADER_CONTROL_INSTANCED_CONSTANTS) opt |= COMPILER_OPT_ENABLE_INSTANCING; - if (auto it = m_program_cache.find(opt); it != m_program_cache.end()) [[likely]] { - m_current_interpreter = it->second.get(); - } - else - { - m_current_interpreter = build_program(opt); + std::lock_guard lock(m_program_cache_lock); + if (auto it = m_program_cache.find(opt); it != m_program_cache.end()) [[likely]] + { + m_current_interpreter = it->second.get(); + } + return m_current_interpreter->prog.get(); } + m_current_interpreter = build_program(opt); return m_current_interpreter->prog.get(); } void shader_interpreter::build_vs(u64 compiler_options, interpreter::cached_program& prog_data) { + compiler_options &= COMPILER_OPT_ALL_VS_MASK; { std::lock_guard lock(m_vs_cache_lock); @@ -165,7 +222,7 @@ namespace gl std::string shader_str; ParamArray arr; - null_prog.ctrl = (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_INSTANCING) + null_prog.ctrl = (compiler_options & COMPILER_OPT_ENABLE_INSTANCING) ? RSX_SHADER_CONTROL_INSTANCED_CONSTANTS : 0; GLVertexDecompilerThread comp(null_prog, shader_str, arr); @@ -195,7 +252,7 @@ namespace gl " uvec4 vp_instructions[];\n" "};\n\n"; - if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_INSTANCING) + if (compiler_options & COMPILER_OPT_ENABLE_INSTANCING) { builder << "#define _ENABLE_INSTANCED_CONSTANTS\n"; } @@ -230,20 +287,9 @@ namespace gl void shader_interpreter::build_fs(u64 compiler_options, interpreter::cached_program& prog_data) { - { - std::lock_guard lock(m_fs_cache_lock); - - if (auto found = m_fs_cache.find(compiler_options); - found != m_fs_cache.end()) - { - prog_data.fragment_shader = found->second; - return; - } - } - // Allocate TIUs auto& allocator = prog_data.allocator; - if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_TEXTURES) + if (compiler_options & COMPILER_OPT_ENABLE_TEXTURES) { allocator.create(::glsl::program_domain::glsl_fragment_program); if (allocator.max_image_units >= 32) @@ -277,6 +323,19 @@ namespace gl } } + // Cache lookup + compiler_options &= COMPILER_OPT_ALL_FS_MASK; + { + std::lock_guard lock(m_fs_cache_lock); + + if (auto found = m_fs_cache.find(compiler_options); + found != m_fs_cache.end()) + { + prog_data.fragment_shader = found->second; + return; + } + } + u32 len; ParamArray arr; std::string shader_str; @@ -291,67 +350,67 @@ namespace gl ::glsl::insert_subheader_block(builder); comp.insertConstants(builder); - if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_GE) + if (compiler_options & COMPILER_OPT_ENABLE_ALPHA_TEST_GE) { builder << "#define ALPHA_TEST_GEQUAL\n"; } - if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_G) + if (compiler_options & COMPILER_OPT_ENABLE_ALPHA_TEST_G) { builder << "#define ALPHA_TEST_GREATER\n"; } - if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_LE) + if (compiler_options & COMPILER_OPT_ENABLE_ALPHA_TEST_LE) { builder << "#define ALPHA_TEST_LEQUAL\n"; } - if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_L) + if (compiler_options & COMPILER_OPT_ENABLE_ALPHA_TEST_L) { builder << "#define ALPHA_TEST_LESS\n"; } - if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_EQ) + if (compiler_options & COMPILER_OPT_ENABLE_ALPHA_TEST_EQ) { builder << "#define ALPHA_TEST_EQUAL\n"; } - if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_ALPHA_TEST_NE) + if (compiler_options & COMPILER_OPT_ENABLE_ALPHA_TEST_NE) { builder << "#define ALPHA_TEST_NEQUAL\n"; } - if (!(compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_F32_EXPORT)) + if (!(compiler_options & COMPILER_OPT_ENABLE_F32_EXPORT)) { builder << "#define WITH_HALF_OUTPUT_REGISTER\n"; } - if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_DEPTH_EXPORT) + if (compiler_options & COMPILER_OPT_ENABLE_DEPTH_EXPORT) { builder << "#define WITH_DEPTH_EXPORT\n"; } - if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_FLOW_CTRL) + if (compiler_options & COMPILER_OPT_ENABLE_FLOW_CTRL) { builder << "#define WITH_FLOW_CTRL\n"; } - if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_PACKING) + if (compiler_options & COMPILER_OPT_ENABLE_PACKING) { builder << "#define WITH_PACKING\n"; } - if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_KIL) + if (compiler_options & COMPILER_OPT_ENABLE_KIL) { builder << "#define WITH_KIL\n"; } - if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_STIPPLING) + if (compiler_options & COMPILER_OPT_ENABLE_STIPPLING) { builder << "#define WITH_STIPPLING\n"; } - if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_TEXTURES) + if (compiler_options & COMPILER_OPT_ENABLE_TEXTURES) { builder << "#define WITH_TEXTURES\n\n"; @@ -460,7 +519,7 @@ namespace gl data->prog->uniforms[0] = GL_STREAM_BUFFER_START + 0; data->prog->uniforms[1] = GL_STREAM_BUFFER_START + 1; - if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_TEXTURES) + if (compiler_options & COMPILER_OPT_ENABLE_TEXTURES) { // Initialize texture bindings int assigned = 0; @@ -478,12 +537,13 @@ namespace gl } } + std::lock_guard lock(m_program_cache_lock); m_program_cache[compiler_options].reset(data); } bool shader_interpreter::is_interpreter(const glsl::program* program) const { - return (program == m_current_interpreter->prog.get()); + return (m_current_interpreter && program == m_current_interpreter->prog.get()); } void shader_interpreter::update_fragment_textures( diff --git a/rpcs3/Emu/RSX/GL/GLShaderInterpreter.h b/rpcs3/Emu/RSX/GL/GLShaderInterpreter.h index b431377318..d6fafba388 100644 --- a/rpcs3/Emu/RSX/GL/GLShaderInterpreter.h +++ b/rpcs3/Emu/RSX/GL/GLShaderInterpreter.h @@ -85,10 +85,10 @@ namespace gl shared_mutex m_vs_cache_lock; shared_mutex m_fs_cache_lock; + shared_mutex m_program_cache_lock; shader_cache_t m_vs_cache; shader_cache_t m_fs_cache; - pipeline_cache_t m_program_cache; void build_vs(u64 compiler_options, interpreter::cached_program& prog_data); diff --git a/rpcs3/Emu/RSX/Program/ShaderInterpreter.cpp b/rpcs3/Emu/RSX/Program/ShaderInterpreter.cpp index 54ba46b28a..de150b3b15 100644 --- a/rpcs3/Emu/RSX/Program/ShaderInterpreter.cpp +++ b/rpcs3/Emu/RSX/Program/ShaderInterpreter.cpp @@ -5,6 +5,20 @@ namespace program_common::interpreter { + struct vs_variants_metadata + { + u32 vs_base_mask = 0; + std::unordered_set vs_base_opts; // Set of options that covers all possible variants regardless of optimization + std::unordered_set vs_opts; // Full set of options that covers all variants with full optimization + }; + + struct fs_variants_metadata + { + u32 fs_base_mask = 0; + std::unordered_set fs_base_opts; // Set of options that covers all possible variants regardless of optimization + std::unordered_set fs_opts; // Full set of options that covers all variants with full optimization + }; + void bitrange_foreach(u32 min, u32 max, std::function func) { if (max <= min) @@ -22,50 +36,118 @@ namespace program_common::interpreter } } - std::vector get_interpreter_variants() + vs_variants_metadata prepare_vs_variants_data() { - // Separable passes to fetch all possible variants - std::unordered_set fs_masks; - fs_masks.insert(0); + vs_variants_metadata result; + result.vs_opts.insert(0); + result.vs_base_opts.insert(0); + + const u32 base_vs_mask = COMPILER_OPT_ALL_VS_MASK & ~(COMPILER_OPT_VS_EXCL_MASK); + bitrange_foreach(COMPILER_OPT_VS_MIN, COMPILER_OPT_VS_MAX, [&](u32 vs_opt) + { + result.vs_opts.insert(vs_opt); + + if (const auto excl_mask = (vs_opt & COMPILER_OPT_VS_EXCL_MASK); + excl_mask != 0) + { + result.vs_base_opts.insert(base_vs_mask | excl_mask); + } + }); + result.vs_opts.insert(base_vs_mask); + result.vs_base_opts.insert(base_vs_mask); + + result.vs_base_mask = base_vs_mask; + return result; + } + + fs_variants_metadata prepare_fs_variants_data() + { + fs_variants_metadata result; + result.fs_opts.insert(0); + result.fs_base_opts.insert(0); + + const u32 base_fs_mask = COMPILER_OPT_BASE_FS_MASK & ~(COMPILER_OPT_FS_EXCL_MASK); bitrange_foreach(COMPILER_OPT_FS_MIN, COMPILER_OPT_FS_MAX, [&](u32 fs_opt) { - fs_masks.insert(fs_opt); + result.fs_opts.insert(fs_opt); + if (const auto excl_mask = (fs_opt & COMPILER_OPT_FS_EXCL_MASK); + excl_mask != 0) + { + result.fs_base_opts.insert(base_fs_mask | excl_mask); + } }); + result.fs_opts.insert(base_fs_mask); + result.fs_base_opts.insert(base_fs_mask); // Now we add in the alpha testing variants for all fs variants. // Only one alpha test type is usable at once std::unordered_set fs_alpha_test_masks; + std::unordered_set fs_alpha_test_base_masks; + for (u32 alpha_test_bit = COMPILER_OPT_ENABLE_ALPHA_TEST_GE; alpha_test_bit <= COMPILER_OPT_ENABLE_ALPHA_TEST_NE; alpha_test_bit <<= 1) { - for (const auto& mask : fs_masks) + for (const auto& mask : result.fs_opts) { fs_alpha_test_masks.insert(mask | alpha_test_bit); } + + for (const auto& mask : result.fs_base_opts) + { + fs_alpha_test_base_masks.insert(mask | alpha_test_bit); + } } - // VS - std::unordered_set vs_masks; - vs_masks.insert(0); - bitrange_foreach(COMPILER_OPT_VS_MIN, COMPILER_OPT_VS_MAX, [&](u32 vs_opt) - { - vs_masks.insert(vs_opt); - }); - // Merge all FS variants - fs_masks.merge(fs_alpha_test_masks); + result.fs_opts.merge(fs_alpha_test_masks); + result.fs_base_opts.merge(fs_alpha_test_base_masks); + + result.fs_base_mask = base_fs_mask; + return result; + } + + interpreter_variants_t get_interpreter_variants() + { + const auto vs_metadata = prepare_vs_variants_data(); + const auto fs_metadata = prepare_fs_variants_data(); + + const auto& vs_masks = vs_metadata.vs_opts; + const auto& fs_masks = fs_metadata.fs_opts; + + const auto base_vs_mask = vs_metadata.vs_base_mask; + const auto base_fs_mask = fs_metadata.fs_base_mask; // Prepare outputs - std::vector results; + interpreter_variants_t result; for (const auto& vs_opt : vs_masks) { for (const auto& fs_opt : fs_masks) { - results.push_back({ vs_opt, fs_opt }); + interpreter_pipeline_variant_t variant{}; + variant.vs_opts.shader_opt = vs_opt; + variant.vs_opts.compatible_shader_opts = (vs_opt & ~base_vs_mask) | base_vs_mask; + + variant.fs_opts.shader_opt = fs_opt; + variant.fs_opts.compatible_shader_opts = (fs_opt & ~base_fs_mask) | base_fs_mask; + + result.pipelines.emplace_back(std::move(variant)); } } - return results; + // Calculate base pipelines (minimal set) + const auto& vs_base_masks = vs_metadata.vs_base_opts; + const auto& fs_base_masks = fs_metadata.fs_base_opts; + + result.base_pipelines.push_back({ base_vs_mask, base_fs_mask }); + for (const u32 vs_opt : vs_base_masks) + { + for (const u32 fs_opt : fs_base_masks) + { + result.base_pipelines.push_back({ vs_opt, fs_opt }); + } + } + + return result; } } diff --git a/rpcs3/Emu/RSX/Program/ShaderInterpreter.h b/rpcs3/Emu/RSX/Program/ShaderInterpreter.h index 0090a5978e..b80f871769 100644 --- a/rpcs3/Emu/RSX/Program/ShaderInterpreter.h +++ b/rpcs3/Emu/RSX/Program/ShaderInterpreter.h @@ -32,6 +32,11 @@ namespace program_common // Meta COMPILER_OPT_MAX = COMPILER_OPT_ENABLE_ALPHA_TEST_NE, COMPILER_OPT_ALPHA_TEST_MASK = (0b111111 << 9), + COMPILER_OPT_ALL_VS_MASK = COMPILER_OPT_ENABLE_INSTANCING | COMPILER_OPT_ENABLE_VTX_TEXTURES, + COMPILER_OPT_BASE_FS_MASK = 0b1111111, + COMPILER_OPT_ALL_FS_MASK = COMPILER_OPT_BASE_FS_MASK | COMPILER_OPT_ALPHA_TEST_MASK, + COMPILER_OPT_VS_EXCL_MASK = COMPILER_OPT_ENABLE_INSTANCING, + COMPILER_OPT_FS_EXCL_MASK = COMPILER_OPT_ALPHA_TEST_MASK | COMPILER_OPT_ENABLE_STIPPLING | COMPILER_OPT_ENABLE_DEPTH_EXPORT | COMPILER_OPT_ENABLE_F32_EXPORT, // Bounds COMPILER_OPT_FS_MAX = COMPILER_OPT_ENABLE_FLOW_CTRL, @@ -56,7 +61,24 @@ namespace program_common return s; } - using interpreter_variant_t = std::pair; - std::vector get_interpreter_variants(); + struct interpreter_shader_variant_t + { + u32 shader_opt = 0; + u32 compatible_shader_opts = 0; + }; + + struct interpreter_pipeline_variant_t + { + interpreter_shader_variant_t vs_opts; + interpreter_shader_variant_t fs_opts; + }; + + struct interpreter_variants_t + { + std::vector pipelines; + std::vector> base_pipelines; + }; + + interpreter_variants_t get_interpreter_variants(); } } diff --git a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp index 39b654b2ad..3332697420 100644 --- a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp +++ b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp @@ -579,6 +579,7 @@ namespace vk void shader_interpreter::preload(rsx::shader_loading_dialog* dlg) { +#if 0 dlg->create("Precompiling interpreter variants.\nPlease wait...", "Shader Compilation"); const auto variants = program_common::interpreter::get_interpreter_variants(); @@ -589,13 +590,14 @@ namespace vk u32 ctr = 0; for (auto& variant : variants) { - build_fs(variant.first | variant.second); - build_vs(variant.first | variant.second); + //build_fs(variant.first | variant.second); + //build_vs(variant.first | variant.second); dlg->update_msg(0, fmt::format("Building variant %u of %u...", ++ctr, limit)); dlg->inc_value(0, 1); } dlg->inc_value(1, 1); dlg->refresh(); +#endif } };